/*

* The original code performs ( n ) normalizations * and ( n * ( n - 1 ) / 2 ) matches, which hide * the same number of normalizations. The new code * performs the same number of normalizations ( n ) * and ( n * ( n - 1 ) / 2 ) mem compares, far less * expensive than an entire match, if a match is * equivalent to a normalization and a mem compare ... * * This is far more memory expensive than the previous, * but it can heavily improve performances when big * chunks of data are added (typical example is a group * with thousands of DN-syntax members; on my system: * for members of 5-RDN DNs, members orig bvmatch (dirty) new 1000 0m38.456s 0m0.553s 0m0.608s 2000 2m33.341s 0m0.851s 0m1.003s * Moreover, 100 groups with 10000 members each were * added in 37m27.933s (an analogous LDIF file was * loaded into Active Directory in 38m28.682s, BTW). * * Maybe we could switch to the new algorithm when * the number of values overcomes a given threshold? */
2025-03-07 14:18:15 +08:00 · 2002-09-02 19:41:03 +00:00 · 2002-09-02 19:41:03 +00:00 · d3ca441ae8
commit d3ca441ae8
parent eb38db1be5
4 changed files with 230 additions and 63 deletions
--- a/servers/slapd/add.c
+++ b/servers/slapd/add.c
@ -398,10 +398,8 @@ static int slap_mods2entry(
 				for ( i = 0; mods->sml_bvalues[i].bv_val != NULL; i++ ) {
 					/* test asserted values against themselves */
 					for( j = 0; j < i; j++ ) {
-						int rc = ber_bvcmp( &mods->sml_bvalues[i],
-							&mods->sml_bvalues[j] );
-
-						if( rc == 0 ) {
+						if ( bvmatch( &mods->sml_bvalues[i],
+							&mods->sml_bvalues[j] ) ) {
 							/* value exists already */
 							snprintf( textbuf, textlen,
 								"%s: value #%d provided more than once",
@ -412,34 +410,16 @@ static int slap_mods2entry(
 				}

 			} else {
-				for ( i = 0; mods->sml_bvalues[i].bv_val != NULL; i++ ) {
-					int rc, match;
-					const char *text = NULL;
-					struct berval asserted;
+				int		rc;
+				const char	*text = NULL;
+				char		textbuf[ SLAP_TEXT_BUFLEN ]  = { '\0' };
+				
+				rc = modify_check_duplicates( mods->sml_desc, mr,
+						NULL, mods->sml_bvalues,
+						&text, textbuf, sizeof( textbuf ) );

-					rc = value_normalize( mods->sml_desc,
-						SLAP_MR_EQUALITY,
-						&mods->sml_bvalues[i],
-						&asserted,
-						&text );
-
-					if( rc != LDAP_SUCCESS ) return rc;
-
-					for ( j = 0; j < i; j++ ) {
-						int rc = value_match( &match, mods->sml_desc, mr,
-							SLAP_MR_VALUE_SYNTAX_MATCH,
-							&mods->sml_bvalues[j], &asserted, &text );
-
-						if( rc == LDAP_SUCCESS && match == 0 ) {
-							free( asserted.bv_val );
-							snprintf( textbuf, textlen,
-								"%s: value #%d provided more than once",
-								mods->sml_desc->ad_cname.bv_val, j );
-							return LDAP_TYPE_OR_VALUE_EXISTS;
-						}
-					}
-
-					free( asserted.bv_val );
+				if ( rc != LDAP_SUCCESS ) {
+					return rc;
 				}
 			}
 		}
--- a/servers/slapd/mods.c
+++ b/servers/slapd/mods.c
@ -18,6 +18,160 @@

 #include "slap.h"

+#undef QUICK_DIRTY_DUPLICATE_CHECK
+
+int
+modify_check_duplicates(
+	AttributeDescription	*ad,
+	MatchingRule		*mr,
+	BerVarray		vals,
+	BerVarray		mods,
+	const char	**text,
+	char *textbuf, size_t textlen )
+{
+	int		i, j, rc = LDAP_SUCCESS;
+	BerVarray	nvals = NULL, nmods;
+
+	/*
+	 * FIXME: better do the following
+	 * 
+	 *   - count the existing values
+	 *   - count the new values
+	 *   
+	 *   - if the existing values are less than the new ones {
+	 *       // current code
+	 *       - normalize the existing values
+	 *       - for each new value {
+	 *           - normalize
+	 *           - check with existing
+	 *           - cross-check with already normalized new vals
+	 *       }
+	 *   } else {
+	 *       // to be implemented
+	 *       - for each new value {
+	 *           - normalize
+	 *           - cross-check with already normalized new vals
+	 *       }
+	 *       - for each existing value {
+	 *           - normalize
+	 *           - check with already normalized new values
+	 *       }
+	 *   }
+	 *
+	 * The first case is good when adding a lot of new values,
+	 * and significantly at first import of values (e.g. adding
+	 * a new group); the latter case seems to be quite important
+	 * as well, because it is likely to be the most frequently
+	 * used when administering the entry.  The current 
+	 * implementation will always normalize all the existing
+	 * values before checking.  If there's no duplicate, the
+	 * performances should not change; they will in case of error.
+	 */
+
+	if ( vals ) {
+		for ( j = 0; vals[ j ].bv_val != NULL; j++ )
+			/* count existing values */ ;
+
+		nvals = ch_calloc( j + 1, sizeof( struct berval ) );
+
+		/* normalize the existing values first */
+		for ( j = 0; vals[ j ].bv_val != NULL; j++ ) {
+			rc = value_normalize( ad, SLAP_MR_EQUALITY,
+				&vals[ j ], &nvals[ j ], text );
+
+			/* existing attribute values must normalize */
+			assert( rc == LDAP_SUCCESS );
+
+			if ( rc != LDAP_SUCCESS ) {
+				nvals[ j ].bv_val = NULL;
+				goto return_results;
+			}
+		}
+		nvals[ j ].bv_val = NULL;
+	}
+
+	for ( i = 0; mods[ i ].bv_val != NULL; i++ )
+		/* count new values */ ;
+
+	nmods = ch_calloc( i + 1, sizeof( struct berval ) );
+
+	for ( i = 0; mods[ i ].bv_val != NULL; i++ ) {
+
+		rc = value_normalize( ad, SLAP_MR_EQUALITY,
+			&mods[ i ], &nmods[ i ], text );
+
+		if ( rc != LDAP_SUCCESS ) {
+			nmods[ i ].bv_val = NULL;
+			goto return_results;
+		}
+
+		if ( vals ) {
+			for ( j = 0; nvals[ j ].bv_val; j++ ) {
+#ifdef QUICK_DIRTY_DUPLICATE_CHECK
+				if ( bvmatch( &nmods[ i ], &nvals[ j ] ) ) {
+#else /* !QUICK_DIRTY_DUPLICATE_CHECK */
+				int match;
+
+				rc = (mr->smr_match)( &match,
+					SLAP_MR_VALUE_SYNTAX_MATCH,
+					ad->ad_type->sat_syntax,
+					mr, &nmods[ i ], &nvals[ j ] );
+				if ( rc != LDAP_SUCCESS ) {
+					nmods[ i + 1 ].bv_val = NULL;
+					goto return_results;
+				}
+	
+				if ( match == 0 ) {
+#endif /* !QUICK_DIRTY_DUPLICATE_CHECK */
+					snprintf( textbuf, textlen,
+						"%s: value #%d provided more than once",
+						ad->ad_cname.bv_val, i );
+					rc = LDAP_TYPE_OR_VALUE_EXISTS;
+					nmods[ i + 1 ].bv_val = NULL;
+					goto return_results;
+				}
+			}
+		}
+
+		for ( j = 0; j < i; j++ ) {
+#ifdef QUICK_DIRTY_DUPLICATE_CHECK
+			if ( bvmatch( &nmods[ i ], &nmods[ j ] ) ) {
+#else /* !QUICK_DIRTY_DUPLICATE_CHECK */
+			int match;
+
+			rc = (mr->smr_match)( &match,
+				SLAP_MR_VALUE_SYNTAX_MATCH,
+				ad->ad_type->sat_syntax,
+				mr, &nmods[ i ], &nmods[ j ] );
+			if ( rc != LDAP_SUCCESS ) {
+				nmods[ i + 1 ].bv_val = NULL;
+				goto return_results;
+			}
+
+			if ( match == 0 ) {
+#endif /* !QUICK_DIRTY_DUPLICATE_CHECK */
+				snprintf( textbuf, textlen,
+					"%s: value #%d provided more than once",
+					ad->ad_cname.bv_val, j );
+				rc = LDAP_TYPE_OR_VALUE_EXISTS;
+				nmods[ i + 1 ].bv_val = NULL;
+				goto return_results;
+			}
+		}
+	}
+	nmods[ i ].bv_val = NULL;
+
+return_results:;
+	if ( nvals ) {
+		ber_bvarray_free( nvals );
+	}
+	if ( nmods ) {
+		ber_bvarray_free( nmods );
+	}
+
+	return rc;
+}
+
 int
 modify_add_values(
 	Entry	*e,
@ -61,10 +215,9 @@ modify_add_values(
 			/* test asserted values against existing values */
 			if( a ) {
 				for( j = 0; a->a_vals[j].bv_val != NULL; j++ ) {
-					int rc = ber_bvcmp( &mod->sm_bvalues[i],
-						&a->a_vals[j] );
+					if ( bvmatch( &mod->sm_bvalues[i],
+						&a->a_vals[j] ) ) {

-					if( rc == 0 ) {
 						/* value exists already */
 						*text = textbuf;
 						snprintf( textbuf, textlen,
@ -77,10 +230,9 @@ modify_add_values(

 			/* test asserted values against themselves */
 			for( j = 0; j < i; j++ ) {
-				int rc = ber_bvcmp( &mod->sm_bvalues[i],
-					&mod->sm_bvalues[j] );
+				if ( bvmatch( &mod->sm_bvalues[i],
+					&mod->sm_bvalues[j] ) ) {

-				if( rc == 0 ) {
 					/* value exists already */
 					*text = textbuf;
 					snprintf( textbuf, textlen,
@ -92,23 +244,56 @@ modify_add_values(
 		}

 	} else {
-		for ( i = 0; mod->sm_bvalues[i].bv_val != NULL; i++ ) {
-			int rc, match;
-			struct berval asserted;

-			rc = value_normalize( mod->sm_desc,
-				SLAP_MR_EQUALITY,
-				&mod->sm_bvalues[i],
-				&asserted,
-				text );
+		/*
+		 * The original code performs ( n ) normalizations 
+		 * and ( n * ( n - 1 ) / 2 ) matches, which hide
+		 * the same number of normalizations.  The new code
+		 * performs the same number of normalizations ( n )
+		 * and ( n * ( n - 1 ) / 2 ) mem compares, far less
+		 * expensive than an entire match, if a match is
+		 * equivalent to a normalization and a mem compare ...
+		 * 
+		 * This is far more memory expensive than the previous,
+		 * but it can heavily improve performances when big
+		 * chunks of data are added (typical example is a group
+		 * with thousands of DN-syntax members; on my system:
+		 * for members of 5-RDN DNs,

-			if( rc != LDAP_SUCCESS ) return rc;
+		members		orig		bvmatch (dirty)	new
+		1000		0m38.456s	0m0.553s 	0m0.608s
+		2000		2m33.341s	0m0.851s	0m1.003s

-			if( a ) {
-				for ( j = 0; a->a_vals[j].bv_val != NULL; j++ ) {
-					int rc = value_match( &match, mod->sm_desc, mr,
+		 * Moreover, 100 groups with 10000 members each were
+		 * added in 37m27.933s (an analogous LDIF file was
+		 * loaded into Active Directory in 38m28.682s, BTW).
+		 * 
+		 * Maybe we could switch to the new algorithm when
+		 * the number of values overcomes a given threshold?
+		 */
+
+		int		rc;
+		const char	*text = NULL;
+		char		textbuf[ SLAP_TEXT_BUFLEN ] = { '\0' };
+
+		if ( mod->sm_bvalues[ 1 ].bv_val == 0 ) {
+			if ( a != NULL ) {
+				struct berval	asserted;
+				int		i;
+
+				rc = value_normalize( mod->sm_desc, SLAP_MR_EQUALITY,
+					&mod->sm_bvalues[ 0 ], &asserted, &text );
+
+				if ( rc != LDAP_SUCCESS ) {
+					return rc;
+				}
+
+				for ( i = 0; a->a_vals[ i ].bv_val; i++ ) {
+					int	match;
+
+					rc = value_match( &match, mod->sm_desc, mr,
 						SLAP_MR_VALUE_SYNTAX_MATCH,
-						&a->a_vals[j], &asserted, text );
+						&a->a_vals[ i ], &asserted, &text );

 					if( rc == LDAP_SUCCESS && match == 0 ) {
 						free( asserted.bv_val );
@ -117,18 +302,14 @@ modify_add_values(
 				}
 			}

-			for ( j = 0; j < i; j++ ) {
-				int rc = value_match( &match, mod->sm_desc, mr,
-					SLAP_MR_VALUE_SYNTAX_MATCH,
-					&mod->sm_bvalues[j], &asserted, text );
-
-				if( rc == LDAP_SUCCESS && match == 0 ) {
-					free( asserted.bv_val );
-					return LDAP_TYPE_OR_VALUE_EXISTS;
-				}
+		} else {
+			rc = modify_check_duplicates( mod->sm_desc, mr,
+					a ? a->a_vals : NULL, mod->sm_bvalues,
+					&text, textbuf, sizeof( textbuf ) );
+	
+			if ( rc != LDAP_SUCCESS ) {
+				return rc;
 			}
-
-			free( asserted.bv_val );
 		}
 	}

--- a/servers/slapd/proto-slap.h
+++ b/servers/slapd/proto-slap.h
@ -570,6 +570,10 @@ LDAP_SLAPD_F( int ) slap_mods_opattrs(
 /*
 * mods.c
 */
+LDAP_SLAPD_F( int ) modify_check_duplicates(
+	AttributeDescription *ad, MatchingRule *mr, 
+	BerVarray vals, BerVarray mods,
+	const char **text, char *textbuf, size_t textlen );
 LDAP_SLAPD_F( int ) modify_add_values( Entry *e,
 	Modification *mod,
 	const char **text, char *textbuf, size_t textlen );
--- a/servers/slapd/value.c
+++ b/servers/slapd/value.c
@ -330,7 +330,6 @@ int value_find_ex(
 	int	i;
 	int rc;
 	struct berval nval = { 0, NULL };
-	struct berval nval_tmp;
 	MatchingRule *mr = ad->ad_type->sat_equality;

 	if( mr == NULL || !mr->smr_match ) {
@ -351,8 +350,11 @@ int value_find_ex(
 	}

 	if( mr->smr_syntax->ssyn_normalize ) {
+		struct berval nval_tmp;
+
 		rc = mr->smr_syntax->ssyn_normalize(
-			mr->smr_syntax, nval.bv_val == NULL ? val : &nval, &nval_tmp );
+			mr->smr_syntax,
+			nval.bv_val == NULL ? val : &nval, &nval_tmp );

 		free(nval.bv_val);
 		nval = nval_tmp;