Use <lber.h> ber_memalloc, not malloc. Shortcuts for pure-ASCII strings.

Still need to add some kind of isNormalized() check to avoid rework on UTF8 strings that are already properly normalized.
2025-01-30 13:30:57 +08:00 · 2002-01-03 16:22:53 +00:00 · 2002-01-03 16:22:53 +00:00 · 8c04993cdf
commit 8c04993cdf
parent 33e6f6e65c
1 changed files with 51 additions and 4 deletions
--- a/libraries/liblunicode/ucstr.c
+++ b/libraries/liblunicode/ucstr.c
@ -4,6 +4,12 @@
 #include <ac/string.h>
 #include <ac/stdlib.h>

+#include <lber.h>
+
+#define	malloc(x)	ber_memalloc(x)
+#define	realloc(x,y)	ber_memrealloc(x,y)
+#define	free(x)		ber_memfree(x)
+
 #include <ldap_utf8.h>
 #include <ldap_pvt_uc.h>

@ -99,18 +105,35 @@ char * UTF8normalize(
 	s = bv->bv_val;
 	len = bv->bv_len;

-	if ( len == 0 ) {
-		out = (char *) malloc( 1 );
-		*out = '\0';
+	/* See if the string is pure ASCII so we can shortcut */
+	for ( i=0; i<len; i++ ) {
+		if ( s[i] & 0x80 )	/* non-ASCII */
+			break;
+	}
+
+	/* It's pure ASCII or zero-len */
+	if ( i == len ) {
+		out = malloc( len + 1 );
+		if ( i && !casefold ) {
+			strncpy( out, bv->bv_val, len );
+		} else {
+			for ( j=0; j<i; j++ )
+				out[j] = TOUPPER( s[j] );
+		}
+		out[len] = '\0';
 		return out;
 	}
-	
+
 	outsize = len + 7;
 	out = (char *) malloc( outsize );
 	if ( out == NULL ) {
 		return NULL;
 	}

+	/* FIXME: Should first check to see if string is already in
+	 * proper normalized form.
+	 */
+
 	outpos = 0;

 	/* finish off everything up to character before first non-ascii */
@ -234,6 +257,30 @@ int UTF8normcmp(
 		return *s1 - *s2 > 0 ? 1 : -1;
 	}
 	
+	/* See if we can get away with a straight ASCII compare */
+	len = (l1 < l2) ? l1 : l2;
+	for ( i = 0; i<len; i++ ) {
+		/* Is either char non-ASCII? */
+		if ((s1[i] & 0x80) || (s2[i] & 0x80))
+			break;
+		if (casefold) {
+			char c1 = TOUPPER(s1[i]);
+			char c2 = TOUPPER(s2[i]);
+		    	res = c1 - c2;
+		} else {
+			res = s1[i] - s2[i];
+		}
+		if (res)
+			return res;
+	}
+	/* Strings were ASCII, equal up to minlen */
+	if (i == len)
+		return l1 - l2;
+		
+	/* FIXME: Should first check to see if strings are already in
+	 * proper normalized form.
+	 */
+
 	ucs = (long *) malloc( ( l1 > l2 ? l1 : l2 ) * sizeof(*ucs) );
 	if ( ucs == NULL ) {
 		return l1 > l2 ? 1 : -1; /* what to do??? */