From 729fe5669895890b25ddbbfeec4deb545b0e1df3 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Sat, 29 Dec 2001 02:57:32 +0000 Subject: [PATCH] Made LDAP_UTF8_CHARLEN/ldap_utf8_charlen table-driven. --- include/ldap_pvt_uc.h | 6 ++++-- libraries/libldap/getdn.c | 4 ++-- libraries/libldap/utf-8.c | 40 +++++++++++++-------------------------- 3 files changed, 19 insertions(+), 31 deletions(-) diff --git a/include/ldap_pvt_uc.h b/include/ldap_pvt_uc.h index 5c19491191..bb25f8f6b1 100644 --- a/include/ldap_pvt_uc.h +++ b/include/ldap_pvt_uc.h @@ -76,9 +76,11 @@ LDAP_F (char *) ldap_utf8_strpbrk( const char* str, const char *set); LDAP_F (char*) ldap_utf8_strtok( char* sp, const char* sep, char **last); /* Optimizations */ -#define LDAP_UTF8_ISASCII(p) ( * (const unsigned char *) (p) < 0x80 ) +LDAP_V (const char) ldap_utf8_lentab[128]; + +#define LDAP_UTF8_ISASCII(p) ( *(unsigned char *)(p) ^ 0x80 ) #define LDAP_UTF8_CHARLEN(p) ( LDAP_UTF8_ISASCII(p) \ - ? 1 : ldap_utf8_charlen((p)) ) + ? 1 : ldap_utf8_lentab[*(unsigned char *)(p) ^ 0x80] ) #define LDAP_UTF8_OFFSET(p) ( LDAP_UTF8_ISASCII(p) \ ? 1 : ldap_utf8_offset((p)) ) diff --git a/libraries/libldap/getdn.c b/libraries/libldap/getdn.c index 75ea756b2b..d45a2321a4 100644 --- a/libraries/libldap/getdn.c +++ b/libraries/libldap/getdn.c @@ -1950,7 +1950,7 @@ strval2strlen( struct berval *val, unsigned flags, ber_len_t *len ) } for ( l = 0, p = val->bv_val; p[ 0 ]; p += cl ) { - cl = ldap_utf8_charlen( p ); + cl = LDAP_UTF8_CHARLEN( p ); if ( cl == 0 ) { /* illegal utf-8 char! */ return( -1 ); @@ -2018,7 +2018,7 @@ strval2str( struct berval *val, char *str, unsigned flags, ber_len_t *len ) * of the value */ for ( s = 0, d = 0, end = val->bv_len - 1; s < val->bv_len; ) { - ber_len_t cl = ldap_utf8_charlen( &val->bv_val[ s ] ); + ber_len_t cl = LDAP_UTF8_CHARLEN( &val->bv_val[ s ] ); /* * there might be some chars we want to escape in form diff --git a/libraries/libldap/utf-8.c b/libraries/libldap/utf-8.c index b0c2fd9973..6a97a4fc45 100644 --- a/libraries/libldap/utf-8.c +++ b/libraries/libldap/utf-8.c @@ -76,36 +76,22 @@ int ldap_utf8_offset( const char * p ) * * This function should use a table lookup. */ +const char ldap_utf8_lentab[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0 }; + int ldap_utf8_charlen( const char * p ) { - unsigned c = * (const unsigned char *) p; + if (!(*p & 0x80)) + return 1; - if ((c & 0xfe ) == 0xfc) { - return 6; - } - - if ((c & 0xfc ) == 0xf8) { - return 5; - } - - if ((c & 0xf8 ) == 0xf0) { - return 4; - } - - if ((c & 0xf0 ) == 0xe0) { - return 3; - } - - if ((c & 0xe0 ) == 0xc0) { - return 2; - } - - if ((c & 0x80 ) == 0x80) { - /* INVALID */ - return 0; - } - - return 1; + return ldap_utf8_lentab[*(unsigned char *)p ^ 0x80]; } /* conv UTF-8 to UCS-4, useful for comparisons */