Add UTF-8 wc/mb conversion routines contributed by Novell.

/****************************************************************************** * Copyright (C) 1999, 2000 Novell, Inc. All Rights Reserved. * * THIS WORK IS SUBJECT TO U.S. AND INTERNATIONAL COPYRIGHT LAWS AND * TREATIES. USE, MODIFICATION, AND REDISTRIBUTION OF THIS WORK IS SUBJECT * TO VERSION 2.0.1 OF THE OPENLDAP PUBLIC LICENSE, A COPY OF WHICH IS * AVAILABLE AT HTTP://WWW.OPENLDAP.ORG/LICENSE.HTML OR IN THE FILE "LICENSE" * IN THE TOP-LEVEL DIRECTORY OF THE DISTRIBUTION. ANY USE OR EXPLOITATION * OF THIS WORK OTHER THAN AS AUTHORIZED IN VERSION 2.0.1 OF THE OPENLDAP * PUBLIC LICENSE, OR OTHER PRIOR WRITTEN CONSENT FROM NOVELL, COULD SUBJECT * THE PERPETRATOR TO CRIMINAL AND CIVIL LIABILITY. ******************************************************************************/
2025-01-18 11:05:48 +08:00 · 2000-12-28 02:20:37 +00:00 · 2000-12-28 02:20:37 +00:00 · 5082731e24
commit 5082731e24
parent 2f8f8b588e
4 changed files with 843 additions and 2 deletions
--- a/doc/devel/utfconv.txt
+++ b/doc/devel/utfconv.txt
@ -0,0 +1,291 @@
                                                                Dec 5, 2000
                                                                Dave Steck
                                                                Novell, Inc.
                    UTF-8 Conversion Functions
 1.  Strings in the LDAP C SDK should be encoded in UTF-8 format.
    However, most platforms do not provide APIs for converting to
    this format.  If they do, they are platform-specific.
    As a result, most applications (knowingly or not) use local strings
    with LDAP functions.  This works fine for 7-bit ASCII characters,
    but will fail with 8-bit European characters, Asian characters, etc.
    We propose adding the following platform-independent conversion functions 
    to the OpenLDAP SDK.  There are 4 functions for converting between UTF-8 
    and wide characters, and 4 functions for converting between UTF-8 and 
    multibyte characters.
    For multibyte to UTF-8 conversions, charset translation is necessary.
    While a full charset translator is not practical or appropriate for the
    LDAP SDK, we can pass the translator function in as an argument.
    A NULL for this argument will use the ANSI C functions mbtowc, mbstowcs,
    wctomb, and wcstombs.
 2.  UTF-8 <--> Wide Character conversions
 The following new conversion routines will be added, following the pattern of 
 the ANSI C conversion routines (mbtowc, mbstowcs, etc).  These routines use
 the wchar_t type.  wchar_t is 2 bytes on some systems and 4 bytes on others.  
 However the advantage of using wchar_t is that all the standard wide character 
 string functions may be used on these strings:   wcslen, wcscpy, etc.
   int ldap_x_utf8_to_wc  -  Convert a single UTF-8 encoded character to a wide character.
   int ldap_x_utf8s_to_wcs  -  Convert a UTF-8 string to a wide character string.
   int ldap_x_wc_to_utf8  -  Convert a single wide character to a UTF-8 sequence.
   int ldap_x_wcs_to_utf8s  -  Convert a wide character string to a UTF-8 string.
 2.1  ldap_x_utf8_to_wc  -  Convert a single UTF-8  encoded character to a wide character.
 int ldap_x_utf8_to_wc ( wchar_t *wchar, const char *utf8char )
  wchar		(OUT)	Points to a wide character code to receive the 
                    converted character.
  utf8char	(IN)	Address of the UTF8 sequence of bytes.
 Return Value:
 		If successful, the function returns the length in 
        bytes of the UTF-8 input character.
        If utf8char is NULL or points to an empty string, the
        function returns 1 and a NULL is written to wchar.
        If utf8char contains an invalid UTF-8 sequence -1 is returned.
 2.2  ldap_x_utf8s_to_wcs   -  Convert a UTF-8 string to a wide character string.
 int ldap_x_utf8s_to_wcs (wchar_t *wcstr, const char *utf8str, size_t count)
  wcstr		(OUT)	Points to a wide char buffer to receive the 
                    converted wide char string. The output string will be 
                    null terminated if there is space for it in the 
                    buffer.
  utf8str   (IN)	Address of the null-terminated UTF-8 string to convert.  
  count		(IN)	The number of UTF-8 characters to convert, or
        			equivalently, the size of the output buffer in wide
        			characters.
 Return Value:
    If successful, the function returns the number of wide
    characters written to wcstr, excluding the null termination
    character, if any.
 	If wcstr is NULL, the function returns the number of wide
    characters required to contain the converted string,
    excluding the null termination character.
    If an invalid UTF-8 sequence is encountered, the 
    function returns -1. 
    If the return value equals count, there was not enough space to fit the 
    string and the null terminator in the buffer.  
 2.3  ldap_x_wc_to_utf8  -  Convert a single wide character to a UTF-8 sequence.
 int ldap_x_wc_to_utf8 ( char *utf8char, wchar_t wchar, count )
  utf8char	(OUT)	Points to a byte array to receive the converted UTF-8
        			string.
  wchar		(IN)	The wide character to convert.
  count		(IN)	The maximum number of bytes to write to the output
                    buffer.  Normally set this to LDAP_MAX_UTF8_LEN, which 
                    is defined as 3 or 6 depending on the size of wchar_t.  
                    A partial character will not be written.
 Return Value:
 		If successful, the function returns the length in bytes of
 		the converted UTF-8 output character.
        If wchar is NULL, the function returns 1 and a NULL is 
        written to utf8char.
        If wchar cannot be converted to a UTF-8 character, the 
        function returns -1.
 2.4  int ldap_x_wcs_to_utf8s  -  Convert a wide character string to a UTF-8 string.
 int ldap_x_wcs_to_utf8s (char *utf8str, const wchar_t *wcstr, size_t count)
  utf8str	(OUT)	Points to a byte array to receive the converted 
                    UTF-8 string. The output string will be null 
                    terminated if there is space for it in the 
                    buffer.
  wcstr		(IN)	Address of the null-terminated wide char string to convert.
  count		(IN)	The size of the output buffer in bytes.
 Return Value:
 		If successful, the function returns the number of bytes
 		written to utf8str, excluding the null termination
        character, if any.
 		If utf8str is NULL, the function returns the number of
        bytes required to contain the converted string, excluding 
        the null termination character.  The 'count' parameter is ignored.
        If the function encounters a wide character that cannot 
        be mapped to a UTF-8 sequence, the function returns -1.
        If the return value equals count, there was not enough space to fit 
        the string and the null terminator in the buffer.
 3. Multi-byte <--> UTF-8 Conversions
 These functions convert the string in a two-step process, from multibyte 
 to Wide, then from Wide to UTF8, or vice versa.  This conversion requires a 
 charset translation routine, which is passed in as an argument.
   ldap_x_mb_to_utf8  -  Convert a multi-byte character  to a UTF-8 character.
   ldap_x_mbs_to_utf8s  -  Convert a multi-byte string to a UTF-8 string.
   ldap_x_utf8_to_mb  -  Convert a UTF-8 character to a multi-byte character.
   ldap_x_utf8s_to_mbs  -  Convert a UTF-8 string to a multi-byte string.
 3.1  ldap_x_mb_to_utf8  - Convert a multi-byte character  to a UTF-8 character.
 int ldap_x_mb_to_utf8 ( char *utf8char, const char *mbchar, size_t mbsize, int (*f_mbtowc)(wchar_t *wchar, const char *mbchar, size_t count)  )
  utf8char	(OUT)	Points to a byte buffer to receive the converted 
                    UTF-8 character.  May be NULL.  The output is not
                    null-terminated.
  mbchar    (IN)	Address of a sequence of bytes forming a multibyte character.
  mbsize	(IN)	The maximum number of bytes of the mbchar argument to 
                    check.  This should normally be MB_CUR_MAX.
  f_mbtowc	(IN)	The function to use for converting a multibyte 
                    character to a wide character.  If NULL, the local 
                    ANSI C routine mbtowc is used.
 Return Value:
 		If successful, the function returns the length in bytes of
        the UTF-8 output character.  
        If utf8char is NULL, count is ignored and the funtion 
        returns the number of bytes that would be written to the 
        output char.
        If count is zero, 0 is returned and nothing is written to
        utf8char.
        If mbchar is NULL or points to an empty string, the 
        function returns 1 and a null byte is written to utf8char.
        If mbchar contains an invalid multi-byte character, -1 is returned.
 3.2  ldap_x_mbs_to_utf8s  - Convert a multi-byte string  to a UTF-8 string.
 int ldap_x_mbs_to_utf8s (char *utf8str, const char *mbstr, size_t count, 
        size_t (*f_mbstowcs)(wchar_t *wcstr, const char *mbstr, size_t count))
 utf8str	    (OUT)	Points to a buffer to receive the converted UTF-8 string.  
                    May be NULL.
  mbchar	(IN)	Address of the null-terminated multi-byte input string.
  count	    (IN)	The size of the output buffer in bytes.
  f_mbstowcs (IN)	The function to use for converting a multibyte string
            		to a wide character string.  If NULL, the local ANSI
            		C routine mbstowcs is used.
 Return Value:
 		If successful, the function returns the length in 
        bytes of the UTF-8 output string, excluding the null
        terminator, if present.
        If utf8str is NULL, count is ignored and the function 
        returns the number of bytes required for the output string, 
        excluding the NULL.
        If count is zero, 0 is returned and nothing is written to utf8str.
        If mbstr is NULL or points to an empty string, the 
        function returns 1 and a null byte is written to utf8str.
        If mbstr contains an invalid multi-byte character, -1 is returned.
        If the returned value is equal to count, the entire null-terminated 
        string would not fit in the output buffer.
 3.3  ldap_x_utf8_to_mb  -  Convert a UTF-8 character to a multi-byte character.
 int ldap_x_utf8_to_mb ( char *mbchar, const char *utf8char,
                        int (*f_wctomb)(char *mbchar, wchar_t wchar) )
 mbchar	(OUT)	Points to a byte buffer to receive the converted multi-byte 
                character.  May be NULL.
  utf8char	(IN)	Address of the UTF-8 character sequence.
  f_wctomb	(IN)	The function to use for converting a wide character 
                    to a multibyte character.  If NULL, the local 
                    ANSI C routine wctomb is used.
 Return Value:
 		If successful, the function returns the length in 
        bytes of the multi-byte output character.  
        If utf8char is NULL or points to an empty string, the 
        function returns 1 and a null byte is written to mbchar.
        If utf8char contains an invalid UTF-8 sequence, -1 is returned.
 3.4  int ldap_x_utf8s_to_mbs  - Convert a UTF-8 string to a multi-byte string.
 int ldap_x_utf8s_to_mbs ( char *mbstr, const char *utf8str, size_t count, 
        size_t (*f_wcstombs)(char *mbstr, const wchar_t *wcstr, size_t count) )
  mbstr		(OUT)	Points to a byte buffer to receive the converted 
                    multi-byte string.  May be NULL.
  utf8str   (IN)	Address of the null-terminated UTF-8 string to convert.
  count		(IN)	The size of the output buffer in bytes.
  f_wcstombs (IN)	The function to use for converting a wide character 
                    string to a multibyte string.  If NULL, the local 
                    ANSI C routine wcstombs is used.
 Return Value:
        If successful, the function returns the number of bytes
 		written to mbstr, excluding the null termination
        character, if any.
        If mbstr is NULL, count is ignored and the funtion 
        returns the number of bytes required for the output string,
        excluding the NULL.
        If count is zero, 0 is returned and nothing is written to
        mbstr.
        If utf8str is NULL or points to an empty string, the 
        function returns 1 and a null byte is written to mbstr.
        If an invalid UTF-8 character is encountered, the 
        function returns -1.
 The output string will be null terminated if there is space for it in 
 the output buffer.
--- a/include/ldap_utf8.h
+++ b/include/ldap_utf8.h
@ -0,0 +1,86 @@
 /* $OpenLDAP$ */
 /* $Novell: /ldap/src/cldap/include/ldap_utf8.h,v 1.3 2000/12/04 20:23:20 dsteck Exp $ 
 /*
 * Copyright 2000 The OpenLDAP Foundation, Redwood City, California, USA
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms are permitted only
 * as authorized by the OpenLDAP Public License.  A copy of this
 * license is available at http://www.OpenLDAP.org/license.html or
 * in file LICENSE in the top-level directory of the distribution.
 */
 /******************************************************************************
 * This notice applies to changes, created by or for Novell, Inc.,
 * to preexisting works for which notices appear elsewhere in this file.
 *
 * Copyright (C) 2000 Novell, Inc. All Rights Reserved.
 *
 * THIS WORK IS SUBJECT TO U.S. AND INTERNATIONAL COPYRIGHT LAWS AND TREATIES.
 * USE, MODIFICATION, AND REDISTRIBUTION OF THIS WORK IS SUBJECT TO VERSION
 * 2.0.1 OF THE OPENLDAP PUBLIC LICENSE, A COPY OF WHICH IS AVAILABLE AT
 * HTTP://WWW.OPENLDAP.ORG/LICENSE.HTML OR IN THE FILE "LICENSE" IN THE
 * TOP-LEVEL DIRECTORY OF THE DISTRIBUTION. ANY USE OR EXPLOITATION OF THIS
 * WORK OTHER THAN AS AUTHORIZED IN VERSION 2.0.1 OF THE OPENLDAP PUBLIC
 * LICENSE, OR OTHER PRIOR WRITTEN CONSENT FROM NOVELL, COULD SUBJECT THE
 * PERPETRATOR TO CRIMINAL AND CIVIL LIABILITY. 
 ******************************************************************************/
 #ifndef _LDAP_UTF8_H
 #define _LDAP_UTF8_H
 LDAP_BEGIN_DECL
 /*  
 * UTF-8 Utility Routines (in utf-8.c)
 */
 #define LDAP_UCS4_INVALID (0x80000000U)
 /* LDAP_MAX_UTF8_LEN is 3 or 6 depending on size of wchar_t */
 #define LDAP_MAX_UTF8_LEN  sizeof(wchar_t)*3/2
 /*
 * UTF-8 Conversion Routines.   (in utfconv.c)
 */
 /* UTF-8 character to Wide Char */
 LDAP_F(int)
 ldap_x_utf8_to_wc ( wchar_t *wchar, const char *utf8char );
 /* UTF-8 string to Wide Char string */
 LDAP_F(int)
 ldap_x_utf8s_to_wcs ( wchar_t *wcstr, const char *utf8str, size_t count );
 /* Wide Char to UTF-8 character */
 LDAP_F(int)
 ldap_x_wc_to_utf8 ( char *utf8char, wchar_t wchar, size_t count );
 /* Wide Char string to UTF-8 string */
 LDAP_F(int)
 ldap_x_wcs_to_utf8s ( char *utf8str, const wchar_t *wcstr, size_t count );
 /* UTF-8 character to MultiByte character */
 LDAP_F(int)
 ldap_x_utf8_to_mb ( char *mbchar, const char *utf8char,
 		int (*f_wctomb)(char *mbchar, wchar_t wchar) );
 /* UTF-8 string to MultiByte string */
 LDAP_F(int)
 ldap_x_utf8s_to_mbs ( char *mbstr, const char *utf8str, size_t count,
 		size_t (*f_wcstombs)(char *mbstr, const wchar_t *wcstr, size_t count) );
 /* MultiByte character to UTF-8 character */
 LDAP_F(int)
 ldap_x_mb_to_utf8 ( char *utf8char, const char *mbchar, size_t mbsize,
 		int (*f_mbtowc)(wchar_t *wchar, const char *mbchar, size_t count) );
 /* MultiByte string to UTF-8 string */
 LDAP_F(int)
 ldap_x_mbs_to_utf8s ( char *utf8str, const char *mbstr, size_t count,
 		size_t (*f_mbstowcs)(wchar_t *wcstr, const char *mbstr, size_t count) );
 LDAP_END_DECL
 #endif /* _LDAP_UTF8_H */
--- a/libraries/libldap/Makefile.in
+++ b/libraries/libldap/Makefile.in
@ -18,7 +18,7 @@ SRCS	= bind.c open.c result.c error.c compare.c search.c \
 	request.c os-ip.c url.c sortctrl.c vlvctrl.c \
 	init.c options.c print.c string.c util-int.c schema.c \
 	charray.c tls.c dn.c os-local.c dnssrv.c \
-	utf-8.c
+	utf-8.c utf-8-conv.c
 OBJS	= bind.lo open.lo result.lo error.lo compare.lo search.lo \
 	controls.lo messages.lo references.lo extended.lo cyrus.lo \
 	modify.lo add.lo modrdn.lo delete.lo abandon.lo ufn.lo cache.lo \
@ -28,7 +28,7 @@ OBJS	= bind.lo open.lo result.lo error.lo compare.lo search.lo \
 	request.lo os-ip.lo url.lo sortctrl.lo vlvctrl.lo \
 	init.lo options.lo print.lo string.lo util-int.lo schema.lo \
 	charray.lo tls.lo dn.lo os-local.lo dnssrv.lo \
-	utf-8.lo
+	utf-8.lo utf-8-conv.lo
 LDAP_INCDIR= ../../include       
 LDAP_LIBDIR= ../../libraries
--- a/libraries/libldap/utf-8-conv.c
+++ b/libraries/libldap/utf-8-conv.c
@ -0,0 +1,464 @@
 /* $OpenLDAP$ */
 /*
 * Copyright 2000 The OpenLDAP Foundation, All Rights Reserved.
 * COPYING RESTRICTIONS APPLY, see COPYRIGHT file
 */
 /* $Novell: /ldap/src/cldap/libraries/libldap/utfconv.c,v 1.3 2000/12/11 19:35:37 dsteck Exp $ */
 /******************************************************************************
 * Copyright (C) 1999, 2000 Novell, Inc. All Rights Reserved.
 * 
 * THIS WORK IS SUBJECT TO U.S. AND INTERNATIONAL COPYRIGHT LAWS AND
 * TREATIES. USE, MODIFICATION, AND REDISTRIBUTION OF THIS WORK IS SUBJECT
 * TO VERSION 2.0.1 OF THE OPENLDAP PUBLIC LICENSE, A COPY OF WHICH IS
 * AVAILABLE AT HTTP://WWW.OPENLDAP.ORG/LICENSE.HTML OR IN THE FILE "LICENSE"
 * IN THE TOP-LEVEL DIRECTORY OF THE DISTRIBUTION. ANY USE OR EXPLOITATION
 * OF THIS WORK OTHER THAN AS AUTHORIZED IN VERSION 2.0.1 OF THE OPENLDAP
 * PUBLIC LICENSE, OR OTHER PRIOR WRITTEN CONSENT FROM NOVELL, COULD SUBJECT
 * THE PERPETRATOR TO CRIMINAL AND CIVIL LIABILITY. 
 ******************************************************************************/
 /*
 * UTF-8 Conversion Routines
 *
 * These routines convert between Wide Character and UTF-8,
 * or between MultiByte and UTF-8 encodings.
 *
 * Both single character and string versions of the functions are provided.
 * All functions return -1 if the character or string cannot be converted.
 */
 #include "portable.h"
 #include <stdio.h>
 #include <ac/stdlib.h>		/* For wctomb, wcstombs, mbtowc, mbstowcs */
 #include <ac/string.h>
 #include <ac/time.h>		/* for time_t */
 #include "ldap-int.h"
 #include <ldap_utf8.h>
 static unsigned char mask[] = { 0, 0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01 };
 /*-----------------------------------------------------------------------------
 					UTF-8 Format Summary
 ASCII chars 						7 bits
    0xxxxxxx
 2-character UTF-8 sequence:        11 bits
    110xxxxx  10xxxxxx
 3-character UTF-8                  16 bits
    1110xxxx  10xxxxxx  10xxxxxx   
 4-char UTF-8                       21 bits 
    11110xxx  10xxxxxx  10xxxxxx  10xxxxxx
 5-char UTF-8                       26 bits
    111110xx  10xxxxxx  10xxxxxx  10xxxxxx  10xxxxxx
 6-char UTF-8                       31 bits
    1111110x  10xxxxxx  10xxxxxx  10xxxxxx  10xxxxxx  10xxxxxx
 Unicode address space   (0 - 0x10FFFF)    21 bits
 ISO-10646 address space (0 - 0x7FFFFFFF)  31 bits
 Note:  This code does not prevent UTF-8 sequences which are longer than
 	   necessary from being decoded.
 */
 /*------------------------------------------------------------------------------ 
   Convert a UTF-8 character to a wide char. 
   Return the length of the UTF-8 input character in bytes.
 */
 int
 ldap_x_utf8_to_wc ( wchar_t *wchar, const char *utf8char )
 {
 	int utflen, i;
 	wchar_t ch;
 	/* If input ptr is NULL, treat it as empty string. */
 	if (utf8char == NULL)
 		utf8char = "";
 	/* Get UTF-8 sequence length from 1st byte */
 	utflen = UTF8_CHARLEN(utf8char);
 	if( utflen==0 || utflen > LDAP_MAX_UTF8_LEN )
 		return -1;								 	/* Invalid input */
 	/* First byte minus length tag */
 	ch = (wchar_t)(utf8char[0] & mask[utflen]);
 	for(i=1; i < utflen; i++)
 	{
 		/* Subsequent bytes must start with 10 */
 		if ((utf8char[i] & 0xc0) != 0x80)
 			return -1;
 		ch <<= 6;			/* 6 bits of data in each subsequent byte */
 		ch |= (wchar_t)(utf8char[i] & 0x3f);
 	}
 	if (wchar)
 		*wchar = ch;
 	return utflen;
 }
 /*-----------------------------------------------------------------------------
   Convert a UTF-8 string to a wide char string.
   No more than 'count' wide chars will be written to the output buffer.
   Return the size of the converted string in wide chars, excl null terminator.
 */
 int
 ldap_x_utf8s_to_wcs ( wchar_t *wcstr, const char *utf8str, size_t count )
 {
 	size_t wclen = 0;
 	int utflen, i;
 	wchar_t ch;
 	/* If input ptr is NULL, treat it as empty string. */
 	if (utf8str == NULL)
 		utf8str = "";
 	/* Examine next UTF-8 character.  If output buffer is NULL, ignore count */
 	while ( *utf8str && (wcstr==NULL || wclen<count) )
 	{
 		/* Get UTF-8 sequence length from 1st byte */
 		utflen = UTF8_CHARLEN(utf8str);
 		if( utflen==0 || utflen > LDAP_MAX_UTF8_LEN )
 			return -1;								 	/* Invalid input */
 		/* First byte minus length tag */
 		ch = (wchar_t)(utf8str[0] & mask[utflen]);
 		for(i=1; i < utflen; i++)
 		{
 			/* Subsequent bytes must start with 10 */
 			if ((utf8str[i] & 0xc0) != 0x80)
 				return -1;
 			ch <<= 6;			/* 6 bits of data in each subsequent byte */
 			ch |= (wchar_t)(utf8str[i] & 0x3f);
 		}
 		if (wcstr)
 			wcstr[wclen] = ch;
 		utf8str += utflen;		/* Move to next UTF-8 character */
 		wclen++;				/* Count number of wide chars stored/required */
 	}
 	/* Add null terminator if there's room in the buffer. */
 	if (wcstr && wclen < count)
 		wcstr[wclen] = 0;
 	return wclen;
 }
 /*------------------------------------------------------------------------------ 
   Convert one wide char to a UTF-8 character.
   Return the length of the converted UTF-8 character in bytes.
   No more than 'count' bytes will be written to the output buffer.
 */
 int
 ldap_x_wc_to_utf8 ( char *utf8char, wchar_t wchar, size_t count )
 {
 	int len=0;
 	if (utf8char == NULL)   /* Just determine the required UTF-8 char length. */
 	{						/* Ignore count */
 		if( wchar < 0 )
 			return -1;
 		if( wchar < 0x80 )
 			return 1;
 		if( wchar < 0x800 )
 			return 2; 
 		if( wchar < 0x10000 )
 			return 3;
 		if( wchar < 0x200000 ) 
 			return 4;
 		if( wchar < 0x4000000 ) 
 			return 5;
 		if( wchar < 0x80000000 )
 			return 6;
 		return -1;
 	}
 	if ( wchar < 0 ) {				/* Invalid wide character */
 		len = -1;
 	} else if( wchar < 0x80 ) {
 		if (count >= 1) {
 			utf8char[len++] = (char)wchar;
 		}
 	} else if( wchar < 0x800 ) {
 		if (count >=2) {
 			utf8char[len++] = 0xc0 | ( wchar >> 6 );
 			utf8char[len++] = 0x80 | ( wchar & 0x3f );
 		}
 	} else if( wchar < 0x10000 ) {
 		if (count >= 3) {	
 			utf8char[len++] = 0xe0 | ( wchar >> 12 );
 			utf8char[len++] = 0x80 | ( (wchar >> 6) & 0x3f );
 			utf8char[len++] = 0x80 | ( wchar & 0x3f );
 		}
 	} else if( wchar < 0x200000 ) {
 		if (count >= 4) {
 			utf8char[len++] = 0xf0 | ( wchar >> 18 );
 			utf8char[len++] = 0x80 | ( (wchar >> 12) & 0x3f );
 			utf8char[len++] = 0x80 | ( (wchar >> 6) & 0x3f );
 			utf8char[len++] = 0x80 | ( wchar & 0x3f );
 		}
 	} else if( wchar < 0x4000000 ) {
 		if (count >= 5) {
 			utf8char[len++] = 0xf8 | ( wchar >> 24 );
 			utf8char[len++] = 0x80 | ( (wchar >> 18) & 0x3f );
 			utf8char[len++] = 0x80 | ( (wchar >> 12) & 0x3f );
 			utf8char[len++] = 0x80 | ( (wchar >> 6) & 0x3f );
 			utf8char[len++] = 0x80 | ( wchar & 0x3f );
 		}
 	} else if( wchar < 0x80000000 ) {
 		if (count >= 6) {
 			utf8char[len++] = 0xfc | ( wchar >> 30 );
 			utf8char[len++] = 0x80 | ( (wchar >> 24) & 0x3f );
 			utf8char[len++] = 0x80 | ( (wchar >> 18) & 0x3f );
 			utf8char[len++] = 0x80 | ( (wchar >> 12) & 0x3f );
 			utf8char[len++] = 0x80 | ( (wchar >> 6) & 0x3f );
 			utf8char[len++] = 0x80 | ( wchar & 0x3f );
 		}
 	} else
 		len = -1;
 	return len;
 }
 /*-----------------------------------------------------------------------------
   Convert a wide char string to a UTF-8 string.
   No more than 'count' bytes will be written to the output buffer.
   Return the # of bytes written to the output buffer, excl null terminator.
 */
 int
 ldap_x_wcs_to_utf8s ( char *utf8str, const wchar_t *wcstr, size_t count )
 {
 	int len = 0;
 	int n;
 	char *p = utf8str;
 	wchar_t empty = 0;		/* To avoid use of L"" construct */
 	if (wcstr == NULL)		/* Treat input ptr NULL as an empty string */
 		wcstr = &empty;
 	if (utf8str == NULL)	/* Just compute size of output, excl null */
 	{
 		while (*wcstr)
 		{
 			/* Get UTF-8 size of next wide char */
 			n = ldap_x_wc_to_utf8( NULL, *wcstr++, LDAP_MAX_UTF8_LEN);
 			if (n == -1)
 				return -1;
 			len += n;
 		}
 		return len;
 	}
 	/* Do the actual conversion. */
 	n = 1;					/* In case of empty wcstr */
 	while (*wcstr)
 	{
 		n = ldap_x_wc_to_utf8( p, *wcstr++, count);
 		if (n <= 0)  		/* If encoding error (-1) or won't fit (0), quit */
 			break;
 		p += n;
 		count -= n;			/* Space left in output buffer */
 	}
 	/* If not enough room for last character, pad remainder with null
 	   so that return value = original count, indicating buffer full. */
 	if (n == 0)
 	{
 		while (count--)
 			*p++ = 0;
 	}
 	/* Add a null terminator if there's room. */
 	else if (count)
 		*p = 0;
 	if (n == -1)			/* Conversion encountered invalid wide char. */
 		return -1;
 	/* Return the number of bytes written to output buffer, excl null. */ 
 	return (p - utf8str);
 }
 /*-----------------------------------------------------------------------------
   Convert a UTF-8 character to a MultiByte character.
   Return the size of the converted character in bytes.
 */
 int
 ldap_x_utf8_to_mb ( char *mbchar, const char *utf8char,
 		int (*f_wctomb)(char *mbchar, wchar_t wchar) )
 {
 	wchar_t wchar;
 	int n;
 	char tmp[6];				/* Large enough for biggest multibyte char */
 	if (f_wctomb == NULL)		/* If no conversion function was given... */
 		f_wctomb = wctomb;		/*    use the local ANSI C function */
 	/* First convert UTF-8 char to a wide char */
 	n = ldap_x_utf8_to_wc( &wchar, utf8char);
 	if (n == -1)
 		return -1;		/* Invalid UTF-8 character */
 	if (mbchar == NULL)
 		n = f_wctomb( tmp, wchar );
 	else
 		n = f_wctomb( mbchar, wchar);
 	return n;
 }
 /*-----------------------------------------------------------------------------
   Convert a UTF-8 string to a MultiByte string.
   No more than 'count' bytes will be written to the output buffer.
   Return the size of the converted string in bytes, excl null terminator.
 */
 int
 ldap_x_utf8s_to_mbs ( char *mbstr, const char *utf8str, size_t count,
 		size_t (*f_wcstombs)(char *mbstr, const wchar_t *wcstr, size_t count) )
 {
 	wchar_t *wcs;
 	size_t wcsize;
    int n;
 	if (f_wcstombs == NULL)		/* If no conversion function was given... */
 		f_wcstombs = wcstombs;	/*    use the local ANSI C function */
 	if (utf8str == NULL || *utf8str == 0)	/* NULL or empty input string */
 	{
 		if (mbstr)
 			*mbstr = 0;
 		return 0;
 	}
 /* Allocate memory for the maximum size wchar string that we could get. */
 	wcsize = strlen(utf8str) + 1;
 	wcs = (wchar_t *)LDAP_MALLOC(wcsize * sizeof(wchar_t));
 	if (wcs == NULL)
 		return -1;				/* Memory allocation failure. */
 	/* First convert the UTF-8 string to a wide char string */
 	n = ldap_x_utf8s_to_wcs( wcs, utf8str, wcsize);
 	/* Then convert wide char string to multi-byte string */
 	if (n != -1)
 	{
 		n = f_wcstombs(mbstr, wcs, count);
 	}
 	LDAP_FREE(wcs);
 	return n;
 }
 /*-----------------------------------------------------------------------------
   Convert a MultiByte character to a UTF-8 character.
   'mbsize' indicates the number of bytes of 'mbchar' to check.
   Returns the number of bytes written to the output character.
 */
 int
 ldap_x_mb_to_utf8 ( char *utf8char, const char *mbchar, size_t mbsize,
 		int (*f_mbtowc)(wchar_t *wchar, const char *mbchar, size_t count) )
 {
    wchar_t wchar;
    int n;
 	if (f_mbtowc == NULL)		/* If no conversion function was given... */
 		f_mbtowc = mbtowc;		/*    use the local ANSI C function */
    if (mbsize == 0)				/* 0 is not valid. */
        return -1;
    if (mbchar == NULL || *mbchar == 0)
    {
        if (utf8char)
            *utf8char = 0;
        return 1;
    }
 	/* First convert the MB char to a Wide Char */
 	n = f_mbtowc( &wchar, mbchar, mbsize);
 	if (n == -1)
 		return -1;
 	/* Convert the Wide Char to a UTF-8 character. */
 	n = ldap_x_wc_to_utf8( utf8char, wchar, LDAP_MAX_UTF8_LEN);
 	return n;
 }
 /*-----------------------------------------------------------------------------
   Convert a MultiByte string to a UTF-8 string.
   No more than 'count' bytes will be written to the output buffer.
   Return the size of the converted string in bytes, excl null terminator.
 */   
 int
 ldap_x_mbs_to_utf8s ( char *utf8str, const char *mbstr, size_t count,
 		size_t (*f_mbstowcs)(wchar_t *wcstr, const char *mbstr, size_t count) )
 {
 	wchar_t *wcs;
 	int n;
 	size_t wcsize;
 	if (mbstr == NULL)		   /* Treat NULL input string as an empty string */
 		mbstr = "";
 	if (f_mbstowcs == NULL)		/* If no conversion function was given... */
 		f_mbstowcs = mbstowcs;	/*    use the local ANSI C function */
 	/* Allocate memory for the maximum size wchar string that we could get. */
 	wcsize = strlen(mbstr) + 1;
 	wcs = (wchar_t *)LDAP_MALLOC( wcsize * sizeof(wchar_t) );
 	if (wcs == NULL)
 		return -1;
 	/* First convert multi-byte string to a wide char string */
 	n = f_mbstowcs(wcs, mbstr, wcsize);
 	/* Convert wide char string to UTF-8 string */
 	if (n != -1)
 	{
 		n = ldap_x_wcs_to_utf8s( utf8str, wcs, count);
 	}
 	LDAP_FREE(wcs);
 	return n;	
 }