netcdf-c/libdispatch/dutf8.c

/*
 *	Copyright 2017, University Corporation for Atmospheric Research
 *      See netcdf/COPYRIGHT file for copying and redistribution conditions.
 */

#include "config.h"
#include "netcdf.h"
#include "ncutf8.h"
#include "utf8proc.h"

/* Provide a wrapper around whatever utf8 library we use. */

/*
 * Check validity of a UTF8 encoded null-terminated byte string.
 * Return codes:
 * NC_NOERR -- string is valid utf8
 * NC_ENOMEM -- out of memory
 * NC_EINVAL -- invalid argument or internal error
 * NC_EBADNAME-- not valid utf8
 */

int
nc_utf8_validate(const unsigned char* name)
{
    int ncstat = NC_NOERR;
    const nc_utf8proc_uint8_t *str;
    nc_utf8proc_ssize_t strlen = -1;
    nc_utf8proc_int32_t codepoint;
    nc_utf8proc_ssize_t count;

    str = (const nc_utf8proc_uint8_t*)name;
    while(*str) {
        count = nc_utf8proc_iterate(str,strlen,&codepoint);
	if(count < 0) {
	    switch (count) {
	    case UTF8PROC_ERROR_NOMEM:
	    case UTF8PROC_ERROR_OVERFLOW:
		ncstat = NC_ENOMEM;
		break;
	    case UTF8PROC_ERROR_INVALIDOPTS:
		ncstat = NC_EINVAL;
		break;
	    case UTF8PROC_ERROR_INVALIDUTF8:
	    case UTF8PROC_ERROR_NOTASSIGNED:
	    default:
		ncstat = NC_EBADNAME;
		break;
	    }
	    goto done;
	} else { /* move to next char */
	    str += count;
	}
    }
done:
    return ncstat;
}

/*
 * Returns a pointer to newly allocated memory of a
 * normalized version of the null-terminated string 'str'.
 * Normalized string is returned in normalp argument;
 * caller must free.
 * Return codes:
 * NC_NOERR -- success
 * NC_ENOMEM -- out of memory
 * NC_EINVAL -- illegal argument or internal error
 * NC_EBADNAME -- other failure
 */
int
nc_utf8_normalize(const unsigned char* utf8, unsigned char** normalp)
{
    int ncstat = NC_NOERR;
    const nc_utf8proc_uint8_t* str = (const nc_utf8proc_uint8_t*)utf8;
    nc_utf8proc_uint8_t* retval = NULL;
    nc_utf8proc_ssize_t count;
    count = nc_utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE | UTF8PROC_COMPOSE);
    if(count < 0) {/* error */
	switch (count) {
	case UTF8PROC_ERROR_NOMEM:
	case UTF8PROC_ERROR_OVERFLOW:
	ncstat = NC_ENOMEM;
	    break;
	case UTF8PROC_ERROR_INVALIDOPTS:
	    ncstat = NC_EINVAL;
	    break;
	case UTF8PROC_ERROR_INVALIDUTF8:
	case UTF8PROC_ERROR_NOTASSIGNED:
	default:
	    ncstat = NC_EBADNAME;
	    break;
	}
	goto done;
    } else
	if(normalp) *normalp = (unsigned char*)retval;
done:
    return ncstat;
}