Refactored some data types due to collision.

This commit is contained in:
Ward Fisher 2017-02-28 11:12:00 -07:00
parent 5883b6587f
commit 773fe80727
3 changed files with 192 additions and 192 deletions

View File

@ -23,12 +23,12 @@ int
nc_utf8_validate(const unsigned char* name)
{
int ncstat = NC_NOERR;
const utf8proc_uint8_t *str;
utf8proc_ssize_t strlen = -1;
utf8proc_int32_t codepoint;
utf8proc_ssize_t count;
const nc_utf8proc_uint8_t *str;
nc_utf8proc_ssize_t strlen = -1;
nc_utf8proc_int32_t codepoint;
nc_utf8proc_ssize_t count;
str = (const utf8proc_uint8_t*)name;
str = (const nc_utf8proc_uint8_t*)name;
while(*str) {
count = nc_utf8proc_iterate(str,strlen,&codepoint);
if(count < 0) {
@ -70,9 +70,9 @@ int
nc_utf8_normalize(const unsigned char* utf8, unsigned char** normalp)
{
int ncstat = NC_NOERR;
const utf8proc_uint8_t* str = (const utf8proc_uint8_t*)utf8;
utf8proc_uint8_t* retval = NULL;
utf8proc_ssize_t count;
const nc_utf8proc_uint8_t* str = (const nc_utf8proc_uint8_t*)utf8;
nc_utf8proc_uint8_t* retval = NULL;
nc_utf8proc_ssize_t count;
count = nc_utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE | UTF8PROC_COMPOSE);
if(count < 0) {/* error */
switch (count) {

View File

@ -45,7 +45,7 @@
#include "utf8proc_data.c"
const utf8proc_int8_t utf8proc_utf8class[256] = {
const nc_utf8proc_int8_t nc_utf8proc_utf8class[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@ -92,7 +92,7 @@ const char *nc_utf8proc_version(void) {
return STRINGIZE(UTF8PROC_VERSION_MAJOR) "." STRINGIZE(UTF8PROC_VERSION_MINOR) "." STRINGIZE(UTF8PROC_VERSION_PATCH) "";
}
const char *nc_utf8proc_errmsg(utf8proc_ssize_t errcode) {
const char *nc_utf8proc_errmsg(nc_utf8proc_ssize_t errcode) {
switch (errcode) {
case UTF8PROC_ERROR_NOMEM:
return "Memory for processing UTF-8 data could not be allocated.";
@ -110,11 +110,11 @@ const char *nc_utf8proc_version(void) {
}
#define utf_cont(ch) (((ch) & 0xc0) == 0x80)
utf8proc_ssize_t nc_utf8proc_iterate(
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *dst
nc_utf8proc_ssize_t nc_utf8proc_iterate(
const nc_utf8proc_uint8_t *str, nc_utf8proc_ssize_t strlen, nc_utf8proc_int32_t *dst
) {
utf8proc_uint32_t uc;
const utf8proc_uint8_t *end;
nc_utf8proc_uint32_t uc;
const nc_utf8proc_uint8_t *end;
*dst = -1;
if (!strlen) return 0;
@ -158,69 +158,69 @@ utf8proc_ssize_t nc_utf8proc_iterate(
return 4;
}
utf8proc_bool nc_utf8proc_codepoint_valid(utf8proc_int32_t uc) {
return (((utf8proc_uint32_t)uc)-0xd800 > 0x07ff) && ((utf8proc_uint32_t)uc < 0x110000);
nc_utf8proc_bool nc_utf8proc_codepoint_valid(nc_utf8proc_int32_t uc) {
return (((nc_utf8proc_uint32_t)uc)-0xd800 > 0x07ff) && ((nc_utf8proc_uint32_t)uc < 0x110000);
}
utf8proc_ssize_t nc_utf8proc_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t *dst) {
nc_utf8proc_ssize_t nc_utf8proc_encode_char(nc_utf8proc_int32_t uc, nc_utf8proc_uint8_t *dst) {
if (uc < 0x00) {
return 0;
} else if (uc < 0x80) {
dst[0] = (utf8proc_uint8_t) uc;
dst[0] = (nc_utf8proc_uint8_t) uc;
return 1;
} else if (uc < 0x800) {
dst[0] = (utf8proc_uint8_t)(0xC0 + (uc >> 6));
dst[1] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
dst[0] = (nc_utf8proc_uint8_t)(0xC0 + (uc >> 6));
dst[1] = (nc_utf8proc_uint8_t)(0x80 + (uc & 0x3F));
return 2;
// Note: we allow encoding 0xd800-0xdfff here, so as not to change
// the API, however, these are actually invalid in UTF-8
} else if (uc < 0x10000) {
dst[0] = (utf8proc_uint8_t)(0xE0 + (uc >> 12));
dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
dst[2] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
dst[0] = (nc_utf8proc_uint8_t)(0xE0 + (uc >> 12));
dst[1] = (nc_utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
dst[2] = (nc_utf8proc_uint8_t)(0x80 + (uc & 0x3F));
return 3;
} else if (uc < 0x110000) {
dst[0] = (utf8proc_uint8_t)(0xF0 + (uc >> 18));
dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 12) & 0x3F));
dst[2] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
dst[3] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
dst[0] = (nc_utf8proc_uint8_t)(0xF0 + (uc >> 18));
dst[1] = (nc_utf8proc_uint8_t)(0x80 + ((uc >> 12) & 0x3F));
dst[2] = (nc_utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
dst[3] = (nc_utf8proc_uint8_t)(0x80 + (uc & 0x3F));
return 4;
} else return 0;
}
/* internal "unsafe" version that does not check whether uc is in range */
static utf8proc_ssize_t nc_unsafe_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t *dst) {
static nc_utf8proc_ssize_t nc_unsafe_encode_char(nc_utf8proc_int32_t uc, nc_utf8proc_uint8_t *dst) {
if (uc < 0x00) {
return 0;
} else if (uc < 0x80) {
dst[0] = (utf8proc_uint8_t)uc;
dst[0] = (nc_utf8proc_uint8_t)uc;
return 1;
} else if (uc < 0x800) {
dst[0] = (utf8proc_uint8_t)(0xC0 + (uc >> 6));
dst[1] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
dst[0] = (nc_utf8proc_uint8_t)(0xC0 + (uc >> 6));
dst[1] = (nc_utf8proc_uint8_t)(0x80 + (uc & 0x3F));
return 2;
} else if (uc == 0xFFFF) {
dst[0] = (utf8proc_uint8_t)0xFF;
dst[0] = (nc_utf8proc_uint8_t)0xFF;
return 1;
} else if (uc == 0xFFFE) {
dst[0] = (utf8proc_uint8_t)0xFE;
dst[0] = (nc_utf8proc_uint8_t)0xFE;
return 1;
} else if (uc < 0x10000) {
dst[0] = (utf8proc_uint8_t)(0xE0 + (uc >> 12));
dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
dst[2] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
dst[0] = (nc_utf8proc_uint8_t)(0xE0 + (uc >> 12));
dst[1] = (nc_utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
dst[2] = (nc_utf8proc_uint8_t)(0x80 + (uc & 0x3F));
return 3;
} else if (uc < 0x110000) {
dst[0] = (utf8proc_uint8_t)(0xF0 + (uc >> 18));
dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 12) & 0x3F));
dst[2] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
dst[3] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
dst[0] = (nc_utf8proc_uint8_t)(0xF0 + (uc >> 18));
dst[1] = (nc_utf8proc_uint8_t)(0x80 + ((uc >> 12) & 0x3F));
dst[2] = (nc_utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
dst[3] = (nc_utf8proc_uint8_t)(0x80 + (uc & 0x3F));
return 4;
} else return 0;
}
/* internal "unsafe" version that does not check whether uc is in range */
static const utf8proc_property_t *nc_unsafe_get_property(utf8proc_int32_t uc) {
static const nc_utf8proc_property_t *nc_unsafe_get_property(nc_utf8proc_int32_t uc) {
/* ASSERT: uc >= 0 && uc < 0x110000 */
return utf8proc_properties + (
utf8proc_stage2table[
@ -229,7 +229,7 @@ static const utf8proc_property_t *nc_unsafe_get_property(utf8proc_int32_t uc) {
);
}
const utf8proc_property_t *nc_utf8proc_get_property(utf8proc_int32_t uc) {
const nc_utf8proc_property_t *nc_utf8proc_get_property(nc_utf8proc_int32_t uc) {
return uc < 0 || uc >= 0x110000 ? utf8proc_properties : nc_unsafe_get_property(uc);
}
@ -248,7 +248,7 @@ static const utf8proc_property_t *nc_unsafe_get_property(utf8proc_int32_t uc) {
See the special support in grapheme_break_extended, for required bookkeeping by the caller.
*/
static utf8proc_bool nc_grapheme_break_simple(int lbc, int tbc) {
static nc_utf8proc_bool nc_grapheme_break_simple(int lbc, int tbc) {
return
(lbc == UTF8PROC_BOUNDCLASS_START) ? true : // GB1
(lbc == UTF8PROC_BOUNDCLASS_CR && // GB3
@ -282,12 +282,12 @@ static utf8proc_bool nc_grapheme_break_simple(int lbc, int tbc) {
true; // GB999
}
static utf8proc_bool nc_grapheme_break_extended(int lbc, int tbc, utf8proc_int32_t *state)
static nc_utf8proc_bool nc_grapheme_break_extended(int lbc, int tbc, nc_utf8proc_int32_t *state)
{
int lbc_override = lbc;
if (state && *state != UTF8PROC_BOUNDCLASS_START)
lbc_override = *state;
utf8proc_bool break_permitted = nc_grapheme_break_simple(lbc_override, tbc);
nc_utf8proc_bool break_permitted = nc_grapheme_break_simple(lbc_override, tbc);
if (state) {
// Special support for GB 12/13 made possible by GB999. After two RI
// class codepoints we want to force a break. Do this by resetting the
@ -308,8 +308,8 @@ static utf8proc_bool nc_grapheme_break_extended(int lbc, int tbc, utf8proc_int32
return break_permitted;
}
utf8proc_bool nc_utf8proc_grapheme_break_stateful(
utf8proc_int32_t c1, utf8proc_int32_t c2, utf8proc_int32_t *state) {
nc_utf8proc_bool nc_utf8proc_grapheme_break_stateful(
nc_utf8proc_int32_t c1, nc_utf8proc_int32_t c2, nc_utf8proc_int32_t *state) {
return nc_grapheme_break_extended(nc_utf8proc_get_property(c1)->boundclass,
nc_utf8proc_get_property(c2)->boundclass,
@ -317,14 +317,14 @@ static utf8proc_bool nc_grapheme_break_extended(int lbc, int tbc, utf8proc_int32
}
utf8proc_bool nc_utf8proc_grapheme_break(
utf8proc_int32_t c1, utf8proc_int32_t c2) {
nc_utf8proc_bool nc_utf8proc_grapheme_break(
nc_utf8proc_int32_t c1, nc_utf8proc_int32_t c2) {
return nc_utf8proc_grapheme_break_stateful(c1, c2, NULL);
}
static utf8proc_int32_t nc_seqindex_decode_entry(const utf8proc_uint16_t **entry)
static nc_utf8proc_int32_t nc_seqindex_decode_entry(const nc_utf8proc_uint16_t **entry)
{
utf8proc_int32_t entry_cp = **entry;
nc_utf8proc_int32_t entry_cp = **entry;
if ((entry_cp & 0xF800) == 0xD800) {
*entry = *entry + 1;
entry_cp = ((entry_cp & 0x03FF) << 10) | (**entry & 0x03FF);
@ -333,22 +333,22 @@ static utf8proc_int32_t nc_seqindex_decode_entry(const utf8proc_uint16_t **entry
return entry_cp;
}
static utf8proc_int32_t nc_seqindex_decode_index(const utf8proc_uint32_t seqindex)
static nc_utf8proc_int32_t nc_seqindex_decode_index(const nc_utf8proc_uint32_t seqindex)
{
const utf8proc_uint16_t *entry = &utf8proc_sequences[seqindex];
const nc_utf8proc_uint16_t *entry = &utf8proc_sequences[seqindex];
return nc_seqindex_decode_entry(&entry);
}
static utf8proc_ssize_t nc_seqindex_write_char_decomposed(utf8proc_uint16_t seqindex, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) {
utf8proc_ssize_t written = 0;
const utf8proc_uint16_t *entry = &utf8proc_sequences[seqindex & 0x1FFF];
static nc_utf8proc_ssize_t nc_seqindex_write_char_decomposed(nc_utf8proc_uint16_t seqindex, nc_utf8proc_int32_t *dst, nc_utf8proc_ssize_t bufsize, nc_utf8proc_option_t options, int *last_boundclass) {
nc_utf8proc_ssize_t written = 0;
const nc_utf8proc_uint16_t *entry = &utf8proc_sequences[seqindex & 0x1FFF];
int len = seqindex >> 13;
if (len >= 7) {
len = *entry;
entry++;
}
for (; len >= 0; entry++, len--) {
utf8proc_int32_t entry_cp = nc_seqindex_decode_entry(&entry);
nc_utf8proc_int32_t entry_cp = nc_seqindex_decode_entry(&entry);
written += nc_utf8proc_decompose_char(entry_cp, dst+written,
(bufsize > written) ? (bufsize - written) : 0, options,
@ -358,35 +358,35 @@ static utf8proc_ssize_t nc_seqindex_write_char_decomposed(utf8proc_uint16_t seqi
return written;
}
utf8proc_int32_t nc_utf8proc_tolower(utf8proc_int32_t c)
nc_utf8proc_int32_t nc_utf8proc_tolower(nc_utf8proc_int32_t c)
{
utf8proc_int32_t cl = nc_utf8proc_get_property(c)->lowercase_seqindex;
nc_utf8proc_int32_t cl = nc_utf8proc_get_property(c)->lowercase_seqindex;
return cl != UINT16_MAX ? nc_seqindex_decode_index(cl) : c;
}
utf8proc_int32_t nc_utf8proc_toupper(utf8proc_int32_t c)
nc_utf8proc_int32_t nc_utf8proc_toupper(nc_utf8proc_int32_t c)
{
utf8proc_int32_t cu = nc_utf8proc_get_property(c)->uppercase_seqindex;
nc_utf8proc_int32_t cu = nc_utf8proc_get_property(c)->uppercase_seqindex;
return cu != UINT16_MAX ? nc_seqindex_decode_index(cu) : c;
}
utf8proc_int32_t nc_utf8proc_totitle(utf8proc_int32_t c)
nc_utf8proc_int32_t nc_utf8proc_totitle(nc_utf8proc_int32_t c)
{
utf8proc_int32_t cu = nc_utf8proc_get_property(c)->titlecase_seqindex;
nc_utf8proc_int32_t cu = nc_utf8proc_get_property(c)->titlecase_seqindex;
return cu != UINT16_MAX ? nc_seqindex_decode_index(cu) : c;
}
/* return a character width analogous to wcwidth (except portable and
hopefully less buggy than most system wcwidth functions). */
int nc_utf8proc_charwidth(utf8proc_int32_t c) {
int nc_utf8proc_charwidth(nc_utf8proc_int32_t c) {
return nc_utf8proc_get_property(c)->charwidth;
}
utf8proc_category_t nc_utf8proc_category(utf8proc_int32_t c) {
nc_utf8proc_category_t nc_utf8proc_category(nc_utf8proc_int32_t c) {
return nc_utf8proc_get_property(c)->category;
}
const char *nc_utf8proc_category_string(utf8proc_int32_t c) {
const char *nc_utf8proc_category_string(nc_utf8proc_int32_t c) {
static const char s[][3] = {"Cn","Lu","Ll","Lt","Lm","Lo","Mn","Mc","Me","Nd","Nl","No","Pc","Pd","Ps","Pe","Pi","Pf","Po","Sm","Sc","Sk","So","Zs","Zl","Zp","Cc","Cf","Cs","Co"};
return s[nc_utf8proc_category(c)];
}
@ -395,17 +395,17 @@ static utf8proc_ssize_t nc_seqindex_write_char_decomposed(utf8proc_uint16_t seqi
return nc_utf8proc_decompose_char((replacement_uc), dst, bufsize, \
options & ~UTF8PROC_LUMP, last_boundclass)
utf8proc_ssize_t nc_utf8proc_decompose_char(utf8proc_int32_t uc, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) {
const utf8proc_property_t *property;
utf8proc_propval_t category;
utf8proc_int32_t hangul_sindex;
nc_utf8proc_ssize_t nc_utf8proc_decompose_char(nc_utf8proc_int32_t uc, nc_utf8proc_int32_t *dst, nc_utf8proc_ssize_t bufsize, nc_utf8proc_option_t options, int *last_boundclass) {
const nc_utf8proc_property_t *property;
nc_utf8proc_propval_t category;
nc_utf8proc_int32_t hangul_sindex;
if (uc < 0 || uc >= 0x110000) return UTF8PROC_ERROR_NOTASSIGNED;
property = nc_unsafe_get_property(uc);
category = property->category;
hangul_sindex = uc - UTF8PROC_HANGUL_SBASE;
if (options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) {
if (hangul_sindex >= 0 && hangul_sindex < UTF8PROC_HANGUL_SCOUNT) {
utf8proc_int32_t hangul_tindex;
nc_utf8proc_int32_t hangul_tindex;
if (bufsize >= 1) {
dst[0] = UTF8PROC_HANGUL_LBASE +
hangul_sindex / UTF8PROC_HANGUL_NCOUNT;
@ -467,7 +467,7 @@ static utf8proc_ssize_t nc_seqindex_write_char_decomposed(utf8proc_uint16_t seqi
}
}
if (options & UTF8PROC_CHARBOUND) {
utf8proc_bool boundary;
nc_utf8proc_bool boundary;
int tbc = property->boundclass;
boundary = nc_grapheme_break_extended(*last_boundclass, tbc, last_boundclass);
if (boundary) {
@ -480,29 +480,29 @@ static utf8proc_ssize_t nc_seqindex_write_char_decomposed(utf8proc_uint16_t seqi
return 1;
}
utf8proc_ssize_t nc_utf8proc_decompose(
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen,
utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options
nc_utf8proc_ssize_t nc_utf8proc_decompose(
const nc_utf8proc_uint8_t *str, nc_utf8proc_ssize_t strlen,
nc_utf8proc_int32_t *buffer, nc_utf8proc_ssize_t bufsize, nc_utf8proc_option_t options
) {
return nc_utf8proc_decompose_custom(str, strlen, buffer, bufsize, options, NULL, NULL);
}
utf8proc_ssize_t nc_utf8proc_decompose_custom(
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen,
utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options,
utf8proc_custom_func custom_func, void *custom_data
nc_utf8proc_ssize_t nc_utf8proc_decompose_custom(
const nc_utf8proc_uint8_t *str, nc_utf8proc_ssize_t strlen,
nc_utf8proc_int32_t *buffer, nc_utf8proc_ssize_t bufsize, nc_utf8proc_option_t options,
nc_utf8proc_custom_func custom_func, void *custom_data
) {
/* strlen will be ignored, if UTF8PROC_NULLTERM is set in options */
utf8proc_ssize_t wpos = 0;
nc_utf8proc_ssize_t wpos = 0;
if ((options & UTF8PROC_COMPOSE) && (options & UTF8PROC_DECOMPOSE))
return UTF8PROC_ERROR_INVALIDOPTS;
if ((options & UTF8PROC_STRIPMARK) &&
!(options & UTF8PROC_COMPOSE) && !(options & UTF8PROC_DECOMPOSE))
return UTF8PROC_ERROR_INVALIDOPTS;
{
utf8proc_int32_t uc;
utf8proc_ssize_t rpos = 0;
utf8proc_ssize_t decomp_result;
nc_utf8proc_int32_t uc;
nc_utf8proc_ssize_t rpos = 0;
nc_utf8proc_ssize_t decomp_result;
int boundclass = UTF8PROC_BOUNDCLASS_START;
while (1) {
if (options & UTF8PROC_NULLTERM) {
@ -528,15 +528,15 @@ static utf8proc_ssize_t nc_seqindex_write_char_decomposed(utf8proc_uint16_t seqi
wpos += decomp_result;
/* prohibiting integer overflows due to too long strings: */
if (wpos < 0 ||
wpos > (utf8proc_ssize_t)(SSIZE_MAX/sizeof(utf8proc_int32_t)/2))
wpos > (nc_utf8proc_ssize_t)(SSIZE_MAX/sizeof(nc_utf8proc_int32_t)/2))
return UTF8PROC_ERROR_OVERFLOW;
}
}
if ((options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) && bufsize >= wpos) {
utf8proc_ssize_t pos = 0;
nc_utf8proc_ssize_t pos = 0;
while (pos < wpos-1) {
utf8proc_int32_t uc1, uc2;
const utf8proc_property_t *property1, *property2;
nc_utf8proc_int32_t uc1, uc2;
const nc_utf8proc_property_t *property1, *property2;
uc1 = buffer[pos];
uc2 = buffer[pos+1];
property1 = nc_unsafe_get_property(uc1);
@ -554,12 +554,12 @@ static utf8proc_ssize_t nc_seqindex_write_char_decomposed(utf8proc_uint16_t seqi
return wpos;
}
utf8proc_ssize_t nc_utf8proc_normalize_utf32(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) {
nc_utf8proc_ssize_t nc_utf8proc_normalize_utf32(nc_utf8proc_int32_t *buffer, nc_utf8proc_ssize_t length, nc_utf8proc_option_t options) {
/* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored */
if (options & (UTF8PROC_NLF2LS | UTF8PROC_NLF2PS | UTF8PROC_STRIPCC)) {
utf8proc_ssize_t rpos;
utf8proc_ssize_t wpos = 0;
utf8proc_int32_t uc;
nc_utf8proc_ssize_t rpos;
nc_utf8proc_ssize_t wpos = 0;
nc_utf8proc_int32_t uc;
for (rpos = 0; rpos < length; rpos++) {
uc = buffer[rpos];
if (uc == 0x000D && rpos < length-1 && buffer[rpos+1] == 0x000A) rpos++;
@ -588,23 +588,23 @@ static utf8proc_ssize_t nc_seqindex_write_char_decomposed(utf8proc_uint16_t seqi
length = wpos;
}
if (options & UTF8PROC_COMPOSE) {
utf8proc_int32_t *starter = NULL;
utf8proc_int32_t current_char;
const utf8proc_property_t *starter_property = NULL, *current_property;
utf8proc_propval_t max_combining_class = -1;
utf8proc_ssize_t rpos;
utf8proc_ssize_t wpos = 0;
utf8proc_int32_t composition;
nc_utf8proc_int32_t *starter = NULL;
nc_utf8proc_int32_t current_char;
const nc_utf8proc_property_t *starter_property = NULL, *current_property;
nc_utf8proc_propval_t max_combining_class = -1;
nc_utf8proc_ssize_t rpos;
nc_utf8proc_ssize_t wpos = 0;
nc_utf8proc_int32_t composition;
for (rpos = 0; rpos < length; rpos++) {
current_char = buffer[rpos];
current_property = nc_unsafe_get_property(current_char);
if (starter && current_property->combining_class > max_combining_class) {
/* combination perhaps possible */
utf8proc_int32_t hangul_lindex;
utf8proc_int32_t hangul_sindex;
nc_utf8proc_int32_t hangul_lindex;
nc_utf8proc_int32_t hangul_sindex;
hangul_lindex = *starter - UTF8PROC_HANGUL_LBASE;
if (hangul_lindex >= 0 && hangul_lindex < UTF8PROC_HANGUL_LCOUNT) {
utf8proc_int32_t hangul_vindex;
nc_utf8proc_int32_t hangul_vindex;
hangul_vindex = current_char - UTF8PROC_HANGUL_VBASE;
if (hangul_vindex >= 0 && hangul_vindex < UTF8PROC_HANGUL_VCOUNT) {
*starter = UTF8PROC_HANGUL_SBASE +
@ -617,7 +617,7 @@ static utf8proc_ssize_t nc_seqindex_write_char_decomposed(utf8proc_uint16_t seqi
hangul_sindex = *starter - UTF8PROC_HANGUL_SBASE;
if (hangul_sindex >= 0 && hangul_sindex < UTF8PROC_HANGUL_SCOUNT &&
(hangul_sindex % UTF8PROC_HANGUL_TCOUNT) == 0) {
utf8proc_int32_t hangul_tindex;
nc_utf8proc_int32_t hangul_tindex;
hangul_tindex = current_char - UTF8PROC_HANGUL_TBASE;
if (hangul_tindex >= 0 && hangul_tindex < UTF8PROC_HANGUL_TCOUNT) {
*starter += hangul_tindex;
@ -666,46 +666,46 @@ static utf8proc_ssize_t nc_seqindex_write_char_decomposed(utf8proc_uint16_t seqi
return length;
}
utf8proc_ssize_t nc_utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) {
nc_utf8proc_ssize_t nc_utf8proc_reencode(nc_utf8proc_int32_t *buffer, nc_utf8proc_ssize_t length, nc_utf8proc_option_t options) {
/* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored
ASSERT: 'buffer' has one spare byte of free space at the end! */
length = nc_utf8proc_normalize_utf32(buffer, length, options);
if (length < 0) return length;
{
utf8proc_ssize_t rpos, wpos = 0;
utf8proc_int32_t uc;
nc_utf8proc_ssize_t rpos, wpos = 0;
nc_utf8proc_int32_t uc;
if (options & UTF8PROC_CHARBOUND) {
for (rpos = 0; rpos < length; rpos++) {
uc = buffer[rpos];
wpos += nc_unsafe_encode_char(uc, ((utf8proc_uint8_t *)buffer) + wpos);
wpos += nc_unsafe_encode_char(uc, ((nc_utf8proc_uint8_t *)buffer) + wpos);
}
} else {
for (rpos = 0; rpos < length; rpos++) {
uc = buffer[rpos];
wpos += nc_utf8proc_encode_char(uc, ((utf8proc_uint8_t *)buffer) + wpos);
wpos += nc_utf8proc_encode_char(uc, ((nc_utf8proc_uint8_t *)buffer) + wpos);
}
}
((utf8proc_uint8_t *)buffer)[wpos] = 0;
((nc_utf8proc_uint8_t *)buffer)[wpos] = 0;
return wpos;
}
}
utf8proc_ssize_t nc_utf8proc_map(
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options
nc_utf8proc_ssize_t nc_utf8proc_map(
const nc_utf8proc_uint8_t *str, nc_utf8proc_ssize_t strlen, nc_utf8proc_uint8_t **dstptr, nc_utf8proc_option_t options
) {
return nc_utf8proc_map_custom(str, strlen, dstptr, options, NULL, NULL);
}
utf8proc_ssize_t nc_utf8proc_map_custom(
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options,
utf8proc_custom_func custom_func, void *custom_data
nc_utf8proc_ssize_t nc_utf8proc_map_custom(
const nc_utf8proc_uint8_t *str, nc_utf8proc_ssize_t strlen, nc_utf8proc_uint8_t **dstptr, nc_utf8proc_option_t options,
nc_utf8proc_custom_func custom_func, void *custom_data
) {
utf8proc_int32_t *buffer;
utf8proc_ssize_t result;
nc_utf8proc_int32_t *buffer;
nc_utf8proc_ssize_t result;
*dstptr = NULL;
result = nc_utf8proc_decompose_custom(str, strlen, NULL, 0, options, custom_func, custom_data);
if (result < 0) return result;
buffer = (utf8proc_int32_t *) malloc(result * sizeof(utf8proc_int32_t) + 1);
buffer = (nc_utf8proc_int32_t *) malloc(result * sizeof(nc_utf8proc_int32_t) + 1);
if (!buffer) return UTF8PROC_ERROR_NOMEM;
result = nc_utf8proc_decompose_custom(str, strlen, buffer, result, options, custom_func, custom_data);
if (result < 0) {
@ -718,37 +718,37 @@ static utf8proc_ssize_t nc_seqindex_write_char_decomposed(utf8proc_uint16_t seqi
return result;
}
{
utf8proc_int32_t *newptr;
newptr = (utf8proc_int32_t *) realloc(buffer, (size_t)result+1);
nc_utf8proc_int32_t *newptr;
newptr = (nc_utf8proc_int32_t *) realloc(buffer, (size_t)result+1);
if (newptr) buffer = newptr;
}
*dstptr = (utf8proc_uint8_t *)buffer;
*dstptr = (nc_utf8proc_uint8_t *)buffer;
return result;
}
utf8proc_uint8_t *nc_utf8proc_NFD(const utf8proc_uint8_t *str) {
utf8proc_uint8_t *retval;
nc_utf8proc_uint8_t *nc_utf8proc_NFD(const nc_utf8proc_uint8_t *str) {
nc_utf8proc_uint8_t *retval;
nc_utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
UTF8PROC_DECOMPOSE);
return retval;
}
utf8proc_uint8_t *nc_utf8proc_NFC(const utf8proc_uint8_t *str) {
utf8proc_uint8_t *retval;
nc_utf8proc_uint8_t *nc_utf8proc_NFC(const nc_utf8proc_uint8_t *str) {
nc_utf8proc_uint8_t *retval;
nc_utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
UTF8PROC_COMPOSE);
return retval;
}
utf8proc_uint8_t *nc_utf8proc_NFKD(const utf8proc_uint8_t *str) {
utf8proc_uint8_t *retval;
nc_utf8proc_uint8_t *nc_utf8proc_NFKD(const nc_utf8proc_uint8_t *str) {
nc_utf8proc_uint8_t *retval;
nc_utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT);
return retval;
}
utf8proc_uint8_t *nc_utf8proc_NFKC(const utf8proc_uint8_t *str) {
utf8proc_uint8_t *retval;
nc_utf8proc_uint8_t *nc_utf8proc_NFKC(const nc_utf8proc_uint8_t *str) {
nc_utf8proc_uint8_t *retval;
nc_utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
return retval;

View File

@ -108,15 +108,15 @@ typedef bool utf8proc_bool;
# include <stddef.h>
# include <stdbool.h>
# include <inttypes.h>
typedef int8_t utf8proc_int8_t;
typedef uint8_t utf8proc_uint8_t;
typedef int16_t utf8proc_int16_t;
typedef uint16_t utf8proc_uint16_t;
typedef int32_t utf8proc_int32_t;
typedef uint32_t utf8proc_uint32_t;
typedef size_t utf8proc_size_t;
typedef ptrdiff_t utf8proc_ssize_t;
typedef bool utf8proc_bool;
typedef int8_t nc_utf8proc_int8_t;
typedef uint8_t nc_utf8proc_uint8_t;
typedef int16_t nc_utf8proc_int16_t;
typedef uint16_t nc_utf8proc_uint16_t;
typedef int32_t nc_utf8proc_int32_t;
typedef uint32_t nc_utf8proc_uint32_t;
typedef size_t nc_utf8proc_size_t;
typedef ptrdiff_t nc_utf8proc_ssize_t;
typedef bool nc_utf8proc_bool;
#endif
#include <limits.h>
@ -209,7 +209,7 @@ typedef enum {
* @ref UTF8PROC_DECOMPOSE
*/
UTF8PROC_STRIPMARK = (1<<13),
} utf8proc_option_t;
} nc_utf8proc_option_t;
/** @name Error codes
* Error codes being returned by almost all functions.
@ -230,32 +230,32 @@ typedef enum {
/* @name Types */
/** Holds the value of a property. */
typedef utf8proc_int16_t utf8proc_propval_t;
typedef nc_utf8proc_int16_t nc_utf8proc_propval_t;
/** Struct containing information about a codepoint. */
typedef struct utf8proc_property_struct {
typedef struct nc_utf8proc_property_struct {
/**
* Unicode category.
* @see utf8proc_category_t.
*/
utf8proc_propval_t category;
utf8proc_propval_t combining_class;
nc_utf8proc_propval_t category;
nc_utf8proc_propval_t combining_class;
/**
* Bidirectional class.
* @see utf8proc_bidi_class_t.
*/
utf8proc_propval_t bidi_class;
nc_utf8proc_propval_t bidi_class;
/**
* @anchor Decomposition type.
* @see utf8proc_decomp_type_t.
*/
utf8proc_propval_t decomp_type;
utf8proc_uint16_t decomp_seqindex;
utf8proc_uint16_t casefold_seqindex;
utf8proc_uint16_t uppercase_seqindex;
utf8proc_uint16_t lowercase_seqindex;
utf8proc_uint16_t titlecase_seqindex;
utf8proc_uint16_t comb_index;
nc_utf8proc_propval_t decomp_type;
nc_utf8proc_uint16_t decomp_seqindex;
nc_utf8proc_uint16_t casefold_seqindex;
nc_utf8proc_uint16_t uppercase_seqindex;
nc_utf8proc_uint16_t lowercase_seqindex;
nc_utf8proc_uint16_t titlecase_seqindex;
nc_utf8proc_uint16_t comb_index;
unsigned bidi_mirrored:1;
unsigned comp_exclusion:1;
/**
@ -274,7 +274,7 @@ typedef struct utf8proc_property_struct {
* @see utf8proc_boundclass_t.
*/
unsigned boundclass:8;
} utf8proc_property_t;
} nc_utf8proc_property_t;
/** Unicode categories. */
typedef enum {
@ -308,7 +308,7 @@ typedef enum {
UTF8PROC_CATEGORY_CF = 27, /**< Other, format */
UTF8PROC_CATEGORY_CS = 28, /**< Other, surrogate */
UTF8PROC_CATEGORY_CO = 29, /**< Other, private use */
} utf8proc_category_t;
} nc_utf8proc_category_t;
/** Bidirectional character classes. */
typedef enum {
@ -335,7 +335,7 @@ typedef enum {
UTF8PROC_BIDI_CLASS_RLI = 21, /**< Right-to-Left Isolate */
UTF8PROC_BIDI_CLASS_FSI = 22, /**< First Strong Isolate */
UTF8PROC_BIDI_CLASS_PDI = 23, /**< Pop Directional Isolate */
} utf8proc_bidi_class_t;
} nc_utf8proc_bidi_class_t;
/** Decomposition type. */
typedef enum {
@ -355,7 +355,7 @@ typedef enum {
UTF8PROC_DECOMP_TYPE_SQUARE = 14, /**< Square */
UTF8PROC_DECOMP_TYPE_FRACTION = 15, /**< Fraction */
UTF8PROC_DECOMP_TYPE_COMPAT = 16, /**< Compat */
} utf8proc_decomp_type_t;
} nc_utf8proc_decomp_type_t;
/** Boundclass property. (TR29) */
typedef enum {
@ -378,20 +378,20 @@ typedef enum {
UTF8PROC_BOUNDCLASS_E_MODIFIER = 16, /**< Emoji Modifier */
UTF8PROC_BOUNDCLASS_GLUE_AFTER_ZWJ = 17, /**< Glue_After_ZWJ */
UTF8PROC_BOUNDCLASS_E_BASE_GAZ = 18, /**< E_BASE + GLUE_AFTER_ZJW */
} utf8proc_boundclass_t;
} nc_utf8proc_boundclass_t;
/**
* Function pointer type passed to @ref utf8proc_map_custom and
* @ref utf8proc_decompose_custom, which is used to specify a user-defined
* mapping of codepoints to be applied in conjunction with other mappings.
*/
typedef utf8proc_int32_t (*utf8proc_custom_func)(utf8proc_int32_t codepoint, void *data);
typedef nc_utf8proc_int32_t (*nc_utf8proc_custom_func)(nc_utf8proc_int32_t codepoint, void *data);
/**
* Array containing the byte lengths of a UTF-8 encoded codepoint based
* on the first byte.
*/
const utf8proc_int8_t utf8proc_utf8class[256];
const nc_utf8proc_int8_t nc_utf8proc_utf8class[256];
/**
* Returns the utf8proc API version as a string MAJOR.MINOR.PATCH
@ -404,7 +404,7 @@ const char *nc_utf8proc_version(void);
* Returns an informative error string for the given utf8proc error code
* (e.g. the error codes returned by @ref utf8proc_map).
*/
const char *nc_utf8proc_errmsg(utf8proc_ssize_t errcode);
const char *nc_utf8proc_errmsg(nc_utf8proc_ssize_t errcode);
/**
* Reads a single codepoint from the UTF-8 sequence being pointed to by `str`.
@ -416,7 +416,7 @@ const char *nc_utf8proc_errmsg(utf8proc_ssize_t errcode);
* In case of success, the number of bytes read is returned; otherwise, a
* negative error code is returned.
*/
UTF8PROC_DLLEXPORT utf8proc_ssize_t nc_utf8proc_iterate(const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *codepoint_ref);
UTF8PROC_DLLEXPORT nc_utf8proc_ssize_t nc_utf8proc_iterate(const nc_utf8proc_uint8_t *str, nc_utf8proc_ssize_t strlen, nc_utf8proc_int32_t *codepoint_ref);
/**
* Check if a codepoint is valid (regardless of whether it has been
@ -424,7 +424,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t nc_utf8proc_iterate(const utf8proc_uint8_t *
*
* @return 1 if the given `codepoint` is valid and otherwise return 0.
*/
utf8proc_bool nc_utf8proc_codepoint_valid(utf8proc_int32_t codepoint);
nc_utf8proc_bool nc_utf8proc_codepoint_valid(nc_utf8proc_int32_t codepoint);
/**
* Encodes the codepoint as an UTF-8 string in the byte array pointed
@ -435,7 +435,7 @@ utf8proc_bool nc_utf8proc_codepoint_valid(utf8proc_int32_t codepoint);
*
* This function does not check whether `codepoint` is valid Unicode.
*/
utf8proc_ssize_t nc_utf8proc_encode_char(utf8proc_int32_t codepoint, utf8proc_uint8_t *dst);
nc_utf8proc_ssize_t nc_utf8proc_encode_char(nc_utf8proc_int32_t codepoint, nc_utf8proc_uint8_t *dst);
/**
* Look up the properties for a given codepoint.
@ -449,7 +449,7 @@ utf8proc_ssize_t nc_utf8proc_encode_char(utf8proc_int32_t codepoint, utf8proc_ui
* If the codepoint is unassigned or invalid, a pointer to a special struct is
* returned in which `category` is 0 (@ref UTF8PROC_CATEGORY_CN).
*/
const utf8proc_property_t *nc_utf8proc_get_property(utf8proc_int32_t codepoint);
const nc_utf8proc_property_t *nc_utf8proc_get_property(nc_utf8proc_int32_t codepoint);
/** Decompose a codepoint into an array of codepoints.
*
@ -478,9 +478,9 @@ const utf8proc_property_t *nc_utf8proc_get_property(utf8proc_int32_t codepoint);
* required buffer size is returned, while the buffer will be overwritten with
* undefined data.
*/
utf8proc_ssize_t nc_utf8proc_decompose_char(
utf8proc_int32_t codepoint, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize,
utf8proc_option_t options, int *last_boundclass
nc_utf8proc_ssize_t nc_utf8proc_decompose_char(
nc_utf8proc_int32_t codepoint, nc_utf8proc_int32_t *dst, nc_utf8proc_ssize_t bufsize,
nc_utf8proc_option_t options, int *last_boundclass
);
/**
@ -500,9 +500,9 @@ utf8proc_ssize_t nc_utf8proc_decompose_char(
* required buffer size is returned, while the buffer will be overwritten with
* undefined data.
*/
utf8proc_ssize_t nc_utf8proc_decompose(
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen,
utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options
nc_utf8proc_ssize_t nc_utf8proc_decompose(
const nc_utf8proc_uint8_t *str, nc_utf8proc_ssize_t strlen,
nc_utf8proc_int32_t *buffer, nc_utf8proc_ssize_t bufsize, nc_utf8proc_option_t options
);
/**
@ -511,10 +511,10 @@ utf8proc_ssize_t nc_utf8proc_decompose(
* (along with a `custom_data` pointer that is passed through to `custom_func`).
* The `custom_func` argument is ignored if it is `NULL`. See also @ref utf8proc_map_custom.
*/
utf8proc_ssize_t nc_utf8proc_decompose_custom(
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen,
utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options,
utf8proc_custom_func custom_func, void *custom_data
nc_utf8proc_ssize_t nc_utf8proc_decompose_custom(
const nc_utf8proc_uint8_t *str, nc_utf8proc_ssize_t strlen,
nc_utf8proc_int32_t *buffer, nc_utf8proc_ssize_t bufsize, nc_utf8proc_option_t options,
nc_utf8proc_custom_func custom_func, void *custom_data
);
/**
@ -540,7 +540,7 @@ utf8proc_ssize_t nc_utf8proc_decompose_custom(
* @warning The entries of the array pointed to by `str` have to be in the
* range `0x0000` to `0x10FFFF`. Otherwise, the program might crash!
*/
utf8proc_ssize_t nc_utf8proc_normalize_utf32(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options);
nc_utf8proc_ssize_t nc_utf8proc_normalize_utf32(nc_utf8proc_int32_t *buffer, nc_utf8proc_ssize_t length, nc_utf8proc_option_t options);
/**
* Reencodes the sequence of `length` codepoints pointed to by `buffer`
@ -570,7 +570,7 @@ utf8proc_ssize_t nc_utf8proc_normalize_utf32(utf8proc_int32_t *buffer, utf8proc_
* entries of the array pointed to by `str` have to be in the
* range `0x0000` to `0x10FFFF`. Otherwise, the program might crash!
*/
utf8proc_ssize_t nc_utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options);
nc_utf8proc_ssize_t nc_utf8proc_reencode(nc_utf8proc_int32_t *buffer, nc_utf8proc_ssize_t length, nc_utf8proc_option_t options);
/**
* Given a pair of consecutive codepoints, return whether a grapheme break is
@ -586,15 +586,15 @@ utf8proc_ssize_t nc_utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t
* @warning If the state parameter is used, `utf8proc_grapheme_break_stateful` must
* be called IN ORDER on ALL potential breaks in a string.
*/
utf8proc_bool nc_utf8proc_grapheme_break_stateful(
utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2, utf8proc_int32_t *state);
nc_utf8proc_bool nc_utf8proc_grapheme_break_stateful(
nc_utf8proc_int32_t codepoint1, nc_utf8proc_int32_t codepoint2, nc_utf8proc_int32_t *state);
/**
* Same as @ref utf8proc_grapheme_break_stateful, except without support for the
* Unicode 9 additions to the algorithm. Supported for legacy reasons.
*/
utf8proc_bool nc_utf8proc_grapheme_break(
utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2);
nc_utf8proc_bool nc_utf8proc_grapheme_break(
nc_utf8proc_int32_t codepoint1, nc_utf8proc_int32_t codepoint2);
/**
@ -602,21 +602,21 @@ utf8proc_bool nc_utf8proc_grapheme_break(
* lower-case character, if any; otherwise (if there is no lower-case
* variant, or if `c` is not a valid codepoint) return `c`.
*/
utf8proc_int32_t nc_utf8proc_tolower(utf8proc_int32_t c);
nc_utf8proc_int32_t nc_utf8proc_tolower(nc_utf8proc_int32_t c);
/**
* Given a codepoint `c`, return the codepoint of the corresponding
* upper-case character, if any; otherwise (if there is no upper-case
* variant, or if `c` is not a valid codepoint) return `c`.
*/
utf8proc_int32_t nc_utf8proc_toupper(utf8proc_int32_t c);
nc_utf8proc_int32_t nc_utf8proc_toupper(nc_utf8proc_int32_t c);
/**
* Given a codepoint `c`, return the codepoint of the corresponding
* title-case character, if any; otherwise (if there is no title-case
* variant, or if `c` is not a valid codepoint) return `c`.
*/
utf8proc_int32_t nc_utf8proc_totitle(utf8proc_int32_t c);
nc_utf8proc_int32_t nc_utf8proc_totitle(nc_utf8proc_int32_t c);
/**
* Given a codepoint, return a character width analogous to `wcwidth(codepoint)`,
@ -626,19 +626,19 @@ utf8proc_int32_t nc_utf8proc_totitle(utf8proc_int32_t c);
* @note
* If you want to check for particular types of non-printable characters,
* (analogous to `isprint` or `iscntrl`), use @ref utf8proc_category. */
int nc_utf8proc_charwidth(utf8proc_int32_t codepoint);
int nc_utf8proc_charwidth(nc_utf8proc_int32_t codepoint);
/**
* Return the Unicode category for the codepoint (one of the
* @ref utf8proc_category_t constants.)
*/
utf8proc_category_t nc_utf8proc_category(utf8proc_int32_t codepoint);
nc_utf8proc_category_t nc_utf8proc_category(nc_utf8proc_int32_t codepoint);
/**
* Return the two-letter (nul-terminated) Unicode category string for
* the codepoint (e.g. `"Lu"` or `"Co"`).
*/
const char *nc_utf8proc_category_string(utf8proc_int32_t codepoint);
const char *nc_utf8proc_category_string(nc_utf8proc_int32_t codepoint);
/**
* Maps the given UTF-8 string pointed to by `str` to a new UTF-8
@ -659,8 +659,8 @@ const char *nc_utf8proc_category_string(utf8proc_int32_t codepoint);
* @note The memory of the new UTF-8 string will have been allocated
* with `malloc`, and should therefore be deallocated with `free`.
*/
utf8proc_ssize_t nc_utf8proc_map(
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options
nc_utf8proc_ssize_t nc_utf8proc_map(
const nc_utf8proc_uint8_t *str, nc_utf8proc_ssize_t strlen, nc_utf8proc_uint8_t **dstptr, nc_utf8proc_option_t options
);
/**
@ -669,9 +669,9 @@ utf8proc_ssize_t nc_utf8proc_map(
* (along with a `custom_data` pointer that is passed through to `custom_func`).
* The `custom_func` argument is ignored if it is `NULL`.
*/
utf8proc_ssize_t nc_utf8proc_map_custom(
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options,
utf8proc_custom_func custom_func, void *custom_data
nc_utf8proc_ssize_t nc_utf8proc_map_custom(
const nc_utf8proc_uint8_t *str, nc_utf8proc_ssize_t strlen, nc_utf8proc_uint8_t **dstptr, nc_utf8proc_option_t options,
nc_utf8proc_custom_func custom_func, void *custom_data
);
/** @name Unicode normalization
@ -683,13 +683,13 @@ utf8proc_ssize_t nc_utf8proc_map_custom(
*/
/** @{ */
/** NFD normalization (@ref UTF8PROC_DECOMPOSE). */
UTF8PROC_DLLEXPORT utf8proc_uint8_t *nc_utf8proc_NFD(const utf8proc_uint8_t *str);
UTF8PROC_DLLEXPORT nc_utf8proc_uint8_t *nc_utf8proc_NFD(const nc_utf8proc_uint8_t *str);
/** NFC normalization (@ref UTF8PROC_COMPOSE). */
UTF8PROC_DLLEXPORT utf8proc_uint8_t *nc_utf8proc_NFC(const utf8proc_uint8_t *str);
UTF8PROC_DLLEXPORT nc_utf8proc_uint8_t *nc_utf8proc_NFC(const nc_utf8proc_uint8_t *str);
/** NFKD normalization (@ref UTF8PROC_DECOMPOSE and @ref UTF8PROC_COMPAT). */
UTF8PROC_DLLEXPORT utf8proc_uint8_t *nc_utf8proc_NFKD(const utf8proc_uint8_t *str);
UTF8PROC_DLLEXPORT nc_utf8proc_uint8_t *nc_utf8proc_NFKD(const nc_utf8proc_uint8_t *str);
/** NFKC normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT). */
UTF8PROC_DLLEXPORT utf8proc_uint8_t *nc_utf8proc_NFKC(const utf8proc_uint8_t *str);
UTF8PROC_DLLEXPORT nc_utf8proc_uint8_t *nc_utf8proc_NFKC(const nc_utf8proc_uint8_t *str);
/** @} */
#ifdef __cplusplus