mirror of
https://github.com/Unidata/netcdf-c.git
synced 2024-11-21 03:13:42 +08:00
Refactored some data types due to collision.
This commit is contained in:
parent
5883b6587f
commit
773fe80727
@ -23,12 +23,12 @@ int
|
||||
nc_utf8_validate(const unsigned char* name)
|
||||
{
|
||||
int ncstat = NC_NOERR;
|
||||
const utf8proc_uint8_t *str;
|
||||
utf8proc_ssize_t strlen = -1;
|
||||
utf8proc_int32_t codepoint;
|
||||
utf8proc_ssize_t count;
|
||||
const nc_utf8proc_uint8_t *str;
|
||||
nc_utf8proc_ssize_t strlen = -1;
|
||||
nc_utf8proc_int32_t codepoint;
|
||||
nc_utf8proc_ssize_t count;
|
||||
|
||||
str = (const utf8proc_uint8_t*)name;
|
||||
str = (const nc_utf8proc_uint8_t*)name;
|
||||
while(*str) {
|
||||
count = nc_utf8proc_iterate(str,strlen,&codepoint);
|
||||
if(count < 0) {
|
||||
@ -70,9 +70,9 @@ int
|
||||
nc_utf8_normalize(const unsigned char* utf8, unsigned char** normalp)
|
||||
{
|
||||
int ncstat = NC_NOERR;
|
||||
const utf8proc_uint8_t* str = (const utf8proc_uint8_t*)utf8;
|
||||
utf8proc_uint8_t* retval = NULL;
|
||||
utf8proc_ssize_t count;
|
||||
const nc_utf8proc_uint8_t* str = (const nc_utf8proc_uint8_t*)utf8;
|
||||
nc_utf8proc_uint8_t* retval = NULL;
|
||||
nc_utf8proc_ssize_t count;
|
||||
count = nc_utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE | UTF8PROC_COMPOSE);
|
||||
if(count < 0) {/* error */
|
||||
switch (count) {
|
||||
|
@ -45,7 +45,7 @@
|
||||
#include "utf8proc_data.c"
|
||||
|
||||
|
||||
const utf8proc_int8_t utf8proc_utf8class[256] = {
|
||||
const nc_utf8proc_int8_t nc_utf8proc_utf8class[256] = {
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
@ -92,7 +92,7 @@ const char *nc_utf8proc_version(void) {
|
||||
return STRINGIZE(UTF8PROC_VERSION_MAJOR) "." STRINGIZE(UTF8PROC_VERSION_MINOR) "." STRINGIZE(UTF8PROC_VERSION_PATCH) "";
|
||||
}
|
||||
|
||||
const char *nc_utf8proc_errmsg(utf8proc_ssize_t errcode) {
|
||||
const char *nc_utf8proc_errmsg(nc_utf8proc_ssize_t errcode) {
|
||||
switch (errcode) {
|
||||
case UTF8PROC_ERROR_NOMEM:
|
||||
return "Memory for processing UTF-8 data could not be allocated.";
|
||||
@ -110,11 +110,11 @@ const char *nc_utf8proc_version(void) {
|
||||
}
|
||||
|
||||
#define utf_cont(ch) (((ch) & 0xc0) == 0x80)
|
||||
utf8proc_ssize_t nc_utf8proc_iterate(
|
||||
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *dst
|
||||
nc_utf8proc_ssize_t nc_utf8proc_iterate(
|
||||
const nc_utf8proc_uint8_t *str, nc_utf8proc_ssize_t strlen, nc_utf8proc_int32_t *dst
|
||||
) {
|
||||
utf8proc_uint32_t uc;
|
||||
const utf8proc_uint8_t *end;
|
||||
nc_utf8proc_uint32_t uc;
|
||||
const nc_utf8proc_uint8_t *end;
|
||||
|
||||
*dst = -1;
|
||||
if (!strlen) return 0;
|
||||
@ -158,69 +158,69 @@ utf8proc_ssize_t nc_utf8proc_iterate(
|
||||
return 4;
|
||||
}
|
||||
|
||||
utf8proc_bool nc_utf8proc_codepoint_valid(utf8proc_int32_t uc) {
|
||||
return (((utf8proc_uint32_t)uc)-0xd800 > 0x07ff) && ((utf8proc_uint32_t)uc < 0x110000);
|
||||
nc_utf8proc_bool nc_utf8proc_codepoint_valid(nc_utf8proc_int32_t uc) {
|
||||
return (((nc_utf8proc_uint32_t)uc)-0xd800 > 0x07ff) && ((nc_utf8proc_uint32_t)uc < 0x110000);
|
||||
}
|
||||
|
||||
utf8proc_ssize_t nc_utf8proc_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t *dst) {
|
||||
nc_utf8proc_ssize_t nc_utf8proc_encode_char(nc_utf8proc_int32_t uc, nc_utf8proc_uint8_t *dst) {
|
||||
if (uc < 0x00) {
|
||||
return 0;
|
||||
} else if (uc < 0x80) {
|
||||
dst[0] = (utf8proc_uint8_t) uc;
|
||||
dst[0] = (nc_utf8proc_uint8_t) uc;
|
||||
return 1;
|
||||
} else if (uc < 0x800) {
|
||||
dst[0] = (utf8proc_uint8_t)(0xC0 + (uc >> 6));
|
||||
dst[1] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
|
||||
dst[0] = (nc_utf8proc_uint8_t)(0xC0 + (uc >> 6));
|
||||
dst[1] = (nc_utf8proc_uint8_t)(0x80 + (uc & 0x3F));
|
||||
return 2;
|
||||
// Note: we allow encoding 0xd800-0xdfff here, so as not to change
|
||||
// the API, however, these are actually invalid in UTF-8
|
||||
} else if (uc < 0x10000) {
|
||||
dst[0] = (utf8proc_uint8_t)(0xE0 + (uc >> 12));
|
||||
dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
|
||||
dst[2] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
|
||||
dst[0] = (nc_utf8proc_uint8_t)(0xE0 + (uc >> 12));
|
||||
dst[1] = (nc_utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
|
||||
dst[2] = (nc_utf8proc_uint8_t)(0x80 + (uc & 0x3F));
|
||||
return 3;
|
||||
} else if (uc < 0x110000) {
|
||||
dst[0] = (utf8proc_uint8_t)(0xF0 + (uc >> 18));
|
||||
dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 12) & 0x3F));
|
||||
dst[2] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
|
||||
dst[3] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
|
||||
dst[0] = (nc_utf8proc_uint8_t)(0xF0 + (uc >> 18));
|
||||
dst[1] = (nc_utf8proc_uint8_t)(0x80 + ((uc >> 12) & 0x3F));
|
||||
dst[2] = (nc_utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
|
||||
dst[3] = (nc_utf8proc_uint8_t)(0x80 + (uc & 0x3F));
|
||||
return 4;
|
||||
} else return 0;
|
||||
}
|
||||
|
||||
/* internal "unsafe" version that does not check whether uc is in range */
|
||||
static utf8proc_ssize_t nc_unsafe_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t *dst) {
|
||||
static nc_utf8proc_ssize_t nc_unsafe_encode_char(nc_utf8proc_int32_t uc, nc_utf8proc_uint8_t *dst) {
|
||||
if (uc < 0x00) {
|
||||
return 0;
|
||||
} else if (uc < 0x80) {
|
||||
dst[0] = (utf8proc_uint8_t)uc;
|
||||
dst[0] = (nc_utf8proc_uint8_t)uc;
|
||||
return 1;
|
||||
} else if (uc < 0x800) {
|
||||
dst[0] = (utf8proc_uint8_t)(0xC0 + (uc >> 6));
|
||||
dst[1] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
|
||||
dst[0] = (nc_utf8proc_uint8_t)(0xC0 + (uc >> 6));
|
||||
dst[1] = (nc_utf8proc_uint8_t)(0x80 + (uc & 0x3F));
|
||||
return 2;
|
||||
} else if (uc == 0xFFFF) {
|
||||
dst[0] = (utf8proc_uint8_t)0xFF;
|
||||
dst[0] = (nc_utf8proc_uint8_t)0xFF;
|
||||
return 1;
|
||||
} else if (uc == 0xFFFE) {
|
||||
dst[0] = (utf8proc_uint8_t)0xFE;
|
||||
dst[0] = (nc_utf8proc_uint8_t)0xFE;
|
||||
return 1;
|
||||
} else if (uc < 0x10000) {
|
||||
dst[0] = (utf8proc_uint8_t)(0xE0 + (uc >> 12));
|
||||
dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
|
||||
dst[2] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
|
||||
dst[0] = (nc_utf8proc_uint8_t)(0xE0 + (uc >> 12));
|
||||
dst[1] = (nc_utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
|
||||
dst[2] = (nc_utf8proc_uint8_t)(0x80 + (uc & 0x3F));
|
||||
return 3;
|
||||
} else if (uc < 0x110000) {
|
||||
dst[0] = (utf8proc_uint8_t)(0xF0 + (uc >> 18));
|
||||
dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 12) & 0x3F));
|
||||
dst[2] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
|
||||
dst[3] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
|
||||
dst[0] = (nc_utf8proc_uint8_t)(0xF0 + (uc >> 18));
|
||||
dst[1] = (nc_utf8proc_uint8_t)(0x80 + ((uc >> 12) & 0x3F));
|
||||
dst[2] = (nc_utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
|
||||
dst[3] = (nc_utf8proc_uint8_t)(0x80 + (uc & 0x3F));
|
||||
return 4;
|
||||
} else return 0;
|
||||
}
|
||||
|
||||
/* internal "unsafe" version that does not check whether uc is in range */
|
||||
static const utf8proc_property_t *nc_unsafe_get_property(utf8proc_int32_t uc) {
|
||||
static const nc_utf8proc_property_t *nc_unsafe_get_property(nc_utf8proc_int32_t uc) {
|
||||
/* ASSERT: uc >= 0 && uc < 0x110000 */
|
||||
return utf8proc_properties + (
|
||||
utf8proc_stage2table[
|
||||
@ -229,7 +229,7 @@ static const utf8proc_property_t *nc_unsafe_get_property(utf8proc_int32_t uc) {
|
||||
);
|
||||
}
|
||||
|
||||
const utf8proc_property_t *nc_utf8proc_get_property(utf8proc_int32_t uc) {
|
||||
const nc_utf8proc_property_t *nc_utf8proc_get_property(nc_utf8proc_int32_t uc) {
|
||||
return uc < 0 || uc >= 0x110000 ? utf8proc_properties : nc_unsafe_get_property(uc);
|
||||
}
|
||||
|
||||
@ -248,7 +248,7 @@ static const utf8proc_property_t *nc_unsafe_get_property(utf8proc_int32_t uc) {
|
||||
|
||||
See the special support in grapheme_break_extended, for required bookkeeping by the caller.
|
||||
*/
|
||||
static utf8proc_bool nc_grapheme_break_simple(int lbc, int tbc) {
|
||||
static nc_utf8proc_bool nc_grapheme_break_simple(int lbc, int tbc) {
|
||||
return
|
||||
(lbc == UTF8PROC_BOUNDCLASS_START) ? true : // GB1
|
||||
(lbc == UTF8PROC_BOUNDCLASS_CR && // GB3
|
||||
@ -282,12 +282,12 @@ static utf8proc_bool nc_grapheme_break_simple(int lbc, int tbc) {
|
||||
true; // GB999
|
||||
}
|
||||
|
||||
static utf8proc_bool nc_grapheme_break_extended(int lbc, int tbc, utf8proc_int32_t *state)
|
||||
static nc_utf8proc_bool nc_grapheme_break_extended(int lbc, int tbc, nc_utf8proc_int32_t *state)
|
||||
{
|
||||
int lbc_override = lbc;
|
||||
if (state && *state != UTF8PROC_BOUNDCLASS_START)
|
||||
lbc_override = *state;
|
||||
utf8proc_bool break_permitted = nc_grapheme_break_simple(lbc_override, tbc);
|
||||
nc_utf8proc_bool break_permitted = nc_grapheme_break_simple(lbc_override, tbc);
|
||||
if (state) {
|
||||
// Special support for GB 12/13 made possible by GB999. After two RI
|
||||
// class codepoints we want to force a break. Do this by resetting the
|
||||
@ -308,8 +308,8 @@ static utf8proc_bool nc_grapheme_break_extended(int lbc, int tbc, utf8proc_int32
|
||||
return break_permitted;
|
||||
}
|
||||
|
||||
utf8proc_bool nc_utf8proc_grapheme_break_stateful(
|
||||
utf8proc_int32_t c1, utf8proc_int32_t c2, utf8proc_int32_t *state) {
|
||||
nc_utf8proc_bool nc_utf8proc_grapheme_break_stateful(
|
||||
nc_utf8proc_int32_t c1, nc_utf8proc_int32_t c2, nc_utf8proc_int32_t *state) {
|
||||
|
||||
return nc_grapheme_break_extended(nc_utf8proc_get_property(c1)->boundclass,
|
||||
nc_utf8proc_get_property(c2)->boundclass,
|
||||
@ -317,14 +317,14 @@ static utf8proc_bool nc_grapheme_break_extended(int lbc, int tbc, utf8proc_int32
|
||||
}
|
||||
|
||||
|
||||
utf8proc_bool nc_utf8proc_grapheme_break(
|
||||
utf8proc_int32_t c1, utf8proc_int32_t c2) {
|
||||
nc_utf8proc_bool nc_utf8proc_grapheme_break(
|
||||
nc_utf8proc_int32_t c1, nc_utf8proc_int32_t c2) {
|
||||
return nc_utf8proc_grapheme_break_stateful(c1, c2, NULL);
|
||||
}
|
||||
|
||||
static utf8proc_int32_t nc_seqindex_decode_entry(const utf8proc_uint16_t **entry)
|
||||
static nc_utf8proc_int32_t nc_seqindex_decode_entry(const nc_utf8proc_uint16_t **entry)
|
||||
{
|
||||
utf8proc_int32_t entry_cp = **entry;
|
||||
nc_utf8proc_int32_t entry_cp = **entry;
|
||||
if ((entry_cp & 0xF800) == 0xD800) {
|
||||
*entry = *entry + 1;
|
||||
entry_cp = ((entry_cp & 0x03FF) << 10) | (**entry & 0x03FF);
|
||||
@ -333,22 +333,22 @@ static utf8proc_int32_t nc_seqindex_decode_entry(const utf8proc_uint16_t **entry
|
||||
return entry_cp;
|
||||
}
|
||||
|
||||
static utf8proc_int32_t nc_seqindex_decode_index(const utf8proc_uint32_t seqindex)
|
||||
static nc_utf8proc_int32_t nc_seqindex_decode_index(const nc_utf8proc_uint32_t seqindex)
|
||||
{
|
||||
const utf8proc_uint16_t *entry = &utf8proc_sequences[seqindex];
|
||||
const nc_utf8proc_uint16_t *entry = &utf8proc_sequences[seqindex];
|
||||
return nc_seqindex_decode_entry(&entry);
|
||||
}
|
||||
|
||||
static utf8proc_ssize_t nc_seqindex_write_char_decomposed(utf8proc_uint16_t seqindex, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) {
|
||||
utf8proc_ssize_t written = 0;
|
||||
const utf8proc_uint16_t *entry = &utf8proc_sequences[seqindex & 0x1FFF];
|
||||
static nc_utf8proc_ssize_t nc_seqindex_write_char_decomposed(nc_utf8proc_uint16_t seqindex, nc_utf8proc_int32_t *dst, nc_utf8proc_ssize_t bufsize, nc_utf8proc_option_t options, int *last_boundclass) {
|
||||
nc_utf8proc_ssize_t written = 0;
|
||||
const nc_utf8proc_uint16_t *entry = &utf8proc_sequences[seqindex & 0x1FFF];
|
||||
int len = seqindex >> 13;
|
||||
if (len >= 7) {
|
||||
len = *entry;
|
||||
entry++;
|
||||
}
|
||||
for (; len >= 0; entry++, len--) {
|
||||
utf8proc_int32_t entry_cp = nc_seqindex_decode_entry(&entry);
|
||||
nc_utf8proc_int32_t entry_cp = nc_seqindex_decode_entry(&entry);
|
||||
|
||||
written += nc_utf8proc_decompose_char(entry_cp, dst+written,
|
||||
(bufsize > written) ? (bufsize - written) : 0, options,
|
||||
@ -358,35 +358,35 @@ static utf8proc_ssize_t nc_seqindex_write_char_decomposed(utf8proc_uint16_t seqi
|
||||
return written;
|
||||
}
|
||||
|
||||
utf8proc_int32_t nc_utf8proc_tolower(utf8proc_int32_t c)
|
||||
nc_utf8proc_int32_t nc_utf8proc_tolower(nc_utf8proc_int32_t c)
|
||||
{
|
||||
utf8proc_int32_t cl = nc_utf8proc_get_property(c)->lowercase_seqindex;
|
||||
nc_utf8proc_int32_t cl = nc_utf8proc_get_property(c)->lowercase_seqindex;
|
||||
return cl != UINT16_MAX ? nc_seqindex_decode_index(cl) : c;
|
||||
}
|
||||
|
||||
utf8proc_int32_t nc_utf8proc_toupper(utf8proc_int32_t c)
|
||||
nc_utf8proc_int32_t nc_utf8proc_toupper(nc_utf8proc_int32_t c)
|
||||
{
|
||||
utf8proc_int32_t cu = nc_utf8proc_get_property(c)->uppercase_seqindex;
|
||||
nc_utf8proc_int32_t cu = nc_utf8proc_get_property(c)->uppercase_seqindex;
|
||||
return cu != UINT16_MAX ? nc_seqindex_decode_index(cu) : c;
|
||||
}
|
||||
|
||||
utf8proc_int32_t nc_utf8proc_totitle(utf8proc_int32_t c)
|
||||
nc_utf8proc_int32_t nc_utf8proc_totitle(nc_utf8proc_int32_t c)
|
||||
{
|
||||
utf8proc_int32_t cu = nc_utf8proc_get_property(c)->titlecase_seqindex;
|
||||
nc_utf8proc_int32_t cu = nc_utf8proc_get_property(c)->titlecase_seqindex;
|
||||
return cu != UINT16_MAX ? nc_seqindex_decode_index(cu) : c;
|
||||
}
|
||||
|
||||
/* return a character width analogous to wcwidth (except portable and
|
||||
hopefully less buggy than most system wcwidth functions). */
|
||||
int nc_utf8proc_charwidth(utf8proc_int32_t c) {
|
||||
int nc_utf8proc_charwidth(nc_utf8proc_int32_t c) {
|
||||
return nc_utf8proc_get_property(c)->charwidth;
|
||||
}
|
||||
|
||||
utf8proc_category_t nc_utf8proc_category(utf8proc_int32_t c) {
|
||||
nc_utf8proc_category_t nc_utf8proc_category(nc_utf8proc_int32_t c) {
|
||||
return nc_utf8proc_get_property(c)->category;
|
||||
}
|
||||
|
||||
const char *nc_utf8proc_category_string(utf8proc_int32_t c) {
|
||||
const char *nc_utf8proc_category_string(nc_utf8proc_int32_t c) {
|
||||
static const char s[][3] = {"Cn","Lu","Ll","Lt","Lm","Lo","Mn","Mc","Me","Nd","Nl","No","Pc","Pd","Ps","Pe","Pi","Pf","Po","Sm","Sc","Sk","So","Zs","Zl","Zp","Cc","Cf","Cs","Co"};
|
||||
return s[nc_utf8proc_category(c)];
|
||||
}
|
||||
@ -395,17 +395,17 @@ static utf8proc_ssize_t nc_seqindex_write_char_decomposed(utf8proc_uint16_t seqi
|
||||
return nc_utf8proc_decompose_char((replacement_uc), dst, bufsize, \
|
||||
options & ~UTF8PROC_LUMP, last_boundclass)
|
||||
|
||||
utf8proc_ssize_t nc_utf8proc_decompose_char(utf8proc_int32_t uc, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) {
|
||||
const utf8proc_property_t *property;
|
||||
utf8proc_propval_t category;
|
||||
utf8proc_int32_t hangul_sindex;
|
||||
nc_utf8proc_ssize_t nc_utf8proc_decompose_char(nc_utf8proc_int32_t uc, nc_utf8proc_int32_t *dst, nc_utf8proc_ssize_t bufsize, nc_utf8proc_option_t options, int *last_boundclass) {
|
||||
const nc_utf8proc_property_t *property;
|
||||
nc_utf8proc_propval_t category;
|
||||
nc_utf8proc_int32_t hangul_sindex;
|
||||
if (uc < 0 || uc >= 0x110000) return UTF8PROC_ERROR_NOTASSIGNED;
|
||||
property = nc_unsafe_get_property(uc);
|
||||
category = property->category;
|
||||
hangul_sindex = uc - UTF8PROC_HANGUL_SBASE;
|
||||
if (options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) {
|
||||
if (hangul_sindex >= 0 && hangul_sindex < UTF8PROC_HANGUL_SCOUNT) {
|
||||
utf8proc_int32_t hangul_tindex;
|
||||
nc_utf8proc_int32_t hangul_tindex;
|
||||
if (bufsize >= 1) {
|
||||
dst[0] = UTF8PROC_HANGUL_LBASE +
|
||||
hangul_sindex / UTF8PROC_HANGUL_NCOUNT;
|
||||
@ -467,7 +467,7 @@ static utf8proc_ssize_t nc_seqindex_write_char_decomposed(utf8proc_uint16_t seqi
|
||||
}
|
||||
}
|
||||
if (options & UTF8PROC_CHARBOUND) {
|
||||
utf8proc_bool boundary;
|
||||
nc_utf8proc_bool boundary;
|
||||
int tbc = property->boundclass;
|
||||
boundary = nc_grapheme_break_extended(*last_boundclass, tbc, last_boundclass);
|
||||
if (boundary) {
|
||||
@ -480,29 +480,29 @@ static utf8proc_ssize_t nc_seqindex_write_char_decomposed(utf8proc_uint16_t seqi
|
||||
return 1;
|
||||
}
|
||||
|
||||
utf8proc_ssize_t nc_utf8proc_decompose(
|
||||
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen,
|
||||
utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options
|
||||
nc_utf8proc_ssize_t nc_utf8proc_decompose(
|
||||
const nc_utf8proc_uint8_t *str, nc_utf8proc_ssize_t strlen,
|
||||
nc_utf8proc_int32_t *buffer, nc_utf8proc_ssize_t bufsize, nc_utf8proc_option_t options
|
||||
) {
|
||||
return nc_utf8proc_decompose_custom(str, strlen, buffer, bufsize, options, NULL, NULL);
|
||||
}
|
||||
|
||||
utf8proc_ssize_t nc_utf8proc_decompose_custom(
|
||||
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen,
|
||||
utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options,
|
||||
utf8proc_custom_func custom_func, void *custom_data
|
||||
nc_utf8proc_ssize_t nc_utf8proc_decompose_custom(
|
||||
const nc_utf8proc_uint8_t *str, nc_utf8proc_ssize_t strlen,
|
||||
nc_utf8proc_int32_t *buffer, nc_utf8proc_ssize_t bufsize, nc_utf8proc_option_t options,
|
||||
nc_utf8proc_custom_func custom_func, void *custom_data
|
||||
) {
|
||||
/* strlen will be ignored, if UTF8PROC_NULLTERM is set in options */
|
||||
utf8proc_ssize_t wpos = 0;
|
||||
nc_utf8proc_ssize_t wpos = 0;
|
||||
if ((options & UTF8PROC_COMPOSE) && (options & UTF8PROC_DECOMPOSE))
|
||||
return UTF8PROC_ERROR_INVALIDOPTS;
|
||||
if ((options & UTF8PROC_STRIPMARK) &&
|
||||
!(options & UTF8PROC_COMPOSE) && !(options & UTF8PROC_DECOMPOSE))
|
||||
return UTF8PROC_ERROR_INVALIDOPTS;
|
||||
{
|
||||
utf8proc_int32_t uc;
|
||||
utf8proc_ssize_t rpos = 0;
|
||||
utf8proc_ssize_t decomp_result;
|
||||
nc_utf8proc_int32_t uc;
|
||||
nc_utf8proc_ssize_t rpos = 0;
|
||||
nc_utf8proc_ssize_t decomp_result;
|
||||
int boundclass = UTF8PROC_BOUNDCLASS_START;
|
||||
while (1) {
|
||||
if (options & UTF8PROC_NULLTERM) {
|
||||
@ -528,15 +528,15 @@ static utf8proc_ssize_t nc_seqindex_write_char_decomposed(utf8proc_uint16_t seqi
|
||||
wpos += decomp_result;
|
||||
/* prohibiting integer overflows due to too long strings: */
|
||||
if (wpos < 0 ||
|
||||
wpos > (utf8proc_ssize_t)(SSIZE_MAX/sizeof(utf8proc_int32_t)/2))
|
||||
wpos > (nc_utf8proc_ssize_t)(SSIZE_MAX/sizeof(nc_utf8proc_int32_t)/2))
|
||||
return UTF8PROC_ERROR_OVERFLOW;
|
||||
}
|
||||
}
|
||||
if ((options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) && bufsize >= wpos) {
|
||||
utf8proc_ssize_t pos = 0;
|
||||
nc_utf8proc_ssize_t pos = 0;
|
||||
while (pos < wpos-1) {
|
||||
utf8proc_int32_t uc1, uc2;
|
||||
const utf8proc_property_t *property1, *property2;
|
||||
nc_utf8proc_int32_t uc1, uc2;
|
||||
const nc_utf8proc_property_t *property1, *property2;
|
||||
uc1 = buffer[pos];
|
||||
uc2 = buffer[pos+1];
|
||||
property1 = nc_unsafe_get_property(uc1);
|
||||
@ -554,12 +554,12 @@ static utf8proc_ssize_t nc_seqindex_write_char_decomposed(utf8proc_uint16_t seqi
|
||||
return wpos;
|
||||
}
|
||||
|
||||
utf8proc_ssize_t nc_utf8proc_normalize_utf32(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) {
|
||||
nc_utf8proc_ssize_t nc_utf8proc_normalize_utf32(nc_utf8proc_int32_t *buffer, nc_utf8proc_ssize_t length, nc_utf8proc_option_t options) {
|
||||
/* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored */
|
||||
if (options & (UTF8PROC_NLF2LS | UTF8PROC_NLF2PS | UTF8PROC_STRIPCC)) {
|
||||
utf8proc_ssize_t rpos;
|
||||
utf8proc_ssize_t wpos = 0;
|
||||
utf8proc_int32_t uc;
|
||||
nc_utf8proc_ssize_t rpos;
|
||||
nc_utf8proc_ssize_t wpos = 0;
|
||||
nc_utf8proc_int32_t uc;
|
||||
for (rpos = 0; rpos < length; rpos++) {
|
||||
uc = buffer[rpos];
|
||||
if (uc == 0x000D && rpos < length-1 && buffer[rpos+1] == 0x000A) rpos++;
|
||||
@ -588,23 +588,23 @@ static utf8proc_ssize_t nc_seqindex_write_char_decomposed(utf8proc_uint16_t seqi
|
||||
length = wpos;
|
||||
}
|
||||
if (options & UTF8PROC_COMPOSE) {
|
||||
utf8proc_int32_t *starter = NULL;
|
||||
utf8proc_int32_t current_char;
|
||||
const utf8proc_property_t *starter_property = NULL, *current_property;
|
||||
utf8proc_propval_t max_combining_class = -1;
|
||||
utf8proc_ssize_t rpos;
|
||||
utf8proc_ssize_t wpos = 0;
|
||||
utf8proc_int32_t composition;
|
||||
nc_utf8proc_int32_t *starter = NULL;
|
||||
nc_utf8proc_int32_t current_char;
|
||||
const nc_utf8proc_property_t *starter_property = NULL, *current_property;
|
||||
nc_utf8proc_propval_t max_combining_class = -1;
|
||||
nc_utf8proc_ssize_t rpos;
|
||||
nc_utf8proc_ssize_t wpos = 0;
|
||||
nc_utf8proc_int32_t composition;
|
||||
for (rpos = 0; rpos < length; rpos++) {
|
||||
current_char = buffer[rpos];
|
||||
current_property = nc_unsafe_get_property(current_char);
|
||||
if (starter && current_property->combining_class > max_combining_class) {
|
||||
/* combination perhaps possible */
|
||||
utf8proc_int32_t hangul_lindex;
|
||||
utf8proc_int32_t hangul_sindex;
|
||||
nc_utf8proc_int32_t hangul_lindex;
|
||||
nc_utf8proc_int32_t hangul_sindex;
|
||||
hangul_lindex = *starter - UTF8PROC_HANGUL_LBASE;
|
||||
if (hangul_lindex >= 0 && hangul_lindex < UTF8PROC_HANGUL_LCOUNT) {
|
||||
utf8proc_int32_t hangul_vindex;
|
||||
nc_utf8proc_int32_t hangul_vindex;
|
||||
hangul_vindex = current_char - UTF8PROC_HANGUL_VBASE;
|
||||
if (hangul_vindex >= 0 && hangul_vindex < UTF8PROC_HANGUL_VCOUNT) {
|
||||
*starter = UTF8PROC_HANGUL_SBASE +
|
||||
@ -617,7 +617,7 @@ static utf8proc_ssize_t nc_seqindex_write_char_decomposed(utf8proc_uint16_t seqi
|
||||
hangul_sindex = *starter - UTF8PROC_HANGUL_SBASE;
|
||||
if (hangul_sindex >= 0 && hangul_sindex < UTF8PROC_HANGUL_SCOUNT &&
|
||||
(hangul_sindex % UTF8PROC_HANGUL_TCOUNT) == 0) {
|
||||
utf8proc_int32_t hangul_tindex;
|
||||
nc_utf8proc_int32_t hangul_tindex;
|
||||
hangul_tindex = current_char - UTF8PROC_HANGUL_TBASE;
|
||||
if (hangul_tindex >= 0 && hangul_tindex < UTF8PROC_HANGUL_TCOUNT) {
|
||||
*starter += hangul_tindex;
|
||||
@ -666,46 +666,46 @@ static utf8proc_ssize_t nc_seqindex_write_char_decomposed(utf8proc_uint16_t seqi
|
||||
return length;
|
||||
}
|
||||
|
||||
utf8proc_ssize_t nc_utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) {
|
||||
nc_utf8proc_ssize_t nc_utf8proc_reencode(nc_utf8proc_int32_t *buffer, nc_utf8proc_ssize_t length, nc_utf8proc_option_t options) {
|
||||
/* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored
|
||||
ASSERT: 'buffer' has one spare byte of free space at the end! */
|
||||
length = nc_utf8proc_normalize_utf32(buffer, length, options);
|
||||
if (length < 0) return length;
|
||||
{
|
||||
utf8proc_ssize_t rpos, wpos = 0;
|
||||
utf8proc_int32_t uc;
|
||||
nc_utf8proc_ssize_t rpos, wpos = 0;
|
||||
nc_utf8proc_int32_t uc;
|
||||
if (options & UTF8PROC_CHARBOUND) {
|
||||
for (rpos = 0; rpos < length; rpos++) {
|
||||
uc = buffer[rpos];
|
||||
wpos += nc_unsafe_encode_char(uc, ((utf8proc_uint8_t *)buffer) + wpos);
|
||||
wpos += nc_unsafe_encode_char(uc, ((nc_utf8proc_uint8_t *)buffer) + wpos);
|
||||
}
|
||||
} else {
|
||||
for (rpos = 0; rpos < length; rpos++) {
|
||||
uc = buffer[rpos];
|
||||
wpos += nc_utf8proc_encode_char(uc, ((utf8proc_uint8_t *)buffer) + wpos);
|
||||
wpos += nc_utf8proc_encode_char(uc, ((nc_utf8proc_uint8_t *)buffer) + wpos);
|
||||
}
|
||||
}
|
||||
((utf8proc_uint8_t *)buffer)[wpos] = 0;
|
||||
((nc_utf8proc_uint8_t *)buffer)[wpos] = 0;
|
||||
return wpos;
|
||||
}
|
||||
}
|
||||
|
||||
utf8proc_ssize_t nc_utf8proc_map(
|
||||
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options
|
||||
nc_utf8proc_ssize_t nc_utf8proc_map(
|
||||
const nc_utf8proc_uint8_t *str, nc_utf8proc_ssize_t strlen, nc_utf8proc_uint8_t **dstptr, nc_utf8proc_option_t options
|
||||
) {
|
||||
return nc_utf8proc_map_custom(str, strlen, dstptr, options, NULL, NULL);
|
||||
}
|
||||
|
||||
utf8proc_ssize_t nc_utf8proc_map_custom(
|
||||
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options,
|
||||
utf8proc_custom_func custom_func, void *custom_data
|
||||
nc_utf8proc_ssize_t nc_utf8proc_map_custom(
|
||||
const nc_utf8proc_uint8_t *str, nc_utf8proc_ssize_t strlen, nc_utf8proc_uint8_t **dstptr, nc_utf8proc_option_t options,
|
||||
nc_utf8proc_custom_func custom_func, void *custom_data
|
||||
) {
|
||||
utf8proc_int32_t *buffer;
|
||||
utf8proc_ssize_t result;
|
||||
nc_utf8proc_int32_t *buffer;
|
||||
nc_utf8proc_ssize_t result;
|
||||
*dstptr = NULL;
|
||||
result = nc_utf8proc_decompose_custom(str, strlen, NULL, 0, options, custom_func, custom_data);
|
||||
if (result < 0) return result;
|
||||
buffer = (utf8proc_int32_t *) malloc(result * sizeof(utf8proc_int32_t) + 1);
|
||||
buffer = (nc_utf8proc_int32_t *) malloc(result * sizeof(nc_utf8proc_int32_t) + 1);
|
||||
if (!buffer) return UTF8PROC_ERROR_NOMEM;
|
||||
result = nc_utf8proc_decompose_custom(str, strlen, buffer, result, options, custom_func, custom_data);
|
||||
if (result < 0) {
|
||||
@ -718,37 +718,37 @@ static utf8proc_ssize_t nc_seqindex_write_char_decomposed(utf8proc_uint16_t seqi
|
||||
return result;
|
||||
}
|
||||
{
|
||||
utf8proc_int32_t *newptr;
|
||||
newptr = (utf8proc_int32_t *) realloc(buffer, (size_t)result+1);
|
||||
nc_utf8proc_int32_t *newptr;
|
||||
newptr = (nc_utf8proc_int32_t *) realloc(buffer, (size_t)result+1);
|
||||
if (newptr) buffer = newptr;
|
||||
}
|
||||
*dstptr = (utf8proc_uint8_t *)buffer;
|
||||
*dstptr = (nc_utf8proc_uint8_t *)buffer;
|
||||
return result;
|
||||
}
|
||||
|
||||
utf8proc_uint8_t *nc_utf8proc_NFD(const utf8proc_uint8_t *str) {
|
||||
utf8proc_uint8_t *retval;
|
||||
nc_utf8proc_uint8_t *nc_utf8proc_NFD(const nc_utf8proc_uint8_t *str) {
|
||||
nc_utf8proc_uint8_t *retval;
|
||||
nc_utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
|
||||
UTF8PROC_DECOMPOSE);
|
||||
return retval;
|
||||
}
|
||||
|
||||
utf8proc_uint8_t *nc_utf8proc_NFC(const utf8proc_uint8_t *str) {
|
||||
utf8proc_uint8_t *retval;
|
||||
nc_utf8proc_uint8_t *nc_utf8proc_NFC(const nc_utf8proc_uint8_t *str) {
|
||||
nc_utf8proc_uint8_t *retval;
|
||||
nc_utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
|
||||
UTF8PROC_COMPOSE);
|
||||
return retval;
|
||||
}
|
||||
|
||||
utf8proc_uint8_t *nc_utf8proc_NFKD(const utf8proc_uint8_t *str) {
|
||||
utf8proc_uint8_t *retval;
|
||||
nc_utf8proc_uint8_t *nc_utf8proc_NFKD(const nc_utf8proc_uint8_t *str) {
|
||||
nc_utf8proc_uint8_t *retval;
|
||||
nc_utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
|
||||
UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT);
|
||||
return retval;
|
||||
}
|
||||
|
||||
utf8proc_uint8_t *nc_utf8proc_NFKC(const utf8proc_uint8_t *str) {
|
||||
utf8proc_uint8_t *retval;
|
||||
nc_utf8proc_uint8_t *nc_utf8proc_NFKC(const nc_utf8proc_uint8_t *str) {
|
||||
nc_utf8proc_uint8_t *retval;
|
||||
nc_utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
|
||||
UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
|
||||
return retval;
|
||||
|
@ -108,15 +108,15 @@ typedef bool utf8proc_bool;
|
||||
# include <stddef.h>
|
||||
# include <stdbool.h>
|
||||
# include <inttypes.h>
|
||||
typedef int8_t utf8proc_int8_t;
|
||||
typedef uint8_t utf8proc_uint8_t;
|
||||
typedef int16_t utf8proc_int16_t;
|
||||
typedef uint16_t utf8proc_uint16_t;
|
||||
typedef int32_t utf8proc_int32_t;
|
||||
typedef uint32_t utf8proc_uint32_t;
|
||||
typedef size_t utf8proc_size_t;
|
||||
typedef ptrdiff_t utf8proc_ssize_t;
|
||||
typedef bool utf8proc_bool;
|
||||
typedef int8_t nc_utf8proc_int8_t;
|
||||
typedef uint8_t nc_utf8proc_uint8_t;
|
||||
typedef int16_t nc_utf8proc_int16_t;
|
||||
typedef uint16_t nc_utf8proc_uint16_t;
|
||||
typedef int32_t nc_utf8proc_int32_t;
|
||||
typedef uint32_t nc_utf8proc_uint32_t;
|
||||
typedef size_t nc_utf8proc_size_t;
|
||||
typedef ptrdiff_t nc_utf8proc_ssize_t;
|
||||
typedef bool nc_utf8proc_bool;
|
||||
#endif
|
||||
#include <limits.h>
|
||||
|
||||
@ -209,7 +209,7 @@ typedef enum {
|
||||
* @ref UTF8PROC_DECOMPOSE
|
||||
*/
|
||||
UTF8PROC_STRIPMARK = (1<<13),
|
||||
} utf8proc_option_t;
|
||||
} nc_utf8proc_option_t;
|
||||
|
||||
/** @name Error codes
|
||||
* Error codes being returned by almost all functions.
|
||||
@ -230,32 +230,32 @@ typedef enum {
|
||||
/* @name Types */
|
||||
|
||||
/** Holds the value of a property. */
|
||||
typedef utf8proc_int16_t utf8proc_propval_t;
|
||||
typedef nc_utf8proc_int16_t nc_utf8proc_propval_t;
|
||||
|
||||
/** Struct containing information about a codepoint. */
|
||||
typedef struct utf8proc_property_struct {
|
||||
typedef struct nc_utf8proc_property_struct {
|
||||
/**
|
||||
* Unicode category.
|
||||
* @see utf8proc_category_t.
|
||||
*/
|
||||
utf8proc_propval_t category;
|
||||
utf8proc_propval_t combining_class;
|
||||
nc_utf8proc_propval_t category;
|
||||
nc_utf8proc_propval_t combining_class;
|
||||
/**
|
||||
* Bidirectional class.
|
||||
* @see utf8proc_bidi_class_t.
|
||||
*/
|
||||
utf8proc_propval_t bidi_class;
|
||||
nc_utf8proc_propval_t bidi_class;
|
||||
/**
|
||||
* @anchor Decomposition type.
|
||||
* @see utf8proc_decomp_type_t.
|
||||
*/
|
||||
utf8proc_propval_t decomp_type;
|
||||
utf8proc_uint16_t decomp_seqindex;
|
||||
utf8proc_uint16_t casefold_seqindex;
|
||||
utf8proc_uint16_t uppercase_seqindex;
|
||||
utf8proc_uint16_t lowercase_seqindex;
|
||||
utf8proc_uint16_t titlecase_seqindex;
|
||||
utf8proc_uint16_t comb_index;
|
||||
nc_utf8proc_propval_t decomp_type;
|
||||
nc_utf8proc_uint16_t decomp_seqindex;
|
||||
nc_utf8proc_uint16_t casefold_seqindex;
|
||||
nc_utf8proc_uint16_t uppercase_seqindex;
|
||||
nc_utf8proc_uint16_t lowercase_seqindex;
|
||||
nc_utf8proc_uint16_t titlecase_seqindex;
|
||||
nc_utf8proc_uint16_t comb_index;
|
||||
unsigned bidi_mirrored:1;
|
||||
unsigned comp_exclusion:1;
|
||||
/**
|
||||
@ -274,7 +274,7 @@ typedef struct utf8proc_property_struct {
|
||||
* @see utf8proc_boundclass_t.
|
||||
*/
|
||||
unsigned boundclass:8;
|
||||
} utf8proc_property_t;
|
||||
} nc_utf8proc_property_t;
|
||||
|
||||
/** Unicode categories. */
|
||||
typedef enum {
|
||||
@ -308,7 +308,7 @@ typedef enum {
|
||||
UTF8PROC_CATEGORY_CF = 27, /**< Other, format */
|
||||
UTF8PROC_CATEGORY_CS = 28, /**< Other, surrogate */
|
||||
UTF8PROC_CATEGORY_CO = 29, /**< Other, private use */
|
||||
} utf8proc_category_t;
|
||||
} nc_utf8proc_category_t;
|
||||
|
||||
/** Bidirectional character classes. */
|
||||
typedef enum {
|
||||
@ -335,7 +335,7 @@ typedef enum {
|
||||
UTF8PROC_BIDI_CLASS_RLI = 21, /**< Right-to-Left Isolate */
|
||||
UTF8PROC_BIDI_CLASS_FSI = 22, /**< First Strong Isolate */
|
||||
UTF8PROC_BIDI_CLASS_PDI = 23, /**< Pop Directional Isolate */
|
||||
} utf8proc_bidi_class_t;
|
||||
} nc_utf8proc_bidi_class_t;
|
||||
|
||||
/** Decomposition type. */
|
||||
typedef enum {
|
||||
@ -355,7 +355,7 @@ typedef enum {
|
||||
UTF8PROC_DECOMP_TYPE_SQUARE = 14, /**< Square */
|
||||
UTF8PROC_DECOMP_TYPE_FRACTION = 15, /**< Fraction */
|
||||
UTF8PROC_DECOMP_TYPE_COMPAT = 16, /**< Compat */
|
||||
} utf8proc_decomp_type_t;
|
||||
} nc_utf8proc_decomp_type_t;
|
||||
|
||||
/** Boundclass property. (TR29) */
|
||||
typedef enum {
|
||||
@ -378,20 +378,20 @@ typedef enum {
|
||||
UTF8PROC_BOUNDCLASS_E_MODIFIER = 16, /**< Emoji Modifier */
|
||||
UTF8PROC_BOUNDCLASS_GLUE_AFTER_ZWJ = 17, /**< Glue_After_ZWJ */
|
||||
UTF8PROC_BOUNDCLASS_E_BASE_GAZ = 18, /**< E_BASE + GLUE_AFTER_ZJW */
|
||||
} utf8proc_boundclass_t;
|
||||
} nc_utf8proc_boundclass_t;
|
||||
|
||||
/**
|
||||
* Function pointer type passed to @ref utf8proc_map_custom and
|
||||
* @ref utf8proc_decompose_custom, which is used to specify a user-defined
|
||||
* mapping of codepoints to be applied in conjunction with other mappings.
|
||||
*/
|
||||
typedef utf8proc_int32_t (*utf8proc_custom_func)(utf8proc_int32_t codepoint, void *data);
|
||||
typedef nc_utf8proc_int32_t (*nc_utf8proc_custom_func)(nc_utf8proc_int32_t codepoint, void *data);
|
||||
|
||||
/**
|
||||
* Array containing the byte lengths of a UTF-8 encoded codepoint based
|
||||
* on the first byte.
|
||||
*/
|
||||
const utf8proc_int8_t utf8proc_utf8class[256];
|
||||
const nc_utf8proc_int8_t nc_utf8proc_utf8class[256];
|
||||
|
||||
/**
|
||||
* Returns the utf8proc API version as a string MAJOR.MINOR.PATCH
|
||||
@ -404,7 +404,7 @@ const char *nc_utf8proc_version(void);
|
||||
* Returns an informative error string for the given utf8proc error code
|
||||
* (e.g. the error codes returned by @ref utf8proc_map).
|
||||
*/
|
||||
const char *nc_utf8proc_errmsg(utf8proc_ssize_t errcode);
|
||||
const char *nc_utf8proc_errmsg(nc_utf8proc_ssize_t errcode);
|
||||
|
||||
/**
|
||||
* Reads a single codepoint from the UTF-8 sequence being pointed to by `str`.
|
||||
@ -416,7 +416,7 @@ const char *nc_utf8proc_errmsg(utf8proc_ssize_t errcode);
|
||||
* In case of success, the number of bytes read is returned; otherwise, a
|
||||
* negative error code is returned.
|
||||
*/
|
||||
UTF8PROC_DLLEXPORT utf8proc_ssize_t nc_utf8proc_iterate(const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *codepoint_ref);
|
||||
UTF8PROC_DLLEXPORT nc_utf8proc_ssize_t nc_utf8proc_iterate(const nc_utf8proc_uint8_t *str, nc_utf8proc_ssize_t strlen, nc_utf8proc_int32_t *codepoint_ref);
|
||||
|
||||
/**
|
||||
* Check if a codepoint is valid (regardless of whether it has been
|
||||
@ -424,7 +424,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t nc_utf8proc_iterate(const utf8proc_uint8_t *
|
||||
*
|
||||
* @return 1 if the given `codepoint` is valid and otherwise return 0.
|
||||
*/
|
||||
utf8proc_bool nc_utf8proc_codepoint_valid(utf8proc_int32_t codepoint);
|
||||
nc_utf8proc_bool nc_utf8proc_codepoint_valid(nc_utf8proc_int32_t codepoint);
|
||||
|
||||
/**
|
||||
* Encodes the codepoint as an UTF-8 string in the byte array pointed
|
||||
@ -435,7 +435,7 @@ utf8proc_bool nc_utf8proc_codepoint_valid(utf8proc_int32_t codepoint);
|
||||
*
|
||||
* This function does not check whether `codepoint` is valid Unicode.
|
||||
*/
|
||||
utf8proc_ssize_t nc_utf8proc_encode_char(utf8proc_int32_t codepoint, utf8proc_uint8_t *dst);
|
||||
nc_utf8proc_ssize_t nc_utf8proc_encode_char(nc_utf8proc_int32_t codepoint, nc_utf8proc_uint8_t *dst);
|
||||
|
||||
/**
|
||||
* Look up the properties for a given codepoint.
|
||||
@ -449,7 +449,7 @@ utf8proc_ssize_t nc_utf8proc_encode_char(utf8proc_int32_t codepoint, utf8proc_ui
|
||||
* If the codepoint is unassigned or invalid, a pointer to a special struct is
|
||||
* returned in which `category` is 0 (@ref UTF8PROC_CATEGORY_CN).
|
||||
*/
|
||||
const utf8proc_property_t *nc_utf8proc_get_property(utf8proc_int32_t codepoint);
|
||||
const nc_utf8proc_property_t *nc_utf8proc_get_property(nc_utf8proc_int32_t codepoint);
|
||||
|
||||
/** Decompose a codepoint into an array of codepoints.
|
||||
*
|
||||
@ -478,9 +478,9 @@ const utf8proc_property_t *nc_utf8proc_get_property(utf8proc_int32_t codepoint);
|
||||
* required buffer size is returned, while the buffer will be overwritten with
|
||||
* undefined data.
|
||||
*/
|
||||
utf8proc_ssize_t nc_utf8proc_decompose_char(
|
||||
utf8proc_int32_t codepoint, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize,
|
||||
utf8proc_option_t options, int *last_boundclass
|
||||
nc_utf8proc_ssize_t nc_utf8proc_decompose_char(
|
||||
nc_utf8proc_int32_t codepoint, nc_utf8proc_int32_t *dst, nc_utf8proc_ssize_t bufsize,
|
||||
nc_utf8proc_option_t options, int *last_boundclass
|
||||
);
|
||||
|
||||
/**
|
||||
@ -500,9 +500,9 @@ utf8proc_ssize_t nc_utf8proc_decompose_char(
|
||||
* required buffer size is returned, while the buffer will be overwritten with
|
||||
* undefined data.
|
||||
*/
|
||||
utf8proc_ssize_t nc_utf8proc_decompose(
|
||||
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen,
|
||||
utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options
|
||||
nc_utf8proc_ssize_t nc_utf8proc_decompose(
|
||||
const nc_utf8proc_uint8_t *str, nc_utf8proc_ssize_t strlen,
|
||||
nc_utf8proc_int32_t *buffer, nc_utf8proc_ssize_t bufsize, nc_utf8proc_option_t options
|
||||
);
|
||||
|
||||
/**
|
||||
@ -511,10 +511,10 @@ utf8proc_ssize_t nc_utf8proc_decompose(
|
||||
* (along with a `custom_data` pointer that is passed through to `custom_func`).
|
||||
* The `custom_func` argument is ignored if it is `NULL`. See also @ref utf8proc_map_custom.
|
||||
*/
|
||||
utf8proc_ssize_t nc_utf8proc_decompose_custom(
|
||||
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen,
|
||||
utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options,
|
||||
utf8proc_custom_func custom_func, void *custom_data
|
||||
nc_utf8proc_ssize_t nc_utf8proc_decompose_custom(
|
||||
const nc_utf8proc_uint8_t *str, nc_utf8proc_ssize_t strlen,
|
||||
nc_utf8proc_int32_t *buffer, nc_utf8proc_ssize_t bufsize, nc_utf8proc_option_t options,
|
||||
nc_utf8proc_custom_func custom_func, void *custom_data
|
||||
);
|
||||
|
||||
/**
|
||||
@ -540,7 +540,7 @@ utf8proc_ssize_t nc_utf8proc_decompose_custom(
|
||||
* @warning The entries of the array pointed to by `str` have to be in the
|
||||
* range `0x0000` to `0x10FFFF`. Otherwise, the program might crash!
|
||||
*/
|
||||
utf8proc_ssize_t nc_utf8proc_normalize_utf32(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options);
|
||||
nc_utf8proc_ssize_t nc_utf8proc_normalize_utf32(nc_utf8proc_int32_t *buffer, nc_utf8proc_ssize_t length, nc_utf8proc_option_t options);
|
||||
|
||||
/**
|
||||
* Reencodes the sequence of `length` codepoints pointed to by `buffer`
|
||||
@ -570,7 +570,7 @@ utf8proc_ssize_t nc_utf8proc_normalize_utf32(utf8proc_int32_t *buffer, utf8proc_
|
||||
* entries of the array pointed to by `str` have to be in the
|
||||
* range `0x0000` to `0x10FFFF`. Otherwise, the program might crash!
|
||||
*/
|
||||
utf8proc_ssize_t nc_utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options);
|
||||
nc_utf8proc_ssize_t nc_utf8proc_reencode(nc_utf8proc_int32_t *buffer, nc_utf8proc_ssize_t length, nc_utf8proc_option_t options);
|
||||
|
||||
/**
|
||||
* Given a pair of consecutive codepoints, return whether a grapheme break is
|
||||
@ -586,15 +586,15 @@ utf8proc_ssize_t nc_utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t
|
||||
* @warning If the state parameter is used, `utf8proc_grapheme_break_stateful` must
|
||||
* be called IN ORDER on ALL potential breaks in a string.
|
||||
*/
|
||||
utf8proc_bool nc_utf8proc_grapheme_break_stateful(
|
||||
utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2, utf8proc_int32_t *state);
|
||||
nc_utf8proc_bool nc_utf8proc_grapheme_break_stateful(
|
||||
nc_utf8proc_int32_t codepoint1, nc_utf8proc_int32_t codepoint2, nc_utf8proc_int32_t *state);
|
||||
|
||||
/**
|
||||
* Same as @ref utf8proc_grapheme_break_stateful, except without support for the
|
||||
* Unicode 9 additions to the algorithm. Supported for legacy reasons.
|
||||
*/
|
||||
utf8proc_bool nc_utf8proc_grapheme_break(
|
||||
utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2);
|
||||
nc_utf8proc_bool nc_utf8proc_grapheme_break(
|
||||
nc_utf8proc_int32_t codepoint1, nc_utf8proc_int32_t codepoint2);
|
||||
|
||||
|
||||
/**
|
||||
@ -602,21 +602,21 @@ utf8proc_bool nc_utf8proc_grapheme_break(
|
||||
* lower-case character, if any; otherwise (if there is no lower-case
|
||||
* variant, or if `c` is not a valid codepoint) return `c`.
|
||||
*/
|
||||
utf8proc_int32_t nc_utf8proc_tolower(utf8proc_int32_t c);
|
||||
nc_utf8proc_int32_t nc_utf8proc_tolower(nc_utf8proc_int32_t c);
|
||||
|
||||
/**
|
||||
* Given a codepoint `c`, return the codepoint of the corresponding
|
||||
* upper-case character, if any; otherwise (if there is no upper-case
|
||||
* variant, or if `c` is not a valid codepoint) return `c`.
|
||||
*/
|
||||
utf8proc_int32_t nc_utf8proc_toupper(utf8proc_int32_t c);
|
||||
nc_utf8proc_int32_t nc_utf8proc_toupper(nc_utf8proc_int32_t c);
|
||||
|
||||
/**
|
||||
* Given a codepoint `c`, return the codepoint of the corresponding
|
||||
* title-case character, if any; otherwise (if there is no title-case
|
||||
* variant, or if `c` is not a valid codepoint) return `c`.
|
||||
*/
|
||||
utf8proc_int32_t nc_utf8proc_totitle(utf8proc_int32_t c);
|
||||
nc_utf8proc_int32_t nc_utf8proc_totitle(nc_utf8proc_int32_t c);
|
||||
|
||||
/**
|
||||
* Given a codepoint, return a character width analogous to `wcwidth(codepoint)`,
|
||||
@ -626,19 +626,19 @@ utf8proc_int32_t nc_utf8proc_totitle(utf8proc_int32_t c);
|
||||
* @note
|
||||
* If you want to check for particular types of non-printable characters,
|
||||
* (analogous to `isprint` or `iscntrl`), use @ref utf8proc_category. */
|
||||
int nc_utf8proc_charwidth(utf8proc_int32_t codepoint);
|
||||
int nc_utf8proc_charwidth(nc_utf8proc_int32_t codepoint);
|
||||
|
||||
/**
|
||||
* Return the Unicode category for the codepoint (one of the
|
||||
* @ref utf8proc_category_t constants.)
|
||||
*/
|
||||
utf8proc_category_t nc_utf8proc_category(utf8proc_int32_t codepoint);
|
||||
nc_utf8proc_category_t nc_utf8proc_category(nc_utf8proc_int32_t codepoint);
|
||||
|
||||
/**
|
||||
* Return the two-letter (nul-terminated) Unicode category string for
|
||||
* the codepoint (e.g. `"Lu"` or `"Co"`).
|
||||
*/
|
||||
const char *nc_utf8proc_category_string(utf8proc_int32_t codepoint);
|
||||
const char *nc_utf8proc_category_string(nc_utf8proc_int32_t codepoint);
|
||||
|
||||
/**
|
||||
* Maps the given UTF-8 string pointed to by `str` to a new UTF-8
|
||||
@ -659,8 +659,8 @@ const char *nc_utf8proc_category_string(utf8proc_int32_t codepoint);
|
||||
* @note The memory of the new UTF-8 string will have been allocated
|
||||
* with `malloc`, and should therefore be deallocated with `free`.
|
||||
*/
|
||||
utf8proc_ssize_t nc_utf8proc_map(
|
||||
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options
|
||||
nc_utf8proc_ssize_t nc_utf8proc_map(
|
||||
const nc_utf8proc_uint8_t *str, nc_utf8proc_ssize_t strlen, nc_utf8proc_uint8_t **dstptr, nc_utf8proc_option_t options
|
||||
);
|
||||
|
||||
/**
|
||||
@ -669,9 +669,9 @@ utf8proc_ssize_t nc_utf8proc_map(
|
||||
* (along with a `custom_data` pointer that is passed through to `custom_func`).
|
||||
* The `custom_func` argument is ignored if it is `NULL`.
|
||||
*/
|
||||
utf8proc_ssize_t nc_utf8proc_map_custom(
|
||||
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options,
|
||||
utf8proc_custom_func custom_func, void *custom_data
|
||||
nc_utf8proc_ssize_t nc_utf8proc_map_custom(
|
||||
const nc_utf8proc_uint8_t *str, nc_utf8proc_ssize_t strlen, nc_utf8proc_uint8_t **dstptr, nc_utf8proc_option_t options,
|
||||
nc_utf8proc_custom_func custom_func, void *custom_data
|
||||
);
|
||||
|
||||
/** @name Unicode normalization
|
||||
@ -683,13 +683,13 @@ utf8proc_ssize_t nc_utf8proc_map_custom(
|
||||
*/
|
||||
/** @{ */
|
||||
/** NFD normalization (@ref UTF8PROC_DECOMPOSE). */
|
||||
UTF8PROC_DLLEXPORT utf8proc_uint8_t *nc_utf8proc_NFD(const utf8proc_uint8_t *str);
|
||||
UTF8PROC_DLLEXPORT nc_utf8proc_uint8_t *nc_utf8proc_NFD(const nc_utf8proc_uint8_t *str);
|
||||
/** NFC normalization (@ref UTF8PROC_COMPOSE). */
|
||||
UTF8PROC_DLLEXPORT utf8proc_uint8_t *nc_utf8proc_NFC(const utf8proc_uint8_t *str);
|
||||
UTF8PROC_DLLEXPORT nc_utf8proc_uint8_t *nc_utf8proc_NFC(const nc_utf8proc_uint8_t *str);
|
||||
/** NFKD normalization (@ref UTF8PROC_DECOMPOSE and @ref UTF8PROC_COMPAT). */
|
||||
UTF8PROC_DLLEXPORT utf8proc_uint8_t *nc_utf8proc_NFKD(const utf8proc_uint8_t *str);
|
||||
UTF8PROC_DLLEXPORT nc_utf8proc_uint8_t *nc_utf8proc_NFKD(const nc_utf8proc_uint8_t *str);
|
||||
/** NFKC normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT). */
|
||||
UTF8PROC_DLLEXPORT utf8proc_uint8_t *nc_utf8proc_NFKC(const utf8proc_uint8_t *str);
|
||||
UTF8PROC_DLLEXPORT nc_utf8proc_uint8_t *nc_utf8proc_NFKC(const nc_utf8proc_uint8_t *str);
|
||||
/** @} */
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
Loading…
Reference in New Issue
Block a user