72#define UTF8PROC_VERSION_MAJOR 2
74#define UTF8PROC_VERSION_MINOR 10
76#define UTF8PROC_VERSION_PATCH 0
81#if defined(_MSC_VER) && _MSC_VER < 1800
83typedef signed char utf8proc_int8_t;
84typedef unsigned char utf8proc_uint8_t;
85typedef short utf8proc_int16_t;
86typedef unsigned short utf8proc_uint16_t;
87typedef int utf8proc_int32_t;
88typedef unsigned int utf8proc_uint32_t;
90typedef __int64 utf8proc_ssize_t;
91typedef unsigned __int64 utf8proc_size_t;
93typedef int utf8proc_ssize_t;
94typedef unsigned int utf8proc_size_t;
98typedef unsigned char utf8proc_bool;
99# ifndef __bool_true_false_are_defined
102# define __bool_true_false_are_defined 1
105typedef bool utf8proc_bool;
111typedef int8_t utf8proc_int8_t;
112typedef uint8_t utf8proc_uint8_t;
113typedef int16_t utf8proc_int16_t;
114typedef uint16_t utf8proc_uint16_t;
115typedef int32_t utf8proc_int32_t;
116typedef uint32_t utf8proc_uint32_t;
117typedef size_t utf8proc_size_t;
118typedef ptrdiff_t utf8proc_ssize_t;
119typedef bool utf8proc_bool;
123#ifdef UTF8PROC_STATIC
124# define UTF8PROC_DLLEXPORT
127# ifdef UTF8PROC_EXPORTS
128# define UTF8PROC_DLLEXPORT __declspec(dllexport)
130# define UTF8PROC_DLLEXPORT __declspec(dllimport)
133# define UTF8PROC_DLLEXPORT __attribute__ ((visibility("default")))
135# define UTF8PROC_DLLEXPORT
219#define UTF8PROC_ERROR_NOMEM -1
221#define UTF8PROC_ERROR_OVERFLOW -2
223#define UTF8PROC_ERROR_INVALIDUTF8 -3
225#define UTF8PROC_ERROR_NOTASSIGNED -4
227#define UTF8PROC_ERROR_INVALIDOPTS -5
253 utf8proc_uint16_t decomp_seqindex;
254 utf8proc_uint16_t casefold_seqindex;
255 utf8proc_uint16_t uppercase_seqindex;
256 utf8proc_uint16_t lowercase_seqindex;
257 utf8proc_uint16_t titlecase_seqindex;
293 utf8proc_uint16_t comb_length:5;
294 utf8proc_uint16_t comb_issecond:1;
295 unsigned bidi_mirrored:1;
296 unsigned comp_exclusion:1;
304 unsigned control_boundary:1;
315 unsigned indic_conjunct_break:2;
426 UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC = 19,
427 UTF8PROC_BOUNDCLASS_E_ZWG = 20,
432 UTF8PROC_INDIC_CONJUNCT_BREAK_NONE = 0,
433 UTF8PROC_INDIC_CONJUNCT_BREAK_LINKER = 1,
434 UTF8PROC_INDIC_CONJUNCT_BREAK_CONSONANT = 2,
435 UTF8PROC_INDIC_CONJUNCT_BREAK_EXTEND = 3,
449UTF8PROC_DLLEXPORT
extern const utf8proc_int8_t utf8proc_utf8class[256];
467UTF8PROC_DLLEXPORT
const char *
utf8proc_errmsg(utf8proc_ssize_t errcode);
479UTF8PROC_DLLEXPORT utf8proc_ssize_t
utf8proc_iterate(
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *codepoint_ref);
498UTF8PROC_DLLEXPORT utf8proc_ssize_t
utf8proc_encode_char(utf8proc_int32_t codepoint, utf8proc_uint8_t *dst);
544 utf8proc_int32_t codepoint, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize,
566 const utf8proc_uint8_t *str, utf8proc_ssize_t strlen,
577 const utf8proc_uint8_t *str, utf8proc_ssize_t strlen,
655 utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2, utf8proc_int32_t *state);
662 utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2);
748 const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr,
utf8proc_option_t options
758 const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr,
utf8proc_option_t options,
771UTF8PROC_DLLEXPORT utf8proc_uint8_t *
utf8proc_NFD(
const utf8proc_uint8_t *str);
773UTF8PROC_DLLEXPORT utf8proc_uint8_t *
utf8proc_NFC(
const utf8proc_uint8_t *str);
775UTF8PROC_DLLEXPORT utf8proc_uint8_t *
utf8proc_NFKD(
const utf8proc_uint8_t *str);
777UTF8PROC_DLLEXPORT utf8proc_uint8_t *
utf8proc_NFKC(
const utf8proc_uint8_t *str);
Definition utf8proc.h:236
unsigned boundclass
Definition utf8proc.h:314
unsigned charwidth
Definition utf8proc.h:306
unsigned ignorable
Definition utf8proc.h:303
utf8proc_propval_t category
Definition utf8proc.h:241
unsigned ambiguous_width
Definition utf8proc.h:308
utf8proc_propval_t bidi_class
Definition utf8proc.h:247
utf8proc_propval_t decomp_type
Definition utf8proc.h:252
utf8proc_uint16_t comb_index
Definition utf8proc.h:292
utf8proc_ssize_t utf8proc_map(const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options)
Definition utf8proc.c:754
utf8proc_int32_t(* utf8proc_custom_func)(utf8proc_int32_t codepoint, void *data)
Definition utf8proc.h:443
utf8proc_uint8_t * utf8proc_NFC(const utf8proc_uint8_t *str)
Definition utf8proc.c:797
utf8proc_option_t
Definition utf8proc.h:146
@ UTF8PROC_NULLTERM
Definition utf8proc.h:148
@ UTF8PROC_LUMP
Definition utf8proc.h:200
@ UTF8PROC_REJECTNA
Definition utf8proc.h:160
@ UTF8PROC_NLF2LF
Definition utf8proc.h:174
@ UTF8PROC_STRIPCC
Definition utf8proc.h:182
@ UTF8PROC_NLF2LS
Definition utf8proc.h:166
@ UTF8PROC_COMPOSE
Definition utf8proc.h:154
@ UTF8PROC_STRIPNA
Definition utf8proc.h:211
@ UTF8PROC_CASEFOLD
Definition utf8proc.h:187
@ UTF8PROC_STABLE
Definition utf8proc.h:150
@ UTF8PROC_DECOMPOSE
Definition utf8proc.h:156
@ UTF8PROC_IGNORE
Definition utf8proc.h:158
@ UTF8PROC_NLF2PS
Definition utf8proc.h:172
@ UTF8PROC_CHARBOUND
Definition utf8proc.h:192
@ UTF8PROC_COMPAT
Definition utf8proc.h:152
@ UTF8PROC_STRIPMARK
Definition utf8proc.h:207
utf8proc_uint8_t * utf8proc_NFKD(const utf8proc_uint8_t *str)
Definition utf8proc.c:804
const char * utf8proc_version(void)
Definition utf8proc.c:99
utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t codepoint, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass)
Definition utf8proc.c:452
int utf8proc_charwidth(utf8proc_int32_t codepoint)
Definition utf8proc.c:431
utf8proc_indic_conjunct_break_t
Definition utf8proc.h:431
int utf8proc_islower(utf8proc_int32_t c)
Definition utf8proc.c:417
utf8proc_ssize_t utf8proc_iterate(const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *codepoint_ref)
Definition utf8proc.c:125
const char * utf8proc_unicode_version(void)
Definition utf8proc.c:103
const char * utf8proc_errmsg(utf8proc_ssize_t errcode)
Definition utf8proc.c:107
const utf8proc_property_t * utf8proc_get_property(utf8proc_int32_t codepoint)
Definition utf8proc.c:242
utf8proc_uint8_t * utf8proc_NFKC(const utf8proc_uint8_t *str)
Definition utf8proc.c:811
utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options)
Definition utf8proc.c:730
utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c)
Definition utf8proc.c:399
utf8proc_int16_t utf8proc_propval_t
Definition utf8proc.h:233
struct utf8proc_property_struct utf8proc_property_t
utf8proc_ssize_t utf8proc_decompose_custom(const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options, utf8proc_custom_func custom_func, void *custom_data)
Definition utf8proc.c:547
utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t codepoint, utf8proc_uint8_t *dst)
Definition utf8proc.c:177
utf8proc_ssize_t utf8proc_map_custom(const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options, utf8proc_custom_func custom_func, void *custom_data)
Definition utf8proc.c:760
utf8proc_decomp_type_t
Definition utf8proc.h:380
@ UTF8PROC_DECOMP_TYPE_NOBREAK
Definition utf8proc.h:382
@ UTF8PROC_DECOMP_TYPE_SUB
Definition utf8proc.h:389
@ UTF8PROC_DECOMP_TYPE_INITIAL
Definition utf8proc.h:383
@ UTF8PROC_DECOMP_TYPE_WIDE
Definition utf8proc.h:391
@ UTF8PROC_DECOMP_TYPE_SMALL
Definition utf8proc.h:393
@ UTF8PROC_DECOMP_TYPE_FONT
Definition utf8proc.h:381
@ UTF8PROC_DECOMP_TYPE_CIRCLE
Definition utf8proc.h:387
@ UTF8PROC_DECOMP_TYPE_ISOLATED
Definition utf8proc.h:386
@ UTF8PROC_DECOMP_TYPE_NARROW
Definition utf8proc.h:392
@ UTF8PROC_DECOMP_TYPE_SUPER
Definition utf8proc.h:388
@ UTF8PROC_DECOMP_TYPE_FRACTION
Definition utf8proc.h:395
@ UTF8PROC_DECOMP_TYPE_FINAL
Definition utf8proc.h:385
@ UTF8PROC_DECOMP_TYPE_VERTICAL
Definition utf8proc.h:390
@ UTF8PROC_DECOMP_TYPE_COMPAT
Definition utf8proc.h:396
@ UTF8PROC_DECOMP_TYPE_SQUARE
Definition utf8proc.h:394
@ UTF8PROC_DECOMP_TYPE_MEDIAL
Definition utf8proc.h:384
utf8proc_boundclass_t
Definition utf8proc.h:400
@ UTF8PROC_BOUNDCLASS_V
Definition utf8proc.h:408
@ UTF8PROC_BOUNDCLASS_OTHER
Definition utf8proc.h:402
@ UTF8PROC_BOUNDCLASS_GLUE_AFTER_ZWJ
Definition utf8proc.h:421
@ UTF8PROC_BOUNDCLASS_START
Definition utf8proc.h:401
@ UTF8PROC_BOUNDCLASS_E_BASE_GAZ
Definition utf8proc.h:422
@ UTF8PROC_BOUNDCLASS_EXTEND
Definition utf8proc.h:406
@ UTF8PROC_BOUNDCLASS_CONTROL
Definition utf8proc.h:405
@ UTF8PROC_BOUNDCLASS_SPACINGMARK
Definition utf8proc.h:413
@ UTF8PROC_BOUNDCLASS_L
Definition utf8proc.h:407
@ UTF8PROC_BOUNDCLASS_ZWJ
Definition utf8proc.h:415
@ UTF8PROC_BOUNDCLASS_T
Definition utf8proc.h:409
@ UTF8PROC_BOUNDCLASS_LV
Definition utf8proc.h:410
@ UTF8PROC_BOUNDCLASS_LF
Definition utf8proc.h:404
@ UTF8PROC_BOUNDCLASS_CR
Definition utf8proc.h:403
@ UTF8PROC_BOUNDCLASS_E_BASE
Definition utf8proc.h:419
@ UTF8PROC_BOUNDCLASS_E_MODIFIER
Definition utf8proc.h:420
@ UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR
Definition utf8proc.h:412
@ UTF8PROC_BOUNDCLASS_LVT
Definition utf8proc.h:411
@ UTF8PROC_BOUNDCLASS_PREPEND
Definition utf8proc.h:414
utf8proc_ssize_t utf8proc_decompose(const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options)
Definition utf8proc.c:540
utf8proc_uint8_t * utf8proc_NFD(const utf8proc_uint8_t *str)
Definition utf8proc.c:790
utf8proc_bool utf8proc_grapheme_break_stateful(utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2, utf8proc_int32_t *state)
Definition utf8proc.c:345
utf8proc_uint8_t * utf8proc_NFKC_Casefold(const utf8proc_uint8_t *str)
Definition utf8proc.c:818
utf8proc_bool utf8proc_codepoint_valid(utf8proc_int32_t codepoint)
Definition utf8proc.c:173
const char * utf8proc_category_string(utf8proc_int32_t codepoint)
Definition utf8proc.c:443
utf8proc_int32_t utf8proc_totitle(utf8proc_int32_t c)
Definition utf8proc.c:411
utf8proc_category_t utf8proc_category(utf8proc_int32_t codepoint)
Definition utf8proc.c:439
utf8proc_bool utf8proc_charwidth_ambiguous(utf8proc_int32_t codepoint)
Definition utf8proc.c:435
utf8proc_bool utf8proc_grapheme_break(utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2)
Definition utf8proc.c:358
utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c)
Definition utf8proc.c:405
utf8proc_ssize_t utf8proc_normalize_utf32(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options)
Definition utf8proc.c:614
int utf8proc_isupper(utf8proc_int32_t c)
Definition utf8proc.c:423
utf8proc_category_t
Definition utf8proc.h:319
@ UTF8PROC_CATEGORY_SO
Definition utf8proc.h:342
@ UTF8PROC_CATEGORY_ME
Definition utf8proc.h:328
@ UTF8PROC_CATEGORY_SK
Definition utf8proc.h:341
@ UTF8PROC_CATEGORY_PF
Definition utf8proc.h:337
@ UTF8PROC_CATEGORY_MN
Definition utf8proc.h:326
@ UTF8PROC_CATEGORY_CN
Definition utf8proc.h:320
@ UTF8PROC_CATEGORY_SC
Definition utf8proc.h:340
@ UTF8PROC_CATEGORY_ND
Definition utf8proc.h:329
@ UTF8PROC_CATEGORY_LT
Definition utf8proc.h:323
@ UTF8PROC_CATEGORY_PC
Definition utf8proc.h:332
@ UTF8PROC_CATEGORY_NO
Definition utf8proc.h:331
@ UTF8PROC_CATEGORY_MC
Definition utf8proc.h:327
@ UTF8PROC_CATEGORY_ZS
Definition utf8proc.h:343
@ UTF8PROC_CATEGORY_CF
Definition utf8proc.h:347
@ UTF8PROC_CATEGORY_LU
Definition utf8proc.h:321
@ UTF8PROC_CATEGORY_ZL
Definition utf8proc.h:344
@ UTF8PROC_CATEGORY_NL
Definition utf8proc.h:330
@ UTF8PROC_CATEGORY_SM
Definition utf8proc.h:339
@ UTF8PROC_CATEGORY_ZP
Definition utf8proc.h:345
@ UTF8PROC_CATEGORY_PI
Definition utf8proc.h:336
@ UTF8PROC_CATEGORY_PO
Definition utf8proc.h:338
@ UTF8PROC_CATEGORY_PS
Definition utf8proc.h:334
@ UTF8PROC_CATEGORY_CS
Definition utf8proc.h:348
@ UTF8PROC_CATEGORY_PD
Definition utf8proc.h:333
@ UTF8PROC_CATEGORY_LM
Definition utf8proc.h:324
@ UTF8PROC_CATEGORY_PE
Definition utf8proc.h:335
@ UTF8PROC_CATEGORY_LL
Definition utf8proc.h:322
@ UTF8PROC_CATEGORY_LO
Definition utf8proc.h:325
@ UTF8PROC_CATEGORY_CO
Definition utf8proc.h:349
@ UTF8PROC_CATEGORY_CC
Definition utf8proc.h:346
utf8proc_bidi_class_t
Definition utf8proc.h:353
@ UTF8PROC_BIDI_CLASS_ES
Definition utf8proc.h:363
@ UTF8PROC_BIDI_CLASS_RLE
Definition utf8proc.h:359
@ UTF8PROC_BIDI_CLASS_L
Definition utf8proc.h:354
@ UTF8PROC_BIDI_CLASS_AN
Definition utf8proc.h:365
@ UTF8PROC_BIDI_CLASS_CS
Definition utf8proc.h:366
@ UTF8PROC_BIDI_CLASS_B
Definition utf8proc.h:369
@ UTF8PROC_BIDI_CLASS_WS
Definition utf8proc.h:371
@ UTF8PROC_BIDI_CLASS_EN
Definition utf8proc.h:362
@ UTF8PROC_BIDI_CLASS_LRI
Definition utf8proc.h:373
@ UTF8PROC_BIDI_CLASS_ON
Definition utf8proc.h:372
@ UTF8PROC_BIDI_CLASS_FSI
Definition utf8proc.h:375
@ UTF8PROC_BIDI_CLASS_PDI
Definition utf8proc.h:376
@ UTF8PROC_BIDI_CLASS_RLO
Definition utf8proc.h:360
@ UTF8PROC_BIDI_CLASS_LRO
Definition utf8proc.h:356
@ UTF8PROC_BIDI_CLASS_ET
Definition utf8proc.h:364
@ UTF8PROC_BIDI_CLASS_NSM
Definition utf8proc.h:367
@ UTF8PROC_BIDI_CLASS_LRE
Definition utf8proc.h:355
@ UTF8PROC_BIDI_CLASS_RLI
Definition utf8proc.h:374
@ UTF8PROC_BIDI_CLASS_S
Definition utf8proc.h:370
@ UTF8PROC_BIDI_CLASS_R
Definition utf8proc.h:357
@ UTF8PROC_BIDI_CLASS_BN
Definition utf8proc.h:368
@ UTF8PROC_BIDI_CLASS_AL
Definition utf8proc.h:358
@ UTF8PROC_BIDI_CLASS_PDF
Definition utf8proc.h:361