72#define UTF8PROC_VERSION_MAJOR 2
74#define UTF8PROC_VERSION_MINOR 9
76#define UTF8PROC_VERSION_PATCH 0
81#if defined(_MSC_VER) && _MSC_VER < 1800
83typedef signed char utf8proc_int8_t;
84typedef unsigned char utf8proc_uint8_t;
85typedef short utf8proc_int16_t;
86typedef unsigned short utf8proc_uint16_t;
87typedef int utf8proc_int32_t;
88typedef unsigned int utf8proc_uint32_t;
90typedef __int64 utf8proc_ssize_t;
91typedef unsigned __int64 utf8proc_size_t;
93typedef int utf8proc_ssize_t;
94typedef unsigned int utf8proc_size_t;
98typedef unsigned char utf8proc_bool;
99# ifndef __bool_true_false_are_defined
102# define __bool_true_false_are_defined 1
105typedef bool utf8proc_bool;
110# include <inttypes.h>
111typedef int8_t utf8proc_int8_t;
112typedef uint8_t utf8proc_uint8_t;
113typedef int16_t utf8proc_int16_t;
114typedef uint16_t utf8proc_uint16_t;
115typedef int32_t utf8proc_int32_t;
116typedef uint32_t utf8proc_uint32_t;
117typedef size_t utf8proc_size_t;
118typedef ptrdiff_t utf8proc_ssize_t;
119typedef bool utf8proc_bool;
123#ifdef UTF8PROC_STATIC
124# define UTF8PROC_DLLEXPORT
127# ifdef UTF8PROC_EXPORTS
128# define UTF8PROC_DLLEXPORT __declspec(dllexport)
130# define UTF8PROC_DLLEXPORT __declspec(dllimport)
133# define UTF8PROC_DLLEXPORT __attribute__ ((visibility("default")))
135# define UTF8PROC_DLLEXPORT
219#define UTF8PROC_ERROR_NOMEM -1
221#define UTF8PROC_ERROR_OVERFLOW -2
223#define UTF8PROC_ERROR_INVALIDUTF8 -3
225#define UTF8PROC_ERROR_NOTASSIGNED -4
227#define UTF8PROC_ERROR_INVALIDOPTS -5
253 utf8proc_uint16_t decomp_seqindex;
254 utf8proc_uint16_t casefold_seqindex;
255 utf8proc_uint16_t uppercase_seqindex;
256 utf8proc_uint16_t lowercase_seqindex;
257 utf8proc_uint16_t titlecase_seqindex;
258 utf8proc_uint16_t comb_index;
259 unsigned bidi_mirrored:1;
260 unsigned comp_exclusion:1;
268 unsigned control_boundary:1;
277 unsigned indic_conjunct_break:2;
388 UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC = 19,
389 UTF8PROC_BOUNDCLASS_E_ZWG = 20,
394 UTF8PROC_INDIC_CONJUNCT_BREAK_NONE = 0,
395 UTF8PROC_INDIC_CONJUNCT_BREAK_LINKER = 1,
396 UTF8PROC_INDIC_CONJUNCT_BREAK_CONSONANT = 2,
397 UTF8PROC_INDIC_CONJUNCT_BREAK_EXTEND = 3,
429UTF8PROC_DLLEXPORT
const char *
utf8proc_errmsg(utf8proc_ssize_t errcode);
441UTF8PROC_DLLEXPORT utf8proc_ssize_t
utf8proc_iterate(
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *codepoint_ref);
460UTF8PROC_DLLEXPORT utf8proc_ssize_t
utf8proc_encode_char(utf8proc_int32_t codepoint, utf8proc_uint8_t *dst);
506 utf8proc_int32_t codepoint, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize,
528 const utf8proc_uint8_t *str, utf8proc_ssize_t strlen,
539 const utf8proc_uint8_t *str, utf8proc_ssize_t strlen,
617 utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2, utf8proc_int32_t *state);
624 utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2);
702 const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr,
utf8proc_option_t options
712 const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr,
utf8proc_option_t options,
725UTF8PROC_DLLEXPORT utf8proc_uint8_t *
utf8proc_NFD(
const utf8proc_uint8_t *str);
727UTF8PROC_DLLEXPORT utf8proc_uint8_t *
utf8proc_NFC(
const utf8proc_uint8_t *str);
729UTF8PROC_DLLEXPORT utf8proc_uint8_t *
utf8proc_NFKD(
const utf8proc_uint8_t *str);
731UTF8PROC_DLLEXPORT utf8proc_uint8_t *
utf8proc_NFKC(
const utf8proc_uint8_t *str);
Definition utf8proc.h:236
unsigned boundclass
Definition utf8proc.h:276
unsigned charwidth
Definition utf8proc.h:270
unsigned ignorable
Definition utf8proc.h:267
utf8proc_propval_t category
Definition utf8proc.h:241
utf8proc_propval_t bidi_class
Definition utf8proc.h:247
utf8proc_propval_t decomp_type
Definition utf8proc.h:252
utf8proc_ssize_t utf8proc_map(const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options)
Definition utf8proc.c:746
utf8proc_int32_t(* utf8proc_custom_func)(utf8proc_int32_t codepoint, void *data)
Definition utf8proc.h:405
utf8proc_uint8_t * utf8proc_NFC(const utf8proc_uint8_t *str)
Definition utf8proc.c:789
utf8proc_option_t
Definition utf8proc.h:146
@ UTF8PROC_NULLTERM
Definition utf8proc.h:148
@ UTF8PROC_LUMP
Definition utf8proc.h:200
@ UTF8PROC_REJECTNA
Definition utf8proc.h:160
@ UTF8PROC_NLF2LF
Definition utf8proc.h:174
@ UTF8PROC_STRIPCC
Definition utf8proc.h:182
@ UTF8PROC_NLF2LS
Definition utf8proc.h:166
@ UTF8PROC_COMPOSE
Definition utf8proc.h:154
@ UTF8PROC_STRIPNA
Definition utf8proc.h:211
@ UTF8PROC_CASEFOLD
Definition utf8proc.h:187
@ UTF8PROC_STABLE
Definition utf8proc.h:150
@ UTF8PROC_DECOMPOSE
Definition utf8proc.h:156
@ UTF8PROC_IGNORE
Definition utf8proc.h:158
@ UTF8PROC_NLF2PS
Definition utf8proc.h:172
@ UTF8PROC_CHARBOUND
Definition utf8proc.h:192
@ UTF8PROC_COMPAT
Definition utf8proc.h:152
@ UTF8PROC_STRIPMARK
Definition utf8proc.h:207
utf8proc_uint8_t * utf8proc_NFKD(const utf8proc_uint8_t *str)
Definition utf8proc.c:796
const char * utf8proc_version(void)
Definition utf8proc.c:99
utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t codepoint, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass)
Definition utf8proc.c:448
int utf8proc_charwidth(utf8proc_int32_t codepoint)
Definition utf8proc.c:431
utf8proc_indic_conjunct_break_t
Definition utf8proc.h:393
int utf8proc_islower(utf8proc_int32_t c)
Definition utf8proc.c:417
utf8proc_ssize_t utf8proc_iterate(const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *codepoint_ref)
Definition utf8proc.c:125
const utf8proc_int8_t utf8proc_utf8class[256]
Definition utf8proc.c:56
const char * utf8proc_unicode_version(void)
Definition utf8proc.c:103
const char * utf8proc_errmsg(utf8proc_ssize_t errcode)
Definition utf8proc.c:107
const utf8proc_property_t * utf8proc_get_property(utf8proc_int32_t codepoint)
Definition utf8proc.c:242
utf8proc_uint8_t * utf8proc_NFKC(const utf8proc_uint8_t *str)
Definition utf8proc.c:803
utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options)
Definition utf8proc.c:722
utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c)
Definition utf8proc.c:399
utf8proc_int16_t utf8proc_propval_t
Definition utf8proc.h:233
struct utf8proc_property_struct utf8proc_property_t
utf8proc_ssize_t utf8proc_decompose_custom(const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options, utf8proc_custom_func custom_func, void *custom_data)
Definition utf8proc.c:543
utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t codepoint, utf8proc_uint8_t *dst)
Definition utf8proc.c:177
utf8proc_ssize_t utf8proc_map_custom(const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options, utf8proc_custom_func custom_func, void *custom_data)
Definition utf8proc.c:752
utf8proc_decomp_type_t
Definition utf8proc.h:342
@ UTF8PROC_DECOMP_TYPE_NOBREAK
Definition utf8proc.h:344
@ UTF8PROC_DECOMP_TYPE_SUB
Definition utf8proc.h:351
@ UTF8PROC_DECOMP_TYPE_INITIAL
Definition utf8proc.h:345
@ UTF8PROC_DECOMP_TYPE_WIDE
Definition utf8proc.h:353
@ UTF8PROC_DECOMP_TYPE_SMALL
Definition utf8proc.h:355
@ UTF8PROC_DECOMP_TYPE_FONT
Definition utf8proc.h:343
@ UTF8PROC_DECOMP_TYPE_CIRCLE
Definition utf8proc.h:349
@ UTF8PROC_DECOMP_TYPE_ISOLATED
Definition utf8proc.h:348
@ UTF8PROC_DECOMP_TYPE_NARROW
Definition utf8proc.h:354
@ UTF8PROC_DECOMP_TYPE_SUPER
Definition utf8proc.h:350
@ UTF8PROC_DECOMP_TYPE_FRACTION
Definition utf8proc.h:357
@ UTF8PROC_DECOMP_TYPE_FINAL
Definition utf8proc.h:347
@ UTF8PROC_DECOMP_TYPE_VERTICAL
Definition utf8proc.h:352
@ UTF8PROC_DECOMP_TYPE_COMPAT
Definition utf8proc.h:358
@ UTF8PROC_DECOMP_TYPE_SQUARE
Definition utf8proc.h:356
@ UTF8PROC_DECOMP_TYPE_MEDIAL
Definition utf8proc.h:346
utf8proc_boundclass_t
Definition utf8proc.h:362
@ UTF8PROC_BOUNDCLASS_V
Definition utf8proc.h:370
@ UTF8PROC_BOUNDCLASS_OTHER
Definition utf8proc.h:364
@ UTF8PROC_BOUNDCLASS_GLUE_AFTER_ZWJ
Definition utf8proc.h:383
@ UTF8PROC_BOUNDCLASS_START
Definition utf8proc.h:363
@ UTF8PROC_BOUNDCLASS_E_BASE_GAZ
Definition utf8proc.h:384
@ UTF8PROC_BOUNDCLASS_EXTEND
Definition utf8proc.h:368
@ UTF8PROC_BOUNDCLASS_CONTROL
Definition utf8proc.h:367
@ UTF8PROC_BOUNDCLASS_SPACINGMARK
Definition utf8proc.h:375
@ UTF8PROC_BOUNDCLASS_L
Definition utf8proc.h:369
@ UTF8PROC_BOUNDCLASS_ZWJ
Definition utf8proc.h:377
@ UTF8PROC_BOUNDCLASS_T
Definition utf8proc.h:371
@ UTF8PROC_BOUNDCLASS_LV
Definition utf8proc.h:372
@ UTF8PROC_BOUNDCLASS_LF
Definition utf8proc.h:366
@ UTF8PROC_BOUNDCLASS_CR
Definition utf8proc.h:365
@ UTF8PROC_BOUNDCLASS_E_BASE
Definition utf8proc.h:381
@ UTF8PROC_BOUNDCLASS_E_MODIFIER
Definition utf8proc.h:382
@ UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR
Definition utf8proc.h:374
@ UTF8PROC_BOUNDCLASS_LVT
Definition utf8proc.h:373
@ UTF8PROC_BOUNDCLASS_PREPEND
Definition utf8proc.h:376
utf8proc_ssize_t utf8proc_decompose(const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options)
Definition utf8proc.c:536
utf8proc_uint8_t * utf8proc_NFD(const utf8proc_uint8_t *str)
Definition utf8proc.c:782
utf8proc_bool utf8proc_grapheme_break_stateful(utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2, utf8proc_int32_t *state)
Definition utf8proc.c:345
utf8proc_uint8_t * utf8proc_NFKC_Casefold(const utf8proc_uint8_t *str)
Definition utf8proc.c:810
utf8proc_bool utf8proc_codepoint_valid(utf8proc_int32_t codepoint)
Definition utf8proc.c:173
const char * utf8proc_category_string(utf8proc_int32_t codepoint)
Definition utf8proc.c:439
utf8proc_int32_t utf8proc_totitle(utf8proc_int32_t c)
Definition utf8proc.c:411
utf8proc_category_t utf8proc_category(utf8proc_int32_t codepoint)
Definition utf8proc.c:435
utf8proc_bool utf8proc_grapheme_break(utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2)
Definition utf8proc.c:358
utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c)
Definition utf8proc.c:405
utf8proc_ssize_t utf8proc_normalize_utf32(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options)
Definition utf8proc.c:610
int utf8proc_isupper(utf8proc_int32_t c)
Definition utf8proc.c:423
utf8proc_category_t
Definition utf8proc.h:281
@ UTF8PROC_CATEGORY_SO
Definition utf8proc.h:304
@ UTF8PROC_CATEGORY_ME
Definition utf8proc.h:290
@ UTF8PROC_CATEGORY_SK
Definition utf8proc.h:303
@ UTF8PROC_CATEGORY_PF
Definition utf8proc.h:299
@ UTF8PROC_CATEGORY_MN
Definition utf8proc.h:288
@ UTF8PROC_CATEGORY_CN
Definition utf8proc.h:282
@ UTF8PROC_CATEGORY_SC
Definition utf8proc.h:302
@ UTF8PROC_CATEGORY_ND
Definition utf8proc.h:291
@ UTF8PROC_CATEGORY_LT
Definition utf8proc.h:285
@ UTF8PROC_CATEGORY_PC
Definition utf8proc.h:294
@ UTF8PROC_CATEGORY_NO
Definition utf8proc.h:293
@ UTF8PROC_CATEGORY_MC
Definition utf8proc.h:289
@ UTF8PROC_CATEGORY_ZS
Definition utf8proc.h:305
@ UTF8PROC_CATEGORY_CF
Definition utf8proc.h:309
@ UTF8PROC_CATEGORY_LU
Definition utf8proc.h:283
@ UTF8PROC_CATEGORY_ZL
Definition utf8proc.h:306
@ UTF8PROC_CATEGORY_NL
Definition utf8proc.h:292
@ UTF8PROC_CATEGORY_SM
Definition utf8proc.h:301
@ UTF8PROC_CATEGORY_ZP
Definition utf8proc.h:307
@ UTF8PROC_CATEGORY_PI
Definition utf8proc.h:298
@ UTF8PROC_CATEGORY_PO
Definition utf8proc.h:300
@ UTF8PROC_CATEGORY_PS
Definition utf8proc.h:296
@ UTF8PROC_CATEGORY_CS
Definition utf8proc.h:310
@ UTF8PROC_CATEGORY_PD
Definition utf8proc.h:295
@ UTF8PROC_CATEGORY_LM
Definition utf8proc.h:286
@ UTF8PROC_CATEGORY_PE
Definition utf8proc.h:297
@ UTF8PROC_CATEGORY_LL
Definition utf8proc.h:284
@ UTF8PROC_CATEGORY_LO
Definition utf8proc.h:287
@ UTF8PROC_CATEGORY_CO
Definition utf8proc.h:311
@ UTF8PROC_CATEGORY_CC
Definition utf8proc.h:308
utf8proc_bidi_class_t
Definition utf8proc.h:315
@ UTF8PROC_BIDI_CLASS_ES
Definition utf8proc.h:325
@ UTF8PROC_BIDI_CLASS_RLE
Definition utf8proc.h:321
@ UTF8PROC_BIDI_CLASS_L
Definition utf8proc.h:316
@ UTF8PROC_BIDI_CLASS_AN
Definition utf8proc.h:327
@ UTF8PROC_BIDI_CLASS_CS
Definition utf8proc.h:328
@ UTF8PROC_BIDI_CLASS_B
Definition utf8proc.h:331
@ UTF8PROC_BIDI_CLASS_WS
Definition utf8proc.h:333
@ UTF8PROC_BIDI_CLASS_EN
Definition utf8proc.h:324
@ UTF8PROC_BIDI_CLASS_LRI
Definition utf8proc.h:335
@ UTF8PROC_BIDI_CLASS_ON
Definition utf8proc.h:334
@ UTF8PROC_BIDI_CLASS_FSI
Definition utf8proc.h:337
@ UTF8PROC_BIDI_CLASS_PDI
Definition utf8proc.h:338
@ UTF8PROC_BIDI_CLASS_RLO
Definition utf8proc.h:322
@ UTF8PROC_BIDI_CLASS_LRO
Definition utf8proc.h:318
@ UTF8PROC_BIDI_CLASS_ET
Definition utf8proc.h:326
@ UTF8PROC_BIDI_CLASS_NSM
Definition utf8proc.h:329
@ UTF8PROC_BIDI_CLASS_LRE
Definition utf8proc.h:317
@ UTF8PROC_BIDI_CLASS_RLI
Definition utf8proc.h:336
@ UTF8PROC_BIDI_CLASS_S
Definition utf8proc.h:332
@ UTF8PROC_BIDI_CLASS_R
Definition utf8proc.h:319
@ UTF8PROC_BIDI_CLASS_BN
Definition utf8proc.h:330
@ UTF8PROC_BIDI_CLASS_AL
Definition utf8proc.h:320
@ UTF8PROC_BIDI_CLASS_PDF
Definition utf8proc.h:323