ICU 4.8.1.1  4.8.1.1
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
uchar.h
Go to the documentation of this file.
1 /*
2 **********************************************************************
3 * Copyright (C) 1997-2010, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 *
7 * File UCHAR.H
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 04/02/97 aliu Creation.
13 * 03/29/99 helena Updated for C APIs.
14 * 4/15/99 Madhu Updated for C Implementation and Javadoc
15 * 5/20/99 Madhu Added the function u_getVersion()
16 * 8/19/1999 srl Upgraded scripts to Unicode 3.0
17 * 8/27/1999 schererm UCharDirection constants: U_...
18 * 11/11/1999 weiv added u_isalnum(), cleaned comments
19 * 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion().
20 ******************************************************************************
21 */
22 
23 #ifndef UCHAR_H
24 #define UCHAR_H
25 
26 #include "unicode/utypes.h"
27 
29 
30 /*==========================================================================*/
31 /* Unicode version number */
32 /*==========================================================================*/
42 #define U_UNICODE_VERSION "6.0"
43 
124 #define UCHAR_MIN_VALUE 0
125 
134 #define UCHAR_MAX_VALUE 0x10ffff
135 
140 #define U_MASK(x) ((uint32_t)1<<(x))
141 
142 /*
143  * !! Note: Several comments in this file are machine-read by the
144  * genpname tool. These comments describe the correspondence between
145  * icu enum constants and UCD entities. Do not delete them. Update
146  * these comments as needed.
147  *
148  * Any comment of the form "/ *[name]* /" (spaces added) is such
149  * a comment.
150  *
151  * The U_JG_* and U_GC_*_MASK constants are matched by their symbolic
152  * name, which must match PropertyValueAliases.txt.
153  */
154 
174 typedef enum UProperty {
175  /* See note !!. Comments of the form "Binary property Dash",
176  "Enumerated property Script", "Double property Numeric_Value",
177  and "String property Age" are read by genpname. */
178 
179  /* Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that
180  debuggers display UCHAR_ALPHABETIC as the symbolic name for 0,
181  rather than UCHAR_BINARY_START. Likewise for other *_START
182  identifiers. */
183 
413 
421  UCHAR_BLOCK=0x1001,
449  UCHAR_SCRIPT=0x100A,
496 
510 
518 
521  UCHAR_AGE=0x4000,
538  UCHAR_NAME=0x4005,
562 
576 
579 } UProperty;
580 
586 typedef enum UCharCategory
587 {
654 } UCharCategory;
655 
670 #define U_GC_CN_MASK U_MASK(U_GENERAL_OTHER_TYPES)
671 
673 #define U_GC_LU_MASK U_MASK(U_UPPERCASE_LETTER)
674 
675 #define U_GC_LL_MASK U_MASK(U_LOWERCASE_LETTER)
676 
677 #define U_GC_LT_MASK U_MASK(U_TITLECASE_LETTER)
678 
679 #define U_GC_LM_MASK U_MASK(U_MODIFIER_LETTER)
680 
681 #define U_GC_LO_MASK U_MASK(U_OTHER_LETTER)
682 
684 #define U_GC_MN_MASK U_MASK(U_NON_SPACING_MARK)
685 
686 #define U_GC_ME_MASK U_MASK(U_ENCLOSING_MARK)
687 
688 #define U_GC_MC_MASK U_MASK(U_COMBINING_SPACING_MARK)
689 
691 #define U_GC_ND_MASK U_MASK(U_DECIMAL_DIGIT_NUMBER)
692 
693 #define U_GC_NL_MASK U_MASK(U_LETTER_NUMBER)
694 
695 #define U_GC_NO_MASK U_MASK(U_OTHER_NUMBER)
696 
698 #define U_GC_ZS_MASK U_MASK(U_SPACE_SEPARATOR)
699 
700 #define U_GC_ZL_MASK U_MASK(U_LINE_SEPARATOR)
701 
702 #define U_GC_ZP_MASK U_MASK(U_PARAGRAPH_SEPARATOR)
703 
705 #define U_GC_CC_MASK U_MASK(U_CONTROL_CHAR)
706 
707 #define U_GC_CF_MASK U_MASK(U_FORMAT_CHAR)
708 
709 #define U_GC_CO_MASK U_MASK(U_PRIVATE_USE_CHAR)
710 
711 #define U_GC_CS_MASK U_MASK(U_SURROGATE)
712 
714 #define U_GC_PD_MASK U_MASK(U_DASH_PUNCTUATION)
715 
716 #define U_GC_PS_MASK U_MASK(U_START_PUNCTUATION)
717 
718 #define U_GC_PE_MASK U_MASK(U_END_PUNCTUATION)
719 
720 #define U_GC_PC_MASK U_MASK(U_CONNECTOR_PUNCTUATION)
721 
722 #define U_GC_PO_MASK U_MASK(U_OTHER_PUNCTUATION)
723 
725 #define U_GC_SM_MASK U_MASK(U_MATH_SYMBOL)
726 
727 #define U_GC_SC_MASK U_MASK(U_CURRENCY_SYMBOL)
728 
729 #define U_GC_SK_MASK U_MASK(U_MODIFIER_SYMBOL)
730 
731 #define U_GC_SO_MASK U_MASK(U_OTHER_SYMBOL)
732 
734 #define U_GC_PI_MASK U_MASK(U_INITIAL_PUNCTUATION)
735 
736 #define U_GC_PF_MASK U_MASK(U_FINAL_PUNCTUATION)
737 
738 
740 #define U_GC_L_MASK \
741  (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK)
742 
744 #define U_GC_LC_MASK \
745  (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK)
746 
748 #define U_GC_M_MASK (U_GC_MN_MASK|U_GC_ME_MASK|U_GC_MC_MASK)
749 
751 #define U_GC_N_MASK (U_GC_ND_MASK|U_GC_NL_MASK|U_GC_NO_MASK)
752 
754 #define U_GC_Z_MASK (U_GC_ZS_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK)
755 
757 #define U_GC_C_MASK \
758  (U_GC_CN_MASK|U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CO_MASK|U_GC_CS_MASK)
759 
761 #define U_GC_P_MASK \
762  (U_GC_PD_MASK|U_GC_PS_MASK|U_GC_PE_MASK|U_GC_PC_MASK|U_GC_PO_MASK| \
763  U_GC_PI_MASK|U_GC_PF_MASK)
764 
766 #define U_GC_S_MASK (U_GC_SM_MASK|U_GC_SC_MASK|U_GC_SK_MASK|U_GC_SO_MASK)
767 
772 typedef enum UCharDirection {
816 
822 
824  UBLOCK_NO_BLOCK = 0, /*[none]*/ /* Special range indicating No_Block */
825 
827  UBLOCK_BASIC_LATIN = 1, /*[0000]*/ /*See note !!*/
828 
831 
833  UBLOCK_LATIN_EXTENDED_A =3, /*[0100]*/
834 
836  UBLOCK_LATIN_EXTENDED_B =4, /*[0180]*/
837 
839  UBLOCK_IPA_EXTENSIONS =5, /*[0250]*/
840 
843 
846 
851  UBLOCK_GREEK =8, /*[0370]*/
852 
854  UBLOCK_CYRILLIC =9, /*[0400]*/
855 
857  UBLOCK_ARMENIAN =10, /*[0530]*/
858 
860  UBLOCK_HEBREW =11, /*[0590]*/
861 
863  UBLOCK_ARABIC =12, /*[0600]*/
864 
866  UBLOCK_SYRIAC =13, /*[0700]*/
867 
869  UBLOCK_THAANA =14, /*[0780]*/
870 
872  UBLOCK_DEVANAGARI =15, /*[0900]*/
873 
875  UBLOCK_BENGALI =16, /*[0980]*/
876 
878  UBLOCK_GURMUKHI =17, /*[0A00]*/
879 
881  UBLOCK_GUJARATI =18, /*[0A80]*/
882 
884  UBLOCK_ORIYA =19, /*[0B00]*/
885 
887  UBLOCK_TAMIL =20, /*[0B80]*/
888 
890  UBLOCK_TELUGU =21, /*[0C00]*/
891 
893  UBLOCK_KANNADA =22, /*[0C80]*/
894 
896  UBLOCK_MALAYALAM =23, /*[0D00]*/
897 
899  UBLOCK_SINHALA =24, /*[0D80]*/
900 
902  UBLOCK_THAI =25, /*[0E00]*/
903 
905  UBLOCK_LAO =26, /*[0E80]*/
906 
908  UBLOCK_TIBETAN =27, /*[0F00]*/
909 
911  UBLOCK_MYANMAR =28, /*[1000]*/
912 
914  UBLOCK_GEORGIAN =29, /*[10A0]*/
915 
917  UBLOCK_HANGUL_JAMO =30, /*[1100]*/
918 
920  UBLOCK_ETHIOPIC =31, /*[1200]*/
921 
923  UBLOCK_CHEROKEE =32, /*[13A0]*/
924 
927 
929  UBLOCK_OGHAM =34, /*[1680]*/
930 
932  UBLOCK_RUNIC =35, /*[16A0]*/
933 
935  UBLOCK_KHMER =36, /*[1780]*/
936 
938  UBLOCK_MONGOLIAN =37, /*[1800]*/
939 
942 
944  UBLOCK_GREEK_EXTENDED =39, /*[1F00]*/
945 
948 
951 
953  UBLOCK_CURRENCY_SYMBOLS =42, /*[20A0]*/
954 
960 
963 
965  UBLOCK_NUMBER_FORMS =45, /*[2150]*/
966 
968  UBLOCK_ARROWS =46, /*[2190]*/
969 
972 
975 
977  UBLOCK_CONTROL_PICTURES =49, /*[2400]*/
978 
981 
984 
986  UBLOCK_BOX_DRAWING =52, /*[2500]*/
987 
989  UBLOCK_BLOCK_ELEMENTS =53, /*[2580]*/
990 
992  UBLOCK_GEOMETRIC_SHAPES =54, /*[25A0]*/
993 
996 
998  UBLOCK_DINGBATS =56, /*[2700]*/
999 
1001  UBLOCK_BRAILLE_PATTERNS =57, /*[2800]*/
1002 
1005 
1007  UBLOCK_KANGXI_RADICALS =59, /*[2F00]*/
1008 
1011 
1014 
1016  UBLOCK_HIRAGANA =62, /*[3040]*/
1017 
1019  UBLOCK_KATAKANA =63, /*[30A0]*/
1020 
1022  UBLOCK_BOPOMOFO =64, /*[3100]*/
1023 
1026 
1028  UBLOCK_KANBUN =66, /*[3190]*/
1029 
1032 
1035 
1038 
1041 
1044 
1046  UBLOCK_YI_SYLLABLES =72, /*[A000]*/
1047 
1049  UBLOCK_YI_RADICALS =73, /*[A490]*/
1050 
1052  UBLOCK_HANGUL_SYLLABLES =74, /*[AC00]*/
1053 
1055  UBLOCK_HIGH_SURROGATES =75, /*[D800]*/
1056 
1059 
1061  UBLOCK_LOW_SURROGATES =77, /*[DC00]*/
1062 
1083 
1086 
1089 
1092 
1095 
1098 
1101 
1104 
1106  UBLOCK_SPECIALS =86, /*[FFF0]*/
1107 
1110 
1111  /* New blocks in Unicode 3.1 */
1112 
1114  UBLOCK_OLD_ITALIC = 88 , /*[10300]*/
1116  UBLOCK_GOTHIC = 89 , /*[10330]*/
1118  UBLOCK_DESERET = 90 , /*[10400]*/
1122  UBLOCK_MUSICAL_SYMBOLS = 92 , /*[1D100]*/
1130  UBLOCK_TAGS = 96, /*[E0000]*/
1131 
1132  /* New blocks in Unicode 3.2 */
1133 
1142  UBLOCK_TAGALOG = 98, /*[1700]*/
1144  UBLOCK_HANUNOO = 99, /*[1720]*/
1146  UBLOCK_BUHID = 100, /*[1740]*/
1148  UBLOCK_TAGBANWA = 101, /*[1760]*/
1162  UBLOCK_VARIATION_SELECTORS = 108, /*[FE00]*/
1167 
1168  /* New blocks in Unicode 4 */
1169 
1171  UBLOCK_LIMBU = 111, /*[1900]*/
1173  UBLOCK_TAI_LE = 112, /*[1950]*/
1175  UBLOCK_KHMER_SYMBOLS = 113, /*[19E0]*/
1177  UBLOCK_PHONETIC_EXTENSIONS = 114, /*[1D00]*/
1183  UBLOCK_LINEAR_B_SYLLABARY = 117, /*[10000]*/
1185  UBLOCK_LINEAR_B_IDEOGRAMS = 118, /*[10080]*/
1187  UBLOCK_AEGEAN_NUMBERS = 119, /*[10100]*/
1189  UBLOCK_UGARITIC = 120, /*[10380]*/
1191  UBLOCK_SHAVIAN = 121, /*[10450]*/
1193  UBLOCK_OSMANYA = 122, /*[10480]*/
1195  UBLOCK_CYPRIOT_SYLLABARY = 123, /*[10800]*/
1197  UBLOCK_TAI_XUAN_JING_SYMBOLS = 124, /*[1D300]*/
1200 
1201  /* New blocks in Unicode 4.1 */
1202 
1206  UBLOCK_ANCIENT_GREEK_NUMBERS = 127, /*[10140]*/
1208  UBLOCK_ARABIC_SUPPLEMENT = 128, /*[0750]*/
1210  UBLOCK_BUGINESE = 129, /*[1A00]*/
1212  UBLOCK_CJK_STROKES = 130, /*[31C0]*/
1216  UBLOCK_COPTIC = 132, /*[2C80]*/
1218  UBLOCK_ETHIOPIC_EXTENDED = 133, /*[2D80]*/
1220  UBLOCK_ETHIOPIC_SUPPLEMENT = 134, /*[1380]*/
1222  UBLOCK_GEORGIAN_SUPPLEMENT = 135, /*[2D00]*/
1224  UBLOCK_GLAGOLITIC = 136, /*[2C00]*/
1226  UBLOCK_KHAROSHTHI = 137, /*[10A00]*/
1230  UBLOCK_NEW_TAI_LUE = 139, /*[1980]*/
1232  UBLOCK_OLD_PERSIAN = 140, /*[103A0]*/
1238  UBLOCK_SYLOTI_NAGRI = 143, /*[A800]*/
1240  UBLOCK_TIFINAGH = 144, /*[2D30]*/
1242  UBLOCK_VERTICAL_FORMS = 145, /*[FE10]*/
1243 
1244  /* New blocks in Unicode 5.0 */
1245 
1247  UBLOCK_NKO = 146, /*[07C0]*/
1249  UBLOCK_BALINESE = 147, /*[1B00]*/
1251  UBLOCK_LATIN_EXTENDED_C = 148, /*[2C60]*/
1253  UBLOCK_LATIN_EXTENDED_D = 149, /*[A720]*/
1255  UBLOCK_PHAGS_PA = 150, /*[A840]*/
1257  UBLOCK_PHOENICIAN = 151, /*[10900]*/
1259  UBLOCK_CUNEIFORM = 152, /*[12000]*/
1263  UBLOCK_COUNTING_ROD_NUMERALS = 154, /*[1D360]*/
1264 
1265  /* New blocks in Unicode 5.1 */
1266 
1268  UBLOCK_SUNDANESE = 155, /*[1B80]*/
1270  UBLOCK_LEPCHA = 156, /*[1C00]*/
1272  UBLOCK_OL_CHIKI = 157, /*[1C50]*/
1274  UBLOCK_CYRILLIC_EXTENDED_A = 158, /*[2DE0]*/
1276  UBLOCK_VAI = 159, /*[A500]*/
1278  UBLOCK_CYRILLIC_EXTENDED_B = 160, /*[A640]*/
1280  UBLOCK_SAURASHTRA = 161, /*[A880]*/
1282  UBLOCK_KAYAH_LI = 162, /*[A900]*/
1284  UBLOCK_REJANG = 163, /*[A930]*/
1286  UBLOCK_CHAM = 164, /*[AA00]*/
1288  UBLOCK_ANCIENT_SYMBOLS = 165, /*[10190]*/
1290  UBLOCK_PHAISTOS_DISC = 166, /*[101D0]*/
1292  UBLOCK_LYCIAN = 167, /*[10280]*/
1294  UBLOCK_CARIAN = 168, /*[102A0]*/
1296  UBLOCK_LYDIAN = 169, /*[10920]*/
1298  UBLOCK_MAHJONG_TILES = 170, /*[1F000]*/
1300  UBLOCK_DOMINO_TILES = 171, /*[1F030]*/
1301 
1302  /* New blocks in Unicode 5.2 */
1303 
1305  UBLOCK_SAMARITAN = 172, /*[0800]*/
1309  UBLOCK_TAI_THAM = 174, /*[1A20]*/
1311  UBLOCK_VEDIC_EXTENSIONS = 175, /*[1CD0]*/
1313  UBLOCK_LISU = 176, /*[A4D0]*/
1315  UBLOCK_BAMUM = 177, /*[A6A0]*/
1319  UBLOCK_DEVANAGARI_EXTENDED = 179, /*[A8E0]*/
1323  UBLOCK_JAVANESE = 181, /*[A980]*/
1325  UBLOCK_MYANMAR_EXTENDED_A = 182, /*[AA60]*/
1327  UBLOCK_TAI_VIET = 183, /*[AA80]*/
1329  UBLOCK_MEETEI_MAYEK = 184, /*[ABC0]*/
1333  UBLOCK_IMPERIAL_ARAMAIC = 186, /*[10840]*/
1335  UBLOCK_OLD_SOUTH_ARABIAN = 187, /*[10A60]*/
1337  UBLOCK_AVESTAN = 188, /*[10B00]*/
1341  UBLOCK_INSCRIPTIONAL_PAHLAVI = 190, /*[10B60]*/
1343  UBLOCK_OLD_TURKIC = 191, /*[10C00]*/
1345  UBLOCK_RUMI_NUMERAL_SYMBOLS = 192, /*[10E60]*/
1347  UBLOCK_KAITHI = 193, /*[11080]*/
1349  UBLOCK_EGYPTIAN_HIEROGLYPHS = 194, /*[13000]*/
1356 
1357  /* New blocks in Unicode 6.0 */
1358 
1360  UBLOCK_MANDAIC = 198, /*[0840]*/
1362  UBLOCK_BATAK = 199, /*[1BC0]*/
1364  UBLOCK_ETHIOPIC_EXTENDED_A = 200, /*[AB00]*/
1366  UBLOCK_BRAHMI = 201, /*[11000]*/
1368  UBLOCK_BAMUM_SUPPLEMENT = 202, /*[16800]*/
1370  UBLOCK_KANA_SUPPLEMENT = 203, /*[1B000]*/
1372  UBLOCK_PLAYING_CARDS = 204, /*[1F0A0]*/
1376  UBLOCK_EMOTICONS = 206, /*[1F600]*/
1380  UBLOCK_ALCHEMICAL_SYMBOLS = 208, /*[1F700]*/
1383 
1386 
1389 };
1390 
1392 typedef enum UBlockCode UBlockCode;
1393 
1401 typedef enum UEastAsianWidth {
1402  U_EA_NEUTRAL, /*[N]*/ /*See note !!*/
1403  U_EA_AMBIGUOUS, /*[A]*/
1404  U_EA_HALFWIDTH, /*[H]*/
1405  U_EA_FULLWIDTH, /*[F]*/
1406  U_EA_NARROW, /*[Na]*/
1407  U_EA_WIDE, /*[W]*/
1408  U_EA_COUNT
1409 } UEastAsianWidth;
1410 /*
1411  * Implementation note:
1412  * Keep UEastAsianWidth constant values in sync with names list in genprops/props2.c.
1413  */
1414 
1426 typedef enum UCharNameChoice {
1427  U_UNICODE_CHAR_NAME,
1428  U_UNICODE_10_CHAR_NAME,
1429  U_EXTENDED_CHAR_NAME,
1431  U_CHAR_NAME_CHOICE_COUNT
1432 } UCharNameChoice;
1433 
1447 typedef enum UPropertyNameChoice {
1448  U_SHORT_PROPERTY_NAME,
1449  U_LONG_PROPERTY_NAME,
1450  U_PROPERTY_NAME_CHOICE_COUNT
1452 
1459 typedef enum UDecompositionType {
1460  U_DT_NONE, /*[none]*/ /*See note !!*/
1461  U_DT_CANONICAL, /*[can]*/
1462  U_DT_COMPAT, /*[com]*/
1463  U_DT_CIRCLE, /*[enc]*/
1464  U_DT_FINAL, /*[fin]*/
1465  U_DT_FONT, /*[font]*/
1466  U_DT_FRACTION, /*[fra]*/
1467  U_DT_INITIAL, /*[init]*/
1468  U_DT_ISOLATED, /*[iso]*/
1469  U_DT_MEDIAL, /*[med]*/
1470  U_DT_NARROW, /*[nar]*/
1471  U_DT_NOBREAK, /*[nb]*/
1472  U_DT_SMALL, /*[sml]*/
1473  U_DT_SQUARE, /*[sqr]*/
1474  U_DT_SUB, /*[sub]*/
1475  U_DT_SUPER, /*[sup]*/
1476  U_DT_VERTICAL, /*[vert]*/
1477  U_DT_WIDE, /*[wide]*/
1478  U_DT_COUNT /* 18 */
1480 
1487 typedef enum UJoiningType {
1488  U_JT_NON_JOINING, /*[U]*/ /*See note !!*/
1489  U_JT_JOIN_CAUSING, /*[C]*/
1490  U_JT_DUAL_JOINING, /*[D]*/
1491  U_JT_LEFT_JOINING, /*[L]*/
1492  U_JT_RIGHT_JOINING, /*[R]*/
1493  U_JT_TRANSPARENT, /*[T]*/
1494  U_JT_COUNT /* 6 */
1495 } UJoiningType;
1496 
1503 typedef enum UJoiningGroup {
1504  U_JG_NO_JOINING_GROUP,
1505  U_JG_AIN,
1506  U_JG_ALAPH,
1507  U_JG_ALEF,
1508  U_JG_BEH,
1509  U_JG_BETH,
1510  U_JG_DAL,
1511  U_JG_DALATH_RISH,
1512  U_JG_E,
1513  U_JG_FEH,
1514  U_JG_FINAL_SEMKATH,
1515  U_JG_GAF,
1516  U_JG_GAMAL,
1517  U_JG_HAH,
1519  U_JG_HAMZA_ON_HEH_GOAL=U_JG_TEH_MARBUTA_GOAL,
1520  U_JG_HE,
1521  U_JG_HEH,
1522  U_JG_HEH_GOAL,
1523  U_JG_HETH,
1524  U_JG_KAF,
1525  U_JG_KAPH,
1526  U_JG_KNOTTED_HEH,
1527  U_JG_LAM,
1528  U_JG_LAMADH,
1529  U_JG_MEEM,
1530  U_JG_MIM,
1531  U_JG_NOON,
1532  U_JG_NUN,
1533  U_JG_PE,
1534  U_JG_QAF,
1535  U_JG_QAPH,
1536  U_JG_REH,
1537  U_JG_REVERSED_PE,
1538  U_JG_SAD,
1539  U_JG_SADHE,
1540  U_JG_SEEN,
1541  U_JG_SEMKATH,
1542  U_JG_SHIN,
1543  U_JG_SWASH_KAF,
1544  U_JG_SYRIAC_WAW,
1545  U_JG_TAH,
1546  U_JG_TAW,
1547  U_JG_TEH_MARBUTA,
1548  U_JG_TETH,
1549  U_JG_WAW,
1550  U_JG_YEH,
1551  U_JG_YEH_BARREE,
1552  U_JG_YEH_WITH_TAIL,
1553  U_JG_YUDH,
1554  U_JG_YUDH_HE,
1555  U_JG_ZAIN,
1562  U_JG_COUNT
1563 } UJoiningGroup;
1564 
1572  U_GCB_OTHER = 0, /*[XX]*/ /*See note !!*/
1573  U_GCB_CONTROL = 1, /*[CN]*/
1574  U_GCB_CR = 2, /*[CR]*/
1575  U_GCB_EXTEND = 3, /*[EX]*/
1576  U_GCB_L = 4, /*[L]*/
1577  U_GCB_LF = 5, /*[LF]*/
1578  U_GCB_LV = 6, /*[LV]*/
1579  U_GCB_LVT = 7, /*[LVT]*/
1580  U_GCB_T = 8, /*[T]*/
1581  U_GCB_V = 9, /*[V]*/
1582  U_GCB_SPACING_MARK = 10, /*[SM]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
1583  U_GCB_PREPEND = 11, /*[PP]*/
1584  U_GCB_COUNT = 12
1586 
1594 typedef enum UWordBreakValues {
1595  U_WB_OTHER = 0, /*[XX]*/ /*See note !!*/
1596  U_WB_ALETTER = 1, /*[LE]*/
1597  U_WB_FORMAT = 2, /*[FO]*/
1598  U_WB_KATAKANA = 3, /*[KA]*/
1599  U_WB_MIDLETTER = 4, /*[ML]*/
1600  U_WB_MIDNUM = 5, /*[MN]*/
1601  U_WB_NUMERIC = 6, /*[NU]*/
1602  U_WB_EXTENDNUMLET = 7, /*[EX]*/
1603  U_WB_CR = 8, /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
1604  U_WB_EXTEND = 9, /*[Extend]*/
1605  U_WB_LF = 10, /*[LF]*/
1606  U_WB_MIDNUMLET =11, /*[MB]*/
1607  U_WB_NEWLINE =12, /*[NL]*/
1608  U_WB_COUNT = 13
1610 
1617 typedef enum USentenceBreak {
1618  U_SB_OTHER = 0, /*[XX]*/ /*See note !!*/
1619  U_SB_ATERM = 1, /*[AT]*/
1620  U_SB_CLOSE = 2, /*[CL]*/
1621  U_SB_FORMAT = 3, /*[FO]*/
1622  U_SB_LOWER = 4, /*[LO]*/
1623  U_SB_NUMERIC = 5, /*[NU]*/
1624  U_SB_OLETTER = 6, /*[LE]*/
1625  U_SB_SEP = 7, /*[SE]*/
1626  U_SB_SP = 8, /*[SP]*/
1627  U_SB_STERM = 9, /*[ST]*/
1628  U_SB_UPPER = 10, /*[UP]*/
1629  U_SB_CR = 11, /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
1630  U_SB_EXTEND = 12, /*[EX]*/
1631  U_SB_LF = 13, /*[LF]*/
1632  U_SB_SCONTINUE = 14, /*[SC]*/
1633  U_SB_COUNT = 15
1634 } USentenceBreak;
1635 
1642 typedef enum ULineBreak {
1643  U_LB_UNKNOWN = 0, /*[XX]*/ /*See note !!*/
1644  U_LB_AMBIGUOUS = 1, /*[AI]*/
1645  U_LB_ALPHABETIC = 2, /*[AL]*/
1646  U_LB_BREAK_BOTH = 3, /*[B2]*/
1647  U_LB_BREAK_AFTER = 4, /*[BA]*/
1648  U_LB_BREAK_BEFORE = 5, /*[BB]*/
1649  U_LB_MANDATORY_BREAK = 6, /*[BK]*/
1650  U_LB_CONTINGENT_BREAK = 7, /*[CB]*/
1651  U_LB_CLOSE_PUNCTUATION = 8, /*[CL]*/
1652  U_LB_COMBINING_MARK = 9, /*[CM]*/
1653  U_LB_CARRIAGE_RETURN = 10, /*[CR]*/
1654  U_LB_EXCLAMATION = 11, /*[EX]*/
1655  U_LB_GLUE = 12, /*[GL]*/
1656  U_LB_HYPHEN = 13, /*[HY]*/
1657  U_LB_IDEOGRAPHIC = 14, /*[ID]*/
1658  U_LB_INSEPERABLE = 15,
1660  U_LB_INSEPARABLE=U_LB_INSEPERABLE,/*[IN]*/
1661  U_LB_INFIX_NUMERIC = 16, /*[IS]*/
1662  U_LB_LINE_FEED = 17, /*[LF]*/
1663  U_LB_NONSTARTER = 18, /*[NS]*/
1664  U_LB_NUMERIC = 19, /*[NU]*/
1665  U_LB_OPEN_PUNCTUATION = 20, /*[OP]*/
1666  U_LB_POSTFIX_NUMERIC = 21, /*[PO]*/
1667  U_LB_PREFIX_NUMERIC = 22, /*[PR]*/
1668  U_LB_QUOTATION = 23, /*[QU]*/
1669  U_LB_COMPLEX_CONTEXT = 24, /*[SA]*/
1670  U_LB_SURROGATE = 25, /*[SG]*/
1671  U_LB_SPACE = 26, /*[SP]*/
1672  U_LB_BREAK_SYMBOLS = 27, /*[SY]*/
1673  U_LB_ZWSPACE = 28, /*[ZW]*/
1674  U_LB_NEXT_LINE = 29, /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */
1675  U_LB_WORD_JOINER = 30, /*[WJ]*/
1676  U_LB_H2 = 31, /*[H2]*/ /* from here on: new in Unicode 4.1/ICU 3.4 */
1677  U_LB_H3 = 32, /*[H3]*/
1678  U_LB_JL = 33, /*[JL]*/
1679  U_LB_JT = 34, /*[JT]*/
1680  U_LB_JV = 35, /*[JV]*/
1681  U_LB_CLOSE_PARENTHESIS = 36, /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */
1682  U_LB_COUNT = 37
1683 } ULineBreak;
1684 
1691 typedef enum UNumericType {
1692  U_NT_NONE, /*[None]*/ /*See note !!*/
1693  U_NT_DECIMAL, /*[de]*/
1694  U_NT_DIGIT, /*[di]*/
1695  U_NT_NUMERIC, /*[nu]*/
1696  U_NT_COUNT
1697 } UNumericType;
1698 
1705 typedef enum UHangulSyllableType {
1706  U_HST_NOT_APPLICABLE, /*[NA]*/ /*See note !!*/
1707  U_HST_LEADING_JAMO, /*[L]*/
1708  U_HST_VOWEL_JAMO, /*[V]*/
1709  U_HST_TRAILING_JAMO, /*[T]*/
1710  U_HST_LV_SYLLABLE, /*[LV]*/
1711  U_HST_LVT_SYLLABLE, /*[LVT]*/
1712  U_HST_COUNT
1714 
1743 
1758 
1773 
1788 
1809 
1847 U_STABLE int32_t U_EXPORT2
1849 
1868 U_STABLE int32_t U_EXPORT2
1870 
1897 U_STABLE int32_t U_EXPORT2
1899 
1920 U_STABLE double U_EXPORT2
1922 
1930 #define U_NO_NUMERIC_VALUE ((double)-123456789.)
1931 
1956 u_islower(UChar32 c);
1957 
1983 u_isupper(UChar32 c);
1984 
2000 u_istitle(UChar32 c);
2001 
2021 u_isdigit(UChar32 c);
2022 
2042 u_isalpha(UChar32 c);
2043 
2063 u_isalnum(UChar32 c);
2064 
2086 u_isxdigit(UChar32 c);
2087 
2102 u_ispunct(UChar32 c);
2103 
2121 u_isgraph(UChar32 c);
2122 
2150 u_isblank(UChar32 c);
2151 
2175 u_isdefined(UChar32 c);
2176 
2196 u_isspace(UChar32 c);
2197 
2218 
2258 
2281 u_iscntrl(UChar32 c);
2282 
2297 
2314 u_isprint(UChar32 c);
2315 
2335 u_isbase(UChar32 c);
2336 
2355 
2373 
2395 
2407 U_STABLE int8_t U_EXPORT2
2408 u_charType(UChar32 c);
2409 
2423 #define U_GET_GC_MASK(c) U_MASK(u_charType(c))
2424 
2442 typedef UBool U_CALLCONV
2443 UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type);
2444 
2464 U_STABLE void U_EXPORT2
2465 u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context);
2466 
2467 #if !UCONFIG_NO_NORMALIZATION
2468 
2476 U_STABLE uint8_t U_EXPORT2
2478 
2479 #endif
2480 
2504 U_STABLE int32_t U_EXPORT2
2506 
2518 
2551 U_STABLE int32_t U_EXPORT2
2552 u_charName(UChar32 code, UCharNameChoice nameChoice,
2553  char *buffer, int32_t bufferLength,
2554  UErrorCode *pErrorCode);
2555 
2581 U_STABLE int32_t U_EXPORT2
2583  char *dest, int32_t destCapacity,
2584  UErrorCode *pErrorCode);
2585 
2607 u_charFromName(UCharNameChoice nameChoice,
2608  const char *name,
2609  UErrorCode *pErrorCode);
2610 
2628 typedef UBool U_CALLCONV UEnumCharNamesFn(void *context,
2629  UChar32 code,
2630  UCharNameChoice nameChoice,
2631  const char *name,
2632  int32_t length);
2633 
2655 U_STABLE void U_EXPORT2
2656 u_enumCharNames(UChar32 start, UChar32 limit,
2657  UEnumCharNamesFn *fn,
2658  void *context,
2659  UCharNameChoice nameChoice,
2660  UErrorCode *pErrorCode);
2661 
2693 U_STABLE const char* U_EXPORT2
2694 u_getPropertyName(UProperty property,
2695  UPropertyNameChoice nameChoice);
2696 
2717 u_getPropertyEnum(const char* alias);
2718 
2766 U_STABLE const char* U_EXPORT2
2768  int32_t value,
2769  UPropertyNameChoice nameChoice);
2770 
2802 U_STABLE int32_t U_EXPORT2
2804  const char* alias);
2805 
2824 u_isIDStart(UChar32 c);
2825 
2848 u_isIDPart(UChar32 c);
2849 
2872 
2891 
2912 
2936 u_tolower(UChar32 c);
2937 
2961 u_toupper(UChar32 c);
2962 
2986 u_totitle(UChar32 c);
2987 
2989 #define U_FOLD_CASE_DEFAULT 0
2990 
3007 #define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
3008 
3032 u_foldCase(UChar32 c, uint32_t options);
3033 
3072 U_STABLE int32_t U_EXPORT2
3073 u_digit(UChar32 ch, int8_t radix);
3074 
3104 u_forDigit(int32_t digit, int8_t radix);
3105 
3120 U_STABLE void U_EXPORT2
3121 u_charAge(UChar32 c, UVersionInfo versionArray);
3122 
3134 U_STABLE void U_EXPORT2
3135 u_getUnicodeVersion(UVersionInfo versionArray);
3136 
3137 #if !UCONFIG_NO_NORMALIZATION
3138 
3159 U_STABLE int32_t U_EXPORT2
3160 u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode);
3161 
3162 #endif
3163 
3164 
3166 
3167 #endif /*_UCHAR*/
3168 /*eof*/
uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]
The binary form of a version on ICU APIs is an array of 4 uint8_t.
Definition: uversion.h:57
Binary property Ideographic.
Definition: uchar.h:248
Unicode 4.0.1 renames the &quot;Cyrillic Supplementary&quot; block to &quot;Cyrillic Supplement&quot;.
Definition: uchar.h:1138
Binary property Changes_When_Lowercased.
Definition: uchar.h:400
Binary property IDS_Binary_Operator (new in Unicode 3.2).
Definition: uchar.h:252
Binary property Case_Ignorable.
Definition: uchar.h:398
UBool u_isUWhiteSpace(UChar32 c)
Check if a code point has the White_Space Unicode property.
UBool u_istitle(UChar32 c)
Determines whether the specified code point is a titlecase letter.
Enumerated property NFC_Quick_Check.
Definition: uchar.h:461
See note !!.
Definition: uchar.h:776
UChar32 u_totitle(UChar32 c)
The given character is mapped to its titlecase equivalent according to UnicodeData.txt; if none is defined, the character itself is returned.
Provisional property Script_Extensions (new in Unicode 6.0).
Definition: uchar.h:570
const char * u_getPropertyName(UProperty property, UPropertyNameChoice nameChoice)
Return the Unicode name for a given property, as given in the Unicode database file PropertyAliases...
Same as UBLOCK_PRIVATE_USE_AREA.
Definition: uchar.h:1072
UChar32 u_foldCase(UChar32 c, uint32_t options)
The given character is mapped to its case folding equivalent according to UnicodeData.txt and CaseFolding.txt; if the character has no case folding equivalent, the character itself is returned.
First constant for enumerated/integer Unicode properties.
Definition: uchar.h:418
Binary property XID_Start.
Definition: uchar.h:305
Binary property Join_Control.
Definition: uchar.h:259
Binary property Logical_Order_Exception (new in Unicode 3.2).
Definition: uchar.h:263
Binary property White_Space.
Definition: uchar.h:298
String property Titlecase_Mapping.
Definition: uchar.h:553
One more than the last constant for enumerated/integer Unicode properties.
Definition: uchar.h:495
Enumerated property Numeric_Type.
Definition: uchar.h:446
Binary property xdigit (a C/POSIX character class).
Definition: uchar.h:394
UBlockCode ublock_getCode(UChar32 c)
Returns the Unicode allocation block that contains the character.
Binary property Alphabetic.
Definition: uchar.h:186
First constant for double Unicode properties.
Definition: uchar.h:515
UBool UEnumCharNamesFn(void *context, UChar32 code, UCharNameChoice nameChoice, const char *name, int32_t length)
Type of a callback function for u_enumCharNames() that gets called for each Unicode character with th...
Definition: uchar.h:2628
UBool u_isgraph(UChar32 c)
Determines whether the specified code point is a &quot;graphic&quot; character (printable, excluding spaces)...
Cn &quot;Other, Not Assigned (no characters in [UnicodeData.txt] have this property)&quot; (same as U_UNASSIGNE...
Definition: uchar.h:593
String property Simple_Case_Folding.
Definition: uchar.h:541
Binary property NFC_Inert.
Definition: uchar.h:341
Binary property graph (a C/POSIX character class).
Definition: uchar.h:384
String property Bidi_Mirroring_Glyph.
Definition: uchar.h:526
One more than the last constant for bit-mask Unicode properties.
Definition: uchar.h:509
UBool u_isdefined(UChar32 c)
Determines whether the specified code point is &quot;defined&quot;, which usually means that it is assigned a c...
Enumerated property Block.
Definition: uchar.h:421
Represents a nonexistent or invalid property or property value.
Definition: uchar.h:578
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
Definition: utypes.h:287
Renamed from the misspelled &quot;inseperable&quot; in Unicode 4.0.1/ICU 3.0.
Definition: uchar.h:1660
Binary property Radical (new in Unicode 3.2).
Definition: uchar.h:278
UCharDirection
This specifies the language directional property of a character set.
Definition: uchar.h:772
Binary property IDS_Trinary_Operator (new in Unicode 3.2).
Definition: uchar.h:256
Binary property Grapheme_Link (new in Unicode 3.2).
Definition: uchar.h:230
Enumerated property Decomposition_Type.
Definition: uchar.h:427
String property Case_Folding.
Definition: uchar.h:529
String property Name.
Definition: uchar.h:538
String property Simple_Uppercase_Mapping.
Definition: uchar.h:550
UChar32 u_forDigit(int32_t digit, int8_t radix)
Determines the character representation for a specific digit in the specified radix.
Enumerated property Bidi_Class.
Definition: uchar.h:416
int32_t u_charDigitValue(UChar32 c)
Returns the decimal digit value of a decimal digit character.
Enumerated property General_Category.
Definition: uchar.h:434
Sm.
Definition: uchar.h:641
String property Unicode_1_Name.
Definition: uchar.h:556
int32_t u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Get the FC_NFKC_Closure property string for a character.
UNumericType
Numeric Type constants.
Definition: uchar.h:1691
Binary property Pattern_White_Space (new in Unicode 4.1).
Definition: uchar.h:369
UBool u_iscntrl(UChar32 c)
Determines whether the specified code point is a control character (as defined by this function)...
UBool UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type)
Callback from u_enumCharTypes(), is called for each contiguous range of code points c (where start&lt;=c...
Definition: uchar.h:2443
Binary property Changes_When_Casefolded.
Definition: uchar.h:406
Binary property NFD_Inert.
Definition: uchar.h:327
Binary property Diacritic.
Definition: uchar.h:211
Binary property Terminal_Punctuation.
Definition: uchar.h:287
UChar32 u_charFromName(UCharNameChoice nameChoice, const char *name, UErrorCode *pErrorCode)
Find a Unicode character by its name and return its code point value.
UBool u_isUAlphabetic(UChar32 c)
Check if a code point has the Alphabetic Unicode property.
Enumerated property NFD_Quick_Check.
Definition: uchar.h:455
void u_charAge(UChar32 c, UVersionInfo versionArray)
Get the &quot;age&quot; of the code point.
int32_t u_getPropertyValueEnum(UProperty property, const char *alias)
Return the property value integer for a given value name, as specified in the Unicode database file P...
#define U_CDECL_BEGIN
This is used to begin a declaration of a library private ICU C API.
Definition: umachine.h:110
Binary property STerm (new in Unicode 4.0.1).
Definition: uchar.h:314
Enumerated property Joining_Group.
Definition: uchar.h:437
Binary property ID_Continue.
Definition: uchar.h:241
Binary property blank (a C/POSIX character class).
Definition: uchar.h:379
Binary property Quotation_Mark.
Definition: uchar.h:274
Binary property Changes_When_NFKC_Casefolded.
Definition: uchar.h:410
First constant for binary Unicode properties.
Definition: uchar.h:188
Binary property Noncharacter_Code_Point.
Definition: uchar.h:272
Binary property Hyphen.
Definition: uchar.h:236
Enumerated property East_Asian_Width.
Definition: uchar.h:431
ULineBreak
Line Break constants.
Definition: uchar.h:1642
Binary property Full_Composition_Exclusion.
Definition: uchar.h:219
Bitmask property General_Category_Mask.
Definition: uchar.h:505
String property Simple_Titlecase_Mapping.
Definition: uchar.h:547
Unicode 3.2 renames this block to &quot;Combining Diacritical Marks for Symbols&quot;.
Definition: uchar.h:959
int32_t u_digit(UChar32 ch, int8_t radix)
Returns the decimal digit value of the code point in the specified radix.
UDecompositionType
Decomposition Type constants.
Definition: uchar.h:1459
UBool u_isprint(UChar32 c)
Determines whether the specified code point is a printable character.
UBool u_isxdigit(UChar32 c)
Determines whether the specified code point is a hexadecimal digit.
UHangulSyllableType
Hangul Syllable Type constants.
Definition: uchar.h:1705
String property Simple_Lowercase_Mapping.
Definition: uchar.h:544
Binary property print (a C/POSIX character class).
Definition: uchar.h:389
Binary property Case_Sensitive.
Definition: uchar.h:309
Binary property Bidi_Mirrored.
Definition: uchar.h:199
Binary property NFKC_Inert.
Definition: uchar.h:348
int32_t u_getIntPropertyValue(UChar32 c, UProperty which)
Get the property value for an enumerated or integer Unicode property for a code point.
Binary property Changes_When_Casemapped.
Definition: uchar.h:408
First constant for string Unicode properties.
Definition: uchar.h:523
Binary property Grapheme_Extend (new in Unicode 3.2).
Definition: uchar.h:227
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:345
UGraphemeClusterBreak
Grapheme Cluster Break constants.
Definition: uchar.h:1571
New No_Block value in Unicode 4.
Definition: uchar.h:824
Binary property Extender.
Definition: uchar.h:215
Double property Numeric_Value.
Definition: uchar.h:513
Binary property Math.
Definition: uchar.h:268
Binary property Grapheme_Base (new in Unicode 3.2).
Definition: uchar.h:223
Binary property NFKD_Inert.
Definition: uchar.h:334
uint8_t u_getCombiningClass(UChar32 c)
Returns the combining class of the code point as specified in UnicodeData.txt.
UBool u_isbase(UChar32 c)
Determines whether the specified code point is a base character.
UCharCategory
Data for enumerated Unicode general category types.
Definition: uchar.h:586
const char * u_getPropertyValueName(UProperty property, int32_t value, UPropertyNameChoice nameChoice)
Return the Unicode name for a given property value, as given in the Unicode database file PropertyVal...
Enumerated property Sentence_Break (new in Unicode 4.1).
Definition: uchar.h:488
double u_getNumericValue(UChar32 c)
Get the numeric value for a Unicode code point as defined in the Unicode Character Database...
Binary property Lowercase.
Definition: uchar.h:266
UBool u_isJavaIDStart(UChar32 c)
Determines if the specified character is permissible as the first character in a Java identifier...
First constant for bit-mask Unicode properties.
Definition: uchar.h:507
#define U_EXPORT2
Definition: platform.h:314
UBool u_isspace(UChar32 c)
Determines if the specified character is a space character or not.
USentenceBreak
Sentence Break constants.
Definition: uchar.h:1617
Binary property Unified_Ideograph (new in Unicode 3.2).
Definition: uchar.h:291
Enumerated property Canonical_Combining_Class.
Definition: uchar.h:424
UCharNameChoice
Selector constants for u_charName().
Definition: uchar.h:1426
One more than the last constant for binary Unicode properties.
Definition: uchar.h:412
UBool u_isJavaIDPart(UChar32 c)
Determines if the specified character is permissible in a Java identifier.
Enumerated property Script.
Definition: uchar.h:449
Unicode 3.2 renames this block to &quot;Greek and Coptic&quot;.
Definition: uchar.h:851
Binary property Hex_Digit.
Definition: uchar.h:233
String property Uppercase_Mapping.
Definition: uchar.h:559
UPropertyNameChoice
Selector constants for u_getPropertyName() and u_getPropertyValueName().
Definition: uchar.h:1447
String property Lowercase_Mapping.
Definition: uchar.h:535
UCharDirection u_charDirection(UChar32 c)
Returns the bidirectional category value for the code point, which is used in the Unicode bidirection...
UBool u_islower(UChar32 c)
Determines whether the specified code point has the general category &quot;Ll&quot; (lowercase letter)...
uint16_t UChar
Define UChar to be wchar_t if that is 16 bits wide; always assumed to be unsigned.
Definition: umachine.h:325
#define U_CDECL_END
This is used to end a declaration of a library private ICU C API.
Definition: umachine.h:111
Enumerated property NFKC_Quick_Check.
Definition: uchar.h:464
UProperty
Selection constants for Unicode properties.
Definition: uchar.h:174
void u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context)
Enumerate efficiently all code points with their Unicode general categories.
Enumerated property Hangul_Syllable_Type, new in Unicode 4.
Definition: uchar.h:452
Binary property Dash.
Definition: uchar.h:201
Binary property alnum (a C/POSIX character class).
Definition: uchar.h:374
Cf.
Definition: uchar.h:625
Binary property Variation_Selector (new in Unicode 4.0.1).
Definition: uchar.h:320
UBool u_isUUppercase(UChar32 c)
Check if a code point has the Uppercase Unicode property.
UBlockCode
Constants for Unicode blocks, see the Unicode Data file Blocks.txt.
Definition: uchar.h:821
Enumerated property Word_Break (new in Unicode 4.1).
Definition: uchar.h:493
Binary property Deprecated (new in Unicode 3.2).
Definition: uchar.h:208
Binary property Bidi_Control.
Definition: uchar.h:194
Binary property XID_Continue.
Definition: uchar.h:302
Same as UBLOCK_PRIVATE_USE.
Definition: uchar.h:1082
UBool u_hasBinaryProperty(UChar32 c, UProperty which)
Check a binary Unicode property for a code point.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:639
Binary property Uppercase.
Definition: uchar.h:294
void u_getUnicodeVersion(UVersionInfo versionArray)
Gets the Unicode version information.
Binary property Changes_When_Uppercased.
Definition: uchar.h:402
UJoiningGroup
Joining Group constants.
Definition: uchar.h:1503
Binary property Cased.
Definition: uchar.h:396
Cs.
Definition: uchar.h:629
UEastAsianWidth
East Asian Width constants.
Definition: uchar.h:1401
UBool u_isupper(UChar32 c)
Determines whether the specified code point has the general category &quot;Lu&quot; (uppercase letter)...
Enumerated property Trail_Canonical_Combining_Class.
Definition: uchar.h:478
UBool u_isULowercase(UChar32 c)
Check if a code point has the Lowercase Unicode property.
See note !!.
Definition: uchar.h:591
UBool u_ispunct(UChar32 c)
Determines whether the specified code point is a punctuation character.
First constant for Unicode properties with unusual value types.
Definition: uchar.h:572
UWordBreakValues
Word Break constants.
Definition: uchar.h:1594
void u_enumCharNames(UChar32 start, UChar32 limit, UEnumCharNamesFn *fn, void *context, UCharNameChoice nameChoice, UErrorCode *pErrorCode)
Enumerate all assigned Unicode characters between the start and limit code points (start inclusive...
int32_t u_getIntPropertyMaxValue(UProperty which)
Get the maximum value for an enumerated/integer/binary Unicode property.
Enumerated property Joining_Type.
Definition: uchar.h:440
One more than the last constant for double Unicode properties.
Definition: uchar.h:517
Basic definitions for ICU, for both C and C++ APIs.
UBool u_isIDPart(UChar32 c)
Determines if the specified character is permissible in an identifier according to Java...
Enumerated property Lead_Canonical_Combining_Class.
Definition: uchar.h:471
Binary property ASCII_Hex_Digit.
Definition: uchar.h:190
UJoiningType
Joining Type constants.
Definition: uchar.h:1487
UBool u_isIDStart(UChar32 c)
Determines if the specified character is permissible as the first character in an identifier accordin...
Binary property Soft_Dotted (new in Unicode 3.2).
Definition: uchar.h:283
One more than the last constant for string Unicode properties.
Definition: uchar.h:561
Binary Property Segment_Starter.
Definition: uchar.h:359
UChar32 u_toupper(UChar32 c)
The given character is mapped to its uppercase equivalent according to UnicodeData.txt; if the character has no uppercase equivalent, the character itself is returned.
Binary property ID_Start.
Definition: uchar.h:245
int32_t u_getIntPropertyMinValue(UProperty which)
Get the minimum value for an enumerated/integer/binary Unicode property.
UBool u_isJavaSpaceChar(UChar32 c)
Determine if the specified code point is a space character according to Java.
UBool u_isMirrored(UChar32 c)
Determines whether the code point has the Bidi_Mirrored property.
Corrected name from NameAliases.txt.
Definition: uchar.h:1430
Binary property Changes_When_Titlecased.
Definition: uchar.h:404
UChar32 u_tolower(UChar32 c)
The given character is mapped to its lowercase equivalent according to UnicodeData.txt; if the character has no lowercase equivalent, the character itself is returned.
Enumerated property Line_Break.
Definition: uchar.h:443
UBool u_isIDIgnorable(UChar32 c)
Determines if the specified character should be regarded as an ignorable character in an identifier...
Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1).
Definition: uchar.h:483
UBool u_isdigit(UChar32 c)
Determines whether the specified code point is a digit character according to Java.
UProperty u_getPropertyEnum(const char *alias)
Return the UProperty enum for a given property name, as specified in the Unicode database file Proper...
UBool u_isWhitespace(UChar32 c)
Determines if the specified code point is a whitespace character according to Java/ICU.
String property ISO_Comment.
Definition: uchar.h:532
UBool u_isblank(UChar32 c)
Determines whether the specified code point is a &quot;blank&quot; or &quot;horizontal space&quot;, a character that visi...
UBool u_isalpha(UChar32 c)
Determines whether the specified code point is a letter character.
Binary property Default_Ignorable_Code_Point (new in Unicode 3.2).
Definition: uchar.h:205
int32_t u_getISOComment(UChar32 c, char *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Get the ISO 10646 comment for a character.
One higher than the last enum UCharCategory constant.
Definition: uchar.h:653
UBool u_isalnum(UChar32 c)
Determines whether the specified code point is an alphanumeric character (letter or digit) according ...
Binary property Pattern_Syntax (new in Unicode 4.1).
Definition: uchar.h:364
One more than the last constant for Unicode properties with unusual value types.
Definition: uchar.h:575
UChar32 u_charMirror(UChar32 c)
Maps the specified character to a &quot;mirror-image&quot; character.
UBool u_isISOControl(UChar32 c)
Determines whether the specified code point is an ISO control code.
int8_t u_charType(UChar32 c)
Returns the general category value for the code point.
int32_t u_charName(UChar32 code, UCharNameChoice nameChoice, char *buffer, int32_t bufferLength, UErrorCode *pErrorCode)
Retrieve the name of a Unicode character.
String property Age.
Definition: uchar.h:521
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:137
int8_t UBool
The ICU boolean type.
Definition: umachine.h:228
Enumerated property NFKD_Quick_Check.
Definition: uchar.h:458