ICU 4.8.1.1  4.8.1.1
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ustring.h
Go to the documentation of this file.
1 /*
2 **********************************************************************
3 * Copyright (C) 1998-2010, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 *
7 * File ustring.h
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 12/07/98 bertrand Creation.
13 ******************************************************************************
14 */
15 
16 #ifndef USTRING_H
17 #define USTRING_H
18 
19 #include "unicode/utypes.h"
20 #include "unicode/putil.h"
21 #include "unicode/uiter.h"
22 
24 #ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
25 # define UBRK_TYPEDEF_UBREAK_ITERATOR
26  typedef struct UBreakIterator UBreakIterator;
27 #endif
28 
85 U_STABLE int32_t U_EXPORT2
86 u_strlen(const UChar *s);
102 U_STABLE int32_t U_EXPORT2
103 u_countChar32(const UChar *s, int32_t length);
104 
124 u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number);
125 
137 u_strcat(UChar *dst,
138  const UChar *src);
139 
155 u_strncat(UChar *dst,
156  const UChar *src,
157  int32_t n);
158 
180 u_strstr(const UChar *s, const UChar *substring);
181 
204 u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
205 
224 u_strchr(const UChar *s, UChar c);
225 
244 u_strchr32(const UChar *s, UChar32 c);
245 
267 u_strrstr(const UChar *s, const UChar *substring);
268 
291 u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
292 
311 u_strrchr(const UChar *s, UChar c);
312 
331 u_strrchr32(const UChar *s, UChar32 c);
332 
346 u_strpbrk(const UChar *string, const UChar *matchSet);
347 
361 U_STABLE int32_t U_EXPORT2
362 u_strcspn(const UChar *string, const UChar *matchSet);
363 
377 U_STABLE int32_t U_EXPORT2
378 u_strspn(const UChar *string, const UChar *matchSet);
379 
406 u_strtok_r(UChar *src,
407  const UChar *delim,
408  UChar **saveState);
409 
420 U_STABLE int32_t U_EXPORT2
421 u_strcmp(const UChar *s1,
422  const UChar *s2);
423 
435 U_STABLE int32_t U_EXPORT2
436 u_strcmpCodePointOrder(const UChar *s1, const UChar *s2);
437 
465 U_STABLE int32_t U_EXPORT2
466 u_strCompare(const UChar *s1, int32_t length1,
467  const UChar *s2, int32_t length2,
468  UBool codePointOrder);
469 
490 U_STABLE int32_t U_EXPORT2
491 u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder);
492 
493 #ifndef U_COMPARE_CODE_POINT_ORDER
494 /* see also unistr.h and unorm.h */
500 #define U_COMPARE_CODE_POINT_ORDER 0x8000
501 #endif
502 
543 U_STABLE int32_t U_EXPORT2
544 u_strCaseCompare(const UChar *s1, int32_t length1,
545  const UChar *s2, int32_t length2,
546  uint32_t options,
547  UErrorCode *pErrorCode);
548 
561 U_STABLE int32_t U_EXPORT2
562 u_strncmp(const UChar *ucs1,
563  const UChar *ucs2,
564  int32_t n);
565 
579 U_STABLE int32_t U_EXPORT2
580 u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n);
581 
601 U_STABLE int32_t U_EXPORT2
602 u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options);
603 
625 U_STABLE int32_t U_EXPORT2
626 u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options);
627 
649 U_STABLE int32_t U_EXPORT2
650 u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options);
651 
661 u_strcpy(UChar *dst,
662  const UChar *src);
663 
676 u_strncpy(UChar *dst,
677  const UChar *src,
678  int32_t n);
679 
680 #if !UCONFIG_NO_CONVERSION
681 
693  const char *src );
694 
708  const char *src,
709  int32_t n);
710 
721 U_STABLE char* U_EXPORT2 u_austrcpy(char *dst,
722  const UChar *src );
723 
736 U_STABLE char* U_EXPORT2 u_austrncpy(char *dst,
737  const UChar *src,
738  int32_t n );
739 
740 #endif
741 
751 u_memcpy(UChar *dest, const UChar *src, int32_t count);
752 
762 u_memmove(UChar *dest, const UChar *src, int32_t count);
763 
774 u_memset(UChar *dest, UChar c, int32_t count);
775 
787 U_STABLE int32_t U_EXPORT2
788 u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count);
789 
803 U_STABLE int32_t U_EXPORT2
804 u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count);
805 
824 u_memchr(const UChar *s, UChar c, int32_t count);
825 
844 u_memchr32(const UChar *s, UChar32 c, int32_t count);
845 
864 u_memrchr(const UChar *s, UChar c, int32_t count);
865 
884 u_memrchr32(const UChar *s, UChar32 c, int32_t count);
885 
936 #if defined(U_DECLARE_UTF16)
937 # define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=U_DECLARE_UTF16(cs)
938 
939 # define U_STRING_INIT(var, cs, length)
940 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
941 # define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=L ## cs
942 
943 # define U_STRING_INIT(var, cs, length)
944 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
945 # define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=cs
946 
947 # define U_STRING_INIT(var, cs, length)
948 #else
949 # define U_STRING_DECL(var, cs, length) static UChar var[(length)+1]
950 
951 # define U_STRING_INIT(var, cs, length) u_charsToUChars(cs, var, length+1)
952 #endif
953 
1001 U_STABLE int32_t U_EXPORT2
1002 u_unescape(const char *src,
1003  UChar *dest, int32_t destCapacity);
1004 
1018 typedef UChar (U_CALLCONV *UNESCAPE_CHAR_AT)(int32_t offset, void *context);
1020 
1051  int32_t *offset,
1052  int32_t length,
1053  void *context);
1054 
1075 U_STABLE int32_t U_EXPORT2
1076 u_strToUpper(UChar *dest, int32_t destCapacity,
1077  const UChar *src, int32_t srcLength,
1078  const char *locale,
1079  UErrorCode *pErrorCode);
1080 
1101 U_STABLE int32_t U_EXPORT2
1102 u_strToLower(UChar *dest, int32_t destCapacity,
1103  const UChar *src, int32_t srcLength,
1104  const char *locale,
1105  UErrorCode *pErrorCode);
1106 
1107 #if !UCONFIG_NO_BREAK_ITERATION
1108 
1147 U_STABLE int32_t U_EXPORT2
1148 u_strToTitle(UChar *dest, int32_t destCapacity,
1149  const UChar *src, int32_t srcLength,
1150  UBreakIterator *titleIter,
1151  const char *locale,
1152  UErrorCode *pErrorCode);
1153 
1154 #endif
1155 
1178 U_STABLE int32_t U_EXPORT2
1179 u_strFoldCase(UChar *dest, int32_t destCapacity,
1180  const UChar *src, int32_t srcLength,
1181  uint32_t options,
1182  UErrorCode *pErrorCode);
1183 
1184 #if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
1185 
1207 U_STABLE wchar_t* U_EXPORT2
1208 u_strToWCS(wchar_t *dest,
1209  int32_t destCapacity,
1210  int32_t *pDestLength,
1211  const UChar *src,
1212  int32_t srcLength,
1213  UErrorCode *pErrorCode);
1237 u_strFromWCS(UChar *dest,
1238  int32_t destCapacity,
1239  int32_t *pDestLength,
1240  const wchar_t *src,
1241  int32_t srcLength,
1242  UErrorCode *pErrorCode);
1243 #endif /* defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION */
1244 
1267 U_STABLE char* U_EXPORT2
1268 u_strToUTF8(char *dest,
1269  int32_t destCapacity,
1270  int32_t *pDestLength,
1271  const UChar *src,
1272  int32_t srcLength,
1273  UErrorCode *pErrorCode);
1274 
1298 u_strFromUTF8(UChar *dest,
1299  int32_t destCapacity,
1300  int32_t *pDestLength,
1301  const char *src,
1302  int32_t srcLength,
1303  UErrorCode *pErrorCode);
1304 
1341 U_STABLE char* U_EXPORT2
1342 u_strToUTF8WithSub(char *dest,
1343  int32_t destCapacity,
1344  int32_t *pDestLength,
1345  const UChar *src,
1346  int32_t srcLength,
1347  UChar32 subchar, int32_t *pNumSubstitutions,
1348  UErrorCode *pErrorCode);
1349 
1389  int32_t destCapacity,
1390  int32_t *pDestLength,
1391  const char *src,
1392  int32_t srcLength,
1393  UChar32 subchar, int32_t *pNumSubstitutions,
1394  UErrorCode *pErrorCode);
1395 
1449  int32_t destCapacity,
1450  int32_t *pDestLength,
1451  const char *src,
1452  int32_t srcLength,
1453  UErrorCode *pErrorCode);
1454 
1478 u_strToUTF32(UChar32 *dest,
1479  int32_t destCapacity,
1480  int32_t *pDestLength,
1481  const UChar *src,
1482  int32_t srcLength,
1483  UErrorCode *pErrorCode);
1484 
1508 u_strFromUTF32(UChar *dest,
1509  int32_t destCapacity,
1510  int32_t *pDestLength,
1511  const UChar32 *src,
1512  int32_t srcLength,
1513  UErrorCode *pErrorCode);
1514 
1553  int32_t destCapacity,
1554  int32_t *pDestLength,
1555  const UChar *src,
1556  int32_t srcLength,
1557  UChar32 subchar, int32_t *pNumSubstitutions,
1558  UErrorCode *pErrorCode);
1559 
1598  int32_t destCapacity,
1599  int32_t *pDestLength,
1600  const UChar32 *src,
1601  int32_t srcLength,
1602  UChar32 subchar, int32_t *pNumSubstitutions,
1603  UErrorCode *pErrorCode);
1604 
1637 U_STABLE char* U_EXPORT2
1639  char *dest,
1640  int32_t destCapacity,
1641  int32_t *pDestLength,
1642  const UChar *src,
1643  int32_t srcLength,
1644  UErrorCode *pErrorCode);
1645 
1688  UChar *dest,
1689  int32_t destCapacity,
1690  int32_t *pDestLength,
1691  const char *src,
1692  int32_t srcLength,
1693  UChar32 subchar, int32_t *pNumSubstitutions,
1694  UErrorCode *pErrorCode);
1695 
1696 #endif
UChar * u_strtok_r(UChar *src, const UChar *delim, UChar **saveState)
The string tokenizer API allows an application to break a string into tokens.
struct UBreakIterator UBreakIterator
Opaque type representing an ICU Break iterator object.
Definition: ubrk.h:26
UChar * u_strFromJavaModifiedUTF8WithSub(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
Convert a Java Modified UTF-8 string to a 16-bit Unicode string.
UChar * u_memchr(const UChar *s, UChar c, int32_t count)
Find the first occurrence of a BMP code point in a string.
int32_t u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n)
Compare two Unicode strings in code point order.
UChar * u_strFromWCS(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const wchar_t *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert a wchar_t string to UTF-16.
UChar * u_strFromUTF8Lenient(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert a UTF-8 string to UTF-16.
UChar * u_strFromUTF32WithSub(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const UChar32 *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
Convert a UTF-32 string to UTF-16.
UChar * u_strrchr(const UChar *s, UChar c)
Find the last occurrence of a BMP code point in a string.
UChar32 * u_strToUTF32(UChar32 *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert a UTF-16 string to UTF-32.
int32_t u_strToTitle(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UBreakIterator *titleIter, const char *locale, UErrorCode *pErrorCode)
Titlecase a string.
UChar * u_strncpy(UChar *dst, const UChar *src, int32_t n)
Copy a ustring.
int32_t u_countChar32(const UChar *s, int32_t length)
Count Unicode code points in the length UChar code units of the string.
UChar * u_memcpy(UChar *dest, const UChar *src, int32_t count)
Synonym for memcpy(), but with UChars only.
char * u_strToUTF8(char *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert a UTF-16 string to UTF-8.
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
Definition: utypes.h:287
int32_t u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count)
Compare two Unicode strings in code point order.
int32_t u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options)
Compare two strings case-insensitively using full case folding.
C API for code unit iteration.
Definition: uiter.h:339
int32_t u_strcmpCodePointOrder(const UChar *s1, const UChar *s2)
Compare two Unicode strings in code point order.
UChar * u_strchr32(const UChar *s, UChar32 c)
Find the first occurrence of a code point in a string.
int32_t u_strFoldCase(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, uint32_t options, UErrorCode *pErrorCode)
Case-fold the characters in a string.
UChar32 * u_strToUTF32WithSub(UChar32 *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
Convert a UTF-16 string to UTF-32.
UBool u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number)
Check if the string contains more Unicode code points than a certain number.
int32_t u_strcspn(const UChar *string, const UChar *matchSet)
Returns the number of consecutive characters in string, beginning with the first, that do not occur s...
char * u_austrncpy(char *dst, const UChar *src, int32_t n)
Copy ustring to a byte string encoded in the default codepage.
wchar_t * u_strToWCS(wchar_t *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert a UTF-16 string to a wchar_t string.
#define U_CDECL_BEGIN
This is used to begin a declaration of a library private ICU C API.
Definition: umachine.h:110
int32_t u_strCaseCompare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, uint32_t options, UErrorCode *pErrorCode)
Compare two strings case-insensitively using full case folding.
UChar * u_memset(UChar *dest, UChar c, int32_t count)
Initialize count characters of dest to c.
UChar * u_uastrcpy(UChar *dst, const char *src)
Copy a byte string encoded in the default codepage to a ustring.
int32_t u_strcmp(const UChar *s1, const UChar *s2)
Compare two Unicode strings for bitwise equality (code unit order).
int32_t u_strCompare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, UBool codePointOrder)
Compare two Unicode strings (binary order).
UChar * u_strncat(UChar *dst, const UChar *src, int32_t n)
Concatenate two ustrings.
UChar(* UNESCAPE_CHAR_AT)(int32_t offset, void *context)
Callback function for u_unescapeAt() that returns a character of the source text given an offset and ...
Definition: ustring.h:1018
char * u_strToUTF8WithSub(char *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
Convert a UTF-16 string to UTF-8.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:345
UChar * u_strchr(const UChar *s, UChar c)
Find the first occurrence of a BMP code point in a string.
UChar * u_memrchr(const UChar *s, UChar c, int32_t count)
Find the last occurrence of a BMP code point in a string.
C API: Platform Utilities.
int32_t u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count)
Compare the first count UChars of each buffer.
UChar * u_strrchr32(const UChar *s, UChar32 c)
Find the last occurrence of a code point in a string.
#define U_EXPORT2
Definition: platform.h:314
UChar * u_strcpy(UChar *dst, const UChar *src)
Copy a ustring.
C API: Unicode Character Iteration.
UChar * u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength)
Find the last occurrence of a substring in a string.
UChar * u_memmove(UChar *dest, const UChar *src, int32_t count)
Synonym for memmove(), but with UChars only.
int32_t u_unescape(const char *src, UChar *dest, int32_t destCapacity)
Unescape a string of characters and write the resulting Unicode characters to the destination buffer...
int32_t u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder)
Compare two Unicode strings (binary order) as presented by UCharIterator objects. ...
int32_t u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options)
Compare two strings case-insensitively using full case folding.
UChar * u_strpbrk(const UChar *string, const UChar *matchSet)
Locates the first occurrence in the string string of any of the characters in the string matchSet...
UChar * u_strcat(UChar *dst, const UChar *src)
Concatenate two ustrings.
uint16_t UChar
Define UChar to be wchar_t if that is 16 bits wide; always assumed to be unsigned.
Definition: umachine.h:325
UChar * u_strstr(const UChar *s, const UChar *substring)
Find the first occurrence of a substring in a string.
#define U_CDECL_END
This is used to end a declaration of a library private ICU C API.
Definition: umachine.h:111
int32_t u_strToLower(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, const char *locale, UErrorCode *pErrorCode)
Lowercase the characters in a string.
int32_t u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options)
Compare two strings case-insensitively using full case folding.
int32_t u_strncmp(const UChar *ucs1, const UChar *ucs2, int32_t n)
Compare two ustrings for bitwise equality.
UChar * u_strFromUTF32(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const UChar32 *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert a UTF-32 string to UTF-16.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:639
char * u_strToJavaModifiedUTF8(char *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert a 16-bit Unicode string to Java Modified UTF-8.
char * u_austrcpy(char *dst, const UChar *src)
Copy ustring to a byte string encoded in the default codepage.
UChar32 u_unescapeAt(UNESCAPE_CHAR_AT charAt, int32_t *offset, int32_t length, void *context)
Unescape a single sequence.
int32_t u_strspn(const UChar *string, const UChar *matchSet)
Returns the number of consecutive characters in string, beginning with the first, that occur somewher...
UChar * u_strFromUTF8(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UErrorCode *pErrorCode)
Convert a UTF-8 string to UTF-16.
Basic definitions for ICU, for both C and C++ APIs.
int32_t u_strlen(const UChar *s)
Determine the length of an array of UChar.
UChar * u_memchr32(const UChar *s, UChar32 c, int32_t count)
Find the first occurrence of a code point in a string.
UChar * u_memrchr32(const UChar *s, UChar32 c, int32_t count)
Find the last occurrence of a code point in a string.
UChar * u_strrstr(const UChar *s, const UChar *substring)
Find the last occurrence of a substring in a string.
UChar * u_uastrncpy(UChar *dst, const char *src, int32_t n)
Copy a byte string encoded in the default codepage to a ustring.
UChar * u_strFromUTF8WithSub(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const char *src, int32_t srcLength, UChar32 subchar, int32_t *pNumSubstitutions, UErrorCode *pErrorCode)
Convert a UTF-8 string to UTF-16.
int32_t u_strToUpper(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, const char *locale, UErrorCode *pErrorCode)
Uppercase the characters in a string.
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:137
int8_t UBool
The ICU boolean type.
Definition: umachine.h:228
UChar * u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength)
Find the first occurrence of a substring in a string.