ICU 4.8.1.1  4.8.1.1
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
utext.h
Go to the documentation of this file.
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2004-2010, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: utext.h
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2004oct06
14 * created by: Markus W. Scherer
15 */
16 
17 #ifndef __UTEXT_H__
18 #define __UTEXT_H__
19 
138 #include "unicode/utypes.h"
139 #include "unicode/uchar.h"
140 #if U_SHOW_CPLUSPLUS_API
141 #include "unicode/localpointer.h"
142 #include "unicode/rep.h"
143 #include "unicode/unistr.h"
144 #include "unicode/chariter.h"
145 #endif
146 
147 
149 
150 struct UText;
151 typedef struct UText UText;
154 /***************************************************************************************
155  *
156  * C Functions for creating UText wrappers around various kinds of text strings.
157  *
158  ****************************************************************************************/
159 
160 
182 utext_close(UText *ut);
183 
184 #if U_SHOW_CPLUSPLUS_API
185 
187 
197 U_DEFINE_LOCAL_OPEN_POINTER(LocalUTextPointer, UText, utext_close);
198 
200 
201 #endif
202 
225 utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status);
226 
227 
243 utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status);
244 
245 
246 #if U_SHOW_CPLUSPLUS_API
247 
260 utext_openUnicodeString(UText *ut, U_NAMESPACE_QUALIFIER UnicodeString *s, UErrorCode *status);
261 
262 
276 utext_openConstUnicodeString(UText *ut, const U_NAMESPACE_QUALIFIER UnicodeString *s, UErrorCode *status);
277 
278 
292 utext_openReplaceable(UText *ut, U_NAMESPACE_QUALIFIER Replaceable *rep, UErrorCode *status);
293 
307 utext_openCharacterIterator(UText *ut, U_NAMESPACE_QUALIFIER CharacterIterator *ic, UErrorCode *status);
308 
309 #endif
310 
311 
370 utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status);
371 
372 
385 utext_equals(const UText *a, const UText *b);
386 
387 
388 /*****************************************************************************
389  *
390  * Functions to work with the text represeted by a UText wrapper
391  *
392  *****************************************************************************/
393 
405 U_STABLE int64_t U_EXPORT2
407 
422 utext_isLengthExpensive(const UText *ut);
423 
450 utext_char32At(UText *ut, int64_t nativeIndex);
451 
452 
465 
466 
486 utext_next32(UText *ut);
487 
488 
508 
509 
529 utext_next32From(UText *ut, int64_t nativeIndex);
530 
531 
532 
549 utext_previous32From(UText *ut, int64_t nativeIndex);
550 
563 U_STABLE int64_t U_EXPORT2
564 utext_getNativeIndex(const UText *ut);
565 
589 U_STABLE void U_EXPORT2
590 utext_setNativeIndex(UText *ut, int64_t nativeIndex);
591 
609 utext_moveIndex32(UText *ut, int32_t delta);
610 
633 U_STABLE int64_t U_EXPORT2
635 
636 
671 U_STABLE int32_t U_EXPORT2
672 utext_extract(UText *ut,
673  int64_t nativeStart, int64_t nativeLimit,
674  UChar *dest, int32_t destCapacity,
675  UErrorCode *status);
676 
677 
704 U_INTERNAL int32_t U_EXPORT2
705 utext_compare(UText *s1, int32_t length1,
706  UText *s2, int32_t length2);
707 
735 U_INTERNAL int32_t U_EXPORT2
736 utext_compareNativeLimit(UText *s1, int64_t limit1,
737  UText *s2, int64_t limit2);
738 
773 U_INTERNAL int32_t U_EXPORT2
774 utext_caseCompare(UText *s1, int32_t length1,
775  UText *s2, int32_t length2,
776  uint32_t options, UErrorCode *pErrorCode);
777 
814 U_INTERNAL int32_t U_EXPORT2
815 utext_caseCompareNativeLimit(UText *s1, int64_t limit1,
816  UText *s2, int64_t limit2,
817  uint32_t options, UErrorCode *pErrorCode);
818 
819 
820 /************************************************************************************
821  *
822  * #define inline versions of selected performance-critical text access functions
823  * Caution: do not use auto increment++ or decrement-- expressions
824  * as parameters to these macros.
825  *
826  * For most use, where there is no extreme performance constraint, the
827  * normal, non-inline functions are a better choice. The resulting code
828  * will be smaller, and, if the need ever arises, easier to debug.
829  *
830  * These are implemented as #defines rather than real functions
831  * because there is no fully portable way to do inline functions in plain C.
832  *
833  ************************************************************************************/
834 
844 #define UTEXT_CURRENT32(ut) \
845  ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
846  ((ut)->chunkContents)[((ut)->chunkOffset)] : utext_current32(ut))
847 
859 #define UTEXT_NEXT32(ut) \
860  ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
861  ((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut))
862 
873 #define UTEXT_PREVIOUS32(ut) \
874  ((ut)->chunkOffset > 0 && \
875  (ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \
876  (ut)->chunkContents[--((ut)->chunkOffset)] : utext_previous32(ut))
877 
890 #define UTEXT_GETNATIVEINDEX(ut) \
891  ((ut)->chunkOffset <= (ut)->nativeIndexingLimit? \
892  (ut)->chunkNativeStart+(ut)->chunkOffset : \
893  (ut)->pFuncs->mapOffsetToNative(ut))
894 
906 #define UTEXT_SETNATIVEINDEX(ut, ix) \
907  { int64_t __offset = (ix) - (ut)->chunkNativeStart; \
908  if (__offset>=0 && __offset<=(int64_t)(ut)->nativeIndexingLimit) { \
909  (ut)->chunkOffset=(int32_t)__offset; \
910  } else { \
911  utext_setNativeIndex((ut), (ix)); } }
912 
913 
914 
915 /************************************************************************************
916  *
917  * Functions related to writing or modifying the text.
918  * These will work only with modifiable UTexts. Attempting to
919  * modify a read-only UText will return an error status.
920  *
921  ************************************************************************************/
922 
923 
943 utext_isWritable(const UText *ut);
944 
945 
955 utext_hasMetaData(const UText *ut);
956 
957 
985 U_STABLE int32_t U_EXPORT2
986 utext_replace(UText *ut,
987  int64_t nativeStart, int64_t nativeLimit,
988  const UChar *replacementText, int32_t replacementLength,
989  UErrorCode *status);
990 
991 
992 
1025 U_STABLE void U_EXPORT2
1026 utext_copy(UText *ut,
1027  int64_t nativeStart, int64_t nativeLimit,
1028  int64_t destIndex,
1029  UBool move,
1030  UErrorCode *status);
1031 
1032 
1054 U_STABLE void U_EXPORT2
1055 utext_freeze(UText *ut);
1056 
1057 
1064 enum {
1098 };
1099 
1137 typedef UText * U_CALLCONV
1138 UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status);
1139 
1140 
1149 typedef int64_t U_CALLCONV
1151 
1177 typedef UBool U_CALLCONV
1178 UTextAccess(UText *ut, int64_t nativeIndex, UBool forward);
1179 
1207 typedef int32_t U_CALLCONV
1209  int64_t nativeStart, int64_t nativeLimit,
1210  UChar *dest, int32_t destCapacity,
1211  UErrorCode *status);
1212 
1242 typedef int32_t U_CALLCONV
1244  int64_t nativeStart, int64_t nativeLimit,
1245  const UChar *replacementText, int32_t replacmentLength,
1246  UErrorCode *status);
1247 
1276 typedef void U_CALLCONV
1278  int64_t nativeStart, int64_t nativeLimit,
1279  int64_t nativeDest,
1280  UBool move,
1281  UErrorCode *status);
1282 
1296 typedef int64_t U_CALLCONV
1298 
1314 typedef int32_t U_CALLCONV
1315 UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex);
1316 
1317 
1335 typedef void U_CALLCONV
1337 
1338 
1348 struct UTextFuncs {
1363  int32_t tableSize;
1364 
1371 
1372 
1380 
1389 
1397 
1405 
1413 
1421 
1429 
1437 
1445 
1451 
1457 
1463 
1464 };
1469 typedef struct UTextFuncs UTextFuncs;
1470 
1482 struct UText {
1495  uint32_t magic;
1496 
1497 
1503  int32_t flags;
1504 
1505 
1512 
1519  int32_t sizeOfStruct;
1520 
1521  /* ------ 16 byte alignment boundary ----------- */
1522 
1523 
1530 
1535  int32_t extraSize;
1536 
1545 
1546  /* ---- 16 byte alignment boundary------ */
1547 
1553 
1559  int32_t chunkOffset;
1560 
1565  int32_t chunkLength;
1566 
1567  /* ---- 16 byte alignment boundary-- */
1568 
1569 
1577 
1583 
1589  void *pExtra;
1590 
1597  const void *context;
1598 
1599  /* --- 16 byte alignment boundary--- */
1600 
1606  const void *p;
1612  const void *q;
1618  const void *r;
1619 
1625  void *privP;
1626 
1627 
1628  /* --- 16 byte alignment boundary--- */
1629 
1630 
1636  int64_t a;
1637 
1643  int32_t b;
1644 
1650  int32_t c;
1651 
1652  /* ---- 16 byte alignment boundary---- */
1653 
1654 
1660  int64_t privA;
1666  int32_t privB;
1672  int32_t privC;
1673 };
1674 
1675 
1693 utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status);
1694 
1700 enum {
1701  UTEXT_MAGIC = 0x345ad82c
1702 };
1703 
1711 #define UTEXT_INITIALIZER { \
1712  UTEXT_MAGIC, /* magic */ \
1713  0, /* flags */ \
1714  0, /* providerProps */ \
1715  sizeof(UText), /* sizeOfStruct */ \
1716  0, /* chunkNativeLimit */ \
1717  0, /* extraSize */ \
1718  0, /* nativeIndexingLimit */ \
1719  0, /* chunkNativeStart */ \
1720  0, /* chunkOffset */ \
1721  0, /* chunkLength */ \
1722  NULL, /* chunkContents */ \
1723  NULL, /* pFuncs */ \
1724  NULL, /* pExtra */ \
1725  NULL, /* context */ \
1726  NULL, NULL, NULL, /* p, q, r */ \
1727  NULL, /* privP */ \
1728  0, 0, 0, /* a, b, c */ \
1729  0, 0, 0 /* privA,B,C, */ \
1730  }
1731 
1732 
1734 
1735 
1736 
1737 #endif
int32_t UTextExtract(UText *ut, int64_t nativeStart, int64_t nativeLimit, UChar *dest, int32_t destCapacity, UErrorCode *status)
Function type declaration for UText.extract().
Definition: utext.h:1208
int32_t c
(protected) Integer field reserved for use by the text provider.
Definition: utext.h:1650
int64_t utext_nativeLength(UText *ut)
Get the length of the text.
UChar32 utext_previous32(UText *ut)
Move the iterator position to the character (code point) whose index precedes the current position...
UTextClose * spare3
(private) Spare function pointer
Definition: utext.h:1462
int32_t nativeIndexingLimit
(protected) The highest chunk offset where native indexing and chunk (UTF-16) indexing correspond...
Definition: utext.h:1544
int64_t chunkNativeStart
(protected) Native index of the first character in the text chunk.
Definition: utext.h:1552
UBool utext_isWritable(const UText *ut)
Return TRUE if the text can be written (modified) with utext_replace() or utext_copy().
void UTextClose(UText *ut)
Function type declaration for UText.utextClose().
Definition: utext.h:1336
int32_t providerProperties
Text provider properties.
Definition: utext.h:1511
The provider supports modifying the text via the replace() and copy() functions.
Definition: utext.h:1083
void * pExtra
(protected) Pointer to additional space requested by the text provider during the utext_open operatio...
Definition: utext.h:1589
int64_t a
(protected) Integer field reserved for use by the text provider.
Definition: utext.h:1636
UChar32 utext_previous32From(UText *ut, int64_t nativeIndex)
Set the iteration index, and return the code point preceding the one specified by the initial index...
int32_t chunkLength
(protected) Length the text chunk (UTF-16 buffer), in UChars.
Definition: utext.h:1565
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
Definition: utypes.h:287
C++ API: Unicode String.
void UTextCopy(UText *ut, int64_t nativeStart, int64_t nativeLimit, int64_t nativeDest, UBool move, UErrorCode *status)
Function type declaration for UText.copy().
Definition: utext.h:1277
UTextMapNativeIndexToUTF16 * mapNativeIndexToUTF16
(public) Function pointer for UTextMapNativeIndexToUTF16.
Definition: utext.h:1436
UText * utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status)
Open a read-only UText implementation for UTF-8 strings.
UText * UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status)
Function type declaration for UText.clone().
Definition: utext.h:1138
int32_t utext_caseCompare(UText *s1, int32_t length1, UText *s2, int32_t length2, uint32_t options, UErrorCode *pErrorCode)
Compare two UTexts case-insensitively using full case folding.
int32_t reserved1
(private) Alignment padding.
Definition: utext.h:1370
void utext_freeze(UText *ut)
#define U_INTERNAL
This is used to declare a function as an internal ICU C API.
Definition: umachine.h:145
Replaceable is an abstract base class representing a string of characters that supports the replaceme...
Definition: rep.h:71
It is potentially time consuming for the provider to determine the length of the text.
Definition: utext.h:1069
UTextExtract * extract
(public) Function pointer for UTextExtract.
Definition: utext.h:1404
int64_t UTextNativeLength(UText *ut)
Function type declaration for UText.nativeLength().
Definition: utext.h:1150
void * privP
Private field reserved for future use by the UText framework itself.
Definition: utext.h:1625
UTextClose * close
(public) Function pointer for UTextClose.
Definition: utext.h:1444
int32_t flags
(private) Flags for managing the allocation and freeing of memory associated with this UText...
Definition: utext.h:1503
int32_t privC
Private field reserved for future use by the UText framework itself.
Definition: utext.h:1672
UTextClone * clone
(public) Function pointer for UTextClone
Definition: utext.h:1379
Text chunks remain valid and usable until the text object is modified or deleted, not just until the ...
Definition: utext.h:1076
UTextNativeLength * nativeLength
(public) function pointer for UTextLength May be expensive to compute!
Definition: utext.h:1388
const void * q
(protected) Pointer fields available for use by the text provider.
Definition: utext.h:1612
(public) Function dispatch table for UText.
Definition: utext.h:1348
UChar32 utext_next32(UText *ut)
Get the code point at the current iteration position of the UText, and advance the position to the fi...
There is meta data associated with the text.
Definition: utext.h:1089
UTextAccess * access
(public) Function pointer for UTextAccess.
Definition: utext.h:1396
UChar32 utext_char32At(UText *ut, int64_t nativeIndex)
Returns the code point at the requested index, or U_SENTINEL (-1) if it is out of bounds...
#define U_CDECL_BEGIN
This is used to begin a declaration of a library private ICU C API.
Definition: umachine.h:110
Abstract class that defines an API for iteration on text objects.
Definition: chariter.h:356
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:131
const void * p
(protected) Pointer fields available for use by the text provider.
Definition: utext.h:1606
void utext_copy(UText *ut, int64_t nativeStart, int64_t nativeLimit, int64_t destIndex, UBool move, UErrorCode *status)
Copy or move a substring from one position to another within the text, while retaining any metadata a...
#define U_NAMESPACE_QUALIFIER
This is used to qualify that a function or class is part of the public ICU C++ API namespace...
Definition: uversion.h:134
UTextCopy * copy
(public) Function pointer for UTextCopy.
Definition: utext.h:1420
UBool utext_equals(const UText *a, const UText *b)
Compare two UText objects for equality.
C++ API: &quot;Smart pointers&quot; for use with and in ICU4C C++ code.
UChar32 utext_current32(UText *ut)
Get the code point at the current iteration position, or U_SENTINEL (-1) if the iteration has reached...
int32_t utext_caseCompareNativeLimit(UText *s1, int64_t limit1, UText *s2, int64_t limit2, uint32_t options, UErrorCode *pErrorCode)
Compare two UTexts case-insensitively using full case folding.
const UChar * chunkContents
(protected) pointer to a chunk of text in UTF-16 format.
Definition: utext.h:1576
int32_t reserved3
Definition: utext.h:1370
int32_t tableSize
(public) Function table size, sizeof(UTextFuncs) Intended for use should the table grow to accomodate...
Definition: utext.h:1363
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:345
int32_t privB
Private field reserved for future use by the UText framework itself.
Definition: utext.h:1666
UBool UTextAccess(UText *ut, int64_t nativeIndex, UBool forward)
Function type declaration for UText.access().
Definition: utext.h:1178
int32_t chunkOffset
(protected) Current iteration position within the text chunk (UTF-16 buffer).
Definition: utext.h:1559
int32_t extraSize
(protected) Size in bytes of the extra space (pExtra).
Definition: utext.h:1535
UChar32 utext_next32From(UText *ut, int64_t nativeIndex)
Set the iteration index and return the code point at that index.
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:188
int64_t UTextMapOffsetToNative(const UText *ut)
Function type declaration for UText.mapOffsetToNative().
Definition: utext.h:1297
#define U_EXPORT2
Definition: platform.h:314
int64_t utext_getPreviousNativeIndex(UText *ut)
Get the native index of the character preceeding the current position.
int32_t utext_extract(UText *ut, int64_t nativeStart, int64_t nativeLimit, UChar *dest, int32_t destCapacity, UErrorCode *status)
Extract text from a UText into a UChar buffer.
C API: Unicode Properties.
int32_t UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex)
Function type declaration for UText.mapIndexToUTF16().
Definition: utext.h:1315
void utext_setNativeIndex(UText *ut, int64_t nativeIndex)
Set the current iteration position to the nearest code point boundary at or preceding the specified i...
uint16_t UChar
Define UChar to be wchar_t if that is 16 bits wide; always assumed to be unsigned.
Definition: umachine.h:325
#define U_CDECL_END
This is used to end a declaration of a library private ICU C API.
Definition: umachine.h:111
int64_t privA
Private field reserved for future use by the UText framework itself.
Definition: utext.h:1660
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:132
const void * r
(protected) Pointer fields available for use by the text provider.
Definition: utext.h:1618
UBool utext_moveIndex32(UText *ut, int32_t delta)
Move the iterator postion by delta code points.
UText * utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status)
Clone a UText.
int64_t chunkNativeLimit
(protected) Native index of the first character position following the current chunk.
Definition: utext.h:1529
int32_t sizeOfStruct
(public) sizeOfStruct=sizeof(UText) Allows possible backward compatible extension.
Definition: utext.h:1519
UTextClose * spare2
(private) Spare function pointer
Definition: utext.h:1456
int32_t utext_compareNativeLimit(UText *s1, int64_t limit1, UText *s2, int64_t limit2)
Compare two UTexts (binary order).
int32_t b
(protected) Integer field reserved for use by the text provider.
Definition: utext.h:1643
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:639
int64_t utext_getNativeIndex(const UText *ut)
Get the current iterator position, which can range from 0 to the length of the text.
const UTextFuncs * pFuncs
(public) Pointer to Dispatch table for accessing functions for this UText.
Definition: utext.h:1582
UTextClose * spare1
(private) Spare function pointer
Definition: utext.h:1450
uint32_t magic
(private) Magic.
Definition: utext.h:1495
int32_t UTextReplace(UText *ut, int64_t nativeStart, int64_t nativeLimit, const UChar *replacementText, int32_t replacmentLength, UErrorCode *status)
Function type declaration for UText.replace().
Definition: utext.h:1243
UText struct.
Definition: utext.h:1482
UTextReplace * replace
(public) Function pointer for UTextReplace.
Definition: utext.h:1412
Basic definitions for ICU, for both C and C++ APIs.
UBool utext_isLengthExpensive(const UText *ut)
Return TRUE if calculating the length of the text could be expensive.
Text provider owns the text storage.
Definition: utext.h:1097
int32_t utext_replace(UText *ut, int64_t nativeStart, int64_t nativeLimit, const UChar *replacementText, int32_t replacementLength, UErrorCode *status)
Replace a range of the original text with a replacement text.
const void * context
(protected) Pointer to string or text-containin object or similar.
Definition: utext.h:1597
UText * utext_close(UText *ut)
Close function for UText instances.
C++ API: Character Iterator.
UTextMapOffsetToNative * mapOffsetToNative
(public) Function pointer for UTextMapOffsetToNative.
Definition: utext.h:1428
int32_t reserved2
Definition: utext.h:1370
UText * utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status)
Common function for use by Text Provider implementations to allocate and/or initialize a new UText st...
UBool utext_hasMetaData(const UText *ut)
Test whether there is meta data associated with the text.
int32_t utext_compare(UText *s1, int32_t length1, UText *s2, int32_t length2)
Compare two UTexts (binary order).
UText * utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status)
Open a read-only UText for UChar * string.
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:137
int8_t UBool
The ICU boolean type.
Definition: umachine.h:228
C++ API: Replaceable String.