ICU 4.8.1.1  4.8.1.1
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
uset.h
Go to the documentation of this file.
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2002-2010, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: uset.h
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2002mar07
14 * created by: Markus W. Scherer
15 *
16 * C version of UnicodeSet.
17 */
18 
19 
27 #ifndef __USET_H__
28 #define __USET_H__
29 
30 #include "unicode/utypes.h"
31 #include "unicode/uchar.h"
32 #include "unicode/localpointer.h"
33 
34 #ifndef UCNV_H
35 struct USet;
41 typedef struct USet USet;
42 #endif
43 
49 enum {
55 
83 
93 
99 };
100 
156 typedef enum USetSpanCondition {
209 
215 typedef struct USerializedSet {
220  const uint16_t *array;
225  int32_t bmpLength;
230  int32_t length;
237 
238 /*********************************************************************
239  * USet API
240  *********************************************************************/
241 
251 
263 uset_open(UChar32 start, UChar32 end);
264 
275 uset_openPattern(const UChar* pattern, int32_t patternLength,
276  UErrorCode* ec);
277 
290 uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
291  uint32_t options,
292  UErrorCode* ec);
293 
300 U_STABLE void U_EXPORT2
301 uset_close(USet* set);
302 
303 #if U_SHOW_CPLUSPLUS_API
304 
306 
316 U_DEFINE_LOCAL_OPEN_POINTER(LocalUSetPointer, USet, uset_close);
317 
319 
320 #endif
321 
332 uset_clone(const USet *set);
333 
344 uset_isFrozen(const USet *set);
345 
360 U_STABLE void U_EXPORT2
361 uset_freeze(USet *set);
362 
374 uset_cloneAsThawed(const USet *set);
375 
385 U_STABLE void U_EXPORT2
386 uset_set(USet* set,
387  UChar32 start, UChar32 end);
388 
410 U_STABLE int32_t U_EXPORT2
412  const UChar *pattern, int32_t patternLength,
413  uint32_t options,
414  UErrorCode *status);
415 
438 U_STABLE void U_EXPORT2
440  UProperty prop, int32_t value, UErrorCode* ec);
441 
477 U_STABLE void U_EXPORT2
479  const UChar *prop, int32_t propLength,
480  const UChar *value, int32_t valueLength,
481  UErrorCode* ec);
482 
493 uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
494  int32_t pos);
495 
511 U_STABLE int32_t U_EXPORT2
512 uset_toPattern(const USet* set,
513  UChar* result, int32_t resultCapacity,
514  UBool escapeUnprintable,
515  UErrorCode* ec);
516 
525 U_STABLE void U_EXPORT2
526 uset_add(USet* set, UChar32 c);
527 
540 U_STABLE void U_EXPORT2
541 uset_addAll(USet* set, const USet *additionalSet);
542 
552 U_STABLE void U_EXPORT2
553 uset_addRange(USet* set, UChar32 start, UChar32 end);
554 
564 U_STABLE void U_EXPORT2
565 uset_addString(USet* set, const UChar* str, int32_t strLen);
566 
576 U_STABLE void U_EXPORT2
577 uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
578 
587 U_STABLE void U_EXPORT2
588 uset_remove(USet* set, UChar32 c);
589 
599 U_STABLE void U_EXPORT2
600 uset_removeRange(USet* set, UChar32 start, UChar32 end);
601 
611 U_STABLE void U_EXPORT2
612 uset_removeString(USet* set, const UChar* str, int32_t strLen);
613 
625 U_STABLE void U_EXPORT2
626 uset_removeAll(USet* set, const USet* removeSet);
627 
642 U_STABLE void U_EXPORT2
643 uset_retain(USet* set, UChar32 start, UChar32 end);
644 
657 U_STABLE void U_EXPORT2
658 uset_retainAll(USet* set, const USet* retain);
659 
668 U_STABLE void U_EXPORT2
669 uset_compact(USet* set);
670 
679 U_STABLE void U_EXPORT2
680 uset_complement(USet* set);
681 
693 U_STABLE void U_EXPORT2
694 uset_complementAll(USet* set, const USet* complement);
695 
703 U_STABLE void U_EXPORT2
704 uset_clear(USet* set);
705 
732 U_STABLE void U_EXPORT2
733 uset_closeOver(USet* set, int32_t attributes);
734 
741 U_STABLE void U_EXPORT2
743 
752 uset_isEmpty(const USet* set);
753 
763 uset_contains(const USet* set, UChar32 c);
764 
775 uset_containsRange(const USet* set, UChar32 start, UChar32 end);
776 
786 uset_containsString(const USet* set, const UChar* str, int32_t strLen);
787 
798 U_STABLE int32_t U_EXPORT2
799 uset_indexOf(const USet* set, UChar32 c);
800 
812 uset_charAt(const USet* set, int32_t charIndex);
813 
822 U_STABLE int32_t U_EXPORT2
823 uset_size(const USet* set);
824 
833 U_STABLE int32_t U_EXPORT2
834 uset_getItemCount(const USet* set);
835 
854 U_STABLE int32_t U_EXPORT2
855 uset_getItem(const USet* set, int32_t itemIndex,
856  UChar32* start, UChar32* end,
857  UChar* str, int32_t strCapacity,
858  UErrorCode* ec);
859 
869 uset_containsAll(const USet* set1, const USet* set2);
870 
882 uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
883 
893 uset_containsNone(const USet* set1, const USet* set2);
894 
904 uset_containsSome(const USet* set1, const USet* set2);
905 
925 U_STABLE int32_t U_EXPORT2
926 uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
927 
946 U_STABLE int32_t U_EXPORT2
947 uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
948 
968 U_STABLE int32_t U_EXPORT2
969 uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
970 
989 U_STABLE int32_t U_EXPORT2
990 uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
991 
1001 uset_equals(const USet* set1, const USet* set2);
1002 
1003 /*********************************************************************
1004  * Serialized set API
1005  *********************************************************************/
1006 
1056 U_STABLE int32_t U_EXPORT2
1057 uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
1058 
1068 uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
1069 
1077 U_STABLE void U_EXPORT2
1079 
1090 
1100 U_STABLE int32_t U_EXPORT2
1102 
1117 uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
1118  UChar32* pStart, UChar32* pEnd);
1119 
1120 #endif
UBool uset_containsRange(const USet *set, UChar32 start, UChar32 end)
Returns TRUE if the given USet contains all characters c where start <= c && c <= end...
uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY]
A small buffer for the array to reduce memory allocations.
Definition: uset.h:235
void uset_removeAll(USet *set, const USet *removeSet)
Removes from this set all of its elements that are contained in the specified set.
void uset_clear(USet *set)
Removes all of the elements from this set.
UBool uset_containsAllCodePoints(const USet *set, const UChar *str, int32_t strLen)
Returns true if this set contains all the characters of the given string.
USet * uset_clone(const USet *set)
Returns a copy of this object.
int32_t uset_applyPattern(USet *set, const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *status)
Modifies the set to represent the set specified by the given pattern.
void uset_retain(USet *set, UChar32 start, UChar32 end)
Retain only the elements in this set that are contained in the specified range.
UBool uset_getSerializedRange(const USerializedSet *set, int32_t rangeIndex, UChar32 *pStart, UChar32 *pEnd)
Returns a range of characters contained in the given serialized set.
void uset_addAll(USet *set, const USet *additionalSet)
Adds all of the elements in the specified set to this set if they're not already present.
Continue a span() while there is a set element at the current position.
Definition: uset.h:183
UBool uset_resemblesPattern(const UChar *pattern, int32_t patternLength, int32_t pos)
Return true if the given position, in the given pattern, appears to be the start of a UnicodeSet patt...
One more than the last span condition.
Definition: uset.h:207
void uset_set(USet *set, UChar32 start, UChar32 end)
Causes the USet object to represent the range start - end.
const uint16_t * array
The serialized Unicode Set.
Definition: uset.h:220
int32_t uset_size(const USet *set)
Returns the number of characters and strings contained in the given USet.
USet * uset_cloneAsThawed(const USet *set)
Clone the set and make the clone mutable.
void uset_removeString(USet *set, const UChar *str, int32_t strLen)
Removes the given string to the given USet.
void uset_addAllCodePoints(USet *set, const UChar *str, int32_t strLen)
Adds each of the characters in this string to the set.
UChar32 uset_charAt(const USet *set, int32_t charIndex)
Returns the character at the given index within this set, where the set is ordered by ascending code ...
UBool uset_contains(const USet *set, UChar32 c)
Returns TRUE if the given USet contains the given character.
Enough for any single-code point set.
Definition: uset.h:98
UBool uset_isEmpty(const USet *set)
Returns TRUE if the given USet contains no characters and no strings.
int32_t uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
void uset_removeAllStrings(USet *set)
Remove all strings from this set.
void uset_compact(USet *set)
Reallocate this objects internal structures to take up the least possible space, without changing thi...
void uset_applyPropertyAlias(USet *set, const UChar *prop, int32_t propLength, const UChar *value, int32_t valueLength, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given property...
void uset_freeze(USet *set)
Freeze the set (make it immutable).
USet * uset_openPattern(const UChar *pattern, int32_t patternLength, UErrorCode *ec)
Creates a set from the given pattern.
Ignore white space within patterns unless quoted or escaped.
Definition: uset.h:54
struct USerializedSet USerializedSet
A serialized form of a Unicode set.
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:131
UBool uset_containsAll(const USet *set1, const USet *set2)
Returns true if set1 contains all the characters and strings of set2.
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
void uset_complementAll(USet *set, const USet *complement)
Complements in this set all elements contained in the specified set.
int32_t uset_getItemCount(const USet *set)
Returns the number of items in this set.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:345
void uset_removeRange(USet *set, UChar32 start, UChar32 end)
Removes the given range of characters from the given USet.
void uset_add(USet *set, UChar32 c)
Adds the given character to the given USet.
UBool uset_equals(const USet *set1, const USet *set2)
Returns true if set1 contains all of the characters and strings of set2, and vis versa.
int32_t uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
UBool uset_containsString(const USet *set, const UChar *str, int32_t strLen)
Returns TRUE if the given USet contains the given string.
int32_t uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
#define U_EXPORT2
Definition: platform.h:314
void uset_applyIntPropertyValue(USet *set, UProperty prop, int32_t value, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given binary or enum...
void uset_addString(USet *set, const UChar *str, int32_t strLen)
Adds the given string to the given USet.
USet * uset_openPatternOptions(const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *ec)
Creates a set from the given pattern.
int32_t uset_getSerializedRangeCount(const USerializedSet *set)
Returns the number of disjoint ranges of characters contained in the given serialized set...
USetSpanCondition
Argument values for whether span() and similar functions continue while the current character is cont...
Definition: uset.h:156
C API: Unicode Properties.
int32_t uset_getItem(const USet *set, int32_t itemIndex, UChar32 *start, UChar32 *end, UChar *str, int32_t strCapacity, UErrorCode *ec)
Returns an item of this set.
uint16_t UChar
Define UChar to be wchar_t if that is 16 bits wide; always assumed to be unsigned.
Definition: umachine.h:325
void uset_complement(USet *set)
Inverts this set.
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:132
UBool uset_containsSome(const USet *set1, const USet *set2)
Returns true if set1 contains some of the characters and strings of set2.
struct USet USet
Definition: ucnv.h:67
int32_t length
The total length of the array.
Definition: uset.h:230
void uset_retainAll(USet *set, const USet *retain)
Retains only the elements in this set that are contained in the specified set.
UProperty
Selection constants for Unicode properties.
Definition: uchar.h:174
UBool uset_serializedContains(const USerializedSet *set, UChar32 c)
Returns TRUE if the given USerializedSet contains the given character.
int32_t uset_toPattern(const USet *set, UChar *result, int32_t resultCapacity, UBool escapeUnprintable, UErrorCode *ec)
Returns a string representation of this set.
USet * uset_open(UChar32 start, UChar32 end)
Creates a USet object that contains the range of characters start..end, inclusive.
void uset_remove(USet *set, UChar32 c)
Removes the given character from the given USet.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:639
UBool uset_containsNone(const USet *set1, const USet *set2)
Returns true if set1 contains none of the characters and strings of set2.
UBool uset_isFrozen(const USet *set)
Determines whether the set has been frozen (made immutable) or not.
void uset_addRange(USet *set, UChar32 start, UChar32 end)
Adds the given range of characters to the given USet.
Enable case insensitive matching.
Definition: uset.h:82
int32_t uset_indexOf(const USet *set, UChar32 c)
Returns the index of the given character within this set, where the set is ordered by ascending code ...
Enable case insensitive matching.
Definition: uset.h:92
Basic definitions for ICU, for both C and C++ APIs.
int32_t bmpLength
The length of the array that contains BMP characters.
Definition: uset.h:225
A serialized form of a Unicode set.
Definition: uset.h:215
Continue a span() while there is a set element at the current position.
Definition: uset.h:202
void uset_setSerializedToOne(USerializedSet *fillSet, UChar32 c)
Set the USerializedSet to contain the given character (and nothing else).
UBool uset_getSerializedSet(USerializedSet *fillSet, const uint16_t *src, int32_t srcLength)
Given a serialized array, fill in the given serialized set object.
int32_t uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
int32_t uset_serialize(const USet *set, uint16_t *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Serializes this set into an array of 16-bit integers.
void uset_closeOver(USet *set, int32_t attributes)
Close this set over the given attribute.
Continue a span() while there is no set element at the current position.
Definition: uset.h:168
USet * uset_openEmpty()
Create an empty USet object.
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:137
int8_t UBool
The ICU boolean type.
Definition: umachine.h:228
void uset_close(USet *set)
Disposes of the storage used by a USet object.