ICU 4.8.1.1  4.8.1.1
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
uniset.h
Go to the documentation of this file.
1 /*
2 ***************************************************************************
3 * Copyright (C) 1999-2011, International Business Machines Corporation
4 * and others. All Rights Reserved.
5 ***************************************************************************
6 * Date Name Description
7 * 10/20/99 alan Creation.
8 ***************************************************************************
9 */
10 
11 #ifndef UNICODESET_H
12 #define UNICODESET_H
13 
14 #include "unicode/unifilt.h"
15 #include "unicode/unistr.h"
16 #include "unicode/uset.h"
17 
24 
25 class BMPSet;
26 class ParsePosition;
27 class SymbolTable;
28 class UnicodeSetStringSpan;
29 class UVector;
30 class RuleCharacterIterator;
31 
273 
274  int32_t len; // length of list used; 0 <= len <= capacity
275  int32_t capacity; // capacity of list
276  UChar32* list; // MUST be terminated with HIGH
277  BMPSet *bmpSet; // The set is frozen iff either bmpSet or stringSpan is not NULL.
278  UChar32* buffer; // internal buffer, may be NULL
279  int32_t bufferCapacity; // capacity of buffer
280  int32_t patLen;
281 
291  UChar *pat;
292  UVector* strings; // maintained in sorted order
293  UnicodeSetStringSpan *stringSpan;
294 
295 private:
296  enum { // constants
297  kIsBogus = 1 // This set is bogus (i.e. not valid)
298  };
299  uint8_t fFlags; // Bit flag (see constants above)
300 public:
310  inline UBool isBogus(void) const;
311 
328  void setToBogus();
329 
330 public:
331 
332  enum {
337  MIN_VALUE = 0,
338 
343  MAX_VALUE = 0x10ffff
344  };
345 
346  //----------------------------------------------------------------
347  // Constructors &c
348  //----------------------------------------------------------------
349 
350 public:
351 
356  UnicodeSet();
357 
366  UnicodeSet(UChar32 start, UChar32 end);
367 
376  UnicodeSet(const UnicodeString& pattern,
377  UErrorCode& status);
378 
391  UnicodeSet(const UnicodeString& pattern,
392  uint32_t options,
393  const SymbolTable* symbols,
394  UErrorCode& status);
395 
409  UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
410  uint32_t options,
411  const SymbolTable* symbols,
412  UErrorCode& status);
413 
418  UnicodeSet(const UnicodeSet& o);
419 
424  virtual ~UnicodeSet();
425 
431  UnicodeSet& operator=(const UnicodeSet& o);
432 
444  virtual UBool operator==(const UnicodeSet& o) const;
445 
451  UBool operator!=(const UnicodeSet& o) const;
452 
462  virtual UnicodeFunctor* clone() const;
463 
471  virtual int32_t hashCode(void) const;
472 
481  inline static UnicodeSet *fromUSet(USet *uset);
482 
491  inline static const UnicodeSet *fromUSet(const USet *uset);
492 
500  inline USet *toUSet();
501 
502 
510  inline const USet * toUSet() const;
511 
512 
513  //----------------------------------------------------------------
514  // Freezable API
515  //----------------------------------------------------------------
516 
525  inline UBool isFrozen() const;
526 
540  UnicodeFunctor *freeze();
541 
550  UnicodeFunctor *cloneAsThawed() const;
551 
552  //----------------------------------------------------------------
553  // Public API
554  //----------------------------------------------------------------
555 
566  UnicodeSet& set(UChar32 start, UChar32 end);
567 
573  static UBool resemblesPattern(const UnicodeString& pattern,
574  int32_t pos);
575 
588  UnicodeSet& applyPattern(const UnicodeString& pattern,
589  UErrorCode& status);
590 
607  UnicodeSet& applyPattern(const UnicodeString& pattern,
608  uint32_t options,
609  const SymbolTable* symbols,
610  UErrorCode& status);
611 
643  UnicodeSet& applyPattern(const UnicodeString& pattern,
644  ParsePosition& pos,
645  uint32_t options,
646  const SymbolTable* symbols,
647  UErrorCode& status);
648 
662  virtual UnicodeString& toPattern(UnicodeString& result,
663  UBool escapeUnprintable = FALSE) const;
664 
687  UnicodeSet& applyIntPropertyValue(UProperty prop,
688  int32_t value,
689  UErrorCode& ec);
690 
720  UnicodeSet& applyPropertyAlias(const UnicodeString& prop,
721  const UnicodeString& value,
722  UErrorCode& ec);
723 
732  virtual int32_t size(void) const;
733 
740  virtual UBool isEmpty(void) const;
741 
749  virtual UBool contains(UChar32 c) const;
750 
759  virtual UBool contains(UChar32 start, UChar32 end) const;
760 
768  UBool contains(const UnicodeString& s) const;
769 
777  virtual UBool containsAll(const UnicodeSet& c) const;
778 
786  UBool containsAll(const UnicodeString& s) const;
787 
796  UBool containsNone(UChar32 start, UChar32 end) const;
797 
805  UBool containsNone(const UnicodeSet& c) const;
806 
814  UBool containsNone(const UnicodeString& s) const;
815 
824  inline UBool containsSome(UChar32 start, UChar32 end) const;
825 
833  inline UBool containsSome(const UnicodeSet& s) const;
834 
842  inline UBool containsSome(const UnicodeString& s) const;
843 
862  int32_t span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
863 
876  inline int32_t span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const;
877 
895  int32_t spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
896 
910  inline int32_t spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const;
911 
930  int32_t spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
931 
949  int32_t spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
950 
955  virtual UMatchDegree matches(const Replaceable& text,
956  int32_t& offset,
957  int32_t limit,
958  UBool incremental);
959 
960 private:
983  static int32_t matchRest(const Replaceable& text,
984  int32_t start, int32_t limit,
985  const UnicodeString& s);
986 
996  int32_t findCodePoint(UChar32 c) const;
997 
998 public:
999 
1007  virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;
1008 
1017  int32_t indexOf(UChar32 c) const;
1018 
1028  UChar32 charAt(int32_t index) const;
1029 
1044  virtual UnicodeSet& add(UChar32 start, UChar32 end);
1045 
1053  UnicodeSet& add(UChar32 c);
1054 
1066  UnicodeSet& add(const UnicodeString& s);
1067 
1068  private:
1074  static int32_t getSingleCP(const UnicodeString& s);
1075 
1076  void _add(const UnicodeString& s);
1077 
1078  public:
1087  UnicodeSet& addAll(const UnicodeString& s);
1088 
1097  UnicodeSet& retainAll(const UnicodeString& s);
1098 
1107  UnicodeSet& complementAll(const UnicodeString& s);
1108 
1117  UnicodeSet& removeAll(const UnicodeString& s);
1118 
1127  static UnicodeSet* U_EXPORT2 createFrom(const UnicodeString& s);
1128 
1129 
1137  static UnicodeSet* U_EXPORT2 createFromAll(const UnicodeString& s);
1138 
1152  virtual UnicodeSet& retain(UChar32 start, UChar32 end);
1153 
1154 
1160  UnicodeSet& retain(UChar32 c);
1161 
1175  virtual UnicodeSet& remove(UChar32 start, UChar32 end);
1176 
1184  UnicodeSet& remove(UChar32 c);
1185 
1195  UnicodeSet& remove(const UnicodeString& s);
1196 
1204  virtual UnicodeSet& complement(void);
1205 
1220  virtual UnicodeSet& complement(UChar32 start, UChar32 end);
1221 
1229  UnicodeSet& complement(UChar32 c);
1230 
1241  UnicodeSet& complement(const UnicodeString& s);
1242 
1255  virtual UnicodeSet& addAll(const UnicodeSet& c);
1256 
1268  virtual UnicodeSet& retainAll(const UnicodeSet& c);
1269 
1281  virtual UnicodeSet& removeAll(const UnicodeSet& c);
1282 
1293  virtual UnicodeSet& complementAll(const UnicodeSet& c);
1294 
1301  virtual UnicodeSet& clear(void);
1302 
1328  UnicodeSet& closeOver(int32_t attribute);
1329 
1336  virtual UnicodeSet &removeAllStrings();
1337 
1345  virtual int32_t getRangeCount(void) const;
1346 
1354  virtual UChar32 getRangeStart(int32_t index) const;
1355 
1363  virtual UChar32 getRangeEnd(int32_t index) const;
1364 
1413  int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
1414 
1421  virtual UnicodeSet& compact();
1422 
1434  static UClassID U_EXPORT2 getStaticClassID(void);
1435 
1444  virtual UClassID getDynamicClassID(void) const;
1445 
1446 private:
1447 
1448  // Private API for the USet API
1449 
1450  friend class USetAccess;
1451 
1452  int32_t getStringCount() const;
1453 
1454  const UnicodeString* getString(int32_t index) const;
1455 
1456  //----------------------------------------------------------------
1457  // RuleBasedTransliterator support
1458  //----------------------------------------------------------------
1459 
1460 private:
1461 
1467  virtual UBool matchesIndexValue(uint8_t v) const;
1468 
1469 private:
1470 
1471  //----------------------------------------------------------------
1472  // Implementation: Clone as thawed (see ICU4J Freezable)
1473  //----------------------------------------------------------------
1474 
1475  UnicodeSet(const UnicodeSet& o, UBool /* asThawed */);
1476 
1477  //----------------------------------------------------------------
1478  // Implementation: Pattern parsing
1479  //----------------------------------------------------------------
1480 
1481  void applyPattern(RuleCharacterIterator& chars,
1482  const SymbolTable* symbols,
1483  UnicodeString& rebuiltPat,
1484  uint32_t options,
1485  UErrorCode& ec);
1486 
1487  //----------------------------------------------------------------
1488  // Implementation: Utility methods
1489  //----------------------------------------------------------------
1490 
1491  void ensureCapacity(int32_t newLen, UErrorCode& ec);
1492 
1493  void ensureBufferCapacity(int32_t newLen, UErrorCode& ec);
1494 
1495  void swapBuffers(void);
1496 
1497  UBool allocateStrings(UErrorCode &status);
1498 
1499  UnicodeString& _toPattern(UnicodeString& result,
1500  UBool escapeUnprintable) const;
1501 
1502  UnicodeString& _generatePattern(UnicodeString& result,
1503  UBool escapeUnprintable) const;
1504 
1505  static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);
1506 
1507  static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);
1508 
1509  //----------------------------------------------------------------
1510  // Implementation: Fundamental operators
1511  //----------------------------------------------------------------
1512 
1513  void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
1514 
1515  void add(const UChar32* other, int32_t otherLen, int8_t polarity);
1516 
1517  void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
1518 
1524  static UBool resemblesPropertyPattern(const UnicodeString& pattern,
1525  int32_t pos);
1526 
1527  static UBool resemblesPropertyPattern(RuleCharacterIterator& chars,
1528  int32_t iterOpts);
1529 
1569  UnicodeSet& applyPropertyPattern(const UnicodeString& pattern,
1570  ParsePosition& ppos,
1571  UErrorCode &ec);
1572 
1573  void applyPropertyPattern(RuleCharacterIterator& chars,
1574  UnicodeString& rebuiltPat,
1575  UErrorCode& ec);
1576 
1577  static const UnicodeSet* getInclusions(int32_t src, UErrorCode &status);
1578 
1583  typedef UBool (*Filter)(UChar32 codePoint, void* context);
1584 
1594  void applyFilter(Filter filter,
1595  void* context,
1596  int32_t src,
1597  UErrorCode &status);
1598 
1602  void setPattern(const UnicodeString& newPat);
1606  void releasePattern();
1607 
1608  friend class UnicodeSetIterator;
1609 };
1610 
1611 
1612 
1613 inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
1614  return !operator==(o);
1615 }
1616 
1617 inline UBool UnicodeSet::isFrozen() const {
1618  return (UBool)(bmpSet!=NULL || stringSpan!=NULL);
1619 }
1620 
1621 inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
1622  return !containsNone(start, end);
1623 }
1624 
1626  return !containsNone(s);
1627 }
1628 
1630  return !containsNone(s);
1631 }
1632 
1633 inline UBool UnicodeSet::isBogus() const {
1634  return (UBool)(fFlags & kIsBogus);
1635 }
1636 
1638  return reinterpret_cast<UnicodeSet *>(uset);
1639 }
1640 
1641 inline const UnicodeSet *UnicodeSet::fromUSet(const USet *uset) {
1642  return reinterpret_cast<const UnicodeSet *>(uset);
1643 }
1644 
1646  return reinterpret_cast<USet *>(this);
1647 }
1648 
1649 inline const USet *UnicodeSet::toUSet() const {
1650  return reinterpret_cast<const USet *>(this);
1651 }
1652 
1653 inline int32_t UnicodeSet::span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const {
1654  int32_t sLength=s.length();
1655  if(start<0) {
1656  start=0;
1657  } else if(start>sLength) {
1658  start=sLength;
1659  }
1660  return start+span(s.getBuffer()+start, sLength-start, spanCondition);
1661 }
1662 
1663 inline int32_t UnicodeSet::spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const {
1664  int32_t sLength=s.length();
1665  if(limit<0) {
1666  limit=0;
1667  } else if(limit>sLength) {
1668  limit=sLength;
1669  }
1670  return spanBack(s.getBuffer(), limit, spanCondition);
1671 }
1672 
1674 
1675 #endif
static UClassID getStaticClassID()
ICU &quot;poor man&#39;s RTTI&quot;, returns a UClassID for this class.
int32_t span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const
Returns the length of the initial substring of the input string which consists only of characters and...
virtual UBool matchesIndexValue(uint8_t v) const =0
Returns TRUE if this matcher will match a character c, where c &amp; 0xFF == v, at offset, in the forward direction (with limit &gt; offset).
A mutable set of Unicode characters and multicharacter strings.
Definition: uniset.h:272
UMatchDegree
Constants returned by UnicodeMatcher::matches() indicating the degree of match.
Definition: unimatch.h:30
UBool containsSome(UChar32 start, UChar32 end) const
Returns true if this set contains one or more of the characters in the given range.
Definition: uniset.h:1621
C++ API: Unicode String.
An interface that defines both lookup protocol and parsing of symbolic names.
Definition: symtable.h:54
Replaceable is an abstract base class representing a string of characters that supports the replaceme...
Definition: rep.h:71
virtual UBool operator==(const UnicodeSet &o) const
Compares the specified object with this set for equality.
virtual UMatchDegree matches(const Replaceable &text, int32_t &offset, int32_t limit, UBool incremental)
Implement UnicodeMatcher API.
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
C API: Unicode Set.
virtual UnicodeString & toPattern(UnicodeString &result, UBool escapeUnprintable=FALSE) const =0
Returns a string representation of this matcher.
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:131
virtual UClassID getDynamicClassID(void) const =0
Returns a unique class ID polymorphically.
virtual void addMatchSetTo(UnicodeSet &toUnionTo) const =0
Union the set of all characters that may be matched by this object into the given set...
static UnicodeSet * fromUSet(USet *uset)
Get a UnicodeSet pointer from a USet.
Definition: uniset.h:1637
ParsePosition is a simple class used by Format and its subclasses to keep track of the current positi...
Definition: parsepos.h:47
UBool isBogus(void) const
Determine if this object contains a valid set.
Definition: uniset.h:1633
UnicodeFilter defines a protocol for selecting a subset of the full range (U+0000 to U+10FFFF) of Uni...
Definition: unifilt.h:59
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:345
#define NULL
Define NULL if necessary, to 0 for C++ and to ((void *)0) for C.
Definition: utypes.h:299
int32_t length(void) const
Return the length of the UnicodeString object.
Definition: unistr.h:3511
USet * toUSet()
Produce a USet * pointer for this UnicodeSet.
Definition: uniset.h:1645
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:188
#define U_EXPORT2
Definition: platform.h:314
int32_t spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const
Returns the start of the trailing substring of the input string which consists only of characters and...
UBool isFrozen() const
Determines whether the set has been frozen (made immutable) or not.
Definition: uniset.h:1617
USetSpanCondition
Argument values for whether span() and similar functions continue while the current character is cont...
Definition: uset.h:156
UBool operator!=(const StringPiece &x, const StringPiece &y)
Global operator != for StringPiece.
Definition: stringpiece.h:218
UnicodeFunctor is an abstract base class for objects that perform match and/or replace operations on ...
Definition: unifunct.h:33
uint16_t UChar
Define UChar to be wchar_t if that is 16 bits wide; always assumed to be unsigned.
Definition: umachine.h:325
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:132
struct USet USet
Definition: ucnv.h:67
UProperty
Selection constants for Unicode properties.
Definition: uchar.h:174
UnicodeSetIterator iterates over the contents of a UnicodeSet.
Definition: usetiter.h:61
void * UClassID
UClassID is used to identify classes without using RTTI, since RTTI is not yet supported by all C++ c...
Definition: utypes.h:385
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:639
UBool containsNone(UChar32 start, UChar32 end) const
Returns true if this set contains none of the characters of the given range.
UChar * getBuffer(int32_t minCapacity)
Get a read/write pointer to the internal buffer.
#define FALSE
The FALSE value of a UBool.
Definition: umachine.h:236
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside...
Definition: utypes.h:520
UBool operator!=(const UnicodeSet &o) const
Compares the specified object with this set for equality.
Definition: uniset.h:1613
C++ API: Unicode Filter.
virtual UnicodeFunctor * clone() const =0
Return a copy of this object.
int8_t UBool
The ICU boolean type.
Definition: umachine.h:228
virtual UBool contains(UChar32 c) const =0
Returns true for characters that are in the selected subset.