ICU 4.8.1.1  4.8.1.1
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
caniter.h
Go to the documentation of this file.
1 /*
2  *******************************************************************************
3  * Copyright (C) 1996-2010, International Business Machines Corporation and *
4  * others. All Rights Reserved. *
5  *******************************************************************************
6  */
7 
8 #ifndef CANITER_H
9 #define CANITER_H
10 
11 #include "unicode/utypes.h"
12 
13 #if !UCONFIG_NO_NORMALIZATION
14 
15 #include "unicode/uobject.h"
16 #include "unicode/unistr.h"
17 
27 #ifndef CANITER_SKIP_ZEROES
28 #define CANITER_SKIP_ZEROES TRUE
29 #endif
30 
32 
33 class Hashtable;
34 class Normalizer2;
35 class Normalizer2Impl;
36 
73 public:
80  CanonicalIterator(const UnicodeString &source, UErrorCode &status);
81 
86  virtual ~CanonicalIterator();
87 
93  UnicodeString getSource();
94 
99  void reset();
100 
108  UnicodeString next();
109 
117  void setSource(const UnicodeString &newSource, UErrorCode &status);
118 
128  static void U_EXPORT2 permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status);
129 
135  static UClassID U_EXPORT2 getStaticClassID();
136 
142  virtual UClassID getDynamicClassID() const;
143 
144 private:
145  // ===================== PRIVATES ==============================
146  // private default constructor
148 
149 
154  CanonicalIterator(const CanonicalIterator& other);
155 
160  CanonicalIterator& operator=(const CanonicalIterator& other);
161 
162  // fields
163  UnicodeString source;
164  UBool done;
165 
166  // 2 dimensional array holds the pieces of the string with
167  // their different canonically equivalent representations
168  UnicodeString **pieces;
169  int32_t pieces_length;
170  int32_t *pieces_lengths;
171 
172  // current is used in iterating to combine pieces
173  int32_t *current;
174  int32_t current_length;
175 
176  // transient fields
177  UnicodeString buffer;
178 
179  const Normalizer2 &nfd;
180  const Normalizer2Impl &nfcImpl;
181 
182  // we have a segment, in NFD. Find all the strings that are canonically equivalent to it.
183  UnicodeString *getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status); //private String[] getEquivalents(String segment)
184 
185  //Set getEquivalents2(String segment);
186  Hashtable *getEquivalents2(Hashtable *fillinResult, const UChar *segment, int32_t segLen, UErrorCode &status);
187  //Hashtable *getEquivalents2(const UnicodeString &segment, int32_t segLen, UErrorCode &status);
188 
194  //Set extract(int comp, String segment, int segmentPos, StringBuffer buffer);
195  Hashtable *extract(Hashtable *fillinResult, UChar32 comp, const UChar *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);
196  //Hashtable *extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);
197 
198  void cleanPieces();
199 
200 };
201 
203 
204 #endif /* #if !UCONFIG_NO_NORMALIZATION */
205 
206 #endif
C++ API: Unicode String.
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:131
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:345
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:188
#define U_EXPORT2
Definition: platform.h:314
C++ API: Common ICU base class UObject.
uint16_t UChar
Define UChar to be wchar_t if that is 16 bits wide; always assumed to be unsigned.
Definition: umachine.h:325
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:132
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:215
This class allows one to iterate through all the strings that are canonically equivalent to a given s...
Definition: caniter.h:72
void * UClassID
UClassID is used to identify classes without using RTTI, since RTTI is not yet supported by all C++ c...
Definition: utypes.h:385
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:639
Basic definitions for ICU, for both C and C++ APIs.
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside...
Definition: utypes.h:520
Unicode normalization functionality for standard Unicode normalization or for using custom mapping ta...
Definition: normalizer2.h:78
virtual UClassID getDynamicClassID() const =0
ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class.
int8_t UBool
The ICU boolean type.
Definition: umachine.h:228