ICU 4.8.1.1  4.8.1.1
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
normalizer2.h
Go to the documentation of this file.
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2009-2011, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: normalizer2.h
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2009nov22
14 * created by: Markus W. Scherer
15 */
16 
17 #ifndef __NORMALIZER2_H__
18 #define __NORMALIZER2_H__
19 
25 #include "unicode/utypes.h"
26 
27 #if !UCONFIG_NO_NORMALIZATION
28 
29 #include "unicode/uniset.h"
30 #include "unicode/unistr.h"
31 #include "unicode/unorm2.h"
32 
34 
79 public:
101  static const Normalizer2 *
102  getInstance(const char *packageName,
103  const char *name,
104  UNormalization2Mode mode,
105  UErrorCode &errorCode);
106 
118  normalize(const UnicodeString &src, UErrorCode &errorCode) const {
119  UnicodeString result;
120  normalize(src, result, errorCode);
121  return result;
122  }
136  virtual UnicodeString &
137  normalize(const UnicodeString &src,
138  UnicodeString &dest,
139  UErrorCode &errorCode) const = 0;
154  virtual UnicodeString &
155  normalizeSecondAndAppend(UnicodeString &first,
156  const UnicodeString &second,
157  UErrorCode &errorCode) const = 0;
172  virtual UnicodeString &
173  append(UnicodeString &first,
174  const UnicodeString &second,
175  UErrorCode &errorCode) const = 0;
176 
190  virtual UBool
191  getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0;
192 
207  virtual UBool
208  isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
209 
226  quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0;
227 
250  virtual int32_t
251  spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0;
252 
266  virtual UBool hasBoundaryBefore(UChar32 c) const = 0;
267 
282  virtual UBool hasBoundaryAfter(UChar32 c) const = 0;
283 
297  virtual UBool isInert(UChar32 c) const = 0;
298 
299 private:
300  // No ICU "poor man's RTTI" for this class nor its subclasses.
301  virtual UClassID getDynamicClassID() const;
302 };
303 
316 public:
327  FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) :
328  norm2(n2), set(filterSet) {}
329 
343  virtual UnicodeString &
344  normalize(const UnicodeString &src,
345  UnicodeString &dest,
346  UErrorCode &errorCode) const;
361  virtual UnicodeString &
363  const UnicodeString &second,
364  UErrorCode &errorCode) const;
379  virtual UnicodeString &
380  append(UnicodeString &first,
381  const UnicodeString &second,
382  UErrorCode &errorCode) const;
383 
394  virtual UBool
395  getDecomposition(UChar32 c, UnicodeString &decomposition) const;
396 
408  virtual UBool
409  isNormalized(const UnicodeString &s, UErrorCode &errorCode) const;
422  quickCheck(const UnicodeString &s, UErrorCode &errorCode) const;
434  virtual int32_t
435  spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const;
436 
445  virtual UBool hasBoundaryBefore(UChar32 c) const;
446 
455  virtual UBool hasBoundaryAfter(UChar32 c) const;
456 
464  virtual UBool isInert(UChar32 c) const;
465 private:
466  UnicodeString &
467  normalize(const UnicodeString &src,
468  UnicodeString &dest,
469  USetSpanCondition spanCondition,
470  UErrorCode &errorCode) const;
471 
472  UnicodeString &
474  const UnicodeString &second,
475  UBool doNormalize,
476  UErrorCode &errorCode) const;
477 
478  const Normalizer2 &norm2;
479  const UnicodeSet &set;
480 };
481 
483 
484 #endif // !UCONFIG_NO_NORMALIZATION
485 #endif // __NORMALIZER2_H__
FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet)
Constructs a filtered normalizer wrapping any Normalizer2 instance and a filter set.
Definition: normalizer2.h:327
A mutable set of Unicode characters and multicharacter strings.
Definition: uniset.h:272
C++ API: Unicode String.
virtual UBool hasBoundaryAfter(UChar32 c) const =0
Tests if the character always has a normalization boundary after it, regardless of context...
virtual UNormalizationCheckResult quickCheck(const UnicodeString &s, UErrorCode &errorCode) const =0
Tests if the string is normalized.
virtual UBool isNormalized(const UnicodeString &s, UErrorCode &errorCode) const =0
Tests if the string is normalized.
Normalization filtered by a UnicodeSet.
Definition: normalizer2.h:315
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:131
virtual UBool getDecomposition(UChar32 c, UnicodeString &decomposition) const =0
Gets the decomposition mapping of c.
C API: New API for Unicode Normalization.
virtual UBool hasBoundaryBefore(UChar32 c) const =0
Tests if the character always has a normalization boundary before it, regardless of context...
UnicodeString normalize(const UnicodeString &src, UErrorCode &errorCode) const
Returns the normalized form of the source string.
Definition: normalizer2.h:118
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:345
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:188
virtual UnicodeString & append(UnicodeString &first, const UnicodeString &second, UErrorCode &errorCode) const =0
Appends the second string to the first string (merging them at the boundary) and returns the first st...
USetSpanCondition
Argument values for whether span() and similar functions continue while the current character is cont...
Definition: uset.h:156
virtual UnicodeString & normalizeSecondAndAppend(UnicodeString &first, const UnicodeString &second, UErrorCode &errorCode) const =0
Appends the normalized form of the second string to the first string (merging them at the boundary) a...
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:132
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:215
UNormalization2Mode
Constants for normalization modes.
Definition: unorm2.h:42
void * UClassID
UClassID is used to identify classes without using RTTI, since RTTI is not yet supported by all C++ c...
Definition: utypes.h:385
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:639
Basic definitions for ICU, for both C and C++ APIs.
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside...
Definition: utypes.h:520
Unicode normalization functionality for standard Unicode normalization or for using custom mapping ta...
Definition: normalizer2.h:78
virtual UBool isInert(UChar32 c) const =0
Tests if the character is normalization-inert.
virtual UClassID getDynamicClassID() const =0
ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class.
UNormalizationCheckResult
Result values for normalization quick check functions.
Definition: unorm2.h:91
virtual int32_t spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const =0
Returns the end of the normalized substring of the input string.
int8_t UBool
The ICU boolean type.
Definition: umachine.h:228
C++ API: Unicode Set.