15 #ifndef __UCHARSTRIE_H__
16 #define __UCHARSTRIE_H__
65 : ownedArray_(
NULL), uchars_(trieUChars),
66 pos_(uchars_), remainingMatchLength_(-1) {}
81 : ownedArray_(
NULL), uchars_(other.uchars_),
82 pos_(other.pos_), remainingMatchLength_(other.remainingMatchLength_) {}
91 remainingMatchLength_=-1;
112 int32_t remainingMatchLength;
123 state.uchars=uchars_;
125 state.remainingMatchLength=remainingMatchLength_;
140 if(uchars_==state.uchars && uchars_!=
NULL) {
142 remainingMatchLength_=state.remainingMatchLength;
163 remainingMatchLength_=-1;
164 return nextImpl(uchars_, uchar);
233 const UChar *pos=pos_;
234 int32_t leadUnit=*pos++;
236 return leadUnit&kValueIsFinal ?
237 readValue(pos, leadUnit&0x7fff) : readNodeValue(pos, leadUnit);
250 const UChar *pos=pos_;
252 return pos!=
NULL && findUniqueValue(pos+remainingMatchLength_+1,
FALSE, uniqueValue);
313 UBool hasNext()
const;
343 UBool truncateAndStop() {
351 const UChar *uchars_;
353 const UChar *initialPos_;
354 int32_t remainingMatchLength_;
355 int32_t initialRemainingMatchLength_;
382 : ownedArray_(adoptUChars), uchars_(trieUChars),
383 pos_(uchars_), remainingMatchLength_(-1) {}
394 static inline int32_t readValue(
const UChar *pos, int32_t leadUnit) {
396 if(leadUnit<kMinTwoUnitValueLead) {
398 }
else if(leadUnit<kThreeUnitValueLead) {
399 value=((leadUnit-kMinTwoUnitValueLead)<<16)|*pos;
401 value=(pos[0]<<16)|pos[1];
405 static inline const UChar *skipValue(
const UChar *pos, int32_t leadUnit) {
406 if(leadUnit>=kMinTwoUnitValueLead) {
407 if(leadUnit<kThreeUnitValueLead) {
415 static inline const UChar *skipValue(
const UChar *pos) {
416 int32_t leadUnit=*pos++;
417 return skipValue(pos, leadUnit&0x7fff);
420 static inline int32_t readNodeValue(
const UChar *pos, int32_t leadUnit) {
423 if(leadUnit<kMinTwoUnitNodeValueLead) {
424 value=(leadUnit>>6)-1;
425 }
else if(leadUnit<kThreeUnitNodeValueLead) {
426 value=(((leadUnit&0x7fc0)-kMinTwoUnitNodeValueLead)<<10)|*pos;
428 value=(pos[0]<<16)|pos[1];
432 static inline const UChar *skipNodeValue(
const UChar *pos, int32_t leadUnit) {
434 if(leadUnit>=kMinTwoUnitNodeValueLead) {
435 if(leadUnit<kThreeUnitNodeValueLead) {
444 static inline const UChar *jumpByDelta(
const UChar *pos) {
445 int32_t delta=*pos++;
446 if(delta>=kMinTwoUnitDeltaLead) {
447 if(delta==kThreeUnitDeltaLead) {
448 delta=(pos[0]<<16)|pos[1];
451 delta=((delta-kMinTwoUnitDeltaLead)<<16)|*pos++;
457 static const UChar *skipDelta(
const UChar *pos) {
458 int32_t delta=*pos++;
459 if(delta>=kMinTwoUnitDeltaLead) {
460 if(delta==kThreeUnitDeltaLead) {
482 static const UChar *findUniqueValueFromBranch(
const UChar *pos, int32_t length,
483 UBool haveUniqueValue, int32_t &uniqueValue);
486 static UBool findUniqueValue(
const UChar *pos,
UBool haveUniqueValue, int32_t &uniqueValue);
490 static void getNextBranchUChars(
const UChar *pos, int32_t length,
Appendable &out);
538 static const int32_t kMinLinearMatch=0x30;
539 static const int32_t kMaxLinearMatchLength=0x10;
544 static const int32_t kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength;
545 static const int32_t kNodeTypeMask=kMinValueLead-1;
548 static const int32_t kValueIsFinal=0x8000;
551 static const int32_t kMaxOneUnitValue=0x3fff;
553 static const int32_t kMinTwoUnitValueLead=kMaxOneUnitValue+1;
554 static const int32_t kThreeUnitValueLead=0x7fff;
556 static const int32_t kMaxTwoUnitValue=((kThreeUnitValueLead-kMinTwoUnitValueLead)<<16)-1;
559 static const int32_t kMaxOneUnitNodeValue=0xff;
560 static const int32_t kMinTwoUnitNodeValueLead=kMinValueLead+((kMaxOneUnitNodeValue+1)<<6);
561 static const int32_t kThreeUnitNodeValueLead=0x7fc0;
563 static const int32_t kMaxTwoUnitNodeValue=
564 ((kThreeUnitNodeValueLead-kMinTwoUnitNodeValueLead)<<10)-1;
567 static const int32_t kMaxOneUnitDelta=0xfbff;
568 static const int32_t kMinTwoUnitDeltaLead=kMaxOneUnitDelta+1;
569 static const int32_t kThreeUnitDeltaLead=0xffff;
571 static const int32_t kMaxTwoUnitDelta=((kThreeUnitDeltaLead-kMinTwoUnitDeltaLead)<<16)-1;
576 const UChar *uchars_;
583 int32_t remainingMatchLength_;
588 #endif // __UCHARSTRIE_H__
#define U16_TRAIL(supplementary)
Get the trail surrogate (0xdc00..0xdfff) for a supplementary code point (0x10000..0x10ffff).
Base class for objects to which Unicode characters and strings can be appended.
int32_t getValue() const
Returns a matching string's value if called immediately after current()/first()/next() returned USTRI...
int32_t getNextUChars(Appendable &out) const
Finds each UChar which continues the string from the current state.
UStringTrieResult next(int32_t uchar)
Traverses the trie from the current state for this input UChar.
Light-weight, non-const reader class for a UCharsTrie.
UCharsTrie & resetToState(const State &state)
Resets this trie to the saved state.
UBool hasUniqueValue(int32_t &uniqueValue) const
Determines whether all strings reachable from the current state map to the same value.
UStringTrieResult first(int32_t uchar)
Traverses the trie from the initial state for this input UChar.
static const int32_t kMaxBranchLinearSubNodeLength
const UnicodeString & getString() const
UStringTrieResult
Return values for BytesTrie::next(), UCharsTrie::next() and similar methods.
State()
Constructs an empty State.
The input unit(s) did not continue a matching string.
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
UCharsTrie(const UChar *trieUChars)
Constructs a UCharsTrie reader instance.
UCharsTrie state object, for saving a trie's current state and resetting the trie back to this state ...
UStringTrieResult current() const
Determines whether the string so far matches, whether it has a value, and whether another input UChar...
#define USTRINGTRIE_HAS_NEXT(result)
Equivalent to (result==USTRINGTRIE_NO_VALUE || result==USTRINGTRIE_INTERMEDIATE_VALUE) but this macro...
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
#define NULL
Define NULL if necessary, to 0 for C++ and to ((void *)0) for C.
#define TRUE
The TRUE value of a UBool.
UMemory is the common ICU base class.
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
#define U16_LEAD(supplementary)
Get the lead surrogate (0xd800..0xdbff) for a supplementary code point (0x10000..0x10ffff).
C++ API: Common ICU base class UObject.
UStringTrieResult nextForCodePoint(UChar32 cp)
Traverses the trie from the current state for the one or two UTF-16 code units for this input code po...
uint16_t UChar
Define UChar to be wchar_t if that is 16 bits wide; always assumed to be unsigned.
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
const UCharsTrie & saveState(State &state) const
Saves the state of this trie.
Builder class for UCharsTrie.
Iterator for all of the (string, value) pairs in a UCharsTrie.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
C API: Helper definitions for dictionary trie APIs.
Basic definitions for ICU, for both C and C++ APIs.
UCharsTrie & reset()
Resets this trie to its initial state.
#define FALSE
The FALSE value of a UBool.
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside...
UStringTrieResult firstForCodePoint(UChar32 cp)
Traverses the trie from the initial state for the one or two UTF-16 code units for this input code po...
The input unit(s) continued a matching string and there is a value for the string so far...
int8_t UBool
The ICU boolean type.
UCharsTrie(const UCharsTrie &other)
Copy constructor, copies the other trie reader object and its state, but not the UChar array which wi...