ICU 4.8.1.1  4.8.1.1
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
alphaindex.h
Go to the documentation of this file.
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2011 International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 */
9 
10 #ifndef INDEXCHARS_H
11 #define INDEXCHARS_H
12 
13 #include "unicode/utypes.h"
14 #include "unicode/uobject.h"
15 #include "unicode/locid.h"
16 
24 
38 
45 
55 
63 
64 
65 struct UHashtable;
67 
69 
70 // Forward Declarations
71 
72 class Collator;
73 class RuleBasedCollator;
74 class StringEnumeration;
75 class UnicodeSet;
76 class UVector;
77 
78 
79 
164 
165  public:
166 
179  AlphabeticIndex(const Locale &locale, UErrorCode &status);
180 
181 
182 
193  virtual AlphabeticIndex &addLabels(const UnicodeSet &additions, UErrorCode &status);
194 
208  virtual AlphabeticIndex &addLabels(const Locale &locale, UErrorCode &status);
209 
214  virtual ~AlphabeticIndex();
215 
216 
229  virtual const RuleBasedCollator &getCollator() const;
230 
231 
240  virtual const UnicodeString &getInflowLabel() const;
241 
253  virtual AlphabeticIndex &setInflowLabel(const UnicodeString &inflowLabel, UErrorCode &status);
254 
255 
256 
264  virtual const UnicodeString &getOverflowLabel() const;
265 
266 
276  virtual AlphabeticIndex &setOverflowLabel(const UnicodeString &overflowLabel, UErrorCode &status);
277 
285  virtual const UnicodeString &getUnderflowLabel() const;
286 
296  virtual AlphabeticIndex &setUnderflowLabel(const UnicodeString &underflowLabel, UErrorCode &status);
297 
298 
306  virtual int32_t getMaxLabelCount() const;
307 
320  virtual AlphabeticIndex &setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status);
321 
322 
335  virtual const UnicodeString &getOverflowComparisonString(const UnicodeString &lowerLimit,
336  UErrorCode &status);
337 
338 
355  virtual AlphabeticIndex &addRecord(const UnicodeString &name, const void *data, UErrorCode &status);
356 
365  virtual AlphabeticIndex &clearRecords(UErrorCode &status);
366 
367 
376  virtual int32_t getBucketCount(UErrorCode &status);
377 
378 
387  virtual int32_t getRecordCount(UErrorCode &status);
388 
389 
390 
403  virtual int32_t getBucketIndex(const UnicodeString &itemName, UErrorCode &status);
404 
405 
412  virtual int32_t getBucketIndex() const;
413 
414 
426  virtual UBool nextBucket(UErrorCode &status);
427 
436  virtual const UnicodeString &getBucketLabel() const;
437 
445  virtual UAlphabeticIndexLabelType getBucketLabelType() const;
446 
455  virtual int32_t getBucketRecordCount() const;
456 
457 
466  virtual AlphabeticIndex &resetBucketIterator(UErrorCode &status);
467 
479  virtual UBool nextRecord(UErrorCode &status);
480 
489  virtual const UnicodeString &getRecordName() const;
490 
491 
500  virtual const void *getRecordData() const;
501 
502 
509  virtual AlphabeticIndex &resetRecordIterator();
510 
511 private:
512  // No ICU "poor man's RTTI" for this class nor its subclasses.
513  virtual UClassID getDynamicClassID() const;
514 
519  AlphabeticIndex(const AlphabeticIndex &other);
520 
524  AlphabeticIndex &operator =(const AlphabeticIndex & /*other*/) { return *this;};
525 
530  virtual UBool operator==(const AlphabeticIndex& other) const;
531 
536  virtual UBool operator!=(const AlphabeticIndex& other) const;
537 
538  // Common initialization, for use from all constructors.
539  void init(UErrorCode &status);
540 
541  // Initialize & destruct static constants used by this class.
542  static void staticInit(UErrorCode &status);
543 
544  // Pinyin stuff. If the input name is Chinese, add the Pinyin prefix to the dest string.
545  void hackName(UnicodeString &dest, const UnicodeString &name, const Collator *coll);
546  void initPinyinBounds(const Collator *coll, UErrorCode &status);
547 
548  public:
554  static void staticCleanup();
555  private:
556 
557  // Add index characters from the specified locale to the dest set.
558  // Does not remove any previous contents from dest.
559  static void getIndexExemplars(UnicodeSet &dest, const Locale &locale, UErrorCode &status);
560 
561  UVector *firstStringsInScript(UErrorCode &status);
562 
563  static UnicodeString separated(const UnicodeString &item);
564 
565  static UnicodeSet *getScriptSet(UnicodeSet &dest, const UnicodeString &codePoint, UErrorCode &status);
566 
567  void buildIndex(UErrorCode &status);
568  void buildBucketList(UErrorCode &status);
569  void bucketRecords(UErrorCode &status);
570 
571 
572  public:
573 
574  // The following internal items are declared public only to allow access from
575  // implementation code written in plain C. They are not intended for
576  // public use.
577 
582  struct Record: public UMemory {
583  AlphabeticIndex *alphaIndex_;
584  const UnicodeString name_;
585  UnicodeString sortingName_; // Usually the same as name_; different for Pinyin.
586  const void *data_;
587  int32_t serialNumber_; // Defines sorting order for names that compare equal.
588  Record(AlphabeticIndex *alphaIndex, const UnicodeString &name, const void *data);
589  ~Record();
590  };
591 
597  UVector *inputRecords_;
598 
604  struct Bucket: public UMemory {
605  UnicodeString label_;
606  UnicodeString lowerBoundary_;
607  UAlphabeticIndexLabelType labelType_;
608  UVector *records_; // Records are owned by inputRecords_ vector.
609 
610  Bucket(const UnicodeString &label, // Parameter strings are copied.
611  const UnicodeString &lowerBoundary,
612  UAlphabeticIndexLabelType type, UErrorCode &status);
613  ~Bucket();
614  };
615 
616  public:
617 
622  enum ELangType {
628  kTraditional
629  };
630 
635  static ELangType langTypeFromLocale(const Locale &loc);
636 
637 
638  private:
639 
640  // Holds the contents of this index, buckets of user items.
641  // UVector elements are of type (Bucket *)
642  UVector *bucketList_;
643 
644  int32_t labelsIterIndex_; // Index of next item to return.
645  int32_t itemsIterIndex_;
646  Bucket *currentBucket_; // While an iteration of the index in underway,
647  // point to the bucket for the current label.
648  // NULL when no iteration underway.
649 
650  UBool indexBuildRequired_; // Caller has made changes to the index that
651  // require rebuilding & bucketing before the
652  // contents can be iterated.
653 
654  int32_t maxLabelCount_; // Limit on # of labels permitted in the index.
655 
656  UHashtable *alreadyIn_; // Key=UnicodeString, value=UnicodeSet
657 
658  UnicodeSet *initialLabels_; // Initial (unprocessed) set of Labels. Union
659  // of those explicitly set by the user plus
660  // those from locales. Raw values, before
661  // crunching into bucket labels.
662 
663  UVector *labels_; // List of Labels, after processing, sorting.
664  // Contents are (UnicodeString *)
665 
666  UnicodeSet *noDistinctSorting_; // As the set of labels is built, strings may
667  // be discarded from the exemplars. This contains
668  // some of the discards, and is
669  // intended for debugging.
670 
671  UnicodeSet *notAlphabetic_; // As the set of labels is built, strings may
672  // be discarded from the exemplars. This contains
673  // some of the discards, and is
674  // intended for debugging.
675 
676 
677  UVector *firstScriptCharacters_; // The first character from each script,
678  // in collation order.
679 
680  Locale locale_;
681  Collator *collator_;
682  Collator *collatorPrimaryOnly_;
683 
684  UnicodeString inflowLabel_;
685  UnicodeString overflowLabel_;
686  UnicodeString underflowLabel_;
687  UnicodeString overflowComparisonString_;
688 
689  ELangType langType_; // The language type, simplified Chinese, Traditional Chinese,
690  // or not Chinese (Normal). Part of the Pinyin support
691 
692  typedef const UChar PinyinLookup[24][3];
693  static PinyinLookup HACK_PINYIN_LOOKUP_SHORT;
694  static PinyinLookup HACK_PINYIN_LOOKUP_LONG;
695 
696  // These will be lazily set to the short or long tables based on which
697  // Chinese collation has been configured into the ICU library.
698  static PinyinLookup *HACK_PINYIN_LOOKUP;
699  static const UChar *PINYIN_LOWER_BOUNDS;
700 
701 
702 
703  int32_t recordCounter_; // Counts Records created. For minting record serial numbers.
704 
705 // Constants. Lazily initialized the first time an AlphabeticIndex object is created.
706 
707  static UnicodeSet *ALPHABETIC;
708  static UnicodeSet *CORE_LATIN;
709  static UnicodeSet *ETHIOPIC;
710  static UnicodeSet *HANGUL;
711  static UnicodeSet *IGNORE_SCRIPTS;
712  static UnicodeSet *TO_TRY;
713  static UnicodeSet *UNIHAN;
714  static const UnicodeString *EMPTY_STRING;
715 
716 };
717 
719 #endif
720 
The RuleBasedCollator class provides the simple implementation of Collator, using data-driven tables...
Definition: tblcoll.h:111
A mutable set of Unicode characters and multicharacter strings.
Definition: uniset.h:272
UVector * inputRecords_
Holds all user records before they are distributed into buckets.
Definition: alphaindex.h:597
A Bucket holds an index label and references to everything belonging to that label.
Definition: alphaindex.h:604
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
Inflow Label.
Definition: alphaindex.h:54
Normal Label, typically the starting letter of the names in the bucket with this label.
Definition: alphaindex.h:37
#define U_CDECL_BEGIN
This is used to begin a declaration of a library private ICU C API.
Definition: umachine.h:110
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside...
Definition: utypes.h:521
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:131
The Collator class performs locale-sensitive string comparison.
Definition: coll.h:177
UMemory is the common ICU base class.
Definition: uobject.h:101
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:188
UBool operator!=(const StringPiece &x, const StringPiece &y)
Global operator != for StringPiece.
Definition: stringpiece.h:218
C++ API: Common ICU base class UObject.
uint16_t UChar
Define UChar to be wchar_t if that is 16 bits wide; always assumed to be unsigned.
Definition: umachine.h:325
#define U_CDECL_END
This is used to end a declaration of a library private ICU C API.
Definition: umachine.h:111
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:132
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:215
void * UClassID
UClassID is used to identify classes without using RTTI, since RTTI is not yet supported by all C++ c...
Definition: utypes.h:385
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:639
Base class for 'pure' C++ implementations of uenum api.
Definition: strenum.h:55
Overflow Label.
Definition: alphaindex.h:61
C++ API: Locale ID object.
Basic definitions for ICU, for both C and C++ APIs.
ELangType
Language Types.
Definition: alphaindex.h:622
class AlphabeticIndex supports the creation of a UI index appropriate for a given language...
Definition: alphaindex.h:163
A Locale object represents a specific geographical, political, or cultural region.
Definition: locid.h:181
Undeflow Label.
Definition: alphaindex.h:44
virtual UClassID getDynamicClassID() const =0
ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class.
A record, or item, in the index.
Definition: alphaindex.h:582
int8_t UBool
The ICU boolean type.
Definition: umachine.h:228
UAlphabeticIndexLabelType
Constants for Alphabetic Index Label Types.
Definition: alphaindex.h:31