00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #ifndef __CS_CSUCTRANSFORM_H__
00020 #define __CS_CSUCTRANSFORM_H__
00021
00022 #include "csunicode.h"
00023
00035 #define CS_UC_MAX_UTF8_ENCODED 4
00036
00040 #define CS_UC_MAX_UTF16_ENCODED 2
00041
00045 #define CS_UC_MAX_UTF32_ENCODED 1
00046 #if (CS_WCHAR_T_SIZE == 1)
00047 #define CS_UC_MAX_WCHAR_T_ENCODED CS_UC_MAX_UTF8_ENCODED
00048 #elif (CS_WCHAR_T_SIZE == 2)
00049
00053 #define CS_UC_MAX_WCHAR_T_ENCODED CS_UC_MAX_UTF16_ENCODED
00054 #else
00055 #define CS_UC_MAX_WCHAR_T_ENCODED CS_UC_MAX_UTF32_ENCODED
00056 #endif
00057
00061 #define CS_UC_MAX_MAPPED 3
00062
00066 enum
00067 {
00073 csUcMapSimple = (1 << 0)
00074 };
00075
00079 class CS_CRYSTALSPACE_EXPORT csUnicodeTransform
00080 {
00081 public:
00082 #define FAIL(ret) \
00083 { \
00084 if (isValid) *isValid = false; \
00085 ch = CS_UC_CHAR_REPLACER; \
00086 return ret; \
00087 }
00088
00089 #define SUCCEED \
00090 if (isValid) *isValid = true; \
00091 return chUsed;
00092
00093 #define GET_NEXT(next) \
00094 if ((size_t)chUsed == strlen) \
00095 { \
00096 FAIL(chUsed); \
00097 } \
00098 next = *str++; \
00099 if (next == 0) \
00100 { \
00101 FAIL(chUsed); \
00102 } \
00103 chUsed++;
00104
00123 inline static int UTF8Decode (const utf8_char* str, size_t strlen,
00124 utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00125 {
00126 if (str == 0)
00127 {
00128 FAIL(0);
00129 }
00130 int chUsed = 0;
00131
00132 utf8_char curCh;
00133 GET_NEXT(curCh);
00134 if ((curCh & 0x80) == 0)
00135 {
00136
00137 ch = curCh;
00138 SUCCEED;
00139 }
00140 else
00141 {
00142
00143 int n = 0;
00144 while ((n < 7) && ((curCh & (1 << (7 - n))) != 0)) { n++; }
00145
00146 if ((n < 2) || (n > 6))
00147 {
00148
00149
00150 FAIL(1);
00151 }
00152
00153 ch = (curCh & ((1 << (8 - n)) - 1));
00154
00155 for (int i = 1; i < n; i++)
00156 {
00157 GET_NEXT(curCh);
00158 if ((curCh & 0xc0) != 0x80)
00159 {
00160 FAIL(chUsed);
00161 }
00162 else
00163 {
00164 ch <<= 6;
00165 ch |= (curCh & 0x3f);
00166 }
00167 }
00168
00169
00170 if (ch > CS_UC_LAST_CHAR)
00171 {
00172 FAIL(chUsed);
00173 }
00174
00175
00176 if ((ch < 0x80) && (n > 0))
00177 {
00178 FAIL(chUsed);
00179 }
00180 else if ((ch < 0x800) && (n > 2))
00181 {
00182 FAIL(chUsed);
00183 }
00184 else if ((ch < 0x10000) && (n > 3))
00185 {
00186 FAIL(chUsed);
00187 }
00188 else if ((ch < 0x200000) && (n > 4))
00189 {
00190 FAIL(chUsed);
00191 }
00192
00193
00194
00195
00196
00197
00198
00199
00200
00201
00202
00203 if (!returnNonChar && (CS_UC_IS_NONCHARACTER(ch)
00204 || CS_UC_IS_SURROGATE(ch)))
00205 FAIL(chUsed);
00206 SUCCEED;
00207 }
00208 }
00209
00214 inline static int UTF16Decode (const utf16_char* str, size_t strlen,
00215 utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00216 {
00217 if (str == 0)
00218 {
00219 FAIL(0);
00220 }
00221 int chUsed = 0;
00222
00223 utf16_char curCh;
00224 GET_NEXT(curCh);
00225
00226 if (CS_UC_IS_SURROGATE (curCh))
00227 {
00228
00229 if (!CS_UC_IS_HIGH_SURROGATE (curCh))
00230 {
00231 FAIL(chUsed);
00232 }
00233 ch = 0x10000 + ((curCh & 0x03ff) << 10);
00234 GET_NEXT(curCh);
00235
00236 if (!CS_UC_IS_LOW_SURROGATE (curCh))
00237 {
00238
00239 FAIL(1);
00240 }
00241 ch |= (curCh & 0x3ff);
00242 }
00243 else
00244 {
00245 ch = curCh;
00246 }
00247 if (!returnNonChar && (CS_UC_IS_NONCHARACTER(ch)
00248 || CS_UC_IS_SURROGATE(ch)))
00249 FAIL(chUsed);
00250 SUCCEED;
00251 }
00252
00257 inline static int UTF32Decode (const utf32_char* str, size_t strlen,
00258 utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00259 {
00260 if (str == 0)
00261 {
00262 FAIL(0);
00263 }
00264 int chUsed = 0;
00265
00266 GET_NEXT(ch);
00267 if ((!returnNonChar && (CS_UC_IS_NONCHARACTER(ch)
00268 || CS_UC_IS_SURROGATE(ch))) || (ch > CS_UC_LAST_CHAR))
00269 FAIL(chUsed);
00270 SUCCEED;
00271 }
00272
00277 inline static int Decode (const utf8_char* str, size_t strlen,
00278 utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00279 {
00280 return UTF8Decode (str, strlen, ch, isValid, returnNonChar);
00281 }
00286 inline static int Decode (const utf16_char* str, size_t strlen,
00287 utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00288 {
00289 return UTF16Decode (str, strlen, ch, isValid, returnNonChar);
00290 }
00295 inline static int Decode (const utf32_char* str, size_t strlen,
00296 utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00297 {
00298 return UTF32Decode (str, strlen, ch, isValid, returnNonChar);
00299 }
00300
00302 #undef FAIL
00303 #undef SUCCEED
00304 #undef GET_NEXT
00305
00308 #define _OUTPUT_CHAR(buf, chr) \
00309 if (bufRemaining > 0) \
00310 { \
00311 if(buf) *buf++ = chr; \
00312 bufRemaining--; \
00313 } \
00314 encodedLen++;
00315
00316 #define OUTPUT_CHAR(chr) _OUTPUT_CHAR(buf, chr)
00317
00333 inline static int EncodeUTF8 (const utf32_char ch, utf8_char* buf,
00334 size_t bufsize, bool allowNonchars = false)
00335 {
00336 if ((!allowNonchars && ((CS_UC_IS_NONCHARACTER(ch))
00337 || (CS_UC_IS_SURROGATE(ch)))) || (ch > CS_UC_LAST_CHAR))
00338 return 0;
00339 size_t bufRemaining = bufsize;
00340 int encodedLen = 0;
00341
00342 if (ch < 0x80)
00343 {
00344 OUTPUT_CHAR ((utf8_char)ch);
00345 }
00346 else if (ch < 0x800)
00347 {
00348 OUTPUT_CHAR ((utf8_char)(0xc0 | (ch >> 6)));
00349 OUTPUT_CHAR ((utf8_char)(0x80 | (ch & 0x3f)));
00350 }
00351 else if (ch < 0x10000)
00352 {
00353 OUTPUT_CHAR ((utf8_char)(0xe0 | (ch >> 12)));
00354 OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 6) & 0x3f)));
00355 OUTPUT_CHAR ((utf8_char)(0x80 | (ch & 0x3f)));
00356 }
00357 else if (ch < 0x200000)
00358 {
00359 OUTPUT_CHAR ((utf8_char)(0xf0 | (ch >> 18)));
00360 OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 12) & 0x3f)));
00361 OUTPUT_CHAR ((utf8_char)(0x80 | ((ch >> 6) & 0x3f)));
00362 OUTPUT_CHAR ((utf8_char)(0x80 | (ch & 0x3f)));
00363 }
00364
00365
00366
00367
00368
00369
00370
00371
00372
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383 return encodedLen;
00384 }
00385
00401 inline static int EncodeUTF16 (const utf32_char ch, utf16_char* buf,
00402 size_t bufsize, bool allowNonchars = false)
00403 {
00404 if ((!allowNonchars && ((CS_UC_IS_NONCHARACTER(ch))
00405 || (CS_UC_IS_SURROGATE(ch)))) || (ch > CS_UC_LAST_CHAR))
00406 return 0;
00407 size_t bufRemaining = bufsize;
00408 int encodedLen = 0;
00409
00410 if (ch < 0x10000)
00411 {
00412 OUTPUT_CHAR((utf16_char)ch);
00413 }
00414 else if (ch < 0x100000)
00415 {
00416 utf32_char ch_shifted = ch - 0x10000;
00417 OUTPUT_CHAR((utf16_char)((ch_shifted >> 10)
00418 | CS_UC_CHAR_HIGH_SURROGATE_FIRST));
00419 OUTPUT_CHAR((utf16_char)((ch_shifted & 0x3ff)
00420 | CS_UC_CHAR_LOW_SURROGATE_FIRST));
00421 }
00422 else
00423 return 0;
00424
00425 return encodedLen;
00426 }
00427
00443 inline static int EncodeUTF32 (const utf32_char ch, utf32_char* buf,
00444 size_t bufsize, bool allowNonchars = false)
00445 {
00446 if ((!allowNonchars && ((CS_UC_IS_NONCHARACTER(ch))
00447 || (CS_UC_IS_SURROGATE(ch)))) || (ch > CS_UC_LAST_CHAR))
00448 return 0;
00449 size_t bufRemaining = bufsize;
00450 int encodedLen = 0;
00451
00452 OUTPUT_CHAR(ch);
00453
00454 return encodedLen;
00455 }
00456
00461 inline static int Encode (const utf32_char ch, utf8_char* buf,
00462 size_t bufsize, bool allowNonchars = false)
00463 {
00464 return EncodeUTF8 (ch, buf, bufsize, allowNonchars);
00465 }
00470 inline static int Encode (const utf32_char ch, utf16_char* buf,
00471 size_t bufsize, bool allowNonchars = false)
00472 {
00473 return EncodeUTF16 (ch, buf, bufsize, allowNonchars);
00474 }
00479 inline static int Encode (const utf32_char ch, utf32_char* buf,
00480 size_t bufsize, bool allowNonchars = false)
00481 {
00482 return EncodeUTF32 (ch, buf, bufsize, allowNonchars);
00483 }
00485 #undef OUTPUT_CHAR
00486
00489 #define OUTPUT_CHAR(chr) _OUTPUT_CHAR(dest, chr)
00490
00491 #define UCTF_CONVERTER(funcName, fromType, decoder, toType, encoder) \
00492 inline static size_t funcName (toType* dest, size_t destSize, \
00493 const fromType* source, size_t srcSize = (size_t)-1) \
00494 { \
00495 if ((srcSize == 0) || (source == 0)) \
00496 return 0; \
00497 \
00498 size_t bufRemaining = (destSize > 0) ? destSize - 1 : 0; \
00499 size_t encodedLen = 0; \
00500 \
00501 size_t srcChars = srcSize; \
00502 \
00503 if (srcSize == (size_t)-1) \
00504 { \
00505 srcChars = 0; \
00506 const fromType* sptr = source; \
00507 while (*sptr++ != 0) srcChars++; \
00508 } \
00509 \
00510 while (srcChars > 0) \
00511 { \
00512 utf32_char ch; \
00513 int scnt = decoder (source, srcChars, ch, 0); \
00514 if (scnt == 0) break; \
00515 int dcnt = encoder (ch, dest, bufRemaining); \
00516 if (dcnt == 0) \
00517 { \
00518 dcnt = encoder (CS_UC_CHAR_REPLACER, dest, bufRemaining); \
00519 } \
00520 \
00521 if ((size_t)dcnt >= bufRemaining) \
00522 { \
00523 if (dest && (destSize > 0)) dest += bufRemaining; \
00524 bufRemaining = 0; \
00525 } \
00526 else \
00527 { \
00528 bufRemaining -= dcnt; \
00529 if (dest && (destSize > 0)) dest += dcnt; \
00530 } \
00531 encodedLen += dcnt; \
00532 if ((size_t)scnt >= srcChars) break; \
00533 srcChars -= scnt; \
00534 source += scnt; \
00535 } \
00536 \
00537 if (dest) *dest = 0; \
00538 \
00539 return encodedLen + 1; \
00540 }
00541
00557 UCTF_CONVERTER (UTF8to16, utf8_char, UTF8Decode, utf16_char, EncodeUTF16);
00562 UCTF_CONVERTER (UTF8to32, utf8_char, UTF8Decode, utf32_char, EncodeUTF32);
00563
00568 UCTF_CONVERTER (UTF16to8, utf16_char, UTF16Decode, utf8_char, EncodeUTF8);
00573 UCTF_CONVERTER (UTF16to32, utf16_char, UTF16Decode, utf32_char, EncodeUTF32);
00574
00579 UCTF_CONVERTER (UTF32to8, utf32_char, UTF32Decode, utf8_char, EncodeUTF8);
00584 UCTF_CONVERTER (UTF32to16, utf32_char, UTF32Decode, utf16_char, EncodeUTF16);
00587 #undef UCTF_CONVERTER
00588 #undef OUTPUT_CHAR
00589 #undef _OUTPUT_CHAR
00590
00591 #if (CS_WCHAR_T_SIZE == 1)
00592 inline static size_t UTF8toWC (wchar_t* dest, size_t destSize,
00593 const utf8_char* source, size_t srcSize)
00594 {
00595 size_t srcChars = srcSize;
00596 if (srcSize == (size_t)-1)
00597 {
00598 srcChars = 0;
00599 const utf8_char* sptr = source;
00600 while (*sptr++ != 0) srcChars++;
00601 }
00602 if ((dest != 0) && (destSize != 0))
00603 {
00604 size_t len = MIN (destSize - 1, srcChars);
00605 memcpy (dest, source, size * sizeof (wchar_t));
00606 *(dest + len) = 0;
00607 }
00608 return srcChars + 1;
00609 };
00610
00611 inline static size_t UTF16toWC (wchar_t* dest, size_t destSize,
00612 const utf16_char* source, size_t srcSize)
00613 {
00614 return UTF16to8 ((utf8_char*)dest, destSize, source, srcSize);
00615 };
00616
00617 inline static size_t UTF32toWC (wchar_t* dest, size_t destSize,
00618 const utf32_char* source, size_t srcSize)
00619 {
00620 return UTF32to8 ((utf8_char*)dest, destSize, source, srcSize);
00621 };
00622
00623 inline static size_t WCtoUTF8 (utf8_char* dest, size_t destSize,
00624 const wchar_t* source, size_t srcSize)
00625 {
00626 size_t srcChars = srcSize;
00627 if (srcSize == (size_t)-1)
00628 {
00629 srcChars = 0;
00630 const wchar_t* sptr = source;
00631 while (*sptr++ != 0) srcChars++;
00632 }
00633 if ((dest != 0) && (destSize != 0))
00634 {
00635 size_t len = MIN (destSize - 1, srcChars);
00636 memcpy (dest, source, len * sizeof (wchar_t));
00637 *(dest + len) = 0;
00638 }
00639 return srcChars + 1;
00640 };
00641
00642 inline static size_t WCtoUTF16 (utf16_char* dest, size_t destSize,
00643 const wchar_t* source, size_t srcSize)
00644 {
00645 return UTF8to16 (dest, destSize, source, srcSize);
00646 };
00647
00648 inline static size_t WCtoUTF32 (utf32_char* dest, size_t destSize,
00649 const wchar_t* source, size_t srcSize)
00650 {
00651 return UTF8to32 (dest, destSize, source, srcSize);
00652 };
00653
00654 inline static int Decode (const wchar_t* str, size_t strlen,
00655 utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00656 {
00657 return UTF8Decode ((utf8_char*)str, strlen, ch, isValid, returnNonChar);
00658 }
00659 inline static int Encode (const utf32_char ch, wchar_t* buf,
00660 size_t bufsize, bool allowNonchars = false)
00661 {
00662 return EncodeUTF8 (ch, (utf8_char*)buf, bufsize, allowNonchars);
00663 }
00664 #elif (CS_WCHAR_T_SIZE == 2)
00665
00666
00667
00674 inline static size_t UTF8toWC (wchar_t* dest, size_t destSize,
00675 const utf8_char* source, size_t srcSize)
00676 {
00677 return UTF8to16 ((utf16_char*)dest, destSize, source, srcSize);
00678 };
00679
00684 inline static size_t UTF16toWC (wchar_t* dest, size_t destSize,
00685 const utf16_char* source, size_t srcSize)
00686 {
00687 size_t srcChars = srcSize;
00688 if (srcSize == (size_t)-1)
00689 {
00690 srcChars = 0;
00691 const utf16_char* sptr = source;
00692 while (*sptr++ != 0) srcChars++;
00693 }
00694 if ((dest != 0) && (destSize != 0))
00695 {
00696 size_t len = MIN (destSize - 1, srcChars);
00697 memcpy (dest, source, len * sizeof (wchar_t));
00698 *(dest + len) = 0;
00699 }
00700 return srcChars + 1;
00701 };
00702
00707 inline static size_t UTF32toWC (wchar_t* dest, size_t destSize,
00708 const utf32_char* source, size_t srcSize)
00709 {
00710 return UTF32to16 ((utf16_char*)dest, destSize, source, srcSize);
00711 };
00712
00717 inline static size_t WCtoUTF8 (utf8_char* dest, size_t destSize,
00718 const wchar_t* source, size_t srcSize)
00719 {
00720 return UTF16to8 (dest, destSize, (utf16_char*)source, srcSize);
00721 };
00722
00727 inline static size_t WCtoUTF16 (utf16_char* dest, size_t destSize,
00728 const wchar_t* source, size_t srcSize)
00729 {
00730 size_t srcChars = srcSize;
00731 if (srcSize == (size_t)-1)
00732 {
00733 srcChars = 0;
00734 const wchar_t* sptr = source;
00735 while (*sptr++ != 0) srcChars++;
00736 }
00737 if ((dest != 0) && (destSize != 0))
00738 {
00739 size_t len = MIN (destSize - 1, srcChars);
00740 memcpy (dest, source, len * sizeof (wchar_t));
00741 *(dest + len) = 0;
00742 }
00743 return srcChars + 1;
00744 };
00745
00750 inline static size_t WCtoUTF32 (utf32_char* dest, size_t destSize,
00751 const wchar_t* source, size_t srcSize)
00752 {
00753 return UTF16to32 (dest, destSize, (utf16_char*)source, srcSize);
00754 };
00755
00756
00757
00758
00759
00760
00761
00762
00763
00764
00765
00766 #if !defined(CS_COMPILER_MSVC) || (_MSC_VER > 1300)
00767 #if !defined(CS_COMPILER_MSVC)
00768 #define __wchar_t wchar_t
00769 #endif
00770
00774 inline static int Decode (const __wchar_t* str, size_t strlen,
00775 utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00776 {
00777 return UTF16Decode ((utf16_char*)str, strlen, ch, isValid, returnNonChar);
00778 }
00783 inline static int Encode (const utf32_char ch, __wchar_t* buf,
00784 size_t bufsize, bool allowNonchars = false)
00785 {
00786 return EncodeUTF16 (ch, (utf16_char*)buf, bufsize, allowNonchars);
00787 }
00788 #ifdef __wchar_t
00789 #undef __wchar_t
00790 #endif
00791 #endif
00792
00793 #elif (CS_WCHAR_T_SIZE == 4)
00794 inline static size_t UTF8toWC (wchar_t* dest, size_t destSize,
00795 const utf8_char* source, size_t srcSize)
00796 {
00797 return UTF8to32 ((utf32_char*)dest, destSize, source, srcSize);
00798 };
00799
00800 inline static size_t UTF16toWC (wchar_t* dest, size_t destSize,
00801 const utf16_char* source, size_t srcSize)
00802 {
00803 return UTF16to32 ((utf32_char*)dest, destSize, source, srcSize);
00804 };
00805
00806 inline static size_t UTF32toWC (wchar_t* dest, size_t destSize,
00807 const utf32_char* source, size_t srcSize)
00808 {
00809 size_t srcChars = srcSize;
00810 if (srcSize == (size_t)-1)
00811 {
00812 srcChars = 0;
00813 const utf32_char* sptr = source;
00814 while (*sptr++ != 0) srcChars++;
00815 }
00816 if ((dest != 0) && (destSize != 0))
00817 {
00818 size_t len = MIN (destSize - 1, srcChars);
00819 memcpy (dest, source, len * sizeof (wchar_t));
00820 *(dest + len) = 0;
00821 }
00822 return srcChars + 1;
00823 };
00824
00825 inline static size_t WCtoUTF8 (utf8_char* dest, size_t destSize,
00826 const wchar_t* source, size_t srcSize)
00827 {
00828 return UTF32to8 (dest, destSize, (utf32_char*)source, srcSize);
00829 };
00830
00831 inline static size_t WCtoUTF16 (utf16_char* dest, size_t destSize,
00832 const wchar_t* source, size_t srcSize)
00833 {
00834 return UTF32to16 (dest, destSize, (utf32_char*)source, srcSize);
00835 };
00836
00837 inline static size_t WCtoUTF32 (utf32_char* dest, size_t destSize,
00838 const wchar_t* source, size_t srcSize)
00839 {
00840 size_t srcChars = srcSize;
00841 if (srcSize == (size_t)-1)
00842 {
00843 srcChars = 0;
00844 const wchar_t* sptr = source;
00845 while (*sptr++ != 0) srcChars++;
00846 }
00847 if ((dest != 0) && (destSize != 0))
00848 {
00849 size_t len = MIN (destSize - 1, srcChars);
00850 memcpy (dest, source, len * sizeof (wchar_t));
00851 *(dest + len) = 0;
00852 }
00853 return srcChars + 1;
00854 };
00855
00856 inline static int Decode (const wchar_t* str, size_t strlen,
00857 utf32_char& ch, bool* isValid = 0, bool returnNonChar = false)
00858 {
00859 return UTF32Decode ((utf32_char*)str, strlen, ch, isValid, returnNonChar);
00860 }
00861 inline static int Encode (const utf32_char ch, wchar_t* buf,
00862 size_t bufsize, bool allowNonchars = false)
00863 {
00864 return EncodeUTF32 (ch, (utf32_char*)buf, bufsize, allowNonchars);
00865 }
00866 #else
00867 #error Odd-sized, unsupported wchar_t!
00868 #endif
00869
00882 inline static int UTF8Skip (const utf8_char* str, size_t maxSkip)
00883 {
00884 if (maxSkip < 1) return 0;
00885
00886 if ((*str & 0x80) == 0)
00887 {
00888 return 1;
00889 }
00890 else
00891 {
00892 int n = 0;
00893 while ((n < 7) && ((*str & (1 << (7 - n))) != 0)) { n++; }
00894
00895 if ((n < 2) || (n > 6))
00896 {
00897 return 1;
00898 }
00899
00900 int skip = 1;
00901
00902 for (; skip < n; skip++)
00903 {
00904 if (((str[skip] & 0xc0) != 0x80) || ((size_t)skip > maxSkip))
00905 {
00906 break;
00907 }
00908 }
00909 return skip;
00910 }
00911 }
00912
00923 inline static int UTF8Rewind (const utf8_char* str, size_t maxRew)
00924 {
00925 if (maxRew < 1) return 0;
00926
00927 const utf8_char* pos = str - 1;
00928
00929 if ((*pos & 0x80) == 0)
00930 {
00931 return 1;
00932 }
00933
00934
00935 int skip = 1;
00936 while (((*pos & 0xc0) == 0x80) && ((size_t)skip < maxRew))
00937 {
00938 skip++;
00939 pos--;
00940 }
00941
00942 return skip;
00943 }
00944
00950 inline static int UTF16Skip (const utf16_char* str, size_t maxSkip)
00951 {
00952 if (CS_UC_IS_HIGH_SURROGATE (*str))
00953 return (int)(MIN(maxSkip, (size_t)2));
00954 else
00955 return (int)(MIN(maxSkip, (size_t)1));
00956 }
00957
00963 inline static int UTF16Rewind (const utf16_char* str, size_t maxRew)
00964 {
00965 if (maxRew < 1) return 0;
00966
00967 const utf16_char* pos = str - 1;
00968 if (!CS_UC_IS_SURROGATE(*pos))
00969 return 1;
00970 else
00971 {
00972 if ((maxRew > 1) && (CS_UC_IS_HIGH_SURROGATE(*(pos - 1))))
00973 return 2;
00974 else
00975 return 1;
00976 }
00977 }
00978
00984 inline static int UTF32Skip (const utf32_char* str, size_t maxSkip)
00985 {
00986 (void)str;
00987 return (int)(MIN(maxSkip, (size_t)1));
00988 }
00989
00995 inline static int UTF32Rewind (const utf32_char* str, size_t maxRew)
00996 {
00997 (void)str;
00998 if (maxRew < 1) return 0;
00999 return 1;
01000 }
01015 static size_t MapToUpper (const utf32_char ch, utf32_char* dest,
01016 size_t destSize, uint flags = 0);
01023 inline static utf32_char MapToUpper (const utf32_char ch)
01024 {
01025 utf32_char ret;
01026 MapToUpper (ch, &ret, 1, csUcMapSimple);
01027 return ret;
01028 }
01033 static size_t MapToLower (const utf32_char ch, utf32_char* dest,
01034 size_t destSize, uint flags = 0);
01035 inline static utf32_char MapToLower (const utf32_char ch)
01036 {
01037 utf32_char ret;
01038 MapToLower (ch, &ret, 1, csUcMapSimple);
01039 return ret;
01040 }
01046 static size_t MapToFold (const utf32_char ch, utf32_char* dest,
01047 size_t destSize, uint flags = 0);
01048 inline static utf32_char MapToFold (const utf32_char ch)
01049 {
01050 utf32_char ret;
01051 MapToFold (ch, &ret, 1, csUcMapSimple);
01052 return ret;
01053 }
01055 };
01056
01059 #endif
01060