![]() |
Public API Reference |
![]() |
Contains functions to convert between several UTF encodings. More...
#include <csutil/csuctransform.h>
Static Public Member Functions | |
UTF Decoders | |
static int | UTF8Decode (const utf8_char *str, size_t strlen, utf32_char &ch, bool *isValid=0, bool returnNonChar=false) |
Decode an Unicode code point encoded in UTF-8. | |
static int | UTF16Decode (const utf16_char *str, size_t strlen, utf32_char &ch, bool *isValid=0, bool returnNonChar=false) |
Decode an Unicode code point encoded in UTF-16. | |
static int | UTF32Decode (const utf32_char *str, size_t strlen, utf32_char &ch, bool *isValid=0, bool returnNonChar=false) |
Decode an Unicode code point encoded in UTF-32. | |
static int | Decode (const utf8_char *str, size_t strlen, utf32_char &ch, bool *isValid=0, bool returnNonChar=false) |
Decode an Unicode code point encoded in UTF-8. | |
static int | Decode (const utf16_char *str, size_t strlen, utf32_char &ch, bool *isValid=0, bool returnNonChar=false) |
Decode an Unicode code point encoded in UTF-16. | |
static int | Decode (const utf32_char *str, size_t strlen, utf32_char &ch, bool *isValid=0, bool returnNonChar=false) |
Decode an Unicode code point encoded in UTF-32. | |
UTF Encoders | |
static int | EncodeUTF8 (const utf32_char ch, utf8_char *buf, size_t bufsize, bool allowNonchars=false) |
Encode an Unicode code point to UTF-8. | |
static int | EncodeUTF16 (const utf32_char ch, utf16_char *buf, size_t bufsize, bool allowNonchars=false) |
Encode an Unicode code point to UTF-16. | |
static int | EncodeUTF32 (const utf32_char ch, utf32_char *buf, size_t bufsize, bool allowNonchars=false) |
Encode an Unicode code point to UTF-32. | |
static int | Encode (const utf32_char ch, utf8_char *buf, size_t bufsize, bool allowNonchars=false) |
Encode an Unicode code point to UTF-8. | |
static int | Encode (const utf32_char ch, utf16_char *buf, size_t bufsize, bool allowNonchars=false) |
Encode an Unicode code point to UTF-16. | |
static int | Encode (const utf32_char ch, utf32_char *buf, size_t bufsize, bool allowNonchars=false) |
Encode an Unicode code point to UTF-32. | |
Converters between strings in different UTF encodings | |
static size_t | UTF8to16 (utf16_char *dest, size_t destSize, const utf8_char *source, size_t srcSize=(size_t)-1) |
Convert UTF-8 to UTF-16. | |
static size_t | UTF8to32 (utf32_char *dest, size_t destSize, const utf8_char *source, size_t srcSize=(size_t)-1) |
Convert UTF-8 to UTF-32. | |
static size_t | UTF16to8 (utf8_char *dest, size_t destSize, const utf16_char *source, size_t srcSize=(size_t)-1) |
Convert UTF-16 to UTF-8. | |
static size_t | UTF16to32 (utf32_char *dest, size_t destSize, const utf16_char *source, size_t srcSize=(size_t)-1) |
Convert UTF-16 to UTF-32. | |
static size_t | UTF32to8 (utf8_char *dest, size_t destSize, const utf32_char *source, size_t srcSize=(size_t)-1) |
Convert UTF-32 to UTF-8. | |
static size_t | UTF32to16 (utf16_char *dest, size_t destSize, const utf32_char *source, size_t srcSize=(size_t)-1) |
Convert UTF-32 to UTF-16. | |
Helpers to skip encoded code units in different UTF encodings | |
static int | UTF8Skip (const utf8_char *str, size_t maxSkip) |
Determine how many code units in an UTF-8 buffer need to be skipped to get to the next encoded char. | |
static int | UTF8Rewind (const utf8_char *str, size_t maxRew) |
Determine how many code units in an UTF-8 buffer need to skipped back to get to the start of the previous encoded code point. | |
static int | UTF16Skip (const utf16_char *str, size_t maxSkip) |
Determine how many code units in an UTF-16 buffer need to be skipped to get to the next encoded char. | |
static int | UTF16Rewind (const utf16_char *str, size_t maxRew) |
Determine how many code units in an UTF-16 buffer need to skipped back to get to the start of the previous encoded code point. | |
static int | UTF32Skip (const utf32_char *str, size_t maxSkip) |
Determine how many code units in an UTF-32 buffer need to be skipped to get to the next encoded char. | |
static int | UTF32Rewind (const utf32_char *str, size_t maxRew) |
Determine how many code units in an UTF-32 buffer need to skipped back to get to the start of the previous encoded code point. | |
Code point mappings | |
static size_t | MapToUpper (const utf32_char ch, utf32_char *dest, size_t destSize, uint flags=0) |
Map a code point to its upper case equivalent(s). | |
static utf32_char | MapToUpper (const utf32_char ch) |
Map a code point to its upper case equivalent(s). | |
static size_t | MapToLower (const utf32_char ch, utf32_char *dest, size_t destSize, uint flags=0) |
Map a code point to its lower case equivalent(s). | |
static utf32_char | MapToLower (const utf32_char ch) |
Map a code point to its upper case equivalent(s). | |
static size_t | MapToFold (const utf32_char ch, utf32_char *dest, size_t destSize, uint flags=0) |
Map a code point to its fold equivalent(s). | |
static utf32_char | MapToFold (const utf32_char ch) |
Map a code point to its upper case equivalent(s). |
Contains functions to convert between several UTF encodings.
Definition at line 79 of file csuctransform.h.
static int csUnicodeTransform::Decode | ( | const utf8_char * | str, |
size_t | strlen, | ||
utf32_char & | ch, | ||
bool * | isValid = 0 , |
||
bool | returnNonChar = false |
||
) | [inline, static] |
Decode an Unicode code point encoded in UTF-8.
Decode an Unicode code point encoded in UTF-8.
str | Pointer to the encoded code point. |
strlen | Number of code units in the source string. |
ch | Decoded code point. |
isValid | When an error occured during decoding, ch contains the replacement character (CS_UC_CHAR_REPLACER). In this case, the bool pointed to by isValid will be set to false. The parameter can be 0, but in this case the information whether the decoded char is the replacement character because the source data is errorneous is lost. |
returnNonChar | Whether decoded non-character or high and low surrogates are returned as such. Normally, those code points are replaced with CS_UC_CHAR_REPLACER to signal an invalid encoded code point. |
Definition at line 277 of file csuctransform.h.
static int csUnicodeTransform::Decode | ( | const utf16_char * | str, |
size_t | strlen, | ||
utf32_char & | ch, | ||
bool * | isValid = 0 , |
||
bool | returnNonChar = false |
||
) | [inline, static] |
Decode an Unicode code point encoded in UTF-16.
Decode an Unicode code point encoded in UTF-8.
str | Pointer to the encoded code point. |
strlen | Number of code units in the source string. |
ch | Decoded code point. |
isValid | When an error occured during decoding, ch contains the replacement character (CS_UC_CHAR_REPLACER). In this case, the bool pointed to by isValid will be set to false. The parameter can be 0, but in this case the information whether the decoded char is the replacement character because the source data is errorneous is lost. |
returnNonChar | Whether decoded non-character or high and low surrogates are returned as such. Normally, those code points are replaced with CS_UC_CHAR_REPLACER to signal an invalid encoded code point. |
Definition at line 286 of file csuctransform.h.
static int csUnicodeTransform::Decode | ( | const utf32_char * | str, |
size_t | strlen, | ||
utf32_char & | ch, | ||
bool * | isValid = 0 , |
||
bool | returnNonChar = false |
||
) | [inline, static] |
Decode an Unicode code point encoded in UTF-32.
Decode an Unicode code point encoded in UTF-8.
str | Pointer to the encoded code point. |
strlen | Number of code units in the source string. |
ch | Decoded code point. |
isValid | When an error occured during decoding, ch contains the replacement character (CS_UC_CHAR_REPLACER). In this case, the bool pointed to by isValid will be set to false. The parameter can be 0, but in this case the information whether the decoded char is the replacement character because the source data is errorneous is lost. |
returnNonChar | Whether decoded non-character or high and low surrogates are returned as such. Normally, those code points are replaced with CS_UC_CHAR_REPLACER to signal an invalid encoded code point. |
Definition at line 295 of file csuctransform.h.
static int csUnicodeTransform::Encode | ( | const utf32_char | ch, |
utf8_char * | buf, | ||
size_t | bufsize, | ||
bool | allowNonchars = false |
||
) | [inline, static] |
Encode an Unicode code point to UTF-8.
Encode an Unicode code point to UTF-8.
ch | Code point to encode. |
buf | Pointer to the buffer receiving the encoded code point. When a fixed-size buffer is used it's a good idea to make it CS_UC_MAX_UTF8_ENCODED utf8_chars large. |
bufsize | Number of code units that fit in buf. |
allowNonchars | Whether non-character or high and low surrogates are encoded. Normally, those code points are rejected to prevent the generation of invalid encoded strings. |
Definition at line 461 of file csuctransform.h.
static int csUnicodeTransform::Encode | ( | const utf32_char | ch, |
utf16_char * | buf, | ||
size_t | bufsize, | ||
bool | allowNonchars = false |
||
) | [inline, static] |
Encode an Unicode code point to UTF-16.
Encode an Unicode code point to UTF-8.
ch | Code point to encode. |
buf | Pointer to the buffer receiving the encoded code point. When a fixed-size buffer is used it's a good idea to make it CS_UC_MAX_UTF8_ENCODED utf8_chars large. |
bufsize | Number of code units that fit in buf. |
allowNonchars | Whether non-character or high and low surrogates are encoded. Normally, those code points are rejected to prevent the generation of invalid encoded strings. |
Definition at line 470 of file csuctransform.h.
static int csUnicodeTransform::Encode | ( | const utf32_char | ch, |
utf32_char * | buf, | ||
size_t | bufsize, | ||
bool | allowNonchars = false |
||
) | [inline, static] |
Encode an Unicode code point to UTF-32.
Encode an Unicode code point to UTF-8.
ch | Code point to encode. |
buf | Pointer to the buffer receiving the encoded code point. When a fixed-size buffer is used it's a good idea to make it CS_UC_MAX_UTF8_ENCODED utf8_chars large. |
bufsize | Number of code units that fit in buf. |
allowNonchars | Whether non-character or high and low surrogates are encoded. Normally, those code points are rejected to prevent the generation of invalid encoded strings. |
Definition at line 479 of file csuctransform.h.
static int csUnicodeTransform::EncodeUTF16 | ( | const utf32_char | ch, |
utf16_char * | buf, | ||
size_t | bufsize, | ||
bool | allowNonchars = false |
||
) | [inline, static] |
Encode an Unicode code point to UTF-16.
ch | Code point to encode. |
buf | Pointer to the buffer receiving the encoded code point. When a fixed-size buffer is used it's a good idea to make it CS_UC_MAX_UTF16_ENCODED utf16_chars large. |
bufsize | Number of code units that fit in buf. |
allowNonchars | Whether non-character or high and low surrogates are encoded. Normally, those code points are rejected to prevent the generation of invalid encoded strings. |
Definition at line 401 of file csuctransform.h.
static int csUnicodeTransform::EncodeUTF32 | ( | const utf32_char | ch, |
utf32_char * | buf, | ||
size_t | bufsize, | ||
bool | allowNonchars = false |
||
) | [inline, static] |
Encode an Unicode code point to UTF-32.
ch | Code point to encode. |
buf | Pointer to the buffer receiving the encoded code point. When a fixed-size buffer is used it's a good idea to make it CS_UC_MAX_UTF32_ENCODED utf32_chars large. |
bufsize | Number of code units that fit in buf. |
allowNonchars | Whether non-character or high and low surrogates are encoded. Normally, those code points are rejected to prevent the generation of invalid encoded strings. |
Definition at line 443 of file csuctransform.h.
static int csUnicodeTransform::EncodeUTF8 | ( | const utf32_char | ch, |
utf8_char * | buf, | ||
size_t | bufsize, | ||
bool | allowNonchars = false |
||
) | [inline, static] |
Encode an Unicode code point to UTF-8.
ch | Code point to encode. |
buf | Pointer to the buffer receiving the encoded code point. When a fixed-size buffer is used it's a good idea to make it CS_UC_MAX_UTF8_ENCODED utf8_chars large. |
bufsize | Number of code units that fit in buf. |
allowNonchars | Whether non-character or high and low surrogates are encoded. Normally, those code points are rejected to prevent the generation of invalid encoded strings. |
Definition at line 333 of file csuctransform.h.
static size_t csUnicodeTransform::MapToFold | ( | const utf32_char | ch, |
utf32_char * | dest, | ||
size_t | destSize, | ||
uint | flags = 0 |
||
) | [static] |
Map a code point to its fold equivalent(s).
Fold mapping is useful for binary comparison of two Unicode strings.
Map a code point to its upper case equivalent(s).
ch | Code point to be mapped. |
dest | Destination buffer. |
destSize | Number of code units the destination buffer can hold. |
flags | Flags to control the result of the mapping. Currently supported is csUcMapSimple. |
static utf32_char csUnicodeTransform::MapToFold | ( | const utf32_char | ch | ) | [inline, static] |
Map a code point to its upper case equivalent(s).
ch | Code point to be mapped. |
dest | Destination buffer. |
destSize | Number of code units the destination buffer can hold. |
flags | Flags to control the result of the mapping. Currently supported is csUcMapSimple. |
Definition at line 1048 of file csuctransform.h.
static size_t csUnicodeTransform::MapToLower | ( | const utf32_char | ch, |
utf32_char * | dest, | ||
size_t | destSize, | ||
uint | flags = 0 |
||
) | [static] |
Map a code point to its lower case equivalent(s).
Map a code point to its upper case equivalent(s).
ch | Code point to be mapped. |
dest | Destination buffer. |
destSize | Number of code units the destination buffer can hold. |
flags | Flags to control the result of the mapping. Currently supported is csUcMapSimple. |
static utf32_char csUnicodeTransform::MapToLower | ( | const utf32_char | ch | ) | [inline, static] |
Map a code point to its upper case equivalent(s).
ch | Code point to be mapped. |
dest | Destination buffer. |
destSize | Number of code units the destination buffer can hold. |
flags | Flags to control the result of the mapping. Currently supported is csUcMapSimple. |
Definition at line 1035 of file csuctransform.h.
static size_t csUnicodeTransform::MapToUpper | ( | const utf32_char | ch, |
utf32_char * | dest, | ||
size_t | destSize, | ||
uint | flags = 0 |
||
) | [static] |
Map a code point to its upper case equivalent(s).
ch | Code point to be mapped. |
dest | Destination buffer. |
destSize | Number of code units the destination buffer can hold. |
flags | Flags to control the result of the mapping. Currently supported is csUcMapSimple. |
static utf32_char csUnicodeTransform::MapToUpper | ( | const utf32_char | ch | ) | [inline, static] |
Map a code point to its upper case equivalent(s).
ch | Code point to be mapped. |
Definition at line 1023 of file csuctransform.h.
static int csUnicodeTransform::UTF16Decode | ( | const utf16_char * | str, |
size_t | strlen, | ||
utf32_char & | ch, | ||
bool * | isValid = 0 , |
||
bool | returnNonChar = false |
||
) | [inline, static] |
Decode an Unicode code point encoded in UTF-16.
Decode an Unicode code point encoded in UTF-8.
str | Pointer to the encoded code point. |
strlen | Number of code units in the source string. |
ch | Decoded code point. |
isValid | When an error occured during decoding, ch contains the replacement character (CS_UC_CHAR_REPLACER). In this case, the bool pointed to by isValid will be set to false. The parameter can be 0, but in this case the information whether the decoded char is the replacement character because the source data is errorneous is lost. |
returnNonChar | Whether decoded non-character or high and low surrogates are returned as such. Normally, those code points are replaced with CS_UC_CHAR_REPLACER to signal an invalid encoded code point. |
Definition at line 214 of file csuctransform.h.
static int csUnicodeTransform::UTF16Rewind | ( | const utf16_char * | str, |
size_t | maxRew | ||
) | [inline, static] |
Determine how many code units in an UTF-16 buffer need to skipped back to get to the start of the previous encoded code point.
Determine how many code units in an UTF-8 buffer need to skipped back to get to the start of the previous encoded code point.
str | Pointer to the encoded code point after the code point that is actually to be skipped back. |
maxRew | The number of code units to go back at max. Typically, this is the number of chars from str to the start of the buffer. |
Definition at line 963 of file csuctransform.h.
static int csUnicodeTransform::UTF16Skip | ( | const utf16_char * | str, |
size_t | maxSkip | ||
) | [inline, static] |
Determine how many code units in an UTF-16 buffer need to be skipped to get to the next encoded char.
Determine how many code units in an UTF-8 buffer need to be skipped to get to the next encoded char.
str | Pointer to buffer with encoded code point. |
maxSkip | The number of code units to skip at max. Usually, this is the number of chars from str to the end of the buffer. |
Definition at line 950 of file csuctransform.h.
static size_t csUnicodeTransform::UTF16to32 | ( | utf32_char * | dest, |
size_t | destSize, | ||
const utf16_char * | source, | ||
size_t | srcSize = (size_t)-1 |
||
) | [inline, static] |
Convert UTF-16 to UTF-32.
Convert UTF-8 to UTF-16.
dest | Destination buffer. |
destSize | Number of code units the destination buffer can hold. |
source | Source buffer. |
srcSize | Number of code units contained in the source buffer. If this is -1, the length will be determined automatically. |
Definition at line 573 of file csuctransform.h.
static size_t csUnicodeTransform::UTF16to8 | ( | utf8_char * | dest, |
size_t | destSize, | ||
const utf16_char * | source, | ||
size_t | srcSize = (size_t)-1 |
||
) | [inline, static] |
Convert UTF-16 to UTF-8.
Convert UTF-8 to UTF-16.
dest | Destination buffer. |
destSize | Number of code units the destination buffer can hold. |
source | Source buffer. |
srcSize | Number of code units contained in the source buffer. If this is -1, the length will be determined automatically. |
Definition at line 568 of file csuctransform.h.
static int csUnicodeTransform::UTF32Decode | ( | const utf32_char * | str, |
size_t | strlen, | ||
utf32_char & | ch, | ||
bool * | isValid = 0 , |
||
bool | returnNonChar = false |
||
) | [inline, static] |
Decode an Unicode code point encoded in UTF-32.
Decode an Unicode code point encoded in UTF-8.
str | Pointer to the encoded code point. |
strlen | Number of code units in the source string. |
ch | Decoded code point. |
isValid | When an error occured during decoding, ch contains the replacement character (CS_UC_CHAR_REPLACER). In this case, the bool pointed to by isValid will be set to false. The parameter can be 0, but in this case the information whether the decoded char is the replacement character because the source data is errorneous is lost. |
returnNonChar | Whether decoded non-character or high and low surrogates are returned as such. Normally, those code points are replaced with CS_UC_CHAR_REPLACER to signal an invalid encoded code point. |
Definition at line 257 of file csuctransform.h.
static int csUnicodeTransform::UTF32Rewind | ( | const utf32_char * | str, |
size_t | maxRew | ||
) | [inline, static] |
Determine how many code units in an UTF-32 buffer need to skipped back to get to the start of the previous encoded code point.
Determine how many code units in an UTF-8 buffer need to skipped back to get to the start of the previous encoded code point.
str | Pointer to the encoded code point after the code point that is actually to be skipped back. |
maxRew | The number of code units to go back at max. Typically, this is the number of chars from str to the start of the buffer. |
Definition at line 995 of file csuctransform.h.
static int csUnicodeTransform::UTF32Skip | ( | const utf32_char * | str, |
size_t | maxSkip | ||
) | [inline, static] |
Determine how many code units in an UTF-32 buffer need to be skipped to get to the next encoded char.
Determine how many code units in an UTF-8 buffer need to be skipped to get to the next encoded char.
str | Pointer to buffer with encoded code point. |
maxSkip | The number of code units to skip at max. Usually, this is the number of chars from str to the end of the buffer. |
Definition at line 984 of file csuctransform.h.
static size_t csUnicodeTransform::UTF32to16 | ( | utf16_char * | dest, |
size_t | destSize, | ||
const utf32_char * | source, | ||
size_t | srcSize = (size_t)-1 |
||
) | [inline, static] |
Convert UTF-32 to UTF-16.
Convert UTF-8 to UTF-16.
dest | Destination buffer. |
destSize | Number of code units the destination buffer can hold. |
source | Source buffer. |
srcSize | Number of code units contained in the source buffer. If this is -1, the length will be determined automatically. |
Definition at line 584 of file csuctransform.h.
static size_t csUnicodeTransform::UTF32to8 | ( | utf8_char * | dest, |
size_t | destSize, | ||
const utf32_char * | source, | ||
size_t | srcSize = (size_t)-1 |
||
) | [inline, static] |
Convert UTF-32 to UTF-8.
Convert UTF-8 to UTF-16.
dest | Destination buffer. |
destSize | Number of code units the destination buffer can hold. |
source | Source buffer. |
srcSize | Number of code units contained in the source buffer. If this is -1, the length will be determined automatically. |
Definition at line 579 of file csuctransform.h.
static int csUnicodeTransform::UTF8Decode | ( | const utf8_char * | str, |
size_t | strlen, | ||
utf32_char & | ch, | ||
bool * | isValid = 0 , |
||
bool | returnNonChar = false |
||
) | [inline, static] |
Decode an Unicode code point encoded in UTF-8.
str | Pointer to the encoded code point. |
strlen | Number of code units in the source string. |
ch | Decoded code point. |
isValid | When an error occured during decoding, ch contains the replacement character (CS_UC_CHAR_REPLACER). In this case, the bool pointed to by isValid will be set to false. The parameter can be 0, but in this case the information whether the decoded char is the replacement character because the source data is errorneous is lost. |
returnNonChar | Whether decoded non-character or high and low surrogates are returned as such. Normally, those code points are replaced with CS_UC_CHAR_REPLACER to signal an invalid encoded code point. |
Definition at line 123 of file csuctransform.h.
static int csUnicodeTransform::UTF8Rewind | ( | const utf8_char * | str, |
size_t | maxRew | ||
) | [inline, static] |
Determine how many code units in an UTF-8 buffer need to skipped back to get to the start of the previous encoded code point.
str | Pointer to the encoded code point after the code point that is actually to be skipped back. |
maxRew | The number of code units to go back at max. Typically, this is the number of chars from str to the start of the buffer. |
Definition at line 923 of file csuctransform.h.
static int csUnicodeTransform::UTF8Skip | ( | const utf8_char * | str, |
size_t | maxSkip | ||
) | [inline, static] |
Determine how many code units in an UTF-8 buffer need to be skipped to get to the next encoded char.
str | Pointer to buffer with encoded code point. |
maxSkip | The number of code units to skip at max. Usually, this is the number of chars from str to the end of the buffer. |
Definition at line 882 of file csuctransform.h.
static size_t csUnicodeTransform::UTF8to16 | ( | utf16_char * | dest, |
size_t | destSize, | ||
const utf8_char * | source, | ||
size_t | srcSize = (size_t)-1 |
||
) | [inline, static] |
Convert UTF-8 to UTF-16.
dest | Destination buffer. |
destSize | Number of code units the destination buffer can hold. |
source | Source buffer. |
srcSize | Number of code units contained in the source buffer. If this is -1, the length will be determined automatically. |
Definition at line 557 of file csuctransform.h.
static size_t csUnicodeTransform::UTF8to32 | ( | utf32_char * | dest, |
size_t | destSize, | ||
const utf8_char * | source, | ||
size_t | srcSize = (size_t)-1 |
||
) | [inline, static] |
Convert UTF-8 to UTF-32.
Convert UTF-8 to UTF-16.
dest | Destination buffer. |
destSize | Number of code units the destination buffer can hold. |
source | Source buffer. |
srcSize | Number of code units contained in the source buffer. If this is -1, the length will be determined automatically. |
Definition at line 562 of file csuctransform.h.