Drizzled Public API Documentation

charset.h
1 /* -*- mode: c++; c-basic-offset: 2; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=2:tabstop=2:smarttab:
3  *
4  * Copyright (C) 2008 Sun Microsystems, Inc.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; version 2 of the License.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18  */
19 
20 /*
21  Header File that defines all the charset declarations being used in Drizzle source code
22 */
23 
24 #pragma once
25 
26 #include <sys/types.h>
27 #include <cstddef>
28 
29 #include <drizzled/visibility.h>
30 #include <drizzled/common_fwd.h>
31 #include <drizzled/definitions.h>
32 
33 namespace drizzled {
34 
35 #define MY_CS_NAME_SIZE 32
36 #define MY_CS_CTYPE_TABLE_SIZE 257
37 #define MY_CS_TO_LOWER_TABLE_SIZE 256
38 #define MY_CS_TO_UPPER_TABLE_SIZE 256
39 #define MY_CS_SORT_ORDER_TABLE_SIZE 256
40 #define MY_CS_TO_UNI_TABLE_SIZE 256
41 #define CHARSET_DIR "charsets/"
42 #define my_wc_t unsigned long
43 /* wm_wc and wc_mb return codes */
44 #define MY_CS_ILSEQ 0 /* Wrong by sequence: wb_wc */
45 #define MY_CS_ILUNI 0 /* Cannot encode Unicode to charset: wc_mb */
46 #define MY_CS_TOOSMALL -101 /* Need at least one byte: wc_mb and mb_wc */
47 #define MY_CS_TOOSMALL2 -102 /* Need at least two bytes: wc_mb and mb_wc */
48 #define MY_CS_TOOSMALL3 -103 /* Need at least three bytes: wc_mb and mb_wc */
49 /* These following three are currently not really used */
50 #define MY_CS_TOOSMALL4 -104 /* Need at least 4 bytes: wc_mb and mb_wc */
51 #define MY_CS_TOOSMALL5 -105 /* Need at least 5 bytes: wc_mb and mb_wc */
52 #define MY_CS_TOOSMALL6 -106 /* Need at least 6 bytes: wc_mb and mb_wc */
53 #define MY_SEQ_INTTAIL 1
54 #define MY_SEQ_SPACES 2
55 #define MY_CS_COMPILED 1 /* compiled-in sets */
56 #define MY_CS_CONFIG 2 /* sets that have a *.conf file */
57 #define MY_CS_INDEX 4 /* sets listed in the Index file */
58 #define MY_CS_LOADED 8 /* sets that are currently loaded */
59 #define MY_CS_BINSORT 16 /* if binary sort order */
60 #define MY_CS_PRIMARY 32 /* if primary collation */
61 #define MY_CS_STRNXFRM 64 /* if strnxfrm is used for sort */
62 #define MY_CS_UNICODE 128 /* is a charset is full unicode */
63 #define MY_CS_READY 256 /* if a charset is initialized */
64 #define MY_CS_AVAILABLE 512 /* If either compiled-in or loaded*/
65 #define MY_CS_CSSORT 1024 /* if case sensitive sort order */
66 #define MY_CS_HIDDEN 2048 /* don't display in SHOW */
67 #define MY_CS_NONASCII 8192 /* if not ASCII-compatible */
68 #define MY_CHARSET_UNDEFINED 0
69 /* Flags for strxfrm */
70 #define MY_STRXFRM_LEVEL1 0x00000001 /* for primary weights */
71 #define MY_STRXFRM_LEVEL2 0x00000002 /* for secondary weights */
72 #define MY_STRXFRM_LEVEL3 0x00000004 /* for tertiary weights */
73 #define MY_STRXFRM_LEVEL4 0x00000008 /* fourth level weights */
74 #define MY_STRXFRM_LEVEL5 0x00000010 /* fifth level weights */
75 #define MY_STRXFRM_LEVEL6 0x00000020 /* sixth level weights */
76 #define MY_STRXFRM_LEVEL_ALL 0x0000003F /* Bit OR for the above six */
77 #define MY_STRXFRM_NLEVELS 6 /* Number of possible levels*/
78 #define MY_STRXFRM_PAD_WITH_SPACE 0x00000040 /* if pad result with spaces */
79 #define MY_STRXFRM_UNUSED_00000080 0x00000080 /* for future extensions */
80 #define MY_STRXFRM_DESC_LEVEL1 0x00000100 /* if desc order for level1 */
81 #define MY_STRXFRM_DESC_LEVEL2 0x00000200 /* if desc order for level2 */
82 #define MY_STRXFRM_DESC_LEVEL3 0x00000300 /* if desc order for level3 */
83 #define MY_STRXFRM_DESC_LEVEL4 0x00000800 /* if desc order for level4 */
84 #define MY_STRXFRM_DESC_LEVEL5 0x00001000 /* if desc order for level5 */
85 #define MY_STRXFRM_DESC_LEVEL6 0x00002000 /* if desc order for level6 */
86 #define MY_STRXFRM_DESC_SHIFT 8
87 #define MY_STRXFRM_UNUSED_00004000 0x00004000 /* for future extensions */
88 #define MY_STRXFRM_UNUSED_00008000 0x00008000 /* for future extensions */
89 #define MY_STRXFRM_REVERSE_LEVEL1 0x00010000 /* if reverse order for level1 */
90 #define MY_STRXFRM_REVERSE_LEVEL2 0x00020000 /* if reverse order for level2 */
91 #define MY_STRXFRM_REVERSE_LEVEL3 0x00040000 /* if reverse order for level3 */
92 #define MY_STRXFRM_REVERSE_LEVEL4 0x00080000 /* if reverse order for level4 */
93 #define MY_STRXFRM_REVERSE_LEVEL5 0x00100000 /* if reverse order for level5 */
94 #define MY_STRXFRM_REVERSE_LEVEL6 0x00200000 /* if reverse order for level6 */
95 #define MY_STRXFRM_REVERSE_SHIFT 16
96 #define ILLEGAL_CHARSET_INFO_NUMBER (UINT32_MAX)
97 #define MY_UTF8MB4 "utf8"
98 #define my_charset_utf8_general_ci ::drizzled::my_charset_utf8mb4_general_ci
99 #define my_charset_utf8_bin ::drizzled::my_charset_utf8mb4_bin
100 #define _MY_U 01 /* Upper case */
101 #define _MY_L 02 /* Lower case */
102 #define _MY_NMR 04 /* Numeral (digit) */
103 #define _MY_SPC 010 /* Spacing character */
104 #define _MY_PNT 020 /* Punctuation */
105 #define _MY_CTR 040 /* Control character */
106 #define _MY_B 0100 /* Blank */
107 #define _MY_X 0200 /* heXadecimal digit */
108 
109 /* Some typedef to make it easy for C++ to make function pointers */
110 typedef int (*my_charset_conv_mb_wc)(const charset_info_st*, my_wc_t *, const unsigned char*, const unsigned char *);
111 typedef int (*my_charset_conv_wc_mb)(const charset_info_st*, my_wc_t, unsigned char*, unsigned char *);
112 typedef size_t (*my_charset_conv_case)(const charset_info_st*, char*, size_t, char*, size_t);
113 
115 {
116  uint16_t toupper;
117  uint16_t tolower;
118  uint16_t sort;
119 };
120 
122 {
123  unsigned char pctype;
124  unsigned char* ctype;
125 };
126 
128 {
129  uint16_t from;
130  uint16_t to;
131  unsigned char *tab;
132 };
133 
135 {
136  uint32_t beg;
137  uint32_t end;
138  uint32_t mb_len;
139 };
140 
141 enum my_lex_states
142 {
143  MY_LEX_START, MY_LEX_CHAR, MY_LEX_IDENT,
144  MY_LEX_IDENT_SEP, MY_LEX_IDENT_START,
145  MY_LEX_REAL, MY_LEX_HEX_NUMBER, MY_LEX_BIN_NUMBER,
146  MY_LEX_CMP_OP, MY_LEX_LONG_CMP_OP, MY_LEX_STRING, MY_LEX_COMMENT, MY_LEX_END,
147  MY_LEX_OPERATOR_OR_IDENT, MY_LEX_NUMBER_IDENT, MY_LEX_INT_OR_REAL,
148  MY_LEX_REAL_OR_POINT, MY_LEX_BOOL, MY_LEX_EOL, MY_LEX_ESCAPE,
149  MY_LEX_LONG_COMMENT, MY_LEX_END_LONG_COMMENT, MY_LEX_SEMICOLON,
150  MY_LEX_SET_VAR, MY_LEX_USER_END, MY_LEX_HOSTNAME, MY_LEX_SKIP,
151  MY_LEX_USER_VARIABLE_DELIMITER, MY_LEX_SYSTEM_VAR,
152  MY_LEX_IDENT_OR_KEYWORD,
153  MY_LEX_IDENT_OR_HEX, MY_LEX_IDENT_OR_BIN,
154  MY_LEX_STRING_OR_DELIMITER
155 };
156 
157 /* See strings/charset_info_st.txt for information about this structure */
159 {
160  bool (*init)(charset_info_st&, unsigned char *(*alloc)(size_t));
161  /* Collation routines */
162  int (*strnncoll)(const charset_info_st*, const unsigned char*, size_t, const unsigned char*, size_t, bool);
163  int (*strnncollsp)(const charset_info_st*, const unsigned char*, size_t, const unsigned char*, size_t, bool diff_if_only_endspace_difference);
164  size_t (*strnxfrm)(const charset_info_st*, unsigned char *dst, size_t dstlen, uint32_t nweights, const unsigned char *src, size_t srclen, uint32_t flags);
165  size_t (*strnxfrmlen)(const charset_info_st*, size_t);
166  bool (*like_range)(const charset_info_st*, const char *s, size_t s_length, char escape, char w_one, char w_many,
167  size_t res_length, char *min_str, char *max_str, size_t *min_len, size_t *max_len);
168  int (*wildcmp)(const charset_info_st*, const char *str,const char *str_end, const char *wildstr, const char *wildend, int escape,int w_one, int w_many);
169 
170  int (*strcasecmp)(const charset_info_st*, const char*, const char *);
171 
172  uint32_t (*instr)(const charset_info_st*, const char *b, size_t b_length, const char *s, size_t s_length, my_match_t *match, uint32_t nmatch);
173 
174  /* Hash calculation */
175  void (*hash_sort)(const charset_info_st*, const unsigned char *key, size_t len, uint32_t *nr1, uint32_t *nr2);
176  bool (*propagate)();
177 };
178 
179 /* See strings/charset_info_st.txt about information on this structure */
181 {
182  /* Multibyte routines */
183  uint32_t (*ismbchar)(const charset_info_st*, const char*, const char *);
184  uint32_t (*mbcharlen)(const charset_info_st*, uint32_t c);
185  size_t (*numchars)(const charset_info_st*, const char *b, const char *e);
186  size_t (*charpos)(const charset_info_st*, const char *b, const char *e, size_t pos);
187  size_t (*well_formed_len)(const charset_info_st&, str_ref, size_t nchars, int *error);
188  size_t (*lengthsp)(const charset_info_st*, const char *ptr, size_t length);
189  size_t (*numcells)(const charset_info_st*, const char *b, const char *e);
190 
191  /* Unicode conversion */
192  my_charset_conv_mb_wc mb_wc;
193  my_charset_conv_wc_mb wc_mb;
194 
195  /* CTYPE scanner */
196  int (*ctype)(const charset_info_st *cs, int *ctype, const unsigned char *s, const unsigned char *e);
197 
198  /* Functions for case and sort conversion */
199  size_t (*caseup_str)(const charset_info_st*, char *);
200  size_t (*casedn_str)(const charset_info_st*, char *);
201 
202  my_charset_conv_case caseup;
203  my_charset_conv_case casedn;
204 
205  /* Charset dependant snprintf() */
206  size_t (*snprintf)(const charset_info_st*, char *to, size_t n, const char *fmt, ...)
207 #if defined(__GNUC__) && !defined(__INTEL_COMPILER)
208  __attribute__((format(printf, 4, 5)))
209 #endif
210  ;
211  size_t (*long10_to_str)(const charset_info_st*, char *to, size_t n, int radix, long int val);
212  size_t (*int64_t10_to_str)(const charset_info_st*, char *to, size_t n, int radix, int64_t val);
213 
214  void (*fill)(const charset_info_st*, char *to, size_t len, int fill);
215 
216  /* String-to-number conversion routines */
217  long (*strntol)(const charset_info_st*, const char *s, size_t l, int base, char **e, int *err);
218  unsigned long (*strntoul)(const charset_info_st*, const char *s, size_t l, int base, char **e, int *err);
219  int64_t (*strntoll)(const charset_info_st*, const char *s, size_t l, int base, char **e, int *err);
220  uint64_t (*strntoull)(const charset_info_st*, const char *s, size_t l, int base, char **e, int *err);
221  double (*strntod)(const charset_info_st*, char *s, size_t l, char **e, int *err);
222  int64_t (*strtoll10)(const charset_info_st*, const char *nptr, char **endptr, int *error);
223  uint64_t (*strntoull10rnd)(const charset_info_st*, const char *str, size_t length, int unsigned_fl, char **endptr, int *error);
224  size_t (*scan)(const charset_info_st*, const char *b, const char *e, int sq);
225 };
226 
227 /* See strings/charset_info_st.txt about information on this structure */
229 {
230  uint32_t number;
231  uint32_t primary_number;
232  uint32_t binary_number;
233  uint32_t state;
234  const char *csname;
235  const char *name;
236  const char *comment;
237  const char *tailoring;
238  unsigned char *ctype;
239  unsigned char *to_lower;
240  unsigned char *to_upper;
241  unsigned char *sort_order;
242  uint16_t *contractions;
243  uint16_t **sort_order_big;
244  uint16_t *tab_to_uni;
245  MY_UNI_IDX *tab_from_uni;
246  MY_UNICASE_INFO **caseinfo;
247  unsigned char *state_map;
248  unsigned char *ident_map;
249  uint32_t strxfrm_multiply;
250  unsigned char caseup_multiply;
251  unsigned char casedn_multiply;
252  uint32_t mbminlen;
253  uint32_t mbmaxlen;
254  uint16_t min_sort_char;
255  uint16_t max_sort_char; /* For LIKE optimization */
256  unsigned char pad_char;
257  unsigned char levels_for_compare;
258  unsigned char levels_for_order;
259 
260  MY_CHARSET_HANDLER *cset;
261  MY_COLLATION_HANDLER *coll;
262 
263  bool isalpha(unsigned char c) const
264  {
265  return ctype[c + 1] & (_MY_U | _MY_L);
266  }
267 
268  bool isupper(unsigned char c) const
269  {
270  return ctype[c + 1] & _MY_U;
271  }
272 
273  bool islower(unsigned char c) const
274  {
275  return ctype[c + 1] & _MY_L;
276  }
277 
278  bool isdigit(unsigned char c) const
279  {
280  return ctype[c + 1] & _MY_NMR;
281  }
282 
283  bool isxdigit(unsigned char c) const
284  {
285  return ctype[c + 1] & _MY_X;
286  }
287 
288  bool isalnum(unsigned char c) const
289  {
290  return ctype[c + 1] & (_MY_U | _MY_L | _MY_NMR);
291  }
292 
293  bool isspace(unsigned char c) const
294  {
295  return ctype[c + 1] & _MY_SPC;
296  }
297 
298  bool ispunct(unsigned char c) const
299  {
300  return ctype[c + 1] & _MY_PNT;
301  }
302 
303  bool isprint(unsigned char c) const
304  {
305  return ctype[c + 1] & (_MY_PNT | _MY_U | _MY_L | _MY_NMR | _MY_B);
306  }
307 
308  bool isgraph(unsigned char c) const
309  {
310  return ctype[c + 1] & (_MY_PNT | _MY_U | _MY_L | _MY_NMR);
311  }
312 
313  bool iscntrl(unsigned char c) const
314  {
315  return ctype[c + 1] & _MY_CTR;
316  }
317 
318  bool isvar(char c) const
319  {
320  return isalnum(c) || (c) == '_';
321  }
322 
323  char toupper(unsigned char c) const
324  {
325  return to_upper[c];
326  }
327 
328  char tolower(unsigned char c) const
329  {
330  return to_lower[c];
331  }
332 
333  bool binary_compare() const
334  {
335  return state & MY_CS_BINSORT;
336  }
337 
338  bool use_strnxfrm() const
339  {
340  return state & MY_CS_STRNXFRM;
341  }
342 
343  size_t strnxfrm(unsigned char *dst, const size_t dstlen, const unsigned char *src, const uint32_t srclen) const
344  {
345  return coll->strnxfrm(this, dst, dstlen, dstlen, src, srclen, MY_STRXFRM_PAD_WITH_SPACE);
346  }
347 
348  int strcasecmp(const char *s, const char *t) const
349  {
350  return coll->strcasecmp(this, s, t);
351  }
352 
353  size_t caseup_str(char* src) const
354  {
355  return cset->caseup_str(this, src);
356  }
357 
358  size_t casedn_str(char* src) const
359  {
360  return cset->casedn_str(this, src);
361  }
362 };
363 
364 extern DRIZZLED_API charset_info_st *all_charsets[256];
365 uint32_t get_charset_number(const char *cs_name, uint32_t cs_flags);
366 uint32_t get_collation_number(const char *name);
367 const char *get_charset_name(uint32_t cs_number);
368 void free_charsets();
369 bool my_charset_same(const charset_info_st*, const charset_info_st*);
370 size_t escape_string_for_drizzle(const charset_info_st *charset_info, char *to, size_t to_length, const char *from, size_t length);
371 size_t escape_quotes_for_drizzle(const charset_info_st *charset_info, char *to, size_t to_length, const char *from, size_t length);
372 extern DRIZZLED_API const charset_info_st *default_charset_info;
373 extern DRIZZLED_API const charset_info_st *system_charset_info;
374 extern const charset_info_st *files_charset_info;
375 extern const charset_info_st *table_alias_charset;
376 extern MY_UNICASE_INFO *my_unicase_default[256];
377 extern MY_UNICASE_INFO *my_unicase_turkish[256];
378 extern MY_UNI_CTYPE my_uni_ctype[256];
379 extern DRIZZLED_API charset_info_st my_charset_bin;
380 extern DRIZZLED_API charset_info_st my_charset_utf8mb4_bin;
381 extern DRIZZLED_API charset_info_st my_charset_utf8mb4_general_ci;
382 extern DRIZZLED_API charset_info_st my_charset_utf8mb4_unicode_ci;
383 size_t my_strnxfrmlen_simple(const charset_info_st*, size_t);
384 int my_strnncollsp_simple(const charset_info_st*, const unsigned char*, size_t, const unsigned char*, size_t, bool diff_if_only_endspace_difference);
385 size_t my_lengthsp_8bit(const charset_info_st*, const char *ptr, size_t length);
386 uint32_t my_instr_simple(const charset_info_st*, const char *b, size_t b_length, const char *s, size_t s_length, my_match_t *match, uint32_t nmatch);
387 int my_strcasecmp_mb(const charset_info_st*, const char *s, const char *t);
388 
389 DRIZZLED_API const charset_info_st *get_charset(uint32_t cs_number);
390 DRIZZLED_API const charset_info_st *get_charset_by_name(const char *cs_name);
391 DRIZZLED_API const charset_info_st *get_charset_by_csname(const char *cs_name, uint32_t cs_flags);
392 
393 /* Functions for 8bit */
394 int my_mb_ctype_8bit(const charset_info_st*, int*, const unsigned char*, const unsigned char *);
395 int my_mb_ctype_mb(const charset_info_st*, int*, const unsigned char*, const unsigned char *);
396 
397 size_t my_scan_8bit(const charset_info_st*, const char *b, const char *e, int sq);
398 size_t my_snprintf_8bit(const charset_info_st*, char *to, size_t n, const char *fmt, ...) __attribute__((format(printf, 4, 5)));
399 
400 long my_strntol_8bit(const charset_info_st*, const char *s, size_t l, int base, char **e, int *err);
401 unsigned long my_strntoul_8bit(const charset_info_st*, const char *s, size_t l, int base, char **e, int *err);
402 int64_t my_strntoll_8bit(const charset_info_st*, const char *s, size_t l, int base, char **e, int *err);
403 uint64_t my_strntoull_8bit(const charset_info_st*, const char *s, size_t l, int base, char **e, int *err);
404 double my_strntod_8bit(const charset_info_st*, char *s, size_t l,char **e, int *err);
405 size_t my_long10_to_str_8bit(const charset_info_st*, char *to, size_t l, int radix, long int val);
406 size_t my_int64_t10_to_str_8bit(const charset_info_st*, char *to, size_t l, int radix, int64_t val);
407 int64_t my_strtoll10_8bit(const charset_info_st*, const char *nptr, char **endptr, int *error);
408 
409 uint64_t my_strntoull10rnd_8bit(const charset_info_st*, const char *str, size_t length, int unsigned_fl, char **endptr, int *error);
410 
411 void my_fill_8bit(const charset_info_st*, char* to, size_t l, int fill);
412 
413 bool my_like_range_simple(const charset_info_st*,
414  const char *ptr, size_t ptr_length,
415  char escape, char w_one, char w_many,
416  size_t res_length,
417  char *min_str, char *max_str,
418  size_t *min_length, size_t *max_length);
419 
420 bool my_like_range_mb(const charset_info_st*,
421  const char *ptr, size_t ptr_length,
422  char escape, char w_one, char w_many,
423  size_t res_length,
424  char *min_str, char *max_str,
425  size_t *min_length, size_t *max_length);
426 
427 int my_wildcmp_8bit(const charset_info_st*,
428  const char *str,const char *str_end,
429  const char *wildstr,const char *wildend,
430  int escape, int w_one, int w_many);
431 
432 int my_wildcmp_bin(const charset_info_st*,
433  const char *str,const char *str_end,
434  const char *wildstr,const char *wildend,
435  int escape, int w_one, int w_many);
436 
437 size_t my_numchars_8bit(const charset_info_st*, const char *b, const char *e);
438 size_t my_numcells_8bit(const charset_info_st*, const char *b, const char *e);
439 size_t my_charpos_8bit(const charset_info_st*, const char *b, const char *e, size_t pos);
440 size_t my_well_formed_len_8bit(const charset_info_st&, str_ref, size_t pos, int *error);
441 typedef unsigned char *(*cs_alloc_func)(size_t);
442 bool my_coll_init_simple(charset_info_st *cs, cs_alloc_func alloc);
443 bool my_cset_init_8bit(charset_info_st *cs, cs_alloc_func alloc);
444 uint32_t my_mbcharlen_8bit(const charset_info_st*, uint32_t c);
445 
446 /* Functions for multibyte charsets */
447 int my_wildcmp_mb(const charset_info_st*,
448  const char *str,const char *str_end,
449  const char *wildstr,const char *wildend,
450  int escape, int w_one, int w_many);
451 size_t my_numchars_mb(const charset_info_st*, const char *b, const char *e);
452 size_t my_numcells_mb(const charset_info_st*, const char *b, const char *e);
453 size_t my_charpos_mb(const charset_info_st*, const char *b, const char *e, size_t pos);
454 size_t my_well_formed_len_mb(const charset_info_st&, str_ref, size_t pos, int *error);
455 uint32_t my_instr_mb(const charset_info_st*,
456  const char *b, size_t b_length,
457  const char *s, size_t s_length,
458  my_match_t *match, uint32_t nmatch);
459 
460 int my_strnncoll_mb_bin(const charset_info_st* cs,
461  const unsigned char *s, size_t slen,
462  const unsigned char *t, size_t tlen,
463  bool t_is_prefix);
464 
465 int my_strnncollsp_mb_bin(const charset_info_st*,
466  const unsigned char *a, size_t a_length,
467  const unsigned char *b, size_t b_length,
468  bool diff_if_only_endspace_difference);
469 
470 int my_wildcmp_mb_bin(const charset_info_st*,
471  const char *str,const char *str_end,
472  const char *wildstr,const char *wildend,
473  int escape, int w_one, int w_many);
474 
475 int my_strcasecmp_mb_bin(const charset_info_st*, const char *s, const char *t);
476 
477 void my_hash_sort_mb_bin(const charset_info_st*,
478  const unsigned char *key, size_t len, uint32_t *nr1, uint32_t *nr2);
479 
480 size_t my_strnxfrm_mb(const charset_info_st*,
481  unsigned char *dst, size_t dstlen, uint32_t nweights,
482  const unsigned char *src, size_t srclen, uint32_t flags);
483 
484 int my_wildcmp_unicode(const charset_info_st*,
485  const char *str, const char *str_end,
486  const char *wildstr, const char *wildend,
487  int escape, int w_one, int w_many,
488  MY_UNICASE_INFO **weights);
489 
490 bool my_propagate_simple();
491 bool my_propagate_complex();
492 
493 
494 uint32_t my_strxfrm_flag_normalize(uint32_t flags, uint32_t nlevels);
495 void my_strxfrm_desc_and_reverse(unsigned char *str, unsigned char *strend,
496  uint32_t flags, uint32_t level);
497 size_t my_strxfrm_pad_desc_and_reverse(const charset_info_st*,
498  unsigned char *str, unsigned char *frmend, unsigned char *strend,
499  uint32_t nweights, uint32_t flags, uint32_t level);
500 
501 bool my_charset_is_ascii_compatible(const charset_info_st*);
502 
503 /*
504  Compare 0-terminated UTF8 strings.
505 
506  SYNOPSIS
507  my_strcasecmp_utf8mb3()
508  cs character set handler
509  s First 0-terminated string to compare
510  t Second 0-terminated string to compare
511 
512  IMPLEMENTATION
513 
514  RETURN
515  - negative number if s < t
516  - positive number if s > t
517  - 0 is the strings are equal
518 */
519 int my_wc_mb_filename(const charset_info_st*, my_wc_t wc, unsigned char *s, unsigned char *e);
520 int my_mb_wc_filename(const charset_info_st*, my_wc_t *pwc, const unsigned char *s, const unsigned char *e);
521 
522 int my_strnncoll_8bit_bin(const charset_info_st*,
523  const unsigned char *s, size_t slen,
524  const unsigned char *t, size_t tlen,
525  bool t_is_prefix);
526 int my_strnncollsp_8bit_bin(const charset_info_st*,
527  const unsigned char *a, size_t a_length,
528  const unsigned char *b, size_t b_length,
529  bool diff_if_only_endspace_difference);
530 size_t my_case_str_bin(const charset_info_st*, char *);
531 size_t my_case_bin(const charset_info_st*, char*,
532  size_t srclen, char*, size_t);
533 int my_strcasecmp_bin(const charset_info_st*,
534  const char *s, const char *t);
535 size_t my_strnxfrm_8bit_bin(const charset_info_st*,
536  unsigned char * dst, size_t dstlen, uint32_t nweights,
537  const unsigned char *src, size_t srclen, uint32_t flags);
538 uint32_t my_instr_bin(const charset_info_st*,
539  const char *b, size_t b_length,
540  const char *s, size_t s_length,
541  my_match_t *match, uint32_t nmatch);
542 size_t my_lengthsp_binary(const charset_info_st*,
543  const char*, size_t length);
544 int my_mb_wc_bin(const charset_info_st*,
545  my_wc_t *wc, const unsigned char *str,
546  const unsigned char *end);
547 int my_wc_mb_bin(const charset_info_st*, my_wc_t wc,
548  unsigned char *str, unsigned char *end);
549 void my_hash_sort_8bit_bin(const charset_info_st*,
550  const unsigned char *key, size_t len,
551  uint32_t *nr1, uint32_t *nr2);
552 bool my_coll_init_8bit_bin(charset_info_st *cs,
553  cs_alloc_func);
554 int my_strnncoll_binary(const charset_info_st*,
555  const unsigned char *s, size_t slen,
556  const unsigned char *t, size_t tlen,
557  bool t_is_prefix);
558 int my_strnncollsp_binary(const charset_info_st*,
559  const unsigned char *s, size_t slen,
560  const unsigned char *t, size_t tlen,
561  bool);
562 
563 inline static int my_strnncoll(const charset_info_st *cs,
564  const unsigned char *s,
565  const size_t slen,
566  const unsigned char *t,
567  const size_t tlen)
568 {
569  return (cs->coll->strnncoll(cs, s, slen, t, tlen, 0));
570 }
571 
572 inline static bool my_like_range(const charset_info_st *cs,
573  const char *ptr, const size_t ptrlen,
574  const char escape,
575  const char w_one,
576  const char w_many,
577  const size_t reslen,
578  char *minstr, char *maxstr,
579  size_t *minlen, size_t *maxlen)
580 {
581  return (cs->coll->like_range(cs, ptr, ptrlen, escape, w_one, w_many, reslen,
582  minstr, maxstr, minlen, maxlen));
583 }
584 
585 inline static int my_wildcmp(const charset_info_st *cs,
586  const char *str, const char *strend,
587  const char *w_str, const char *w_strend,
588  const int escape,
589  const int w_one, const int w_many)
590 {
591  return (cs->coll->wildcmp(cs, str, strend, w_str, w_strend, escape, w_one, w_many));
592 }
593 
594 template <typename CHAR_T>
595 inline static size_t my_charpos(const charset_info_st *cs,
596  const CHAR_T *b, const CHAR_T* e, size_t num)
597 {
598  return cs->cset->charpos(cs, reinterpret_cast<const char*>(b), reinterpret_cast<const char*>(e), num);
599 }
600 
601 inline static bool use_mb(const charset_info_st *cs)
602 {
603  return cs->cset->ismbchar != NULL;
604 }
605 
606 inline static unsigned int my_ismbchar(const charset_info_st *cs, const char *a, const char *b)
607 {
608  return cs->cset->ismbchar(cs, a, b);
609 }
610 
611 inline static unsigned int my_mbcharlen(const charset_info_st *cs, uint32_t c)
612 {
613  return cs->cset->mbcharlen(cs, c);
614 }
615 
616 
617 inline static long my_strntol(const charset_info_st *cs,
618  const char* s, const size_t l, const int base, char **e, int *err)
619 {
620  return (cs->cset->strntol(cs, s, l, base, e, err));
621 }
622 
623 inline static unsigned long my_strntoul(const charset_info_st *cs,
624  const char* s, const size_t l, const int base,
625  char **e, int *err)
626 {
627  return (cs->cset->strntoul(cs, s, l, base, e, err));
628 }
629 
630 inline static int64_t my_strntoll(const charset_info_st *cs,
631  const char* s, const size_t l, const int base, char **e, int *err)
632 {
633  return (cs->cset->strntoll(cs, s, l, base, e, err));
634 }
635 
636 inline static int64_t my_strntoull(const charset_info_st *cs,
637  const char* s, const size_t l, const int base,
638  char **e, int *err)
639 {
640  return (cs->cset->strntoull(cs, s, l, base, e, err));
641 }
642 
643 
644 inline static double my_strntod(const charset_info_st *cs,
645  char* s, const size_t l, char **e, int *err)
646 {
647  return (cs->cset->strntod(cs, s, l, e, err));
648 }
649 
650 int make_escape_code(const charset_info_st*, const char *escape);
651 
652 } /* namespace drizzled */
int my_wildcmp_unicode(const charset_info_st *, const char *str, const char *str_end, const char *wildstr, const char *wildend, int escape, int w_one, int w_many, MY_UNICASE_INFO **weights)
Definition: ctype-utf8.cc:1728
#define DRIZZLED_API
Definition: visibility.h:62
Visibility Control Macros.