Drizzled Public API Documentation

ctype-simple.cc
1 /* Copyright (C) 2002 MySQL AB
2 
3  This program is free software; you can redistribute it and/or modify
4  it under the terms of the GNU General Public License as published by
5  the Free Software Foundation; version 2 of the License.
6 
7  This program is distributed in the hope that it will be useful,
8  but WITHOUT ANY WARRANTY; without even the implied warranty of
9  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  GNU General Public License for more details.
11 
12  You should have received a copy of the GNU General Public License
13  along with this program; if not, write to the Free Software
14  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
15 
16 #include <config.h>
17 
18 #include <drizzled/internal/m_string.h>
19 #include <drizzled/charset.h>
20 #include <cerrno>
21 #include <cstdio>
22 #include <cstdlib>
23 
24 #include <stdarg.h>
25 
26 #include <algorithm>
27 
28 using namespace std;
29 
30 namespace drizzled {
31 
32 /*
33  Returns the number of bytes required for strnxfrm().
34 */
35 
36 size_t my_strnxfrmlen_simple(const charset_info_st * const cs, size_t len)
37 {
38  return len * (cs->strxfrm_multiply ? cs->strxfrm_multiply : 1);
39 }
40 
41 
42 /*
43  We can't use vsprintf here as it's not guaranteed to return
44  the length on all operating systems.
45  This function is also not called in a safe environment, so the
46  end buffer must be checked.
47 */
48 
49 size_t my_snprintf_8bit(const charset_info_st * const,
50  char* to, size_t n,
51  const char* fmt, ...)
52 {
53  va_list args;
54  int result;
55  va_start(args,fmt);
56  result= vsnprintf(to, n, fmt, args);
57  va_end(args);
58  return result;
59 }
60 
61 
62 long my_strntol_8bit(const charset_info_st * const cs,
63  const char *nptr, size_t l, int base,
64  char **endptr, int *err)
65 {
66  int negative;
67  uint32_t cutoff;
68  uint32_t cutlim;
69  uint32_t i;
70  const char *s;
71  unsigned char c;
72  const char *save, *e;
73  int overflow;
74 
75  *err= 0; /* Initialize error indicator */
76 #ifdef NOT_USED
77  if (base < 0 || base == 1 || base > 36)
78  base = 10;
79 #endif
80 
81  s = nptr;
82  e = nptr+l;
83 
84  for ( ; s<e && cs->isspace(*s) ; s++) {}
85 
86  if (s == e)
87  {
88  goto noconv;
89  }
90 
91  /* Check for a sign. */
92  if (*s == '-')
93  {
94  negative = 1;
95  ++s;
96  }
97  else if (*s == '+')
98  {
99  negative = 0;
100  ++s;
101  }
102  else
103  negative = 0;
104 
105 #ifdef NOT_USED
106  if (base == 16 && s[0] == '0' && (s[1]=='X' || s[1]=='x'))
107  s += 2;
108 #endif
109 
110 #ifdef NOT_USED
111  if (base == 0)
112  {
113  if (*s == '0')
114  {
115  if (s[1]=='X' || s[1]=='x')
116  {
117  s += 2;
118  base = 16;
119  }
120  else
121  base = 8;
122  }
123  else
124  base = 10;
125  }
126 #endif
127 
128  save = s;
129  cutoff = (UINT32_MAX) / (uint32_t) base;
130  cutlim = (uint32_t) ((UINT32_MAX) % (uint32_t) base);
131 
132  overflow = 0;
133  i = 0;
134  for (c = *s; s != e; c = *++s)
135  {
136  if (c>='0' && c<='9')
137  c -= '0';
138  else if (c>='A' && c<='Z')
139  c = c - 'A' + 10;
140  else if (c>='a' && c<='z')
141  c = c - 'a' + 10;
142  else
143  break;
144  if (c >= base)
145  break;
146  if (i > cutoff || (i == cutoff && c > cutlim))
147  overflow = 1;
148  else
149  {
150  i *= (uint32_t) base;
151  i += c;
152  }
153  }
154 
155  if (s == save)
156  goto noconv;
157 
158  if (endptr != NULL)
159  *endptr = (char *) s;
160 
161  if (negative)
162  {
163  if (i > (uint32_t) INT32_MIN)
164  overflow = 1;
165  }
166  else if (i > INT32_MAX)
167  overflow = 1;
168 
169  if (overflow)
170  {
171  err[0]= ERANGE;
172  return negative ? INT32_MIN : INT32_MAX;
173  }
174 
175  return (negative ? -((long) i) : (long) i);
176 
177 noconv:
178  err[0]= EDOM;
179  if (endptr != NULL)
180  *endptr = (char *) nptr;
181  return 0L;
182 }
183 
184 
185 ulong my_strntoul_8bit(const charset_info_st * const cs,
186  const char *nptr, size_t l, int base,
187  char **endptr, int *err)
188 {
189  int negative;
190  uint32_t cutoff;
191  uint32_t cutlim;
192  uint32_t i;
193  const char *s;
194  unsigned char c;
195  const char *save, *e;
196  int overflow;
197 
198  *err= 0; /* Initialize error indicator */
199 #ifdef NOT_USED
200  if (base < 0 || base == 1 || base > 36)
201  base = 10;
202 #endif
203 
204  s = nptr;
205  e = nptr+l;
206 
207  for( ; s<e && cs->isspace(*s); s++) {}
208 
209  if (s==e)
210  {
211  goto noconv;
212  }
213 
214  if (*s == '-')
215  {
216  negative = 1;
217  ++s;
218  }
219  else if (*s == '+')
220  {
221  negative = 0;
222  ++s;
223  }
224  else
225  negative = 0;
226 
227 #ifdef NOT_USED
228  if (base == 16 && s[0] == '0' && (s[1]=='X' || s[1]=='x'))
229  s += 2;
230 #endif
231 
232 #ifdef NOT_USED
233  if (base == 0)
234  {
235  if (*s == '0')
236  {
237  if (s[1]=='X' || s[1]=='x')
238  {
239  s += 2;
240  base = 16;
241  }
242  else
243  base = 8;
244  }
245  else
246  base = 10;
247  }
248 #endif
249 
250  save = s;
251  cutoff = (UINT32_MAX) / (uint32_t) base;
252  cutlim = (uint32_t) ((UINT32_MAX) % (uint32_t) base);
253  overflow = 0;
254  i = 0;
255 
256  for (c = *s; s != e; c = *++s)
257  {
258  if (c>='0' && c<='9')
259  c -= '0';
260  else if (c>='A' && c<='Z')
261  c = c - 'A' + 10;
262  else if (c>='a' && c<='z')
263  c = c - 'a' + 10;
264  else
265  break;
266  if (c >= base)
267  break;
268  if (i > cutoff || (i == cutoff && c > cutlim))
269  overflow = 1;
270  else
271  {
272  i *= (uint32_t) base;
273  i += c;
274  }
275  }
276 
277  if (s == save)
278  goto noconv;
279 
280  if (endptr != NULL)
281  *endptr = (char *) s;
282 
283  if (overflow)
284  {
285  err[0]= ERANGE;
286  return UINT32_MAX;
287  }
288 
289  return (negative ? -((long) i) : (long) i);
290 
291 noconv:
292  err[0]= EDOM;
293  if (endptr != NULL)
294  *endptr = (char *) nptr;
295  return 0L;
296 }
297 
298 
299 int64_t my_strntoll_8bit(const charset_info_st * const cs,
300  const char *nptr, size_t l, int base,
301  char **endptr,int *err)
302 {
303  int negative;
304  uint64_t cutoff;
305  uint32_t cutlim;
306  uint64_t i;
307  const char *s, *e;
308  const char *save;
309  int overflow;
310 
311  *err= 0; /* Initialize error indicator */
312 #ifdef NOT_USED
313  if (base < 0 || base == 1 || base > 36)
314  base = 10;
315 #endif
316 
317  s = nptr;
318  e = nptr+l;
319 
320  for(; s<e && cs->isspace(*s); s++) {}
321 
322  if (s == e)
323  {
324  goto noconv;
325  }
326 
327  if (*s == '-')
328  {
329  negative = 1;
330  ++s;
331  }
332  else if (*s == '+')
333  {
334  negative = 0;
335  ++s;
336  }
337  else
338  negative = 0;
339 
340 #ifdef NOT_USED
341  if (base == 16 && s[0] == '0' && (s[1]=='X'|| s[1]=='x'))
342  s += 2;
343 #endif
344 
345 #ifdef NOT_USED
346  if (base == 0)
347  {
348  if (*s == '0')
349  {
350  if (s[1]=='X' || s[1]=='x')
351  {
352  s += 2;
353  base = 16;
354  }
355  else
356  base = 8;
357  }
358  else
359  base = 10;
360  }
361 #endif
362 
363  save = s;
364 
365  cutoff = (~(uint64_t) 0) / (unsigned long int) base;
366  cutlim = (uint32_t) ((~(uint64_t) 0) % (unsigned long int) base);
367 
368  overflow = 0;
369  i = 0;
370  for ( ; s != e; s++)
371  {
372  unsigned char c= *s;
373  if (c>='0' && c<='9')
374  c -= '0';
375  else if (c>='A' && c<='Z')
376  c = c - 'A' + 10;
377  else if (c>='a' && c<='z')
378  c = c - 'a' + 10;
379  else
380  break;
381  if (c >= base)
382  break;
383  if (i > cutoff || (i == cutoff && c > cutlim))
384  overflow = 1;
385  else
386  {
387  i *= (uint64_t) base;
388  i += c;
389  }
390  }
391 
392  if (s == save)
393  goto noconv;
394 
395  if (endptr != NULL)
396  *endptr = (char *) s;
397 
398  if (negative)
399  {
400  if (i > (uint64_t) INT64_MIN)
401  overflow = 1;
402  }
403  else if (i > (uint64_t) INT64_MAX)
404  overflow = 1;
405 
406  if (overflow)
407  {
408  err[0]= ERANGE;
409  return negative ? INT64_MIN : INT64_MAX;
410  }
411 
412  return (negative ? -((int64_t) i) : (int64_t) i);
413 
414 noconv:
415  err[0]= EDOM;
416  if (endptr != NULL)
417  *endptr = (char *) nptr;
418  return 0L;
419 }
420 
421 
422 uint64_t my_strntoull_8bit(const charset_info_st * const cs,
423  const char *nptr, size_t l, int base,
424  char **endptr, int *err)
425 {
426  int negative;
427  uint64_t cutoff;
428  uint32_t cutlim;
429  uint64_t i;
430  const char *s, *e;
431  const char *save;
432  int overflow;
433 
434  *err= 0; /* Initialize error indicator */
435 #ifdef NOT_USED
436  if (base < 0 || base == 1 || base > 36)
437  base = 10;
438 #endif
439 
440  s = nptr;
441  e = nptr+l;
442 
443  for(; s<e && cs->isspace(*s); s++) {}
444 
445  if (s == e)
446  {
447  goto noconv;
448  }
449 
450  if (*s == '-')
451  {
452  negative = 1;
453  ++s;
454  }
455  else if (*s == '+')
456  {
457  negative = 0;
458  ++s;
459  }
460  else
461  negative = 0;
462 
463 #ifdef NOT_USED
464  if (base == 16 && s[0] == '0' && (s[1]=='X' || s[1]=='x'))
465  s += 2;
466 #endif
467 
468 #ifdef NOT_USED
469  if (base == 0)
470  {
471  if (*s == '0')
472  {
473  if (s[1]=='X' || s[1]=='x')
474  {
475  s += 2;
476  base = 16;
477  }
478  else
479  base = 8;
480  }
481  else
482  base = 10;
483  }
484 #endif
485 
486  save = s;
487 
488  cutoff = (~(uint64_t) 0) / (unsigned long int) base;
489  cutlim = (uint32_t) ((~(uint64_t) 0) % (unsigned long int) base);
490 
491  overflow = 0;
492  i = 0;
493  for ( ; s != e; s++)
494  {
495  unsigned char c= *s;
496 
497  if (c>='0' && c<='9')
498  c -= '0';
499  else if (c>='A' && c<='Z')
500  c = c - 'A' + 10;
501  else if (c>='a' && c<='z')
502  c = c - 'a' + 10;
503  else
504  break;
505  if (c >= base)
506  break;
507  if (i > cutoff || (i == cutoff && c > cutlim))
508  overflow = 1;
509  else
510  {
511  i *= (uint64_t) base;
512  i += c;
513  }
514  }
515 
516  if (s == save)
517  goto noconv;
518 
519  if (endptr != NULL)
520  *endptr = (char *) s;
521 
522  if (overflow)
523  {
524  err[0]= ERANGE;
525  return (~(uint64_t) 0);
526  }
527 
528  return (negative ? -((int64_t) i) : (int64_t) i);
529 
530 noconv:
531  err[0]= EDOM;
532  if (endptr != NULL)
533  *endptr = (char *) nptr;
534  return 0L;
535 }
536 
537 
538 /*
539  Read double from string
540 
541  SYNOPSIS:
542  my_strntod_8bit()
543  cs Character set information
544  str String to convert to double
545  length Optional length for string.
546  end result pointer to end of converted string
547  err Error number if failed conversion
548 
549  NOTES:
550  If length is not INT32_MAX or str[length] != 0 then the given str must
551  be writeable
552  If length == INT32_MAX the str must be \0 terminated.
553 
554  It's implemented this way to save a buffer allocation and a memory copy.
555 
556  RETURN
557  Value of number in string
558 */
559 
560 
561 double my_strntod_8bit(const charset_info_st * const,
562  char *str, size_t length,
563  char **end, int *err)
564 {
565  if (length == INT32_MAX)
566  length= 65535; /* Should be big enough */
567  *end= str + length;
568  return internal::my_strtod(str, end, err);
569 }
570 
571 
572 /*
573  This is a fast version optimized for the case of radix 10 / -10
574 
575  Assume len >= 1
576 */
577 
578 size_t my_long10_to_str_8bit(const charset_info_st * const,
579  char *dst, size_t len, int radix, long int val)
580 {
581  char buffer[66];
582  char *p, *e;
583  long int new_val;
584  uint32_t sign=0;
585  unsigned long int uval = (unsigned long int) val;
586 
587  e = p = &buffer[sizeof(buffer)-1];
588  *p= 0;
589 
590  if (radix < 0)
591  {
592  if (val < 0)
593  {
594  /* Avoid integer overflow in (-val) for INT64_MIN (BUG#31799). */
595  uval= (unsigned long int)0 - uval;
596  *dst++= '-';
597  len--;
598  sign= 1;
599  }
600  }
601 
602  new_val = (long) (uval / 10);
603  *--p = '0'+ (char) (uval - (unsigned long) new_val * 10);
604  val = new_val;
605 
606  while (val != 0)
607  {
608  new_val=val/10;
609  *--p = '0' + (char) (val-new_val*10);
610  val= new_val;
611  }
612 
613  len= min(len, (size_t) (e-p));
614  memcpy(dst, p, len);
615  return len+sign;
616 }
617 
618 
619 size_t my_int64_t10_to_str_8bit(const charset_info_st * const,
620  char *dst, size_t len, int radix,
621  int64_t val)
622 {
623  char buffer[65];
624  char *p, *e;
625  long long_val;
626  uint32_t sign= 0;
627  uint64_t uval = (uint64_t)val;
628 
629  if (radix < 0)
630  {
631  if (val < 0)
632  {
633  /* Avoid integer overflow in (-val) for INT64_MIN (BUG#31799). */
634  uval = (uint64_t)0 - uval;
635  *dst++= '-';
636  len--;
637  sign= 1;
638  }
639  }
640 
641  e = p = &buffer[sizeof(buffer)-1];
642  *p= 0;
643 
644  if (uval == 0)
645  {
646  *--p= '0';
647  len= 1;
648  goto cnv;
649  }
650 
651  while (uval > (uint64_t) LONG_MAX)
652  {
653  uint64_t quo= uval/(uint32_t) 10;
654  uint32_t rem= (uint32_t) (uval- quo* (uint32_t) 10);
655  *--p = '0' + rem;
656  uval= quo;
657  }
658 
659  long_val= (long) uval;
660  while (long_val != 0)
661  {
662  long quo= long_val/10;
663  *--p = (char) ('0' + (long_val - quo*10));
664  long_val= quo;
665  }
666 
667  len= min(len, (size_t) (e-p));
668 cnv:
669  memcpy(dst, p, len);
670  return len+sign;
671 }
672 
673 
674 /*
675 ** Compare string against string with wildcard
676 ** 0 if matched
677 ** -1 if not matched with wildcard
678 ** 1 if matched with wildcard
679 */
680 
681 inline static int likeconv(const charset_info_st *cs, const char c)
682 {
683 #ifdef LIKE_CMP_TOUPPER
684  return (unsigned char) cs->toupper(c);
685 #else
686  return cs->sort_order[(unsigned char)c];
687 #endif
688 }
689 
690 
691 inline static const char* inc_ptr(const charset_info_st *cs, const char *str, const char *str_end)
692 {
693  // (Strange this macro have been used. If str_end would actually
694  // have been used it would have made sense. /Gustaf)
695  (void)cs;
696  (void)str_end;
697  return str++;
698 }
699 
700 int my_wildcmp_8bit(const charset_info_st * const cs,
701  const char *str,const char *str_end,
702  const char *wildstr,const char *wildend,
703  int escape, int w_one, int w_many)
704 {
705  int result= -1; /* Not found, using wildcards */
706 
707  while (wildstr != wildend)
708  {
709  while (*wildstr != w_many && *wildstr != w_one)
710  {
711  if (*wildstr == escape && wildstr+1 != wildend)
712  wildstr++;
713 
714  if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++))
715  return 1; /* No match */
716  if (wildstr == wildend)
717  return(str != str_end); /* Match if both are at end */
718  result=1; /* Found an anchor char */
719  }
720  if (*wildstr == w_one)
721  {
722  do
723  {
724  if (str == str_end) /* Skip one char if possible */
725  return(result);
726  inc_ptr(cs,str,str_end);
727  } while (++wildstr < wildend && *wildstr == w_one);
728  if (wildstr == wildend)
729  break;
730  }
731  if (*wildstr == w_many)
732  { /* Found w_many */
733  unsigned char cmp;
734 
735  wildstr++;
736  /* Remove any '%' and '_' from the wild search string */
737  for (; wildstr != wildend ; wildstr++)
738  {
739  if (*wildstr == w_many)
740  continue;
741  if (*wildstr == w_one)
742  {
743  if (str == str_end)
744  return(-1);
745  inc_ptr(cs,str,str_end);
746  continue;
747  }
748  break; /* Not a wild character */
749  }
750  if (wildstr == wildend)
751  return 0; /* Ok if w_many is last */
752  if (str == str_end)
753  return(-1);
754 
755  if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
756  cmp= *++wildstr;
757 
758  inc_ptr(cs,wildstr,wildend); /* This is compared trough cmp */
759  cmp=likeconv(cs,cmp);
760  do
761  {
762  while (str != str_end && (unsigned char) likeconv(cs,*str) != cmp)
763  str++;
764  if (str++ == str_end) return(-1);
765  {
766  int tmp=my_wildcmp_8bit(cs,str,str_end,wildstr,wildend,escape,w_one,
767  w_many);
768  if (tmp <= 0)
769  return(tmp);
770  }
771  } while (str != str_end && wildstr[0] != w_many);
772  return(-1);
773  }
774  }
775  return(str != str_end ? 1 : 0);
776 }
777 
778 
779 /*
780 ** Calculate min_str and max_str that ranges a LIKE string.
781 ** Arguments:
782 ** ptr Pointer to LIKE string.
783 ** ptr_length Length of LIKE string.
784 ** escape Escape character in LIKE. (Normally '\').
785 ** All escape characters should be removed from
786 ** min_str and max_str
787 ** res_length Length of min_str and max_str.
788 ** min_str Smallest case sensitive string that ranges LIKE.
789 ** Should be space padded to res_length.
790 ** max_str Largest case sensitive string that ranges LIKE.
791 ** Normally padded with the biggest character sort value.
792 **
793 ** The function should return 0 if ok and 1 if the LIKE string can't be
794 ** optimized !
795 */
796 
797 bool my_like_range_simple(const charset_info_st * const cs,
798  const char *ptr, size_t ptr_length,
799  char escape, char w_one, char w_many,
800  size_t res_length,
801  char *min_str,char *max_str,
802  size_t *min_length, size_t *max_length)
803 {
804  const char *end= ptr + ptr_length;
805  char *min_org=min_str;
806  char *min_end=min_str+res_length;
807  size_t charlen= res_length / cs->mbmaxlen;
808 
809  for (; ptr != end && min_str != min_end && charlen > 0 ; ptr++, charlen--)
810  {
811  if (*ptr == escape && ptr+1 != end)
812  {
813  ptr++; /* Skip escape */
814  *min_str++= *max_str++ = *ptr;
815  continue;
816  }
817  if (*ptr == w_one) /* '_' in SQL */
818  {
819  *min_str++='\0'; /* This should be min char */
820  *max_str++= (char) cs->max_sort_char;
821  continue;
822  }
823  if (*ptr == w_many) /* '%' in SQL */
824  {
825  /* Calculate length of keys */
826  *min_length= ((cs->state & MY_CS_BINSORT) ?
827  (size_t) (min_str - min_org) :
828  res_length);
829  *max_length= res_length;
830  do
831  {
832  *min_str++= 0;
833  *max_str++= (char) cs->max_sort_char;
834  } while (min_str != min_end);
835  return 0;
836  }
837  *min_str++= *max_str++ = *ptr;
838  }
839 
840  *min_length= *max_length = (size_t) (min_str - min_org);
841  while (min_str != min_end)
842  *min_str++= *max_str++ = ' '; /* Because if key compression */
843  return 0;
844 }
845 
846 
847 size_t my_scan_8bit(const charset_info_st * const cs, const char *str, const char *end, int sq)
848 {
849  const char *str0= str;
850  switch (sq)
851  {
852  case MY_SEQ_INTTAIL:
853  if (*str == '.')
854  {
855  for(str++ ; str != end && *str == '0' ; str++) {}
856  return (size_t) (str - str0);
857  }
858  return 0;
859 
860  case MY_SEQ_SPACES:
861  for ( ; str < end ; str++)
862  {
863  if (!cs->isspace(*str))
864  break;
865  }
866  return (size_t) (str - str0);
867  default:
868  return 0;
869  }
870 }
871 
872 
873 void my_fill_8bit(const charset_info_st * const, char *s, size_t l, int fill)
874 {
875  memset(s, fill, l);
876 }
877 
878 
879 size_t my_numchars_8bit(const charset_info_st * const, const char *b, const char *e)
880 {
881  return (size_t) (e - b);
882 }
883 
884 
885 size_t my_numcells_8bit(const charset_info_st * const, const char *b, const char *e)
886 {
887  return (size_t) (e - b);
888 }
889 
890 
891 size_t my_charpos_8bit(const charset_info_st * const, const char *, const char *, size_t pos)
892 {
893  return pos;
894 }
895 
896 
897 size_t my_well_formed_len_8bit(const charset_info_st&, str_ref str, size_t nchars, int *error)
898 {
899  *error= 0;
900  return min(str.size(), nchars);
901 }
902 
903 
904 size_t my_lengthsp_8bit(const charset_info_st * const,
905  const char *ptr, size_t length)
906 {
907  const char *end;
908  end= (const char *) internal::skip_trailing_space((const unsigned char *)ptr, length);
909  return (size_t) (end-ptr);
910 }
911 
912 
913 uint32_t my_instr_simple(const charset_info_st * const cs,
914  const char *b, size_t b_length,
915  const char *s, size_t s_length,
916  my_match_t *match, uint32_t nmatch)
917 {
918  const unsigned char *str, *search, *end, *search_end;
919 
920  if (s_length <= b_length)
921  {
922  if (!s_length)
923  {
924  if (nmatch)
925  {
926  match->beg= 0;
927  match->end= 0;
928  match->mb_len= 0;
929  }
930  return 1; /* Empty string is always found */
931  }
932 
933  str= (const unsigned char*) b;
934  search= (const unsigned char*) s;
935  end= (const unsigned char*) b+b_length-s_length+1;
936  search_end= (const unsigned char*) s + s_length;
937 
938 skip:
939  while (str != end)
940  {
941  if (cs->sort_order[*str++] == cs->sort_order[*search])
942  {
943  const unsigned char *i,*j;
944 
945  i= str;
946  j= search+1;
947 
948  while (j != search_end)
949  if (cs->sort_order[*i++] != cs->sort_order[*j++])
950  goto skip;
951 
952  if (nmatch > 0)
953  {
954  match[0].beg= 0;
955  match[0].end= (size_t) (str- (const unsigned char*)b-1);
956  match[0].mb_len= match[0].end;
957 
958  if (nmatch > 1)
959  {
960  match[1].beg= match[0].end;
961  match[1].end= match[0].end+s_length;
962  match[1].mb_len= match[1].end-match[1].beg;
963  }
964  }
965  return 2;
966  }
967  }
968  }
969  return 0;
970 }
971 
972 
973 typedef struct
974 {
975  int nchars;
976  MY_UNI_IDX uidx;
977 } uni_idx;
978 
979 #define PLANE_SIZE 0x100
980 #define PLANE_NUM 0x100
981 inline static int plane_number(uint16_t x)
982 {
983  return ((x >> 8) % PLANE_NUM);
984 }
985 
986 static int pcmp(const void * f, const void * s)
987 {
988  const uni_idx *F= (const uni_idx*) f;
989  const uni_idx *S= (const uni_idx*) s;
990  int res;
991 
992  if (!(res=((S->nchars)-(F->nchars))))
993  res=((F->uidx.from)-(S->uidx.to));
994  return res;
995 }
996 
997 static bool create_fromuni(charset_info_st *cs, cs_alloc_func alloc)
998 {
999  uni_idx idx[PLANE_NUM];
1000  int i,n;
1001 
1002  /*
1003  Check that Unicode map is loaded.
1004  It can be not loaded when the collation is
1005  listed in Index.xml but not specified
1006  in the character set specific XML file.
1007  */
1008  if (!cs->tab_to_uni)
1009  return true;
1010 
1011  /* Clear plane statistics */
1012  memset(idx, 0, sizeof(idx));
1013 
1014  /* Count number of characters in each plane */
1015  for (i=0; i< 0x100; i++)
1016  {
1017  uint16_t wc=cs->tab_to_uni[i];
1018  int pl= plane_number(wc);
1019 
1020  if (wc || !i)
1021  {
1022  if (!idx[pl].nchars)
1023  {
1024  idx[pl].uidx.from=wc;
1025  idx[pl].uidx.to=wc;
1026  }else
1027  {
1028  idx[pl].uidx.from=wc<idx[pl].uidx.from?wc:idx[pl].uidx.from;
1029  idx[pl].uidx.to=wc>idx[pl].uidx.to?wc:idx[pl].uidx.to;
1030  }
1031  idx[pl].nchars++;
1032  }
1033  }
1034 
1035  /* Sort planes in descending order */
1036  qsort(&idx,PLANE_NUM,sizeof(uni_idx),&pcmp);
1037 
1038  for (i=0; i < PLANE_NUM; i++)
1039  {
1040  int ch,numchars;
1041 
1042  /* Skip empty plane */
1043  if (!idx[i].nchars)
1044  break;
1045 
1046  numchars=idx[i].uidx.to-idx[i].uidx.from+1;
1047  if (!(idx[i].uidx.tab=(unsigned char*) alloc(numchars * sizeof(*idx[i].uidx.tab))))
1048  return true;
1049 
1050  memset(idx[i].uidx.tab, 0, numchars*sizeof(*idx[i].uidx.tab));
1051 
1052  for (ch=1; ch < PLANE_SIZE; ch++)
1053  {
1054  uint16_t wc=cs->tab_to_uni[ch];
1055  if (wc >= idx[i].uidx.from && wc <= idx[i].uidx.to && wc)
1056  {
1057  int ofs= wc - idx[i].uidx.from;
1058  idx[i].uidx.tab[ofs]= ch;
1059  }
1060  }
1061  }
1062 
1063  /* Allocate and fill reverse table for each plane */
1064  n=i;
1065  if (!(cs->tab_from_uni= (MY_UNI_IDX*) alloc(sizeof(MY_UNI_IDX)*(n+1))))
1066  return true;
1067 
1068  for (i=0; i< n; i++)
1069  cs->tab_from_uni[i]= idx[i].uidx;
1070 
1071  /* Set end-of-list marker */
1072  memset(&cs->tab_from_uni[i], 0, sizeof(MY_UNI_IDX));
1073  return false;
1074 }
1075 
1076 bool my_cset_init_8bit(charset_info_st *cs, cs_alloc_func alloc)
1077 {
1078  cs->caseup_multiply= 1;
1079  cs->casedn_multiply= 1;
1080  cs->pad_char= ' ';
1081  return create_fromuni(cs, alloc);
1082 }
1083 
1084 static void set_max_sort_char(charset_info_st *cs)
1085 {
1086  unsigned char max_char;
1087  uint32_t i;
1088 
1089  if (!cs->sort_order)
1090  return;
1091 
1092  max_char=cs->sort_order[(unsigned char) cs->max_sort_char];
1093  for (i= 0; i < 256; i++)
1094  {
1095  if ((unsigned char) cs->sort_order[i] > max_char)
1096  {
1097  max_char=(unsigned char) cs->sort_order[i];
1098  cs->max_sort_char= i;
1099  }
1100  }
1101 }
1102 
1103 bool my_coll_init_simple(charset_info_st *cs, cs_alloc_func)
1104 {
1105  set_max_sort_char(cs);
1106  return false;
1107 }
1108 
1109 
1110 int64_t my_strtoll10_8bit(const charset_info_st * const,
1111  const char *nptr, char **endptr, int *error)
1112 {
1113  return internal::my_strtoll10(nptr, endptr, error);
1114 }
1115 
1116 
1117 int my_mb_ctype_8bit(const charset_info_st * const cs, int *ctype,
1118  const unsigned char *s, const unsigned char *e)
1119 {
1120  if (s >= e)
1121  {
1122  *ctype= 0;
1123  return MY_CS_TOOSMALL;
1124  }
1125  *ctype= cs->ctype[*s + 1];
1126  return 1;
1127 }
1128 
1129 
1130 #undef UINT64_MAX
1131 #define UINT64_MAX (~(uint64_t) 0)
1132 
1133 #define CUTOFF (UINT64_MAX / 10)
1134 #define CUTLIM (UINT64_MAX % 10)
1135 #define DIGITS_IN_ULONGLONG 20
1136 
1137 static uint64_t d10[DIGITS_IN_ULONGLONG]=
1138 {
1139  1,
1140  10,
1141  100,
1142  1000,
1143  10000,
1144  100000,
1145  1000000,
1146  10000000,
1147  100000000,
1148  1000000000,
1149  10000000000ULL,
1150  100000000000ULL,
1151  1000000000000ULL,
1152  10000000000000ULL,
1153  100000000000000ULL,
1154  1000000000000000ULL,
1155  10000000000000000ULL,
1156  100000000000000000ULL,
1157  1000000000000000000ULL,
1158  10000000000000000000ULL
1159 };
1160 
1161 
1162 /*
1163 
1164  Convert a string to uint64_t integer value
1165  with rounding.
1166 
1167  SYNOPSYS
1168  my_strntoull10_8bit()
1169  cs in pointer to character set
1170  str in pointer to the string to be converted
1171  length in string length
1172  unsigned_flag in whether the number is unsigned
1173  endptr out pointer to the stop character
1174  error out returned error code
1175 
1176  DESCRIPTION
1177  This function takes the decimal representation of integer number
1178  from string str and converts it to an signed or unsigned
1179  int64_t value.
1180  Space characters and tab are ignored.
1181  A sign character might precede the digit characters.
1182  The number may have any number of pre-zero digits.
1183  The number may have decimal point and exponent.
1184  Rounding is always done in "away from zero" style:
1185  0.5 -> 1
1186  -0.5 -> -1
1187 
1188  The function stops reading the string str after "length" bytes
1189  or at the first character that is not a part of correct number syntax:
1190 
1191  <signed numeric literal> ::=
1192  [ <sign> ] <exact numeric literal> [ E [ <sign> ] <unsigned integer> ]
1193 
1194  <exact numeric literal> ::=
1195  <unsigned integer> [ <period> [ <unsigned integer> ] ]
1196  | <period> <unsigned integer>
1197  <unsigned integer> ::= <digit>...
1198 
1199  RETURN VALUES
1200  Value of string as a signed/unsigned int64_t integer
1201 
1202  endptr cannot be NULL. The function will store the end pointer
1203  to the stop character here.
1204 
1205  The error parameter contains information how things went:
1206  0 ok
1207  ERANGE If the the value of the converted number is out of range
1208  In this case the return value is:
1209  - UINT64_MAX if unsigned_flag and the number was too big
1210  - 0 if unsigned_flag and the number was negative
1211  - INT64_MAX if no unsigned_flag and the number is too big
1212  - INT64_MIN if no unsigned_flag and the number it too big negative
1213 
1214  EDOM If the string didn't contain any digits.
1215  In this case the return value is 0.
1216 */
1217 
1218 uint64_t
1219 my_strntoull10rnd_8bit(const charset_info_st * const,
1220  const char *str, size_t length, int unsigned_flag,
1221  char **endptr, int *error)
1222 {
1223  const char *dot, *end9, *beg, *end= str + length;
1224  uint64_t ull;
1225  ulong ul;
1226  unsigned char ch;
1227  int shift= 0, digits= 0, negative, addon;
1228 
1229  /* Skip leading spaces and tabs */
1230  for ( ; str < end && (*str == ' ' || *str == '\t') ; str++) {}
1231 
1232  if (str >= end)
1233  goto ret_edom;
1234 
1235  if ((negative= (*str == '-')) || *str=='+') /* optional sign */
1236  {
1237  if (++str == end)
1238  goto ret_edom;
1239  }
1240 
1241  beg= str;
1242  end9= (str + 9) > end ? end : (str + 9);
1243  /* Accumulate small number into ulong, for performance purposes */
1244  for (ul= 0 ; str < end9 && (ch= (unsigned char) (*str - '0')) < 10; str++)
1245  {
1246  ul= ul * 10 + ch;
1247  }
1248 
1249  if (str >= end) /* Small number without dots and expanents */
1250  {
1251  *endptr= (char*) str;
1252  if (negative)
1253  {
1254  if (unsigned_flag)
1255  {
1256  *error= ul ? ERANGE : 0;
1257  return 0;
1258  }
1259  else
1260  {
1261  *error= 0;
1262  return (uint64_t) (int64_t) -(long) ul;
1263  }
1264  }
1265  else
1266  {
1267  *error=0;
1268  return (uint64_t) ul;
1269  }
1270  }
1271 
1272  digits= str - beg;
1273 
1274  /* Continue to accumulate into uint64_t */
1275  for (dot= NULL, ull= ul; str < end; str++)
1276  {
1277  if ((ch= (unsigned char) (*str - '0')) < 10)
1278  {
1279  if (ull < CUTOFF || (ull == CUTOFF && ch <= CUTLIM))
1280  {
1281  ull= ull * 10 + ch;
1282  digits++;
1283  continue;
1284  }
1285  /*
1286  Adding the next digit would overflow.
1287  Remember the next digit in "addon", for rounding.
1288  Scan all digits with an optional single dot.
1289  */
1290  if (ull == CUTOFF)
1291  {
1292  ull= UINT64_MAX;
1293  addon= 1;
1294  str++;
1295  }
1296  else
1297  addon= (*str >= '5');
1298  if (!dot)
1299  {
1300  for ( ; str < end && (ch= (unsigned char) (*str - '0')) < 10; shift++, str++) {}
1301  if (str < end && *str == '.')
1302  {
1303  str++;
1304  for ( ; str < end && (ch= (unsigned char) (*str - '0')) < 10; str++) {}
1305  }
1306  }
1307  else
1308  {
1309  shift= dot - str;
1310  for ( ; str < end && (ch= (unsigned char) (*str - '0')) < 10; str++) {}
1311  }
1312  goto exp;
1313  }
1314 
1315  if (*str == '.')
1316  {
1317  if (dot)
1318  {
1319  /* The second dot character */
1320  addon= 0;
1321  goto exp;
1322  }
1323  else
1324  {
1325  dot= str + 1;
1326  }
1327  continue;
1328  }
1329 
1330  /* Unknown character, exit the loop */
1331  break;
1332  }
1333  shift= dot ? dot - str : 0; /* Right shift */
1334  addon= 0;
1335 
1336 exp: /* [ E [ <sign> ] <unsigned integer> ] */
1337 
1338  if (!digits)
1339  {
1340  str= beg;
1341  goto ret_edom;
1342  }
1343 
1344  if (str < end && (*str == 'e' || *str == 'E'))
1345  {
1346  str++;
1347  if (str < end)
1348  {
1349  int negative_exp, exponent;
1350  if ((negative_exp= (*str == '-')) || *str=='+')
1351  {
1352  if (++str == end)
1353  goto ret_sign;
1354  }
1355  for (exponent= 0 ;
1356  str < end && (ch= (unsigned char) (*str - '0')) < 10;
1357  str++)
1358  {
1359  exponent= exponent * 10 + ch;
1360  }
1361  shift+= negative_exp ? -exponent : exponent;
1362  }
1363  }
1364 
1365  if (shift == 0) /* No shift, check addon digit */
1366  {
1367  if (addon)
1368  {
1369  if (ull == UINT64_MAX)
1370  goto ret_too_big;
1371  ull++;
1372  }
1373  goto ret_sign;
1374  }
1375 
1376  if (shift < 0) /* Right shift */
1377  {
1378  uint64_t d, r;
1379 
1380  if (-shift >= DIGITS_IN_ULONGLONG)
1381  goto ret_zero; /* Exponent is a big negative number, return 0 */
1382 
1383  d= d10[-shift];
1384  r= (ull % d) * 2;
1385  ull /= d;
1386  if (r >= d)
1387  ull++;
1388  goto ret_sign;
1389  }
1390 
1391  if (shift > DIGITS_IN_ULONGLONG) /* Huge left shift */
1392  {
1393  if (!ull)
1394  goto ret_sign;
1395  goto ret_too_big;
1396  }
1397 
1398  for ( ; shift > 0; shift--, ull*= 10) /* Left shift */
1399  {
1400  if (ull > CUTOFF)
1401  goto ret_too_big; /* Overflow, number too big */
1402  }
1403 
1404 ret_sign:
1405  *endptr= (char*) str;
1406 
1407  if (!unsigned_flag)
1408  {
1409  if (negative)
1410  {
1411  if (ull > (uint64_t) INT64_MIN)
1412  {
1413  *error= ERANGE;
1414  return (uint64_t) INT64_MIN;
1415  }
1416  *error= 0;
1417  return (uint64_t) -(int64_t) ull;
1418  }
1419  else
1420  {
1421  if (ull > (uint64_t) INT64_MAX)
1422  {
1423  *error= ERANGE;
1424  return (uint64_t) INT64_MAX;
1425  }
1426  *error= 0;
1427  return ull;
1428  }
1429  }
1430 
1431  /* Unsigned number */
1432  if (negative && ull)
1433  {
1434  *error= ERANGE;
1435  return 0;
1436  }
1437  *error= 0;
1438  return ull;
1439 
1440 ret_zero:
1441  *endptr= (char*) str;
1442  *error= 0;
1443  return 0;
1444 
1445 ret_edom:
1446  *endptr= (char*) str;
1447  *error= EDOM;
1448  return 0;
1449 
1450 ret_too_big:
1451  *endptr= (char*) str;
1452  *error= ERANGE;
1453  return unsigned_flag ?
1454  UINT64_MAX :
1455  negative ? (uint64_t) INT64_MIN : (uint64_t) INT64_MAX;
1456 }
1457 
1458 
1459 /*
1460  Check if a constant can be propagated
1461 
1462  SYNOPSIS:
1463  my_propagate_simple()
1464  cs Character set information
1465  str String to convert to double
1466  length Optional length for string.
1467 
1468  NOTES:
1469  Takes the string in the given charset and check
1470  if it can be safely propagated in the optimizer.
1471 
1472  create table t1 (
1473  s char(5) character set latin1 collate latin1_german2_ci);
1474  insert into t1 values (0xf6); -- o-umlaut
1475  select * from t1 where length(s)=1 and s='oe';
1476 
1477  The above query should return one row.
1478  We cannot convert this query into:
1479  select * from t1 where length('oe')=1 and s='oe';
1480 
1481  Currently we don't check the constant itself,
1482  and decide not to propagate a constant
1483  just if the collation itself allows tricky things
1484  like expansions and contractions. In the future
1485  we can write a more sophisticated functions to
1486  check the constants. For example, 'oa' can always
1487  be safety propagated in German2 because unlike
1488  'oe' it does not have any special meaning.
1489 
1490  RETURN
1491  1 if constant can be safely propagated
1492  0 if it is not safe to propagate the constant
1493 */
1494 
1495 
1496 
1497 bool my_propagate_simple()
1498 {
1499  return 1;
1500 }
1501 
1502 bool my_propagate_complex()
1503 {
1504  return 0;
1505 }
1506 
1507 /*
1508  Apply DESC and REVERSE collation rules.
1509 
1510  SYNOPSIS:
1511  my_strxfrm_desc_and_reverse()
1512  str - pointer to string
1513  strend - end of string
1514  flags - flags
1515  level - which level, starting from 0.
1516 
1517  NOTES:
1518  Apply DESC or REVERSE or both flags.
1519 
1520  If DESC flag is given, then the weights
1521  come out NOTed or negated for that level.
1522 
1523  If REVERSE flags is given, then the weights come out in
1524  reverse order for that level, that is, starting with
1525  the last character and ending with the first character.
1526 
1527  If nether DESC nor REVERSE flags are give,
1528  the string is not changed.
1529 
1530 */
1531 void my_strxfrm_desc_and_reverse(unsigned char *str, unsigned char *strend,
1532  uint32_t flags, uint32_t level)
1533 {
1534  if (flags & (MY_STRXFRM_DESC_LEVEL1 << level))
1535  {
1536  if (flags & (MY_STRXFRM_REVERSE_LEVEL1 << level))
1537  {
1538  for (strend--; str <= strend;)
1539  {
1540  unsigned char tmp= *str;
1541  *str++= ~*strend;
1542  *strend--= ~tmp;
1543  }
1544  }
1545  else
1546  {
1547  for (; str < strend; str++)
1548  *str= ~*str;
1549  }
1550  }
1551  else if (flags & (MY_STRXFRM_REVERSE_LEVEL1 << level))
1552  {
1553  for (strend--; str < strend;)
1554  {
1555  unsigned char tmp= *str;
1556  *str++= *strend;
1557  *strend--= tmp;
1558  }
1559  }
1560 }
1561 
1562 
1563 size_t
1564 my_strxfrm_pad_desc_and_reverse(const charset_info_st * const cs,
1565  unsigned char *str, unsigned char *frmend, unsigned char *strend,
1566  uint32_t nweights, uint32_t flags, uint32_t level)
1567 {
1568  if (nweights && frmend < strend && (flags & MY_STRXFRM_PAD_WITH_SPACE))
1569  {
1570  uint32_t fill_length= min((uint32_t) (strend - frmend), nweights * cs->mbminlen);
1571  cs->cset->fill(cs, (char*) frmend, fill_length, cs->pad_char);
1572  frmend+= fill_length;
1573  }
1574  my_strxfrm_desc_and_reverse(str, frmend, flags, level);
1575  return frmend - str;
1576 }
1577 
1578 } /* namespace drizzled */