Drizzled Public API Documentation

sum.h
1 /* -*- mode: c++; c-basic-offset: 2; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=2:tabstop=2:smarttab:
3  *
4  * Copyright (C) 2008 Sun Microsystems, Inc.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; version 2 of the License.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18  */
19 
20 #pragma once
21 
22 /* classes for sum functions */
23 
24 #include <drizzled/tree.h>
25 #include <drizzled/hybrid_type.h>
26 #include <drizzled/item.h>
27 #include <drizzled/item/field.h>
28 #include <drizzled/item/bin_string.h>
29 #include <drizzled/charset.h>
30 
31 namespace drizzled {
32 
33 int group_concat_key_cmp_with_distinct(void* arg, const void* key1, const void* key2);
34 
35 int group_concat_key_cmp_with_order(void* arg, const void* key1, const void* key2);
36 
37 /*
38  Class Item_sum is the base class used for special expressions that SQL calls
39  'set functions'. These expressions are formed with the help of aggregate
40  functions such as SUM, MAX, GROUP_CONCAT etc.
41 
42  GENERAL NOTES
43 
44  A set function cannot be used in certain positions where expressions are
45  accepted. There are some quite explicable restrictions for the usage of
46  set functions.
47 
48  In the query:
49  SELECT AVG(b) FROM t1 WHERE SUM(b) > 20 GROUP by a
50  the usage of the set function AVG(b) is legal, while the usage of SUM(b)
51  is illegal. A WHERE condition must contain expressions that can be
52  evaluated for each row of the table. Yet the expression SUM(b) can be
53  evaluated only for each group of rows with the same value of column a.
54  In the query:
55  SELECT AVG(b) FROM t1 WHERE c > 30 GROUP BY a HAVING SUM(b) > 20
56  both set function expressions AVG(b) and SUM(b) are legal.
57 
58  We can say that in a query without nested selects an occurrence of a
59  set function in an expression of the SELECT list or/and in the HAVING
60  clause is legal, while in the WHERE clause it's illegal.
61 
62  The general rule to detect whether a set function is legal in a query with
63  nested subqueries is much more complicated.
64 
65  Consider the the following query:
66  SELECT t1.a FROM t1 GROUP BY t1.a
67  HAVING t1.a > ALL (SELECT t2.c FROM t2 WHERE SUM(t1.b) < t2.c).
68  The set function SUM(b) is used here in the WHERE clause of the subquery.
69  Nevertheless it is legal since it is under the HAVING clause of the query
70  to which this function relates. The expression SUM(t1.b) is evaluated
71  for each group defined in the main query, not for groups of the subquery.
72 
73  The problem of finding the query where to aggregate a particular
74  set function is not so simple as it seems to be.
75 
76  In the query:
77  SELECT t1.a FROM t1 GROUP BY t1.a
78  HAVING t1.a > ALL(SELECT t2.c FROM t2 GROUP BY t2.c
79  HAVING SUM(t1.a) < t2.c)
80  the set function can be evaluated for both outer and inner selects.
81  If we evaluate SUM(t1.a) for the outer query then we get the value of t1.a
82  multiplied by the cardinality of a group in table t1. In this case
83  in each correlated subquery SUM(t1.a) is used as a constant. But we also
84  can evaluate SUM(t1.a) for the inner query. In this case t1.a will be a
85  constant for each correlated subquery and summation is performed
86  for each group of table t2.
87  (Here it makes sense to remind that the query
88  SELECT c FROM t GROUP BY a HAVING SUM(1) < a
89  is quite legal in our SQL).
90 
91  So depending on what query we assign the set function to we
92  can get different result sets.
93 
94  The general rule to detect the query where a set function is to be
95  evaluated can be formulated as follows.
96  Consider a set function S(E) where E is an expression with occurrences
97  of column references C1, ..., CN. Resolve these column references against
98  subqueries that contain the set function S(E). Let Q be the innermost
99  subquery of those subqueries. (It should be noted here that S(E)
100  in no way can be evaluated in the subquery embedding the subquery Q,
101  otherwise S(E) would refer to at least one unbound column reference)
102  If S(E) is used in a construct of Q where set functions are allowed then
103  we evaluate S(E) in Q.
104  Otherwise we look for a innermost subquery containing S(E) of those where
105  usage of S(E) is allowed.
106 
107  Let's demonstrate how this rule is applied to the following queries.
108 
109  1. SELECT t1.a FROM t1 GROUP BY t1.a
110  HAVING t1.a > ALL(SELECT t2.b FROM t2 GROUP BY t2.b
111  HAVING t2.b > ALL(SELECT t3.c FROM t3 GROUP BY t3.c
112  HAVING SUM(t1.a+t2.b) < t3.c))
113  For this query the set function SUM(t1.a+t2.b) depends on t1.a and t2.b
114  with t1.a defined in the outermost query, and t2.b defined for its
115  subquery. The set function is in the HAVING clause of the subquery and can
116  be evaluated in this subquery.
117 
118  2. SELECT t1.a FROM t1 GROUP BY t1.a
119  HAVING t1.a > ALL(SELECT t2.b FROM t2
120  WHERE t2.b > ALL (SELECT t3.c FROM t3 GROUP BY t3.c
121  HAVING SUM(t1.a+t2.b) < t3.c))
122  Here the set function SUM(t1.a+t2.b)is in the WHERE clause of the second
123  subquery - the most upper subquery where t1.a and t2.b are defined.
124  If we evaluate the function in this subquery we violate the context rules.
125  So we evaluate the function in the third subquery (over table t3) where it
126  is used under the HAVING clause.
127 
128  3. SELECT t1.a FROM t1 GROUP BY t1.a
129  HAVING t1.a > ALL(SELECT t2.b FROM t2
130  WHERE t2.b > ALL (SELECT t3.c FROM t3
131  WHERE SUM(t1.a+t2.b) < t3.c))
132  In this query evaluation of SUM(t1.a+t2.b) is not legal neither in the second
133  nor in the third subqueries. So this query is invalid.
134 
135  Mostly set functions cannot be nested. In the query
136  SELECT t1.a from t1 GROUP BY t1.a HAVING AVG(SUM(t1.b)) > 20
137  the expression SUM(b) is not acceptable, though it is under a HAVING clause.
138  Yet it is acceptable in the query:
139  SELECT t.1 FROM t1 GROUP BY t1.a HAVING SUM(t1.b) > 20.
140 
141  An argument of a set function does not have to be a reference to a table
142  column as we saw it in examples above. This can be a more complex expression
143  SELECT t1.a FROM t1 GROUP BY t1.a HAVING SUM(t1.b+1) > 20.
144  The expression SUM(t1.b+1) has a very clear semantics in this context:
145  we sum up the values of t1.b+1 where t1.b varies for all values within a
146  group of rows that contain the same t1.a value.
147 
148  A set function for an outer query yields a constant within a subquery. So
149  the semantics of the query
150  SELECT t1.a FROM t1 GROUP BY t1.a
151  HAVING t1.a IN (SELECT t2.c FROM t2 GROUP BY t2.c
152  HAVING AVG(t2.c+SUM(t1.b)) > 20)
153  is still clear. For a group of the rows with the same t1.a values we
154  calculate the value of SUM(t1.b). This value 's' is substituted in the
155  the subquery:
156  SELECT t2.c FROM t2 GROUP BY t2.c HAVING AVG(t2.c+s)
157  than returns some result set.
158 
159  By the same reason the following query with a subquery
160  SELECT t1.a FROM t1 GROUP BY t1.a
161  HAVING t1.a IN (SELECT t2.c FROM t2 GROUP BY t2.c
162  HAVING AVG(SUM(t1.b)) > 20)
163  is also acceptable.
164 
165  IMPLEMENTATION NOTES
166 
167  Three methods were added to the class to check the constraints specified
168  in the previous section. These methods utilize several new members.
169 
170  The field 'nest_level' contains the number of the level for the subquery
171  containing the set function. The main SELECT is of level 0, its subqueries
172  are of levels 1, the subqueries of the latter are of level 2 and so on.
173 
174  The field 'aggr_level' is to contain the nest level of the subquery
175  where the set function is aggregated.
176 
177  The field 'max_arg_level' is for the maximun of the nest levels of the
178  unbound column references occurred in the set function. A column reference
179  is unbound within a set function if it is not bound by any subquery
180  used as a subexpression in this function. A column reference is bound by
181  a subquery if it is a reference to the column by which the aggregation
182  of some set function that is used in the subquery is calculated.
183  For the set function used in the query
184  SELECT t1.a FROM t1 GROUP BY t1.a
185  HAVING t1.a > ALL(SELECT t2.b FROM t2 GROUP BY t2.b
186  HAVING t2.b > ALL(SELECT t3.c FROM t3 GROUP BY t3.c
187  HAVING SUM(t1.a+t2.b) < t3.c))
188  the value of max_arg_level is equal to 1 since t1.a is bound in the main
189  query, and t2.b is bound by the first subquery whose nest level is 1.
190  Obviously a set function cannot be aggregated in the subquery whose
191  nest level is less than max_arg_level. (Yet it can be aggregated in the
192  subqueries whose nest level is greater than max_arg_level.)
193  In the query
194  SELECT t.a FROM t1 HAVING AVG(t1.a+(SELECT MIN(t2.c) FROM t2))
195  the value of the max_arg_level for the AVG set function is 0 since
196  the reference t2.c is bound in the subquery.
197 
198  The field 'max_sum_func_level' is to contain the maximum of the
199  nest levels of the set functions that are used as subexpressions of
200  the arguments of the given set function, but not aggregated in any
201  subquery within this set function. A nested set function s1 can be
202  used within set function s0 only if s1.max_sum_func_level <
203  s0.max_sum_func_level. Set function s1 is considered as nested
204  for set function s0 if s1 is not calculated in any subquery
205  within s0.
206 
207  A set function that is used as a subexpression in an argument of another
208  set function refers to the latter via the field 'in_sum_func'.
209 
210  The condition imposed on the usage of set functions are checked when
211  we traverse query subexpressions with the help of the recursive method
212  fix_fields. When we apply this method to an object of the class
213  Item_sum, first, on the descent, we call the method init_sum_func_check
214  that initialize members used at checking. Then, on the ascent, we
215  call the method check_sum_func that validates the set function usage
216  and reports an error if it is illegal.
217  The method register_sum_func serves to link the items for the set functions
218  that are aggregated in the embedding (sub)queries. Circular chains of such
219  functions are attached to the corresponding Select_Lex structures
220  through the field inner_sum_func_list.
221 
222  Exploiting the fact that the members mentioned above are used in one
223  recursive function we could have allocated them on the thread stack.
224  Yet we don't do it now.
225 
226  We assume that the nesting level of subquries does not exceed 127.
227  TODO: to catch queries where the limit is exceeded to make the
228  code clean here.
229 
230 */
231 
233 {
234 public:
235  enum Sumfunctype
236  { COUNT_FUNC, COUNT_DISTINCT_FUNC, SUM_FUNC, SUM_DISTINCT_FUNC, AVG_FUNC,
237  AVG_DISTINCT_FUNC, MIN_FUNC, MAX_FUNC, STD_FUNC,
238  VARIANCE_FUNC, SUM_BIT_FUNC, GROUP_CONCAT_FUNC
239  };
240 
241  Item **args, *tmp_args[2];
242  Item **ref_by; /* pointer to a ref to the object used to register it */
243  Item_sum *next; /* next in the circular chain of registered objects */
244  uint32_t arg_count;
245  Item_sum *in_sum_func; /* embedding set function if any */
246  Select_Lex * aggr_sel; /* select where the function is aggregated */
247  int8_t nest_level; /* number of the nesting level of the set function */
248  int8_t aggr_level; /* nesting level of the aggregating subquery */
249  int8_t max_arg_level; /* max level of unbound column references */
250  int8_t max_sum_func_level;/* max level of aggregation for embedded functions */
251  bool quick_group; /* If incremental update of fields */
252  /*
253  This list is used by the check for mixing non aggregated fields and
254  sum functions in the ONLY_FULL_GROUP_BY_MODE. We save all outer fields
255  directly or indirectly used under this function it as it's unclear
256  at the moment of fixing outer field whether it's aggregated or not.
257  */
258  List<Item_field> outer_fields;
259 
260 protected:
261  table_map used_tables_cache;
262  bool forced_const;
263 
264 public:
265 
266  void mark_as_sum_func();
267  Item_sum() :arg_count(0), quick_group(1), forced_const(false)
268  {
269  mark_as_sum_func();
270  }
271  Item_sum(Item *a) :args(tmp_args), arg_count(1), quick_group(1),
272  forced_const(false)
273  {
274  args[0]=a;
275  mark_as_sum_func();
276  }
277  Item_sum( Item *a, Item *b ) :args(tmp_args), arg_count(2), quick_group(1),
278  forced_const(false)
279  {
280  args[0]=a; args[1]=b;
281  mark_as_sum_func();
282  }
283  Item_sum(List<Item> &list);
284  //Copy constructor, need to perform subselects with temporary tables
285  Item_sum(Session *session, Item_sum *item);
286  enum Type type() const { return SUM_FUNC_ITEM; }
287  virtual enum Sumfunctype sum_func () const=0;
288 
289  /*
290  This method is similar to add(), but it is called when the current
291  aggregation group changes. Thus it performs a combination of
292  clear() and add().
293  */
294  inline bool reset() { clear(); return add(); };
295 
296  /*
297  Prepare this item for evaluation of an aggregate value. This is
298  called by reset() when a group changes, or, for correlated
299  subqueries, between subquery executions. E.g. for COUNT(), this
300  method should set count= 0;
301  */
302  virtual void clear()= 0;
303 
304  /*
305  This method is called for the next row in the same group. Its
306  purpose is to aggregate the new value to the previous values in
307  the group (i.e. since clear() was called last time). For example,
308  for COUNT(), do count++.
309  */
310  virtual bool add()=0;
311 
312  /*
313  Called when new group is started and results are being saved in
314  a temporary table. Similar to reset(), but must also store value in
315  result_field. Like reset() it is supposed to reset start value to
316  default.
317  This set of methods (reult_field(), reset_field, update_field()) of
318  Item_sum is used only if quick_group is not null. Otherwise
319  copy_or_same() is used to obtain a copy of this item.
320  */
321  virtual void reset_field()=0;
322  /*
323  Called for each new value in the group, when temporary table is in use.
324  Similar to add(), but uses temporary table field to obtain current value,
325  Updated value is then saved in the field.
326  */
327  virtual void update_field()=0;
328  virtual bool keep_field_type(void) const { return 0; }
329  virtual void fix_length_and_dec() { maybe_null=1; null_value=1; }
330  /*
331  This method is used for debug purposes to print the name of an
332  item to the debug log. The second use of this method is as
333  a helper function of print(), where it is applicable.
334  To suit both goals it should return a meaningful,
335  distinguishable and sintactically correct string. This method
336  should not be used for runtime type identification, use enum
337  {Sum}Functype and Item_func::functype()/Item_sum::sum_func()
338  instead.
339 
340  NOTE: for Items inherited from Item_sum, func_name() return part of
341  function name till first argument (including '(') to make difference in
342  names for functions with 'distinct' clause and without 'distinct' and
343  also to make printing of items inherited from Item_sum uniform.
344  */
345  virtual const char *func_name() const= 0;
346  virtual Item *result_item(Field *field)
347  { return new Item_field(field); }
348  table_map used_tables() const { return used_tables_cache; }
349  void update_used_tables ();
350  void cleanup()
351  {
352  Item::cleanup();
353  forced_const= false;
354  }
355  bool is_null() { return null_value; }
356  void make_const ()
357  {
358  used_tables_cache= 0;
359  forced_const= true;
360  }
361  virtual bool const_item() const { return forced_const; }
362  virtual bool const_during_execution() const { return false; }
363  void make_field(SendField *field);
364  virtual void print(String *str);
365  void fix_num_length_and_dec();
366 
367  /*
368  This function is called by the execution engine to assign 'NO ROWS
369  FOUND' value to an aggregate item, when the underlying result set
370  has no rows. Such value, in a general case, may be different from
371  the default value of the item after 'clear()': e.g. a numeric item
372  may be initialized to 0 by clear() and to NULL by
373  no_rows_in_result().
374  */
375  void no_rows_in_result() { clear(); }
376 
377  virtual bool setup(Session *) {return 0;}
378  virtual void make_unique(void) {}
379  Item *get_tmp_table_item(Session *session);
380  virtual Field *create_tmp_field(bool group, Table *table,
381  uint32_t convert_blob_length);
382  bool walk(Item_processor processor, bool walk_subquery, unsigned char *argument);
383  bool init_sum_func_check(Session *session);
384  bool check_sum_func(Session *session, Item **ref);
385  bool register_sum_func(Session *session, Item **ref);
386  Select_Lex *depended_from()
387  { return (nest_level == aggr_level ? 0 : aggr_sel); }
388 };
389 
390 
391 class Item_sum_num :public Item_sum
392 {
393 protected:
394  /*
395  val_xxx() functions may be called several times during the execution of a
396  query. Derived classes that require extensive calculation in val_xxx()
397  maintain cache of aggregate value. This variable governs the validity of
398  that cache.
399  */
400  bool is_evaluated;
401 public:
402  Item_sum_num() :Item_sum(),is_evaluated(false) {}
403  Item_sum_num(Item *item_par)
404  :Item_sum(item_par), is_evaluated(false) {}
405  Item_sum_num(Item *a, Item* b) :Item_sum(a,b),is_evaluated(false) {}
406  Item_sum_num(List<Item> &list)
407  :Item_sum(list), is_evaluated(false) {}
408  Item_sum_num(Session *session, Item_sum_num *item)
409  :Item_sum(session, item),is_evaluated(item->is_evaluated) {}
410  bool fix_fields(Session *, Item **);
411  int64_t val_int();
412  String *val_str(String*str);
414  void reset_field();
415 };
416 
417 
419 {
420 public:
421  Item_sum_int(Item *item_par) :Item_sum_num(item_par) {}
422  Item_sum_int(List<Item> &list) :Item_sum_num(list) {}
423  Item_sum_int(Session *session, Item_sum_int *item) :Item_sum_num(session, item) {}
424  double val_real() { assert(fixed == 1); return (double) val_int(); }
425  String *val_str(String*str);
427  enum Item_result result_type () const { return INT_RESULT; }
428  void fix_length_and_dec()
429  { decimals=0; max_length=21; maybe_null=null_value=0; }
430 };
431 
432 
434 {
435 protected:
436  Item_result hybrid_type;
437  double sum;
438  type::Decimal dec_buffs[2];
439  uint32_t curr_dec_buff;
440  void fix_length_and_dec();
441 
442 public:
443  Item_sum_sum(Item *item_par) :Item_sum_num(item_par) {}
444  Item_sum_sum(Session *session, Item_sum_sum *item);
445  enum Sumfunctype sum_func () const {return SUM_FUNC;}
446  void clear();
447  bool add();
448  double val_real();
449  int64_t val_int();
450  String *val_str(String*str);
452  enum Item_result result_type () const { return hybrid_type; }
453  void reset_field();
454  void update_field();
456  const char *func_name() const { return "sum("; }
457  Item *copy_or_same(Session* session);
458 };
459 
460 
461 
462 /* Common class for SUM(DISTINCT), AVG(DISTINCT) */
463 
465 {
466 protected:
467  /* storage for the summation result */
468  uint64_t count;
469  Hybrid_type val;
470  /* storage for unique elements */
471  Unique *tree;
472  Table *table;
473  enum enum_field_types table_field_type;
474  uint32_t tree_key_length;
475 protected:
477 public:
478  Item_sum_distinct(Item *item_par);
480 
481  bool setup(Session *session);
482  void clear();
483  void cleanup();
484  bool add();
485  double val_real();
487  int64_t val_int();
488  String *val_str(String *str);
489 
490  /* XXX: does it need make_unique? */
491 
492  enum Sumfunctype sum_func () const { return SUM_DISTINCT_FUNC; }
493  void reset_field() {} // not used
494  void update_field() {} // not used
495  virtual void no_rows_in_result() {}
496  void fix_length_and_dec();
497  enum Item_result result_type () const;
498  virtual void calculate_val_and_count();
499  virtual bool unique_walk_function(void *elem);
500 };
501 
502 
503 /*
504  Item_sum_sum_distinct - implementation of SUM(DISTINCT expr).
505  See also: MySQL manual, chapter 'Adding New Functions To MySQL'
506  and comments in item_sum.cc.
507 */
508 
510 {
511 private:
513  :Item_sum_distinct(session, item) {}
514 public:
515  Item_sum_sum_distinct(Item *item_arg) :Item_sum_distinct(item_arg) {}
516 
517  enum Sumfunctype sum_func () const { return SUM_DISTINCT_FUNC; }
518  const char *func_name() const { return "sum(distinct "; }
519  Item *copy_or_same(Session* session) { return new Item_sum_sum_distinct(session, this); }
520 };
521 
522 
523 /* Item_sum_avg_distinct - SELECT AVG(DISTINCT expr) FROM ... */
524 
526 {
527 private:
529  :Item_sum_distinct(session, original) {}
530 public:
531  uint32_t prec_increment;
532  Item_sum_avg_distinct(Item *item_arg) : Item_sum_distinct(item_arg) {}
533 
534  void fix_length_and_dec();
535  virtual void calculate_val_and_count();
536  enum Sumfunctype sum_func () const { return AVG_DISTINCT_FUNC; }
537  const char *func_name() const { return "avg(distinct "; }
538  Item *copy_or_same(Session* session) { return new Item_sum_avg_distinct(session, this); }
539 };
540 
541 
543 {
544  int64_t count;
545 
546  public:
547  Item_sum_count(Item *item_par)
548  :Item_sum_int(item_par),count(0)
549  {}
550  Item_sum_count(Session *session, Item_sum_count *item)
551  :Item_sum_int(session, item), count(item->count)
552  {}
553  enum Sumfunctype sum_func () const { return COUNT_FUNC; }
554  void clear();
555  void no_rows_in_result() { count=0; }
556  bool add();
557  void make_const_count(int64_t count_arg)
558  {
559  count=count_arg;
560  Item_sum::make_const();
561  }
562  int64_t val_int();
563  void reset_field();
564  void cleanup();
565  void update_field();
566  const char *func_name() const { return "count("; }
567  Item *copy_or_same(Session* session);
568 };
569 
570 
572 {
573  Table *table;
574  uint32_t *field_lengths;
575  Tmp_Table_Param *tmp_table_param;
576  bool force_copy_fields;
577  /*
578  If there are no blobs, we can use a tree, which
579  is faster than heap table. In that case, we still use the table
580  to help get things set up, but we insert nothing in it
581  */
582  Unique *tree;
583  /*
584  Storage for the value of count between calls to val_int() so val_int()
585  will not recalculate on each call. Validitiy of the value is stored in
586  is_evaluated.
587  */
588  int64_t count;
589  /*
590  Following is 0 normal object and pointer to original one for copy
591  (to correctly free resources)
592  */
593  Item_sum_count_distinct *original;
594  uint32_t tree_key_length;
595 
596 
597  bool always_null; // Set to 1 if the result is always NULL
598 
599 
600  friend int composite_key_cmp(void* arg, unsigned char* key1, unsigned char* key2);
601  friend int simple_str_key_cmp(void* arg, unsigned char* key1, unsigned char* key2);
602 
603 public:
605  :Item_sum_int(list), table(0), field_lengths(0), tmp_table_param(0),
606  force_copy_fields(0), tree(0), count(0),
607  original(0), always_null(false)
608  { quick_group= 0; }
610  :Item_sum_int(session, item), table(item->table),
611  field_lengths(item->field_lengths),
612  tmp_table_param(item->tmp_table_param),
613  force_copy_fields(0), tree(item->tree), count(item->count),
614  original(item), tree_key_length(item->tree_key_length),
615  always_null(item->always_null)
616  {}
618 
619  void cleanup();
620 
621  enum Sumfunctype sum_func () const { return COUNT_DISTINCT_FUNC; }
622  void clear();
623  bool add();
624  int64_t val_int();
625  void reset_field() { return ;} // Never called
626  void update_field() { return ; } // Never called
627  const char *func_name() const { return "count(distinct "; }
628  bool setup(Session *session);
629  void make_unique();
630  Item *copy_or_same(Session* session);
632 };
633 
634 
635 /* Item to get the value of a stored sum function */
636 
638 {
639 public:
640  Field *field;
641  Item_result hybrid_type;
642  uint32_t f_precision, f_scale, dec_bin_size;
643  uint32_t prec_increment;
644  Item_avg_field(Item_result res_type, Item_sum_avg *item);
645  enum Type type() const { return FIELD_AVG_ITEM; }
646  double val_real();
647  int64_t val_int();
649  bool is_null() { update_null_value(); return null_value; }
650  String *val_str(String*);
651  enum_field_types field_type() const
652  {
653  return hybrid_type == DECIMAL_RESULT ?
654  DRIZZLE_TYPE_DECIMAL : DRIZZLE_TYPE_DOUBLE;
655  }
656  void fix_length_and_dec() {}
657  enum Item_result result_type () const { return hybrid_type; }
658 };
659 
660 
662 {
663 public:
664  uint64_t count;
665  uint32_t prec_increment;
666  uint32_t f_precision, f_scale, dec_bin_size;
667 
668  Item_sum_avg(Item *item_par) :Item_sum_sum(item_par), count(0) {}
669  Item_sum_avg(Session *session, Item_sum_avg *item)
670  :Item_sum_sum(session, item), count(item->count),
671  prec_increment(item->prec_increment) {}
672 
673  void fix_length_and_dec();
674  enum Sumfunctype sum_func () const {return AVG_FUNC;}
675  void clear();
676  bool add();
677  double val_real();
678  // In SPs we might force the "wrong" type with select into a declare variable
679  int64_t val_int();
681  String *val_str(String *str);
682  void reset_field();
683  void update_field();
684  Item *result_item(Field *)
685  { return new Item_avg_field(hybrid_type, this); }
687  const char *func_name() const { return "avg("; }
688  Item *copy_or_same(Session* session);
689  Field *create_tmp_field(bool group, Table *table, uint32_t convert_blob_length);
690  void cleanup()
691  {
692  count= 0;
693  Item_sum_sum::cleanup();
694  }
695 };
696 
698 {
699 public:
700  Field *field;
701  Item_result hybrid_type;
702  uint32_t f_precision0, f_scale0;
703  uint32_t f_precision1, f_scale1;
704  uint32_t dec_bin_size0, dec_bin_size1;
705  uint32_t sample;
706  uint32_t prec_increment;
708  enum Type type() const {return FIELD_VARIANCE_ITEM; }
709  double val_real();
710  int64_t val_int();
712  { return val_string_from_real(str); }
714  { return val_decimal_from_real(dec_buf); }
715  bool is_null() { update_null_value(); return null_value; }
716  enum_field_types field_type() const
717  {
718  return hybrid_type == DECIMAL_RESULT ?
719  DRIZZLE_TYPE_DECIMAL : DRIZZLE_TYPE_DOUBLE;
720  }
721  void fix_length_and_dec() {}
722  enum Item_result result_type () const { return hybrid_type; }
723 };
724 
725 
726 /*
727  variance(a) =
728 
729  = sum (ai - avg(a))^2 / count(a) )
730  = sum (ai^2 - 2*ai*avg(a) + avg(a)^2) / count(a)
731  = (sum(ai^2) - sum(2*ai*avg(a)) + sum(avg(a)^2))/count(a) =
732  = (sum(ai^2) - 2*avg(a)*sum(a) + count(a)*avg(a)^2)/count(a) =
733  = (sum(ai^2) - 2*sum(a)*sum(a)/count(a) + count(a)*sum(a)^2/count(a)^2 )/count(a) =
734  = (sum(ai^2) - 2*sum(a)^2/count(a) + sum(a)^2/count(a) )/count(a) =
735  = (sum(ai^2) - sum(a)^2/count(a))/count(a)
736 
737 But, this falls prey to catastrophic cancellation. Instead, use the recurrence formulas
738 
739  M_{1} = x_{1}, ~ M_{k} = M_{k-1} + (x_{k} - M_{k-1}) / k newline
740  S_{1} = 0, ~ S_{k} = S_{k-1} + (x_{k} - M_{k-1}) times (x_{k} - M_{k}) newline
741  for 2 <= k <= n newline
742  ital variance = S_{n} / (n-1)
743 
744 */
745 
747 {
748  void fix_length_and_dec();
749 
750 public:
751  Item_result hybrid_type;
752  int cur_dec;
753  double recurrence_m, recurrence_s; /* Used in recurrence relation. */
754  uint64_t count;
755  uint32_t f_precision0, f_scale0;
756  uint32_t f_precision1, f_scale1;
757  uint32_t dec_bin_size0, dec_bin_size1;
758  uint32_t sample;
759  uint32_t prec_increment;
760 
761  Item_sum_variance(Item *item_par, uint32_t sample_arg) :Item_sum_num(item_par),
762  hybrid_type(REAL_RESULT), count(0), sample(sample_arg)
763  {}
765  enum Sumfunctype sum_func () const { return VARIANCE_FUNC; }
766  void clear();
767  bool add();
768  double val_real();
769  int64_t val_int();
771  void reset_field();
772  void update_field();
773  Item *result_item(Field *)
774  { return new Item_variance_field(this); }
776  const char *func_name() const
777  { return sample ? "var_samp(" : "variance("; }
778  Item *copy_or_same(Session* session);
779  Field *create_tmp_field(bool group, Table *table, uint32_t convert_blob_length);
780  enum Item_result result_type () const { return REAL_RESULT; }
781  void cleanup()
782  {
783  count= 0;
784  Item_sum_num::cleanup();
785  }
786 };
787 
789 {
790 public:
792  enum Type type() const { return FIELD_STD_ITEM; }
793  double val_real();
795  enum Item_result result_type () const { return REAL_RESULT; }
796  enum_field_types field_type() const { return DRIZZLE_TYPE_DOUBLE;}
797 };
798 
799 /*
800  standard_deviation(a) = sqrt(variance(a))
801 */
802 
804 {
805  public:
806  Item_sum_std(Item *item_par, uint32_t sample_arg)
807  :Item_sum_variance(item_par, sample_arg) {}
808  Item_sum_std(Session *session, Item_sum_std *item)
809  :Item_sum_variance(session, item)
810  {}
811  enum Sumfunctype sum_func () const { return STD_FUNC; }
812  double val_real();
813  Item *result_item(Field *)
814  { return new Item_std_field(this); }
815  const char *func_name() const { return "std("; }
816  Item *copy_or_same(Session* session);
817  enum Item_result result_type () const { return REAL_RESULT; }
818  enum_field_types field_type() const { return DRIZZLE_TYPE_DOUBLE;}
819 };
820 
821 // This class is a string or number function depending on num_func
822 
824 {
825 protected:
826  String value,tmp_value;
827  double sum;
828  int64_t sum_int;
829  type::Decimal sum_dec;
830  Item_result hybrid_type;
831  enum_field_types hybrid_field_type;
832  int cmp_sign;
833  bool was_values; // Set if we have found at least one row (for max/min only)
834 
835  public:
836  Item_sum_hybrid(Item *item_par,int sign)
837  :Item_sum(item_par), sum(0.0), sum_int(0),
838  hybrid_type(INT_RESULT), hybrid_field_type(DRIZZLE_TYPE_LONGLONG),
839  cmp_sign(sign), was_values(true)
840  { collation.set(&my_charset_bin); }
841  Item_sum_hybrid(Session *session, Item_sum_hybrid *item);
842  bool fix_fields(Session *, Item **);
843  void clear();
844  double val_real();
845  int64_t val_int();
847  void reset_field();
848  String *val_str(String *);
849  bool keep_field_type(void) const { return 1; }
850  enum Item_result result_type () const { return hybrid_type; }
851  enum enum_field_types field_type() const { return hybrid_field_type; }
852  void update_field();
853  void min_max_update_str_field();
854  void min_max_update_real_field();
855  void min_max_update_int_field();
857  void cleanup();
858  bool any_value() { return was_values; }
859  void no_rows_in_result();
860  Field *create_tmp_field(bool group, Table *table,
861  uint32_t convert_blob_length);
862 };
863 
864 
866 {
867 public:
868  Item_sum_min(Item *item_par) :Item_sum_hybrid(item_par,1) {}
869  Item_sum_min(Session *session, Item_sum_min *item) :Item_sum_hybrid(session, item) {}
870  enum Sumfunctype sum_func () const {return MIN_FUNC;}
871 
872  bool add();
873  const char *func_name() const { return "min("; }
874  Item *copy_or_same(Session* session);
875 };
876 
877 
879 {
880 public:
881  Item_sum_max(Item *item_par) :Item_sum_hybrid(item_par,-1) {}
882  Item_sum_max(Session *session, Item_sum_max *item) :Item_sum_hybrid(session, item) {}
883  enum Sumfunctype sum_func () const {return MAX_FUNC;}
884 
885  bool add();
886  const char *func_name() const { return "max("; }
887  Item *copy_or_same(Session* session);
888 };
889 
890 
892 {
893 protected:
894  uint64_t reset_bits,bits;
895 
896 public:
897  Item_sum_bit(Item *item_par,uint64_t reset_arg)
898  :Item_sum_int(item_par),reset_bits(reset_arg),bits(reset_arg) {}
899  Item_sum_bit(Session *session, Item_sum_bit *item):
900  Item_sum_int(session, item), reset_bits(item->reset_bits), bits(item->bits) {}
901  enum Sumfunctype sum_func () const {return SUM_BIT_FUNC;}
902  void clear();
903  int64_t val_int();
904  void reset_field();
905  void update_field();
906  void fix_length_and_dec()
907  { decimals= 0; max_length=21; unsigned_flag= 1; maybe_null= null_value= 0; }
908  void cleanup()
909  {
910  bits= reset_bits;
911  Item_sum_int::cleanup();
912  }
913 };
914 
915 
917 {
918 public:
919  Item_sum_or(Item *item_par) :Item_sum_bit(item_par,0) {}
920  Item_sum_or(Session *session, Item_sum_or *item) :Item_sum_bit(session, item) {}
921  bool add();
922  const char *func_name() const { return "bit_or("; }
923  Item *copy_or_same(Session* session);
924 };
925 
926 
928 {
929  public:
930  Item_sum_and(Item *item_par) :Item_sum_bit(item_par, UINT64_MAX) {}
931  Item_sum_and(Session *session, Item_sum_and *item) :Item_sum_bit(session, item) {}
932  bool add();
933  const char *func_name() const { return "bit_and("; }
934  Item *copy_or_same(Session* session);
935 };
936 
938 {
939  public:
940  Item_sum_xor(Item *item_par) :Item_sum_bit(item_par,0) {}
941  Item_sum_xor(Session *session, Item_sum_xor *item) :Item_sum_bit(session, item) {}
942  bool add();
943  const char *func_name() const { return "bit_xor("; }
944  Item *copy_or_same(Session* session);
945 };
946 
948 {
949  Tmp_Table_Param *tmp_table_param;
950  DRIZZLE_ERROR *warning;
951  String result;
952  String *separator;
953  Tree tree_base;
954  Tree *tree;
955 
964  Table *table;
965  Order **order;
966  Name_resolution_context *context;
968  uint32_t arg_count_order;
970  uint32_t arg_count_field;
971  uint32_t count_cut_values;
972  bool distinct;
973  bool warning_for_row;
974  bool always_null;
975  bool force_copy_fields;
976  bool no_appended;
977  /*
978  Following is 0 normal object and pointer to original one for copy
979  (to correctly free resources)
980  */
981  Item_func_group_concat *original;
982 
983  friend int group_concat_key_cmp_with_distinct(void* arg, const void* key1,
984  const void* key2);
985  friend int group_concat_key_cmp_with_order(void* arg, const void* key1,
986  const void* key2);
987  friend int dump_leaf_key(unsigned char* key, uint32_t,
988  Item_func_group_concat *group_concat_item);
989 
990 public:
992  bool is_distinct, List<Item> *is_select,
993  SQL_LIST *is_order, String *is_separator);
994 
997  void cleanup();
998 
999  enum Sumfunctype sum_func () const {return GROUP_CONCAT_FUNC;}
1000  const char *func_name() const { return "group_concat"; }
1001  virtual Item_result result_type () const { return STRING_RESULT; }
1002  enum_field_types field_type() const
1003  {
1004  if (max_length/collation.collation->mbmaxlen > CONVERT_IF_BIGGER_TO_BLOB )
1005  return DRIZZLE_TYPE_BLOB;
1006  else
1007  return DRIZZLE_TYPE_VARCHAR;
1008  }
1009  void clear();
1010  bool add();
1011  void reset_field() { assert(0); } // not used
1012  void update_field() { assert(0); } // not used
1013  bool fix_fields(Session *,Item **);
1014  bool setup(Session *session);
1015  void make_unique();
1016  double val_real();
1017  int64_t val_int();
1019  {
1020  return val_decimal_from_string(decimal_value);
1021  }
1022  String* val_str(String* str);
1023  Item *copy_or_same(Session* session);
1025  virtual void print(String *str);
1026  virtual bool change_context_processor(unsigned char *cntx)
1027  { context= (Name_resolution_context *)cntx; return false; }
1028 };
1029 
1030 } /* namespace drizzled */
1031