Drizzled Public API Documentation

logging_query.cc
1 /* -*- mode: c++; c-basic-offset: 2; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=2:tabstop=2:smarttab:
3  *
4  * Copyright (C) 2008, 2009 Sun Microsystems, Inc.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; version 2 of the License.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18  */
19 
20 #include <config.h>
21 #include <drizzled/item.h>
22 #include <drizzled/plugin.h>
23 #include <drizzled/plugin/logging.h>
24 #include <drizzled/gettext.h>
25 #include <drizzled/session.h>
26 #include <drizzled/session/times.h>
27 #include <drizzled/sql_parse.h>
28 #include PCRE_HEADER
29 #include <limits.h>
30 #include <sys/time.h>
31 #include <sys/types.h>
32 #include <sys/stat.h>
33 #include <fcntl.h>
34 #include <string>
35 #include <boost/format.hpp>
36 #include <boost/program_options.hpp>
38 #include <cstdio>
39 #include <cerrno>
40 
41 namespace po= boost::program_options;
42 using namespace drizzled;
43 using namespace std;
44 
45 #define ESCAPE_CHAR '\\'
46 #define SEPARATOR_CHAR ','
47 
48 namespace drizzle_plugin {
49 namespace logging_query {
50 
51 
52 static bool sysvar_logging_query_enable= false;
53 /* TODO fix these to not be unsigned long once we have sensible sys_var system */
54 static uint32_constraint sysvar_logging_query_threshold_slow;
55 static uint32_constraint sysvar_logging_query_threshold_big_resultset;
56 static uint32_constraint sysvar_logging_query_threshold_big_examined;
57 
58 bool updateFileName(Session *, set_var*);
59 bool updatePCRE(Session *, set_var*);
60 
61 
62 /* quote a string to be safe to include in a CSV line
63  that means backslash quoting all commas, doublequotes, backslashes,
64  and all the ASCII unprintable characters
65  as long as we pass the high-bit bytes unchanged
66  this is safe to do to a UTF8 string
67  we dont allow overrunning the targetbuffer
68  to avoid having a very long query overwrite memory
69 
70  TODO consider remapping the unprintables instead to "Printable
71  Representation", the Unicode characters from the area U+2400 to
72  U+2421 reserved for representing control characters when it is
73  necessary to print or display them rather than have them perform
74  their intended function.
75 
76 */
77 
78 static void quotify(const string &src, string &dst)
79 {
80  static const char hexit[]= { '0', '1', '2', '3', '4', '5', '6', '7',
81  '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
82  string::const_iterator src_iter;
83 
84  for (src_iter= src.begin(); src_iter < src.end(); ++src_iter)
85  {
86  if (static_cast<unsigned char>(*src_iter) > 0x7f)
87  {
88  dst.push_back(*src_iter);
89  }
90  else if (*src_iter == 0x00) // null
91  {
92  dst.push_back(ESCAPE_CHAR); dst.push_back('0');
93  }
94  else if (*src_iter == 0x07) // bell
95  {
96  dst.push_back(ESCAPE_CHAR); dst.push_back('a');
97  }
98  else if (*src_iter == 0x08) // backspace
99  {
100  dst.push_back(ESCAPE_CHAR); dst.push_back('b');
101  }
102  else if (*src_iter == 0x09) // horiz tab
103  {
104  dst.push_back(ESCAPE_CHAR); dst.push_back('t');
105  }
106  else if (*src_iter == 0x0a) // line feed
107  {
108  dst.push_back(ESCAPE_CHAR); dst.push_back('n');
109  }
110  else if (*src_iter == 0x0b) // vert tab
111  {
112  dst.push_back(ESCAPE_CHAR); dst.push_back('v');
113  }
114  else if (*src_iter == 0x0c) // formfeed
115  {
116  dst.push_back(ESCAPE_CHAR); dst.push_back('f');
117  }
118  else if (*src_iter == 0x0d) // carrage return
119  {
120  dst.push_back(ESCAPE_CHAR); dst.push_back('r');
121  }
122  else if (*src_iter == 0x1b) // escape
123  {
124  dst.push_back(ESCAPE_CHAR); dst.push_back('e');
125  }
126  else if (*src_iter == 0x22) // quotation mark
127  {
128  dst.push_back(ESCAPE_CHAR); dst.push_back(0x22);
129  }
130  else if (*src_iter == SEPARATOR_CHAR)
131  {
132  dst.push_back(ESCAPE_CHAR); dst.push_back(SEPARATOR_CHAR);
133  }
134  else if (*src_iter == ESCAPE_CHAR)
135  {
136  dst.push_back(ESCAPE_CHAR); dst.push_back(ESCAPE_CHAR);
137  }
138  else if ((*src_iter < 0x20) || (*src_iter == 0x7F)) // other unprintable ASCII
139  {
140  dst.push_back(ESCAPE_CHAR);
141  dst.push_back('x');
142  dst.push_back(hexit[(*src_iter >> 4) & 0x0f]);
143  dst.push_back(hexit[*src_iter & 0x0f]);
144  }
145  else // everything else
146  {
147  dst.push_back(*src_iter);
148  }
149  }
150 }
151 
152 
154 {
155  std::string sysvar_filename;
156  std::string sysvar_pcre;
157  int fd;
158  pcre *re;
159  pcre_extra *pe;
160 
162  boost::format formatter;
163 
164 public:
165 
166  Logging_query(const std::string &filename,
167  const std::string &query_pcre) :
168  drizzled::plugin::Logging("csv_query_log"),
169  sysvar_filename(filename),
170  sysvar_pcre(query_pcre),
171  fd(-1), re(NULL), pe(NULL),
172  formatter("%1%,%2%,%3%,\"%4%\",\"%5%\",\"%6%\",%7%,%8%,"
173  "%9%,%10%,%11%,%12%,%13%,%14%,\"%15%\"\n")
174  {
175 
176  /* if there is no destination filename, dont bother doing anything */
177  if (sysvar_filename.empty())
178  return;
179 
180  fd= open(sysvar_filename.c_str(),
181  O_WRONLY | O_APPEND | O_CREAT,
182  S_IRUSR|S_IWUSR);
183 
184  if (fd < 0)
185  {
186  sql_perror( _("fail open()"), sysvar_filename);
187  return;
188  }
189 
190  if (not sysvar_pcre.empty())
191  {
192  const char *this_pcre_error;
193  int this_pcre_erroffset;
194  re= pcre_compile(sysvar_pcre.c_str(), 0, &this_pcre_error,
195  &this_pcre_erroffset, NULL);
196  pe= pcre_study(re, 0, &this_pcre_error);
197  /* TODO emit error messages if there is a problem */
198  }
199  }
200 
201 
207  bool setFileName(std::string new_filename)
208  {
209  if (new_filename.empty())
210  return false;
211 
212  int tmp_fd= open(new_filename.c_str(),
213  O_WRONLY | O_APPEND | O_CREAT,
214  S_IRUSR|S_IWUSR);
215 
216  if (tmp_fd < 0)
217  {
218  sql_perror( _("fail open()"), new_filename);
219  return false;
220  }
221  if(fd >= 0)
222  {
223  close(fd);
224  }
225  fd= tmp_fd;
226  sysvar_filename= new_filename;
227  return true;
228  }
229 
235  bool setPCRE(std::string new_pcre)
236  {
237  if (not new_pcre.empty())
238  {
239  if (pe != NULL)
240  {
241  pcre_free(pe);
242  }
243 
244  if (re != NULL)
245  {
246  pcre_free(re);
247  }
248 
249  const char *tmp_this_pcre_error;
250  int tmp_this_pcre_erroffset;
251  re= pcre_compile(new_pcre.c_str(), 0, &tmp_this_pcre_error,
252  &tmp_this_pcre_erroffset, NULL);
253  pe= pcre_study(re, 0, &tmp_this_pcre_error);
254  /* TODO emit error messages if there is a problem */
255  }
256  sysvar_pcre= new_pcre;
257  return true;
258  }
259 
265  std::string& getFileName()
266  {
267  return sysvar_filename;
268  }
269 
275  std::string& getPCRE()
276  {
277  return sysvar_pcre;
278  }
279 
280  ~Logging_query()
281  {
282  if (fd >= 0)
283  {
284  close(fd);
285  }
286 
287  if (pe != NULL)
288  {
289  pcre_free(pe);
290  }
291 
292  if (re != NULL)
293  {
294  pcre_free(re);
295  }
296  }
297 
298  virtual bool post (Session *session)
299  {
300  size_t wrv;
301 
302  assert(session != NULL);
303 
304  if (fd < 0)
305  return false;
306 
307  /* Yes, we know that checking sysvar_logging_query_enable,
308  sysvar_logging_query_threshold_big_resultset, and
309  sysvar_logging_query_threshold_big_examined is not threadsafe,
310  because some other thread might change these sysvars. But we
311  don't care. We might start logging a little late as it spreads
312  to other threads. Big deal. */
313 
314  // return if not enabled or query was too fast or resultset was too small
315  if (sysvar_logging_query_enable == false)
316  return false;
317  if (session->sent_row_count < sysvar_logging_query_threshold_big_resultset.get())
318  return false;
319  if (session->examined_row_count < sysvar_logging_query_threshold_big_examined.get())
320  return false;
321 
322  /*
323  TODO, the session object should have a "utime command completed"
324  inside itself, so be more accurate, and so this doesnt have to
325  keep calling current_utime, which can be slow.
326  */
327  uint64_t t_mark= session->times.getCurrentTimestamp(false);
328 
329  if (session->times.getElapsedTime() < sysvar_logging_query_threshold_slow.get())
330  return false;
331 
332  Session::QueryString query_string(session->getQueryString());
333  if (query_string == NULL)
334  {
335  return false;
336  }
337 
338  if (re)
339  {
340  int this_pcre_rc;
341  this_pcre_rc= pcre_exec(re, pe, query_string->c_str(), query_string->length(), 0, 0, NULL, 0);
342  if (this_pcre_rc < 0)
343  return false;
344  }
345 
346  // buffer to quotify the query
347  string qs;
348 
349  // Since quotify() builds the quoted string incrementally, we can
350  // avoid some reallocating if we reserve some space up front.
351  qs.reserve(query_string->length());
352 
353  quotify(*query_string, qs);
354 
355  // to avoid trying to printf %s something that is potentially NULL
356  util::string::ptr schema(session->schema());
357  formatter % t_mark
358  % session->thread_id
359  % session->getQueryId()
360  % (schema ? schema->c_str() : "")
361  % qs
362  % getCommandName(session->command)
363  % (t_mark - session->times.getConnectMicroseconds())
364  % session->times.getElapsedTime()
365  % (t_mark - session->times.utime_after_lock)
366  % session->sent_row_count
367  % session->examined_row_count
368  % session->tmp_table
369  % session->total_warn_count
370  % session->getServerId()
371  % getServerHostname();
372 
373  string msgbuf= formatter.str();
374 
375  // a single write has a kernel thread lock, thus no need mutex guard this
376  wrv= write(fd, msgbuf.c_str(), msgbuf.length());
377  (void)(wrv);
378  assert(wrv == msgbuf.length());
379 
380  return false;
381  }
382 };
383 
384 static Logging_query *handler= NULL;
385 
391 bool updateFileName(Session *, set_var* var)
392 {
393  if (not var->value->str_value.empty())
394  {
395  std::string new_filename(var->value->str_value.data());
396  if (handler->setFileName(new_filename))
397  return false; //success
398  else
399  return true; // error
400  }
401  errmsg_printf(error::ERROR, _("logging_query_filename cannot be NULL"));
402  return true; // error
403 }
404 
410 bool updatePCRE(Session *, set_var* var)
411 {
412  if (not var->value->str_value.empty())
413  {
414  std::string new_pcre(var->value->str_value.data());
415  if (handler->setPCRE(new_pcre))
416  return false; //success
417  else
418  return true; // error
419  }
420  return false; // success
421 }
422 
423 static int init(drizzled::module::Context &context)
424 {
425  const module::option_map &vm= context.getOptions();
426 
427  if (vm.count("filename"))
428  {
429  handler= new Logging_query(vm["filename"].as<string>(),
430  vm["pcre"].as<string>());
431 
432  context.add(handler);
433  context.registerVariable(new sys_var_bool_ptr("enable", &sysvar_logging_query_enable));
434  context.registerVariable(new sys_var_std_string("filename", handler->getFileName(), NULL, &updateFileName));
435  context.registerVariable(new sys_var_std_string("pcre", handler->getPCRE(), NULL, &updatePCRE));
436  context.registerVariable(new sys_var_constrained_value<uint32_t>("threshold_slow", sysvar_logging_query_threshold_slow));
437  context.registerVariable(new sys_var_constrained_value<uint32_t>("threshold_big_resultset", sysvar_logging_query_threshold_big_resultset));
438  context.registerVariable(new sys_var_constrained_value<uint32_t>("threshold_big_examined", sysvar_logging_query_threshold_big_examined));
439  }
440 
441  return 0;
442 }
443 
444 static void init_options(drizzled::module::option_context &context)
445 {
446  context("enable",
447  po::value<bool>(&sysvar_logging_query_enable)->default_value(false)->zero_tokens(),
448  _("Enable logging to CSV file"));
449  context("filename",
450  po::value<string>(),
451  _("File to log to"));
452  context("pcre",
453  po::value<string>()->default_value(""),
454  _("PCRE to match the query against"));
455  context("threshold-slow",
456  po::value<uint32_constraint>(&sysvar_logging_query_threshold_slow)->default_value(0),
457  _("Threshold for logging slow queries, in microseconds"));
458  context("threshold-big-resultset",
459  po::value<uint32_constraint>(&sysvar_logging_query_threshold_big_resultset)->default_value(0),
460  _("Threshold for logging big queries, for rows returned"));
461  context("threshold-big-examined",
462  po::value<uint32_constraint>(&sysvar_logging_query_threshold_big_examined)->default_value(0),
463  _("Threshold for logging big queries, for rows examined"));
464 }
465 
466 } /* namespace logging_query */
467 
468 } /* namespace drizzle_plugin */
469 
470 DRIZZLE_DECLARE_PLUGIN
471 {
472  DRIZZLE_VERSION_ID,
473  "logging_query",
474  "0.2",
475  "Mark Atwood",
476  N_("Logs queries to a CSV file"),
477  PLUGIN_LICENSE_GPL,
478  drizzle_plugin::logging_query::init,
479  NULL,
480  drizzle_plugin::logging_query::init_options
481 }
482 DRIZZLE_DECLARE_PLUGIN_END;
ha_rows examined_row_count
Definition: session.h:419
bool setFileName(std::string new_filename)
An Proxy Wrapper around boost::program_options::variables_map.
boost::shared_ptr< const std::string > QueryString
Definition: session.h:137
ha_rows sent_row_count
Definition: session.h:414
query_id_t getQueryId() const
Definition: session.h:625
enum_server_command command
Definition: session.h:322
uint32_t getServerId() const
Definition: session.h:650
String str_value
Definition: item.h:107