Drizzled Public API Documentation

ha_innodb.cc
Go to the documentation of this file.
1 /*****************************************************************************
2 
3 Copyright (C) 2000, 2010, MySQL AB & Innobase Oy. All Rights Reserved.
4 Copyright (C) 2008, 2009 Google Inc.
5 Copyright (C) 2009, Percona Inc.
6 Copyright (C) 2011, Stewart Smith
7 
8 Portions of this file contain modifications contributed and copyrighted by
9 Google, Inc. Those modifications are gratefully acknowledged and are described
10 briefly in the InnoDB documentation. The contributions by Google are
11 incorporated with their permission, and subject to the conditions contained in
12 the file COPYING.Google.
13 
14 Portions of this file contain modifications contributed and copyrighted
15 by Percona Inc.. Those modifications are
16 gratefully acknowledged and are described briefly in the InnoDB
17 documentation. The contributions by Percona Inc. are incorporated with
18 their permission, and subject to the conditions contained in the file
19 COPYING.Percona.
20 
21 This program is free software; you can redistribute it and/or modify it under
22 the terms of the GNU General Public License as published by the Free Software
23 Foundation; version 2 of the License.
24 
25 This program is distributed in the hope that it will be useful, but WITHOUT
26 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
27 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
28 
29 You should have received a copy of the GNU General Public License along with
30 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
31 St, Fifth Floor, Boston, MA 02110-1301 USA
32 
33 *****************************************************************************/
34 
35 /* TODO list for the InnoDB Cursor in 5.0:
36  - fix savepoint functions to use savepoint storage area
37  - Find out what kind of problems the OS X case-insensitivity causes to
38  table and database names; should we 'normalize' the names like we do
39  in Windows?
40 */
41 
42 #include <config.h>
43 
44 #include <limits.h>
45 #include <fcntl.h>
46 
47 #include <drizzled/error.h>
48 #include <drizzled/errmsg_print.h>
49 #include <drizzled/internal/m_string.h>
50 #include <drizzled/internal/my_sys.h>
51 #include <drizzled/plugin.h>
52 #include <drizzled/show.h>
53 #include <drizzled/data_home.h>
54 #include <drizzled/catalog/local.h>
55 #include <drizzled/error.h>
56 #include <drizzled/field.h>
57 #include <drizzled/charset.h>
58 #include <drizzled/session.h>
59 #include <drizzled/current_session.h>
60 #include <drizzled/table.h>
61 #include <drizzled/field/blob.h>
62 #include <drizzled/field/varstring.h>
63 #include <drizzled/plugin/xa_storage_engine.h>
64 #include <drizzled/plugin/daemon.h>
65 #include <drizzled/memory/multi_malloc.h>
66 #include <drizzled/pthread_globals.h>
67 #include <drizzled/named_savepoint.h>
68 #include <drizzled/session/table_messages.h>
69 #include <drizzled/transaction_services.h>
72 #include <drizzled/statistics_variables.h>
73 #include <drizzled/system_variables.h>
74 #include <drizzled/session/times.h>
75 #include <drizzled/session/transactions.h>
76 #include <drizzled/typelib.h>
77 
78 #include <boost/algorithm/string.hpp>
79 #include <boost/program_options.hpp>
80 #include <boost/scoped_array.hpp>
81 #include <boost/filesystem.hpp>
83 #include <iostream>
84 
85 namespace po= boost::program_options;
86 namespace fs=boost::filesystem;
87 using namespace std;
88 
91 /* Include necessary InnoDB headers */
92 #include "univ.i"
93 #include "buf0lru.h"
94 #include "btr0sea.h"
95 #include "os0file.h"
96 #include "os0thread.h"
97 #include "srv0start.h"
98 #include "srv0srv.h"
99 #include "trx0roll.h"
100 #include "trx0trx.h"
101 #include "trx0sys.h"
102 #include "mtr0mtr.h"
103 #include "row0ins.h"
104 #include "row0mysql.h"
105 #include "row0sel.h"
106 #include "row0upd.h"
107 #include "log0log.h"
108 #include "lock0lock.h"
109 #include "dict0crea.h"
110 #include "create_replication.h"
111 #include "btr0cur.h"
112 #include "btr0btr.h"
113 #include "fsp0fsp.h"
114 #include "sync0sync.h"
115 #include "fil0fil.h"
116 #include "trx0xa.h"
117 #include "row0merge.h"
118 #include "thr0loc.h"
119 #include "dict0boot.h"
120 #include "ha_prototypes.h"
121 #include "ut0mem.h"
122 #include "ibuf0ibuf.h"
123 
124 #include "ha_innodb.h"
125 #include "data_dictionary.h"
126 #include "replication_dictionary.h"
127 #include "internal_dictionary.h"
128 #include "handler0vars.h"
129 
130 #include <iostream>
131 #include <sstream>
132 #include <string>
133 
134 #include <plugin/innobase/handler/status_function.h>
135 #include <plugin/innobase/handler/replication_log.h>
136 
137 #include <google/protobuf/io/zero_copy_stream.h>
138 #include <google/protobuf/io/zero_copy_stream_impl.h>
139 #include <google/protobuf/io/coded_stream.h>
140 #include <google/protobuf/text_format.h>
141 
142 #include <boost/thread/mutex.hpp>
143 
144 using namespace std;
145 using namespace drizzled;
146 
148 static boost::mutex innobase_share_mutex;
149 
151 static ulong commit_threads = 0;
152 static boost::condition_variable commit_cond;
153 static boost::mutex commit_cond_m;
154 static bool innodb_inited = 0;
155 
156 #define INSIDE_HA_INNOBASE_CC
157 
158 /* In the Windows plugin, the return value of current_session is
159 undefined. Map it to NULL. */
160 #if defined MYSQL_DYNAMIC_PLUGIN && defined __WIN__
161 # undef current_session
162 # define current_session NULL
163 # define EQ_CURRENT_SESSION(session) TRUE
164 #else /* MYSQL_DYNAMIC_PLUGIN && __WIN__ */
165 # define EQ_CURRENT_SESSION(session) ((session) == current_session)
166 #endif /* MYSQL_DYNAMIC_PLUGIN && __WIN__ */
167 
168 static plugin::XaStorageEngine* innodb_engine_ptr= NULL;
169 
171 static open_files_constraint innobase_open_files;
173 static mirrored_log_groups_constraint innobase_mirrored_log_groups;
175 static log_files_in_group_constraint innobase_log_files_in_group;
177 force_recovery_constraint innobase_force_recovery;
179 static log_buffer_constraint innobase_log_buffer_size;
181 static additional_mem_pool_constraint innobase_additional_mem_pool_size;
183 static autoextend_constraint innodb_auto_extend_increment;
185 static buffer_pool_constraint innobase_buffer_pool_size;
187 static buffer_pool_instances_constraint innobase_buffer_pool_instances;
188 typedef constrained_check<uint32_t,
189  (1 << UNIV_PAGE_SIZE_SHIFT_MAX),
190  (1 << 12)> page_size_constraint;
191 static page_size_constraint innobase_page_size;
192 typedef constrained_check<uint32_t,
193  (1 << UNIV_PAGE_SIZE_SHIFT_MAX),
194  (1 << 9)> log_block_size_constraint;
195 static log_block_size_constraint innobase_log_block_size;
197 static io_capacity_constraint innodb_io_capacity;
199 static purge_batch_constraint innodb_purge_batch_size;
201 static purge_threads_constraint innodb_n_purge_threads;
203 static trinary_constraint innodb_flush_log_at_trx_commit;
205 static max_dirty_pages_constraint innodb_max_dirty_pages_pct;
206 static uint64_constraint innodb_max_purge_lag;
207 static uint64_nonzero_constraint innodb_stats_sample_pages;
209 static io_threads_constraint innobase_read_io_threads;
210 static io_threads_constraint innobase_write_io_threads;
211 
213 static concurrency_constraint innobase_commit_concurrency;
214 static concurrency_constraint innobase_thread_concurrency;
215 static uint32_nonzero_constraint innodb_concurrency_tickets;
216 
218 static log_file_constraint innobase_log_file_size;
219 
220 static uint64_constraint innodb_replication_delay;
221 
222 static uint32_constraint buffer_pool_restore_at_startup;
223 
227 static old_blocks_constraint innobase_old_blocks_pct;
228 
229 static uint32_constraint innodb_sync_spin_loops;
230 static uint32_constraint innodb_spin_wait_delay;
231 static uint32_constraint innodb_thread_sleep_delay;
232 
234 static read_ahead_threshold_constraint innodb_read_ahead_threshold;
235 
236 static uint64_constraint ibuf_max_size;
237 
239 static binary_constraint ibuf_active_contract;
240 
242 static ibuf_accel_rate_constraint ibuf_accel_rate;
243 static uint32_constraint checkpoint_age_target;
244 static binary_constraint flush_neighbor_pages;
245 
246 static string sysvar_transaction_log_use_replicator;
247 
248 /* The default values for the following char* start-up parameters
249 are determined in innobase_init below: */
250 
251 std::string innobase_data_home_dir;
252 std::string innobase_data_file_path;
253 std::string innobase_log_group_home_dir;
254 static string innobase_file_format_name;
255 static string innobase_change_buffering;
256 
257 static string read_ahead;
258 static string adaptive_flushing_method;
259 
260 /* The highest file format being used in the database. The value can be
261 set by user, however, it will be adjusted to the newer file format if
262 a table of such format is created/opened. */
263 static string innobase_file_format_max;
264 
265 /* Below we have boolean-valued start-up parameters, and their default
266 values */
267 
268 static trinary_constraint innobase_fast_shutdown;
269 
270 /* "innobase_file_format_check" decides whether we would continue
271 booting the server if the file format stamped on the system
272 table space exceeds the maximum file format supported
273 by the server. Can be set during server startup at command
274 line or configure file, and a read only variable after
275 server startup */
276 
277 /* If a new file format is introduced, the file format
278 name needs to be updated accordingly. Please refer to
279 file_format_name_map[] defined in trx0sys.c for the next
280 file format name. */
281 
282 static my_bool innobase_file_format_check = TRUE;
283 static my_bool innobase_use_doublewrite = TRUE;
284 static my_bool innobase_use_checksums = TRUE;
285 static my_bool innobase_rollback_on_timeout = FALSE;
286 static my_bool innobase_create_status_file = FALSE;
287 static bool innobase_use_replication_log;
288 static bool support_xa;
289 static bool strict_mode;
291 static lock_wait_constraint lock_wait_timeout;
292 
293 static char* internal_innobase_data_file_path = NULL;
294 
295 /* The following counter is used to convey information to InnoDB
296 about server activity: in selects it is not sensible to call
297 srv_active_wake_master_thread after each fetch or search, we only do
298 it every INNOBASE_WAKE_INTERVAL'th step. */
299 
300 #define INNOBASE_WAKE_INTERVAL 32
301 static ulong innobase_active_counter = 0;
302 
303 static hash_table_t* innobase_open_tables;
304 
305 #ifdef __NETWARE__ /* some special cleanup for NetWare */
306 bool nw_panic = FALSE;
307 #endif
308 
310 static const char* innobase_change_buffering_values[IBUF_USE_COUNT] = {
311  "none", /* IBUF_USE_NONE */
312  "inserts", /* IBUF_USE_INSERT */
313  "deletes", /* IBUF_USE_DELETE_MARK */
314  "changes", /* IBUF_USE_INSERT_DELETE_MARK */
315  "purges", /* IBUF_USE_DELETE */
316  "all" /* IBUF_USE_ALL */
317 };
318 
320 static const char* read_ahead_names[] = {
321  "none", /* 0 */
322  "random",
323  "linear",
324  "both", /* 3 */
325  /* For compatibility with the older Percona patch */
326  "0", /* 4 ("none" + 4) */
327  "1",
328  "2",
329  "3", /* 7 ("both" + 4) */
330  NULL
331 };
332 
333 static TYPELIB read_ahead_typelib = {
334  array_elements(read_ahead_names) - 1, "read_ahead_typelib",
335  read_ahead_names, NULL
336 };
337 
339 static const char* adaptive_flushing_method_names[] = {
340  "native", /* 0 */
341  "estimate", /* 1 */
342  "keep_average", /* 2 */
343  /* For compatibility with the older Percona patch */
344  "0", /* 3 ("native" + 3) */
345  "1", /* 4 ("estimate" + 3) */
346  "2", /* 5 ("keep_average" + 3) */
347  NULL
348 };
349 
350 static TYPELIB adaptive_flushing_method_typelib = {
351  array_elements(adaptive_flushing_method_names) - 1,
352  "adaptive_flushing_method_typelib",
354 };
355 
356 /* "GEN_CLUST_INDEX" is the name reserved for Innodb default
357 system primary index. */
358 static const char innobase_index_reserve_name[]= "GEN_CLUST_INDEX";
359 
360 /********************************************************************
361 Gives the file extension of an InnoDB single-table tablespace. */
362 static const char* ha_innobase_exts[] = {
363  ".ibd",
364  NULL
365 };
366 
367 #define DEFAULT_FILE_EXTENSION ".dfe" // Deep Fried Elephant
368 
369 static INNOBASE_SHARE *get_share(const char *table_name);
370 static void free_share(INNOBASE_SHARE *share);
371 
372 class InnobaseEngine : public plugin::XaStorageEngine
373 {
374 public:
375  explicit InnobaseEngine(string name_arg) :
376  plugin::XaStorageEngine(name_arg,
377  HTON_NULL_IN_KEY |
378  HTON_CAN_INDEX_BLOBS |
379  HTON_PRIMARY_KEY_IN_READ_INDEX |
380  HTON_PARTIAL_COLUMN_READ |
381  HTON_TABLE_SCAN_ON_INDEX |
382  HTON_HAS_FOREIGN_KEYS |
383  HTON_HAS_DOES_TRANSACTIONS)
384  {
385  table_definition_ext= plugin::DEFAULT_DEFINITION_FILE_EXT;
386  addAlias("INNOBASE");
387  }
388 
389  virtual ~InnobaseEngine()
390  {
391  if (innodb_inited) {
392  srv_fast_shutdown = (ulint) innobase_fast_shutdown;
393  innodb_inited = 0;
394  hash_table_free(innobase_open_tables);
395  innobase_open_tables = NULL;
396  if (innobase_shutdown_for_mysql() != DB_SUCCESS) {
397  // Throw here?
398  }
400  free(internal_innobase_data_file_path);
401  }
402 
403  /* These get strdup'd from vm variables */
404 
405  }
406 
407 private:
408  virtual int doStartTransaction(Session *session, start_transaction_option_t options);
409  virtual void doStartStatement(Session *session);
410  virtual void doEndStatement(Session *session);
411 public:
412  virtual
413  int
414  close_connection(
415 /*======================*/
416  /* out: 0 or error number */
417  Session* session); /* in: handle to the MySQL thread of the user
418  whose resources should be free'd */
419 
420  virtual int doSetSavepoint(Session* session,
421  drizzled::NamedSavepoint &savepoint);
422  virtual int doRollbackToSavepoint(Session* session,
423  drizzled::NamedSavepoint &savepoint);
424  virtual int doReleaseSavepoint(Session* session,
425  drizzled::NamedSavepoint &savepoint);
426  virtual int doXaCommit(Session* session, bool all)
427  {
428  return doCommit(session, all); /* XA commit just does a SQL COMMIT */
429  }
430  virtual int doXaRollback(Session *session, bool all)
431  {
432  return doRollback(session, all); /* XA rollback just does a SQL ROLLBACK */
433  }
434  virtual uint64_t doGetCurrentTransactionId(Session *session);
435  virtual uint64_t doGetNewTransactionId(Session *session);
436  virtual int doCommit(Session* session, bool all);
437  virtual int doRollback(Session* session, bool all);
438 
439  /***********************************************************************
440  This function is used to prepare X/Open XA distributed transaction */
441  virtual
442  int
443  doXaPrepare(
444  /*================*/
445  /* out: 0 or error number */
446  Session* session, /* in: handle to the MySQL thread of the user
447  whose XA transaction should be prepared */
448  bool all); /* in: TRUE - commit transaction
449  FALSE - the current SQL statement ended */
450  /***********************************************************************
451  This function is used to recover X/Open XA distributed transactions */
452  virtual
453  int
454  doXaRecover(
455  /*================*/
456  /* out: number of prepared transactions
457  stored in xid_list */
458  ::drizzled::XID* xid_list, /* in/out: prepared transactions */
459  size_t len); /* in: number of slots in xid_list */
460  /***********************************************************************
461  This function is used to commit one X/Open XA distributed transaction
462  which is in the prepared state */
463  virtual
464  int
465  doXaCommitXid(
466  /*===================*/
467  /* out: 0 or error number */
468  ::drizzled::XID* xid); /* in: X/Open XA transaction identification */
469  /***********************************************************************
470  This function is used to rollback one X/Open XA distributed transaction
471  which is in the prepared state */
472  virtual
473  int
474  doXaRollbackXid(
475  /*=====================*/
476  /* out: 0 or error number */
477  ::drizzled::XID *xid); /* in: X/Open XA transaction identification */
478 
479  virtual Cursor *create(Table &table)
480  {
481  return new ha_innobase(*this, table);
482  }
483 
484  /*********************************************************************
485  Removes all tables in the named database inside InnoDB. */
486  bool
487  doDropSchema(
488  /*===================*/
489  /* out: error number */
490  const identifier::Schema &identifier); /* in: database path; inside InnoDB the name
491  of the last directory in the path is used as
492  the database name: for example, in 'mysql/data/test'
493  the database name is 'test' */
494 
495  /********************************************************************
496  Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit flushes
497  the logs, and the name of this function should be innobase_checkpoint. */
498  virtual
499  bool
500  flush_logs();
501  /*================*/
502  /* out: TRUE if error */
503 
504  /****************************************************************************
505  Implements the SHOW INNODB STATUS command. Sends the output of the InnoDB
506  Monitor to the client. */
507  virtual
508  bool
509  show_status(
510  /*===============*/
511  Session* session, /* in: the MySQL query thread of the caller */
512  stat_print_fn *stat_print,
513  enum ha_stat_type stat_type);
514 
515  virtual
516  int
517  doReleaseTemporaryLatches(
518  /*===============================*/
519  /* out: 0 */
520  Session* session); /* in: MySQL thread */
521 
522 
523  const char** bas_ext() const {
524  return(ha_innobase_exts);
525  }
526 
527  UNIV_INTERN int doCreateTable(Session &session,
528  Table &form,
529  const identifier::Table &identifier,
530  const message::Table&);
531  UNIV_INTERN int doRenameTable(Session&, const identifier::Table &from, const identifier::Table &to);
532  UNIV_INTERN int doDropTable(Session &session, const identifier::Table &identifier);
533 
534  UNIV_INTERN virtual bool get_error_message(int error, String *buf) const;
535 
536  UNIV_INTERN uint32_t max_supported_keys() const;
537  UNIV_INTERN uint32_t max_supported_key_length() const;
538  UNIV_INTERN uint32_t max_supported_key_part_length() const;
539 
540 
541  UNIV_INTERN uint32_t index_flags(enum ha_key_alg) const
542  {
543  return (HA_READ_NEXT |
544  HA_READ_PREV |
545  HA_READ_ORDER |
546  HA_READ_RANGE |
547  HA_KEYREAD_ONLY);
548  }
549 
550  int doGetTableDefinition(drizzled::Session& session,
551  const identifier::Table &identifier,
552  drizzled::message::Table &table_proto);
553 
554  bool doDoesTableExist(drizzled::Session& session, const identifier::Table &identifier);
555 
556  void doGetTableIdentifiers(drizzled::CachedDirectory &directory,
557  const drizzled::identifier::Schema &schema_identifier,
558  drizzled::identifier::table::vector &set_of_identifiers);
559  bool validateCreateTableOption(const std::string &key, const std::string &state);
560  void dropTemporarySchema();
561 
562 };
563 
564 
565 bool InnobaseEngine::validateCreateTableOption(const std::string &key, const std::string &state)
566 {
567  if (boost::iequals(key, "ROW_FORMAT"))
568  {
569  if (boost::iequals(state, "COMPRESSED"))
570  return true;
571 
572  if (boost::iequals(state, "COMPACT"))
573  return true;
574 
575  if (boost::iequals(state, "DYNAMIC"))
576  return true;
577 
578  if (boost::iequals(state, "REDUNDANT"))
579  return true;
580  }
581 
582  return false;
583 }
584 
585 void InnobaseEngine::doGetTableIdentifiers(drizzled::CachedDirectory &directory,
586  const drizzled::identifier::Schema &schema_identifier,
587  drizzled::identifier::table::vector &set_of_identifiers)
588 {
589  CachedDirectory::Entries entries= directory.getEntries();
590 
591  std::string search_string(schema_identifier.getSchemaName());
592 
593  boost::algorithm::to_lower(search_string);
594 
595  if (search_string.compare("data_dictionary") == 0)
596  {
597  set_of_identifiers.push_back(identifier::Table(schema_identifier.getSchemaName(), "SYS_REPLICATION_LOG"));
598  }
599 
600  for (CachedDirectory::Entries::iterator entry_iter= entries.begin();
601  entry_iter != entries.end(); ++entry_iter)
602  {
603  CachedDirectory::Entry *entry= *entry_iter;
604  const string *filename= &entry->filename;
605 
606  assert(filename->size());
607 
608  const char *ext= strchr(filename->c_str(), '.');
609 
610  if (ext == NULL || system_charset_info->strcasecmp(ext, DEFAULT_FILE_EXTENSION) ||
611  (filename->compare(0, strlen(TMP_FILE_PREFIX), TMP_FILE_PREFIX) == 0))
612  { }
613  else
614  {
615  std::string path;
616  path+= directory.getPath();
617  path+= FN_LIBCHAR;
618  path+= entry->filename;
619 
620  message::Table definition;
621  if (StorageEngine::readTableFile(path, definition))
622  {
623  /*
624  Using schema_identifier here to stop unused warning, could use
625  definition.schema() instead
626  */
627  identifier::Table identifier(schema_identifier.getSchemaName(), definition.name());
628  set_of_identifiers.push_back(identifier);
629  }
630  }
631  }
632 }
633 
634 bool InnobaseEngine::doDoesTableExist(Session &session, const identifier::Table &identifier)
635 {
636  string proto_path(identifier.getPath());
637  proto_path.append(DEFAULT_FILE_EXTENSION);
638 
639  if (session.getMessageCache().doesTableMessageExist(identifier))
640  return true;
641 
642  std::string search_string(identifier.getPath());
643  boost::algorithm::to_lower(search_string);
644 
645  if (search_string.compare("data_dictionary/sys_replication_log") == 0)
646  return true;
647 
648  if (access(proto_path.c_str(), F_OK))
649  {
650  return false;
651  }
652 
653  return true;
654 }
655 
656 int InnobaseEngine::doGetTableDefinition(Session &session,
657  const identifier::Table &identifier,
658  message::Table &table_proto)
659 {
660  string proto_path(identifier.getPath());
661  proto_path.append(DEFAULT_FILE_EXTENSION);
662 
663  // First we check the temporary tables.
664  if (session.getMessageCache().getTableMessage(identifier, table_proto))
665  return EEXIST;
666 
667  if (read_replication_log_table_message(identifier.getTableName().c_str(), &table_proto) == 0)
668  return EEXIST;
669 
670  if (access(proto_path.c_str(), F_OK))
671  {
672  return errno;
673  }
674 
675  if (StorageEngine::readTableFile(proto_path, table_proto))
676  return EEXIST;
677 
678  return ENOENT;
679 }
680 
681 
682 /************************************************************/
685 static
686 uint
688 /*=============================*/
689  const char* format_name);
691 /************************************************************/
695 static
696 int
698 /*================================*/
699  const char* format_max);
701 static const char innobase_engine_name[]= "InnoDB";
702 
703 
704 /*****************************************************************/
706 static
707 void
709 /*================*/
710  trx_t* trx);
712 static drizzle_show_var innodb_status_variables[]= {
713  {"buffer_pool_pages_data",
714  (char*) &export_vars.innodb_buffer_pool_pages_data, SHOW_LONG},
715  {"buffer_pool_pages_dirty",
716  (char*) &export_vars.innodb_buffer_pool_pages_dirty, SHOW_LONG},
717  {"buffer_pool_pages_flushed",
718  (char*) &export_vars.innodb_buffer_pool_pages_flushed, SHOW_LONG},
719  {"buffer_pool_pages_free",
720  (char*) &export_vars.innodb_buffer_pool_pages_free, SHOW_LONG},
721 #ifdef UNIV_DEBUG
722  {"buffer_pool_pages_latched",
723  (char*) &export_vars.innodb_buffer_pool_pages_latched, SHOW_LONG},
724 #endif /* UNIV_DEBUG */
725  {"buffer_pool_pages_misc",
726  (char*) &export_vars.innodb_buffer_pool_pages_misc, SHOW_LONG},
727  {"buffer_pool_pages_total",
728  (char*) &export_vars.innodb_buffer_pool_pages_total, SHOW_LONG},
729  {"buffer_pool_read_ahead",
730  (char*) &export_vars.innodb_buffer_pool_read_ahead, SHOW_LONG},
731  {"buffer_pool_read_ahead_evicted",
733  {"buffer_pool_read_requests",
734  (char*) &export_vars.innodb_buffer_pool_read_requests, SHOW_LONG},
735  {"buffer_pool_reads",
736  (char*) &export_vars.innodb_buffer_pool_reads, SHOW_LONG},
737  {"buffer_pool_wait_free",
738  (char*) &export_vars.innodb_buffer_pool_wait_free, SHOW_LONG},
739  {"buffer_pool_write_requests",
740  (char*) &export_vars.innodb_buffer_pool_write_requests, SHOW_LONG},
741  {"data_fsyncs",
742  (char*) &export_vars.innodb_data_fsyncs, SHOW_LONG},
743  {"data_pending_fsyncs",
744  (char*) &export_vars.innodb_data_pending_fsyncs, SHOW_LONG},
745  {"data_pending_reads",
746  (char*) &export_vars.innodb_data_pending_reads, SHOW_LONG},
747  {"data_pending_writes",
748  (char*) &export_vars.innodb_data_pending_writes, SHOW_LONG},
749  {"data_read",
750  (char*) &export_vars.innodb_data_read, SHOW_LONG},
751  {"data_reads",
752  (char*) &export_vars.innodb_data_reads, SHOW_LONG},
753  {"data_writes",
754  (char*) &export_vars.innodb_data_writes, SHOW_LONG},
755  {"data_written",
756  (char*) &export_vars.innodb_data_written, SHOW_LONG},
757  {"dblwr_pages_written",
758  (char*) &export_vars.innodb_dblwr_pages_written, SHOW_LONG},
759  {"dblwr_writes",
760  (char*) &export_vars.innodb_dblwr_writes, SHOW_LONG},
761  {"have_atomic_builtins",
762  (char*) &export_vars.innodb_have_atomic_builtins, SHOW_BOOL},
763  {"log_waits",
764  (char*) &export_vars.innodb_log_waits, SHOW_LONG},
765  {"log_write_requests",
766  (char*) &export_vars.innodb_log_write_requests, SHOW_LONG},
767  {"log_writes",
768  (char*) &export_vars.innodb_log_writes, SHOW_LONG},
769  {"os_log_fsyncs",
770  (char*) &export_vars.innodb_os_log_fsyncs, SHOW_LONG},
771  {"os_log_pending_fsyncs",
772  (char*) &export_vars.innodb_os_log_pending_fsyncs, SHOW_LONG},
773  {"os_log_pending_writes",
774  (char*) &export_vars.innodb_os_log_pending_writes, SHOW_LONG},
775  {"os_log_written",
776  (char*) &export_vars.innodb_os_log_written, SHOW_LONG},
777  {"page_size",
778  (char*) &export_vars.innodb_page_size, SHOW_LONG},
779  {"pages_created",
780  (char*) &export_vars.innodb_pages_created, SHOW_LONG},
781  {"pages_read",
782  (char*) &export_vars.innodb_pages_read, SHOW_LONG},
783  {"pages_written",
784  (char*) &export_vars.innodb_pages_written, SHOW_LONG},
785  {"row_lock_current_waits",
786  (char*) &export_vars.innodb_row_lock_current_waits, SHOW_LONG},
787  {"row_lock_time",
788  (char*) &export_vars.innodb_row_lock_time, SHOW_LONGLONG},
789  {"row_lock_time_avg",
790  (char*) &export_vars.innodb_row_lock_time_avg, SHOW_LONG},
791  {"row_lock_time_max",
792  (char*) &export_vars.innodb_row_lock_time_max, SHOW_LONG},
793  {"row_lock_waits",
794  (char*) &export_vars.innodb_row_lock_waits, SHOW_LONG},
795  {"rows_deleted",
796  (char*) &export_vars.innodb_rows_deleted, SHOW_LONG},
797  {"rows_inserted",
798  (char*) &export_vars.innodb_rows_inserted, SHOW_LONG},
799  {"rows_read",
800  (char*) &export_vars.innodb_rows_read, SHOW_LONG},
801  {"rows_updated",
802  (char*) &export_vars.innodb_rows_updated, SHOW_LONG},
803  {NULL, NULL, SHOW_LONG}
804 };
805 
806 InnodbStatusTool::Generator::Generator(drizzled::Field **fields) :
807  plugin::TableFunction::Generator(fields)
808 {
810  status_var_ptr= innodb_status_variables;
811 }
812 
813 bool InnodbStatusTool::Generator::populate()
814 {
815  if (status_var_ptr->name)
816  {
817  std::ostringstream oss;
818  string return_value;
819  const char *value= status_var_ptr->value;
820 
821  /* VARIABLE_NAME */
822  push(status_var_ptr->name);
823 
824  switch (status_var_ptr->type)
825  {
826  case SHOW_LONG:
827  oss << *(int64_t*) value;
828  return_value= oss.str();
829  break;
830  case SHOW_LONGLONG:
831  oss << *(int64_t*) value;
832  return_value= oss.str();
833  break;
834  case SHOW_BOOL:
835  return_value= *(bool*) value ? "ON" : "OFF";
836  break;
837  default:
838  assert(0);
839  }
840 
841  /* VARIABLE_VALUE */
842  if (return_value.length())
843  push(return_value);
844  else
845  push(" ");
846 
847  status_var_ptr++;
848 
849  return true;
850  }
851  return false;
852 }
853 
854 /* General functions */
855 
856 /******************************************************************/
866 UNIV_INTERN
867 ibool
869 /*============================*/
871 {
872  return false;
873 }
874 
875 /******************************************************************/
878 static inline
879 void
881 /*=========================*/
882  trx_t* trx)
883 {
884  if (UNIV_LIKELY(!srv_thread_concurrency)) {
885 
886  return;
887  }
888 
890 }
891 
892 /******************************************************************/
895 static inline
896 void
898 /*========================*/
899  trx_t* trx)
900 {
901  if (UNIV_LIKELY(!trx->declared_to_be_inside_innodb)) {
902 
903  return;
904  }
905 
907 }
908 
909 /******************************************************************/
914 static inline
915 void
917 /*============================*/
918  trx_t* trx)
919 {
920  if (trx->has_search_latch) {
922  }
923 
924  if (trx->declared_to_be_inside_innodb) {
925  /* Release our possible ticket in the FIFO */
926 
928  }
929 }
930 
931 /******************************************************************/
940 UNIV_INTERN
941 ibool
943 /*===========================*/
944  drizzled::Session *session)
945 {
946  return((ibool)session->transaction.all.hasModifiedNonTransData());
947 }
948 
949 /******************************************************************/
952 UNIV_INTERN
953 ibool
955 /*==========*/
956  const drizzled::Session *session)
957 {
958  return(session->getSqlCommand() == SQLCOM_SELECT);
959 }
960 
961 /******************************************************************/
965 UNIV_INTERN
966 ibool
968 /*============*/
971 {
972  /* TODO: Add support here for per-session value */
973  return(support_xa);
974 }
975 
976 /******************************************************************/
979 UNIV_INTERN
980 ulong
982 /*==================*/
985 {
986  /* TODO: Add support here for per-session value */
987  /* According to <drizzle/plugin.h>, passing session == NULL
988  returns the global value of the session variable. */
989  return((ulong)lock_wait_timeout.get());
990 }
991 
992 /******************************************************************/
994 UNIV_INTERN
995 void
997 /*===================*/
998  drizzled::Session* in_session,
999  ulint value)
1000 {
1001  if (in_session)
1002  in_session->times.utime_after_lock+= value;
1003 }
1004 
1005 /********************************************************************/
1008 static inline
1009 trx_t*&
1011 /*=======*/
1012  Session* session)
1013 {
1014  return *(trx_t**) session->getEngineData(innodb_engine_ptr);
1015 }
1016 
1017 
1018 plugin::ReplicationReturnCode ReplicationLog::apply(Session &session,
1019  const message::Transaction &message)
1020 {
1021  char *data= new char[message.ByteSize()];
1022 
1023  message.SerializeToArray(data, message.ByteSize());
1024 
1025  trx_t *trx= session_to_trx(&session);
1026 
1027  uint64_t trx_id= message.transaction_context().transaction_id();
1028  uint32_t seg_id= message.segment_id();
1029  uint64_t end_timestamp= message.transaction_context().end_timestamp();
1030  bool is_end_segment= message.end_segment();
1031  trx->log_commit_id= TRUE;
1032 
1033  string server_uuid= session.getServerUUID();
1034  string originating_server_uuid= session.getOriginatingServerUUID();
1035  uint64_t originating_commit_id= session.getOriginatingCommitID();
1036  bool use_originating_server_uuid= session.isOriginatingServerUUIDSet();
1037 
1038  ulint error= insert_replication_message(data, message.ByteSize(), trx, trx_id,
1039  end_timestamp, is_end_segment, seg_id, server_uuid.c_str(),
1040  use_originating_server_uuid, originating_server_uuid.c_str(),
1041  originating_commit_id);
1042 
1043  (void)error;
1044 
1045  delete[] data;
1046 
1047  return plugin::SUCCESS;
1048 }
1049 
1050 /********************************************************************/
1055 int
1057 /*===============================*/
1058  Session* session)
1059 {
1060  trx_t* trx;
1061 
1062  assert(this == innodb_engine_ptr);
1063 
1064  if (!innodb_inited) {
1065 
1066  return(0);
1067  }
1068 
1069  trx = session_to_trx(session);
1070 
1071  if (trx) {
1073  }
1074  return(0);
1075 }
1076 
1077 /********************************************************************/
1082 static inline
1083 void
1085 /*=======================*/
1086 {
1087  innobase_active_counter++;
1088 
1089  if ((innobase_active_counter % INNOBASE_WAKE_INTERVAL) == 0) {
1091  }
1092 }
1093 
1094 /********************************************************************/
1099 UNIV_INTERN
1100 int
1102 /*========================*/
1103  int error,
1104  ulint flags,
1105  Session* session)
1106 {
1107  switch (error) {
1108  case DB_SUCCESS:
1109  return(0);
1110 
1111  case DB_INTERRUPTED:
1112  my_error(ER_QUERY_INTERRUPTED, MYF(0));
1113  /* fall through */
1114 
1115  case DB_FOREIGN_EXCEED_MAX_CASCADE:
1116  push_warning_printf(session, DRIZZLE_ERROR::WARN_LEVEL_WARN,
1117  HA_ERR_ROW_IS_REFERENCED,
1118  "InnoDB: Cannot delete/update "
1119  "rows with cascading foreign key "
1120  "constraints that exceed max "
1121  "depth of %d. Please "
1122  "drop extra constraints and try "
1123  "again", DICT_FK_MAX_RECURSIVE_LOAD);
1124  /* fall through */
1125 
1126  case DB_ERROR:
1127  default:
1128  return(-1); /* unspecified error */
1129 
1130  case DB_DUPLICATE_KEY:
1131  /* Be cautious with returning this error, since
1132  mysql could re-enter the storage layer to get
1133  duplicated key info, the operation requires a
1134  valid table handle and/or transaction information,
1135  which might not always be available in the error
1136  handling stage. */
1137  return(HA_ERR_FOUND_DUPP_KEY);
1138 
1139  case DB_FOREIGN_DUPLICATE_KEY:
1140  return(HA_ERR_FOREIGN_DUPLICATE_KEY);
1141 
1142  case DB_MISSING_HISTORY:
1143  return(HA_ERR_TABLE_DEF_CHANGED);
1144 
1145  case DB_RECORD_NOT_FOUND:
1146  return(HA_ERR_NO_ACTIVE_RECORD);
1147 
1148  case DB_DEADLOCK:
1149  /* Since we rolled back the whole transaction, we must
1150  tell it also to MySQL so that MySQL knows to empty the
1151  cached binlog for this transaction */
1152 
1153  session->markTransactionForRollback(TRUE);
1154 
1155  return(HA_ERR_LOCK_DEADLOCK);
1156 
1157  case DB_LOCK_WAIT_TIMEOUT:
1158  /* Starting from 5.0.13, we let MySQL just roll back the
1159  latest SQL statement in a lock wait timeout. Previously, we
1160  rolled back the whole transaction. */
1161 
1163 
1164  return(HA_ERR_LOCK_WAIT_TIMEOUT);
1165 
1166  case DB_NO_REFERENCED_ROW:
1167  return(HA_ERR_NO_REFERENCED_ROW);
1168 
1169  case DB_ROW_IS_REFERENCED:
1170  return(HA_ERR_ROW_IS_REFERENCED);
1171 
1172  case DB_CANNOT_ADD_CONSTRAINT:
1173  case DB_CHILD_NO_INDEX:
1174  case DB_PARENT_NO_INDEX:
1175  return(HA_ERR_CANNOT_ADD_FOREIGN);
1176 
1177  case DB_CANNOT_DROP_CONSTRAINT:
1178 
1179  return(HA_ERR_ROW_IS_REFERENCED); /* TODO: This is a bit
1180  misleading, a new MySQL error
1181  code should be introduced */
1182 
1183  case DB_COL_APPEARS_TWICE_IN_INDEX:
1184  case DB_CORRUPTION:
1185  return(HA_ERR_CRASHED);
1186 
1187  case DB_OUT_OF_FILE_SPACE:
1188  return(HA_ERR_RECORD_FILE_FULL);
1189 
1190  case DB_TABLE_IS_BEING_USED:
1191  return(HA_ERR_WRONG_COMMAND);
1192 
1193  case DB_TABLE_NOT_FOUND:
1194  return(HA_ERR_NO_SUCH_TABLE);
1195 
1196  case DB_TOO_BIG_RECORD:
1197  my_error(ER_TOO_BIG_ROWSIZE, MYF(0),
1199  return(HA_ERR_TO_BIG_ROW);
1200 
1201  case DB_NO_SAVEPOINT:
1202  return(HA_ERR_NO_SAVEPOINT);
1203 
1204  case DB_LOCK_TABLE_FULL:
1205  /* Since we rolled back the whole transaction, we must
1206  tell it also to MySQL so that MySQL knows to empty the
1207  cached binlog for this transaction */
1208 
1209  session->markTransactionForRollback(TRUE);
1210 
1211  return(HA_ERR_LOCK_TABLE_FULL);
1212 
1213  case DB_PRIMARY_KEY_IS_NULL:
1214  return(ER_PRIMARY_CANT_HAVE_NULL);
1215 
1216  case DB_TOO_MANY_CONCURRENT_TRXS:
1217 
1218  /* Once MySQL add the appropriate code to errmsg.txt then
1219  we can get rid of this #ifdef. NOTE: The code checked by
1220  the #ifdef is the suggested name for the error condition
1221  and the actual error code name could very well be different.
1222  This will require some monitoring, ie. the status
1223  of this request on our part.*/
1224 
1225  /* New error code HA_ERR_TOO_MANY_CONCURRENT_TRXS is only
1226  available in 5.1.38 and later, but the plugin should still
1227  work with previous versions of MySQL.
1228  In Drizzle we seem to not have this yet.
1229  */
1230 #ifdef HA_ERR_TOO_MANY_CONCURRENT_TRXS
1231  return(HA_ERR_TOO_MANY_CONCURRENT_TRXS);
1232 #else /* HA_ERR_TOO_MANY_CONCURRENT_TRXS */
1233  return(HA_ERR_RECORD_FILE_FULL);
1234 #endif /* HA_ERR_TOO_MANY_CONCURRENT_TRXS */
1235  case DB_UNSUPPORTED:
1236  return(HA_ERR_UNSUPPORTED);
1237  }
1238 }
1239 
1240 
1241 /*************************************************************/
1243 UNIV_INTERN
1244 void
1246 /*=====================*/
1247  FILE* f,
1248  drizzled::Session *in_session,
1249  uint )
1251 {
1252  drizzled::identifier::user::ptr user_identifier(in_session->user());
1253 
1254  fprintf(f,
1255  "Drizzle thread %"PRIu64", query id %"PRIu64", %s, %s, %s ",
1256  static_cast<uint64_t>(in_session->getSessionId()),
1257  static_cast<uint64_t>(in_session->getQueryId()),
1258  getServerHostname().c_str(),
1259  user_identifier->address().c_str(),
1260  user_identifier->username().c_str()
1261  );
1262  fprintf(f, "\n%s", in_session->getQueryString()->c_str());
1263  putc('\n', f);
1264 }
1265 
1266 /******************************************************************/
1268 UNIV_INTERN
1269 void
1271 /*====================*/
1272  ulint cset,
1273  ulint* mbminlen,
1274  ulint* mbmaxlen)
1275 {
1276  charset_info_st* cs;
1277  ut_ad(cset < 256);
1278  ut_ad(mbminlen);
1279  ut_ad(mbmaxlen);
1280 
1281  cs = all_charsets[cset];
1282  if (cs) {
1283  *mbminlen = cs->mbminlen;
1284  *mbmaxlen = cs->mbmaxlen;
1285  ut_ad(*mbminlen < DATA_MBMAX);
1286  ut_ad(*mbmaxlen < DATA_MBMAX);
1287  } else {
1288  ut_a(cset == 0);
1289  *mbminlen = *mbmaxlen = 0;
1290  }
1291 }
1292 
1293 /******************************************************************/
1295 UNIV_INTERN
1296 void
1298 /*===========================*/
1299  const void*,
1300  char* to,
1301  const char* from,
1302  ulint len)
1303 {
1304  strncpy(to, from, len);
1305 }
1306 
1307 /******************************************************************/
1309 UNIV_INTERN
1310 void
1312 /*=====================*/
1313  const void*,
1314  char* to,
1315  const char* from,
1316  ulint len)
1317 {
1318  strncpy(to, from, len);
1319 }
1320 
1321 /******************************************************************/
1324 UNIV_INTERN
1325 int
1327 /*================*/
1328  const char* a,
1329  const char* b)
1330 {
1331  return(system_charset_info->strcasecmp(a, b));
1332 }
1333 
1334 /******************************************************************/
1336 UNIV_INTERN
1337 void
1339 /*================*/
1340  char* a)
1341 {
1342  system_charset_info->casedn_str(a);
1343 }
1344 
1345 UNIV_INTERN
1346 bool
1347 innobase_isspace(
1348  const void* cs,
1349  char char_to_test)
1350 {
1351  return static_cast<const charset_info_st*>(cs)->isspace(char_to_test);
1352 }
1353 
1354 #if defined (__WIN__) && defined (MYSQL_DYNAMIC_PLUGIN)
1355 /*******************************************************************/
1358 void __cdecl
1359 _dosmaperr(
1360  unsigned long);
1362 /*********************************************************************/
1365 UNIV_INTERN
1366 int
1368 /*========================*/
1369 {
1370  int fd; /* handle of opened file */
1371  HANDLE osfh; /* OS handle of opened file */
1372  char* tmpdir; /* point to the directory
1373  where to create file */
1374  TCHAR path_buf[MAX_PATH - 14]; /* buffer for tmp file path.
1375  The length cannot be longer
1376  than MAX_PATH - 14, or
1377  GetTempFileName will fail. */
1378  char filename[MAX_PATH]; /* name of the tmpfile */
1379  DWORD fileaccess = GENERIC_READ /* OS file access */
1380  | GENERIC_WRITE
1381  | DELETE;
1382  DWORD fileshare = FILE_SHARE_READ /* OS file sharing mode */
1383  | FILE_SHARE_WRITE
1384  | FILE_SHARE_DELETE;
1385  DWORD filecreate = CREATE_ALWAYS; /* OS method of open/create */
1386  DWORD fileattrib = /* OS file attribute flags */
1387  FILE_ATTRIBUTE_NORMAL
1388  | FILE_FLAG_DELETE_ON_CLOSE
1389  | FILE_ATTRIBUTE_TEMPORARY
1390  | FILE_FLAG_SEQUENTIAL_SCAN;
1391 
1392  tmpdir = my_tmpdir(&mysql_tmpdir_list);
1393 
1394  /* The tmpdir parameter can not be NULL for GetTempFileName. */
1395  if (!tmpdir) {
1396  uint ret;
1397 
1398  /* Use GetTempPath to determine path for temporary files. */
1399  ret = GetTempPath(sizeof(path_buf), path_buf);
1400  if (ret > sizeof(path_buf) || (ret == 0)) {
1401 
1402  _dosmaperr(GetLastError()); /* map error */
1403  return(-1);
1404  }
1405 
1406  tmpdir = path_buf;
1407  }
1408 
1409  /* Use GetTempFileName to generate a unique filename. */
1410  if (!GetTempFileName(tmpdir, "ib", 0, filename)) {
1411 
1412  _dosmaperr(GetLastError()); /* map error */
1413  return(-1);
1414  }
1415 
1416  /* Open/Create the file. */
1417  osfh = CreateFile(filename, fileaccess, fileshare, NULL,
1418  filecreate, fileattrib, NULL);
1419  if (osfh == INVALID_HANDLE_VALUE) {
1420 
1421  /* open/create file failed! */
1422  _dosmaperr(GetLastError()); /* map error */
1423  return(-1);
1424  }
1425 
1426  do {
1427  /* Associates a CRT file descriptor with the OS file handle. */
1428  fd = _open_osfhandle((intptr_t) osfh, 0);
1429  } while (fd == -1 && errno == EINTR);
1430 
1431  if (fd == -1) {
1432  /* Open failed, close the file handle. */
1433 
1434  _dosmaperr(GetLastError()); /* map error */
1435  CloseHandle(osfh); /* no need to check if
1436  CloseHandle fails */
1437  }
1438 
1439  return(fd);
1440 }
1441 #else
1442 /*********************************************************************/
1445 UNIV_INTERN
1446 int
1448 /*========================*/
1449 {
1450  int fd2 = -1;
1451  int fd = ::drizzled::tmpfile("ib");
1452  if (fd >= 0) {
1453  /* Copy the file descriptor, so that the additional resources
1454  allocated by create_temp_file() can be freed by invoking
1455  internal::my_close().
1456 
1457  Because the file descriptor returned by this function
1458  will be passed to fdopen(), it will be closed by invoking
1459  fclose(), which in turn will invoke close() instead of
1460  internal::my_close(). */
1461  fd2 = dup(fd);
1462  if (fd2 < 0) {
1463  errno=errno;
1464  my_error(EE_OUT_OF_FILERESOURCES,
1465  MYF(ME_BELL+ME_WAITTANG),
1466  "ib*", errno);
1467  }
1468  internal::my_close(fd, MYF(MY_WME));
1469  }
1470  return(fd2);
1471 }
1472 #endif /* defined (__WIN__) && defined (MYSQL_DYNAMIC_PLUGIN) */
1473 
1474 
1475 /*******************************************************************/
1484 UNIV_INTERN
1485 ulint
1487 /*================*/
1488  const char* data,
1489  ulint data_len,
1491  ulint ,
1492  char* buf,
1493  ulint buf_size)
1495 {
1496  return(ut_str_sql_format(data, data_len, buf, buf_size));
1497 }
1498 
1499 /*********************************************************************/
1516 static
1517 uint64_t
1519 /*==================*/
1520  uint64_t current,
1521  uint64_t increment,
1522  uint64_t offset,
1523  uint64_t max_value)
1524 {
1525  uint64_t next_value;
1526 
1527  /* Should never be 0. */
1528  ut_a(increment > 0);
1529 
1530  /* According to MySQL documentation, if the offset is greater than
1531  the increment then the offset is ignored. */
1532  if (offset > increment) {
1533  offset = 0;
1534  }
1535 
1536  if (max_value <= current) {
1537  next_value = max_value;
1538  } else if (offset <= 1) {
1539  /* Offset 0 and 1 are the same, because there must be at
1540  least one node in the system. */
1541  if (max_value - current <= increment) {
1542  next_value = max_value;
1543  } else {
1544  next_value = current + increment;
1545  }
1546  } else if (max_value > current) {
1547  if (current > offset) {
1548  next_value = ((current - offset) / increment) + 1;
1549  } else {
1550  next_value = ((offset - current) / increment) + 1;
1551  }
1552 
1553  ut_a(increment > 0);
1554  ut_a(next_value > 0);
1555 
1556  /* Check for multiplication overflow. */
1557  if (increment > (max_value / next_value)) {
1558 
1559  next_value = max_value;
1560  } else {
1561  next_value *= increment;
1562 
1563  ut_a(max_value >= next_value);
1564 
1565  /* Check for overflow. */
1566  if (max_value - next_value <= offset) {
1567  next_value = max_value;
1568  } else {
1569  next_value += offset;
1570  }
1571  }
1572  } else {
1573  next_value = max_value;
1574  }
1575 
1576  ut_a(next_value <= max_value);
1577 
1578  return(next_value);
1579 }
1580 
1581 /*********************************************************************/
1583 static
1584 void
1586 /*==============*/
1587  Session* session,
1588  trx_t* trx)
1589 {
1590  assert(session == trx->mysql_thd);
1591 
1592  trx->check_foreigns = !session_test_options(
1593  session, OPTION_NO_FOREIGN_KEY_CHECKS);
1594 
1595  trx->check_unique_secondary = !session_test_options(
1596  session, OPTION_RELAXED_UNIQUE_CHECKS);
1597 
1598  return;
1599 }
1600 
1601 /*********************************************************************/
1604 UNIV_INTERN
1605 trx_t*
1607 /*==================*/
1608  Session* session)
1609 {
1610  trx_t* trx;
1611 
1612  assert(session != NULL);
1613  assert(EQ_CURRENT_SESSION(session));
1614 
1615  trx = trx_allocate_for_mysql();
1616 
1617  trx->mysql_thd = session;
1618 
1619  innobase_trx_init(session, trx);
1620 
1621  return(trx);
1622 }
1623 
1624 /*********************************************************************/
1629 static
1630 trx_t*
1632 /*=============*/
1633  Session* session)
1634 {
1635  trx_t*& trx = session_to_trx(session);
1636 
1637  ut_ad(EQ_CURRENT_SESSION(session));
1638 
1639  if (trx == NULL) {
1640  trx = innobase_trx_allocate(session);
1641  } else if (UNIV_UNLIKELY(trx->magic_n != TRX_MAGIC_N)) {
1643  ut_error;
1644  }
1645 
1646  innobase_trx_init(session, trx);
1647 
1648  return(trx);
1649 }
1650 
1651 
1652 /*********************************************************************/
1654 UNIV_INTERN
1655 ha_innobase::ha_innobase(plugin::StorageEngine &engine_arg,
1656  Table &table_arg)
1657  :Cursor(engine_arg, table_arg),
1658  primary_key(0), /* needs initialization because index_flags() may be called
1659  before this is set to the real value. It's ok to have any
1660  value here because it doesn't matter if we return the
1661  HA_DO_INDEX_COND_PUSHDOWN bit from those "early" calls */
1662  start_of_scan(0),
1663  num_write_row(0)
1664 {}
1665 
1666 /*********************************************************************/
1668 UNIV_INTERN
1670 {
1671 }
1672 
1673 /*********************************************************************/
1677 UNIV_INTERN inline
1678 void
1680 /*====================*/
1681  Session* session)
1682 {
1683  trx_t* trx;
1684 
1685  assert(session);
1686  trx = check_trx_exists(session);
1687 
1688  if (prebuilt->trx != trx) {
1689 
1691  }
1692 
1693  user_session = session;
1694 }
1695 
1696 /*****************************************************************/
1700 static
1701 char*
1703 /*========================*/
1704  char* buf,
1705  ulint buflen,
1706  const char* id,
1707  ulint idlen,
1708  drizzled::Session *session,
1709  ibool file_id)
1711 {
1712  char nz[NAME_LEN + 1];
1713  const size_t nz2_size= NAME_LEN + 1 + srv_mysql50_table_name_prefix.size();
1714  boost::scoped_array<char> nz2(new char[nz2_size]);
1715 
1716  const char* s = id;
1717  int q;
1718 
1719  if (file_id) {
1720  /* Decode the table name. The filename_to_tablename()
1721  function expects a NUL-terminated string. The input and
1722  output strings buffers must not be shared. */
1723 
1724  if (UNIV_UNLIKELY(idlen > (sizeof nz) - 1)) {
1725  idlen = (sizeof nz) - 1;
1726  }
1727 
1728  memcpy(nz, id, idlen);
1729  nz[idlen] = 0;
1730 
1731  s = nz2.get();
1732  idlen = identifier::Table::filename_to_tablename(nz, nz2.get(), nz2_size);
1733  }
1734 
1735  /* See if the identifier needs to be quoted. */
1736  if (UNIV_UNLIKELY(!session)) {
1737  q = '"';
1738  } else {
1739  q = get_quote_char_for_identifier();
1740  }
1741 
1742  if (q == EOF) {
1743  if (UNIV_UNLIKELY(idlen > buflen)) {
1744  idlen = buflen;
1745  }
1746  memcpy(buf, s, idlen);
1747  return(buf + idlen);
1748  }
1749 
1750  /* Quote the identifier. */
1751  if (buflen < 2) {
1752  return(buf);
1753  }
1754 
1755  *buf++ = q;
1756  buflen--;
1757 
1758  for (; idlen; idlen--) {
1759  int c = *s++;
1760  if (UNIV_UNLIKELY(c == q)) {
1761  if (UNIV_UNLIKELY(buflen < 3)) {
1762  break;
1763  }
1764 
1765  *buf++ = c;
1766  *buf++ = c;
1767  buflen -= 2;
1768  } else {
1769  if (UNIV_UNLIKELY(buflen < 2)) {
1770  break;
1771  }
1772 
1773  *buf++ = c;
1774  buflen--;
1775  }
1776  }
1777 
1778  *buf++ = q;
1779  return(buf);
1780 }
1781 
1782 /*****************************************************************/
1786 UNIV_INTERN
1787 char*
1789 /*==================*/
1790  char* buf,
1791  ulint buflen,
1792  const char* id,
1793  ulint idlen,
1794  drizzled::Session *session,
1795  ibool table_id)
1797 {
1798  char* s = buf;
1799  const char* bufend = buf + buflen;
1800 
1801  if (table_id) {
1802  const char* slash = (const char*) memchr(id, '/', idlen);
1803  if (!slash) {
1804 
1805  goto no_db_name;
1806  }
1807 
1808  /* Print the database name and table name separately. */
1809  s = innobase_convert_identifier(s, bufend - s, id, slash - id,
1810  session, TRUE);
1811  if (UNIV_LIKELY(s < bufend)) {
1812  *s++ = '.';
1813  s = innobase_convert_identifier(s, bufend - s,
1814  slash + 1, idlen
1815  - (slash - id) - 1,
1816  session, TRUE);
1817  }
1818  } else if (UNIV_UNLIKELY(*id == TEMP_INDEX_PREFIX)) {
1819  /* Temporary index name (smart ALTER TABLE) */
1820  const char temp_index_suffix[]= "--temporary--";
1821 
1822  s = innobase_convert_identifier(buf, buflen, id + 1, idlen - 1,
1823  session, FALSE);
1824  if (s - buf + (sizeof temp_index_suffix - 1) < buflen) {
1825  memcpy(s, temp_index_suffix,
1826  sizeof temp_index_suffix - 1);
1827  s += sizeof temp_index_suffix - 1;
1828  }
1829  } else {
1830 no_db_name:
1831  s = innobase_convert_identifier(buf, buflen, id, idlen,
1832  session, table_id);
1833  }
1834 
1835  return(s);
1836 
1837 }
1838 
1839 /**********************************************************************/
1842 UNIV_INTERN
1843 ibool
1845 /*===============*/
1846  trx_t* trx)
1847 {
1848  return(trx && trx->mysql_thd && trx->mysql_thd->getKilled());
1849 }
1850 
1851 /**********************************************************************/
1854 UNIV_INTERN
1855 ibool
1857 /*==========*/
1858  trx_t* trx)
1859 {
1860  return(trx && trx->mysql_thd
1861  && true);
1862 }
1863 
1864 /**************************************************************/
1867 static
1868 void
1870 /*===========*/
1871  row_prebuilt_t* prebuilt)
1872 {
1873  prebuilt->keep_other_fields_on_keyread = 0;
1874  prebuilt->read_just_key = 0;
1875 }
1876 
1877 template<class T>
1878 void align_value(T& value, size_t align_val= 1024)
1879 {
1880  value= value - (value % align_val);
1881 }
1882 
1883 static void auto_extend_update(Session *, sql_var_t)
1884 {
1885  srv_auto_extend_increment= innodb_auto_extend_increment.get();
1886 }
1887 
1888 static void io_capacity_update(Session *, sql_var_t)
1889 {
1890  srv_io_capacity= innodb_io_capacity.get();
1891 }
1892 
1893 static void purge_batch_update(Session *, sql_var_t)
1894 {
1895  srv_purge_batch_size= innodb_purge_batch_size.get();
1896 }
1897 
1898 static void purge_threads_update(Session *, sql_var_t)
1899 {
1900  srv_n_purge_threads= innodb_n_purge_threads.get();
1901 }
1902 
1903 static void innodb_adaptive_hash_index_update(Session *, sql_var_t)
1904 {
1905  if (btr_search_enabled)
1906  {
1907  btr_search_enable();
1908  } else {
1909  btr_search_disable();
1910  }
1911 }
1912 
1913 static void innodb_old_blocks_pct_update(Session *, sql_var_t)
1914 {
1915  innobase_old_blocks_pct= buf_LRU_old_ratio_update(innobase_old_blocks_pct.get(), TRUE);
1916 }
1917 
1918 static void innodb_thread_concurrency_update(Session *, sql_var_t)
1919 {
1920  srv_thread_concurrency= innobase_thread_concurrency.get();
1921 }
1922 
1923 static void innodb_sync_spin_loops_update(Session *, sql_var_t)
1924 {
1925  srv_n_spin_wait_rounds= innodb_sync_spin_loops.get();
1926 }
1927 
1928 static void innodb_spin_wait_delay_update(Session *, sql_var_t)
1929 {
1930  srv_spin_wait_delay= innodb_spin_wait_delay.get();
1931 }
1932 
1933 static void innodb_thread_sleep_delay_update(Session *, sql_var_t)
1934 {
1935  srv_thread_sleep_delay= innodb_thread_sleep_delay.get();
1936 }
1937 
1938 static void innodb_read_ahead_threshold_update(Session *, sql_var_t)
1939 {
1940  srv_read_ahead_threshold= innodb_read_ahead_threshold.get();
1941 }
1942 
1943 static void auto_lru_dump_update(Session *, sql_var_t)
1944 {
1945  srv_auto_lru_dump= buffer_pool_restore_at_startup.get();
1946 }
1947 
1948 static void ibuf_active_contract_update(Session *, sql_var_t)
1949 {
1950  srv_ibuf_active_contract= ibuf_active_contract.get();
1951 }
1952 
1953 static void ibuf_accel_rate_update(Session *, sql_var_t)
1954 {
1955  srv_ibuf_accel_rate= ibuf_accel_rate.get();
1956 }
1957 
1958 static void checkpoint_age_target_update(Session *, sql_var_t)
1959 {
1960  srv_checkpoint_age_target= checkpoint_age_target.get();
1961 }
1962 
1963 static void flush_neighbor_pages_update(Session *, sql_var_t)
1964 {
1965  srv_flush_neighbor_pages= flush_neighbor_pages.get();
1966 }
1967 
1968 static int innodb_commit_concurrency_validate(Session *session, set_var *var)
1969 {
1970  uint64_t new_value= var->getInteger();
1971 
1972  if ((innobase_commit_concurrency.get() == 0 && new_value != 0) ||
1973  (innobase_commit_concurrency.get() != 0 && new_value == 0))
1974  {
1975  push_warning_printf(session,
1976  DRIZZLE_ERROR::WARN_LEVEL_WARN,
1977  ER_WRONG_ARGUMENTS,
1978  _("Once InnoDB is running, innodb_commit_concurrency "
1979  "must not change between zero and nonzero."));
1980  return 1;
1981  }
1982  return 0;
1983 }
1984 
1985 /*************************************************************/
1989 static
1990 int
1992 /*=============================*/
1993  Session* ,
1994  set_var *var)
1995 {
1996  const char *file_format_input = var->value->str_value.ptr();
1997  if (file_format_input == NULL)
1998  return 1;
1999 
2000  if (file_format_input != NULL) {
2001  uint format_id;
2002 
2004  file_format_input);
2005 
2006  if (format_id <= DICT_TF_FORMAT_MAX) {
2007  innobase_file_format_name =
2008  trx_sys_file_format_id_to_name(format_id);
2009 
2010  return(0);
2011  }
2012  }
2013 
2014  return(1);
2015 }
2016 
2017 /*************************************************************/
2021 static
2022 int
2024 /*=============================*/
2025  Session* ,
2026  set_var *var)
2027 {
2028  const char *change_buffering_input = var->value->str_value.ptr();
2029 
2030  if (change_buffering_input == NULL)
2031  return 1;
2032 
2033  ulint use;
2034 
2035  for (use = 0;
2036  use < UT_ARR_SIZE(innobase_change_buffering_values);
2037  ++use) {
2038  if (!innobase_strcasecmp(change_buffering_input,
2040  {
2041  ibuf_use= static_cast<ibuf_use_t>(use);
2042  return 0;
2043  }
2044  }
2045 
2046  return 1;
2047 }
2048 
2049 
2050 /*************************************************************/
2054 static
2055 int
2057 /*==============================*/
2058  Session* session,
2059  set_var *var)
2060 {
2061  const char *file_format_input = var->value->str_value.ptr();
2062  if (file_format_input == NULL)
2063  return 1;
2064 
2065  if (file_format_input != NULL) {
2066  int format_id = innobase_file_format_validate_and_set(file_format_input);
2067 
2068  if (format_id > DICT_TF_FORMAT_MAX) {
2069  /* DEFAULT is "on", which is invalid at runtime. */
2070  return 1;
2071  }
2072 
2073  if (format_id >= 0) {
2074  innobase_file_format_max.assign(
2075  trx_sys_file_format_id_to_name((uint)format_id));
2076 
2077  /* Update the max format id in the system tablespace. */
2078  const char *name_buff;
2079 
2080  if (trx_sys_file_format_max_set(format_id, &name_buff))
2081  {
2082  errmsg_printf(error::WARN,
2083  " [Info] InnoDB: the file format in the system "
2084  "tablespace is now set to %s.\n", name_buff);
2085  innobase_file_format_max= name_buff;
2086  }
2087  return(0);
2088 
2089  } else {
2090  push_warning_printf(session,
2091  DRIZZLE_ERROR::WARN_LEVEL_WARN,
2092  ER_WRONG_ARGUMENTS,
2093  "InnoDB: invalid innodb_file_format_max "
2094  "value; can be any format up to %s "
2095  "or equivalent id of %d",
2098  }
2099  }
2100 
2101  return(1);
2102 }
2103 
2104 /*********************************************************************/
2108 static
2109 int
2111 /*================*/
2112  Session*,
2113  set_var* var)
2114 {
2115  const char *read_ahead_input = var->value->str_value.ptr();
2116  int res = read_ahead_typelib.find_type(read_ahead_input, TYPELIB::e_none); // e_none is wrong
2117 
2118  if (res > 0) {
2119  srv_read_ahead = res - 1;
2120  return 0;
2121  }
2122 
2123  return 1;
2124 }
2125 
2126 /*********************************************************************/
2130 static
2131 int
2133 /*==============================*/
2134  Session*,
2135  set_var* var)
2136 {
2137  const char *adaptive_flushing_method_input = var->value->str_value.ptr();
2138  int res = adaptive_flushing_method_typelib.find_type(adaptive_flushing_method_input, TYPELIB::e_none); // e_none is wrong
2139 
2140  if (res > 0) {
2141  srv_adaptive_flushing_method = res - 1;
2142  return 0;
2143  }
2144  return 1;
2145 }
2146 
2147 
2148 /*********************************************************************/
2151 static
2152 int
2154 /*==========*/
2155  module::Context &context)
2156 {
2157  int err;
2158  bool ret;
2159  uint format_id;
2160  InnobaseEngine *actuall_engine_ptr;
2161  const module::option_map &vm= context.getOptions();
2162 
2163  srv_auto_extend_increment= innodb_auto_extend_increment.get();
2164  srv_io_capacity= innodb_io_capacity.get();
2165  srv_purge_batch_size= innodb_purge_batch_size.get();
2166  srv_n_purge_threads= innodb_n_purge_threads.get();
2167  srv_flush_log_at_trx_commit= innodb_flush_log_at_trx_commit.get();
2168  srv_max_buf_pool_modified_pct= innodb_max_dirty_pages_pct.get();
2169  srv_max_purge_lag= innodb_max_purge_lag.get();
2170  srv_stats_sample_pages= innodb_stats_sample_pages.get();
2171  srv_n_free_tickets_to_enter= innodb_concurrency_tickets.get();
2172  srv_replication_delay= innodb_replication_delay.get();
2173  srv_thread_concurrency= innobase_thread_concurrency.get();
2174  srv_n_spin_wait_rounds= innodb_sync_spin_loops.get();
2175  srv_spin_wait_delay= innodb_spin_wait_delay.get();
2176  srv_thread_sleep_delay= innodb_thread_sleep_delay.get();
2177  srv_read_ahead_threshold= innodb_read_ahead_threshold.get();
2178  srv_auto_lru_dump= buffer_pool_restore_at_startup.get();
2179  srv_ibuf_max_size= ibuf_max_size.get();
2180  srv_ibuf_active_contract= ibuf_active_contract.get();
2181  srv_ibuf_accel_rate= ibuf_accel_rate.get();
2182  srv_checkpoint_age_target= checkpoint_age_target.get();
2183  srv_flush_neighbor_pages= flush_neighbor_pages.get();
2184 
2185  srv_read_ahead = read_ahead_typelib.find_type_or_exit(vm["read-ahead"].as<string>().c_str(),
2186  "read_ahead_typelib") + 1;
2187 
2188  srv_adaptive_flushing_method = adaptive_flushing_method_typelib.find_type_or_exit(vm["adaptive-flushing-method"].as<string>().c_str(),
2189  "adaptive_flushing_method_typelib") + 1;
2190 
2191  /* Inverted Booleans */
2192 
2193  innobase_use_checksums= not vm.count("disable-checksums");
2194  innobase_use_doublewrite= not vm.count("disable-doublewrite");
2195  srv_adaptive_flushing= not vm.count("disable-adaptive-flushing");
2196  srv_use_sys_malloc= not vm.count("use-internal-malloc");
2197  srv_use_native_aio= not vm.count("disable-native-aio");
2198  support_xa= not vm.count("disable-xa");
2199  btr_search_enabled= not vm.count("disable-adaptive-hash-index");
2200 
2201  /* Hafta do this here because we need to late-bind the default value */
2202  innobase_data_home_dir= vm.count("data-home-dir") ? vm["data-home-dir"].as<string>() : getDataHome().file_string();
2203 
2204  if (vm.count("data-file-path"))
2205  {
2206  innobase_data_file_path= vm["data-file-path"].as<string>();
2207  }
2208 
2209 
2210  innodb_engine_ptr= actuall_engine_ptr= new InnobaseEngine(innobase_engine_name);
2211 
2212  ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)DRIZZLE_TYPE_VARCHAR);
2213 
2214 #ifdef UNIV_DEBUG
2215  static const char test_filename[] = "-@";
2216  const size_t test_tablename_size= sizeof test_filename
2218  boost::scoped_array test_tablename(new char[test_tablename_size]);
2219  if ((test_tablename_size) - 1
2220  != filename_to_tablename(test_filename, test_tablename.get(),
2221  test_tablename_size)
2222  || strncmp(test_tablename.get(),
2225  || strcmp(test_tablename.get()
2227  test_filename)) {
2228  errmsg_printf(error::ERROR, "tablename encoding has been changed");
2229  goto error;
2230  }
2231 #endif /* UNIV_DEBUG */
2232 
2233  srv_page_size = 0;
2234  srv_page_size_shift = 0;
2235 
2236  uint32_t page_size = innobase_page_size.get();
2237  uint32_t log_block_size = innobase_log_block_size.get();
2238 
2239  if (innobase_page_size != (1 << 14)) {
2240  uint n_shift;
2241 
2242  errmsg_printf(error::WARN,
2243  "InnoDB: Warning: innodb_page_size has been changed from default value 16384. (###EXPERIMENTAL### operation)\n");
2244  for (n_shift = 12; n_shift <= UNIV_PAGE_SIZE_SHIFT_MAX; n_shift++) {
2245  if (innobase_page_size == (1UL << n_shift)) {
2246  srv_page_size_shift = n_shift;
2247  srv_page_size = (1 << srv_page_size_shift);
2248  errmsg_printf(error::WARN,
2249  "InnoDB: The universal page size of the database is set to %lu.\n",
2250  srv_page_size);
2251  break;
2252  }
2253  }
2254  } else {
2255  srv_page_size_shift = 14;
2256  srv_page_size = (1 << srv_page_size_shift);
2257  }
2258 
2259  if (!srv_page_size_shift) {
2260  errmsg_printf(error::ERROR,
2261  "InnoDB: Error: %"PRIu32" is not a valid value for innodb_page_size.\n"
2262  "InnoDB: Error: Valid values are 4096, 8192, and 16384 (default=16384).\n",
2263  page_size);
2264  goto error;
2265  }
2266 
2267  srv_log_block_size = 0;
2268  if (log_block_size != (1 << 9)) {
2269  uint n_shift;
2270 
2271  errmsg_printf(error::WARN,
2272  "InnoDB: Warning: innodb_log_block_size has been changed from default value 512. (###EXPERIMENTAL### operation)\n");
2273  for (n_shift = 9; n_shift <= UNIV_PAGE_SIZE_SHIFT_MAX; n_shift++) {
2274  if (log_block_size == (1UL << n_shift)) {
2275  srv_log_block_size = (1 << n_shift);
2276  errmsg_printf(error::WARN, "InnoDB: The log block size is set to %"PRIu32".\n",
2277  srv_log_block_size);
2278  break;
2279  }
2280  }
2281  } else {
2282  srv_log_block_size = 512;
2283  }
2284 
2285  if (!srv_log_block_size) {
2286  errmsg_printf(error::ERROR,
2287  "InnoDB: Error: %"PRIu32" is not a valid value for innodb_log_block_size.\n"
2288  "InnoDB: Error: A valid value for innodb_log_block_size is\n"
2289  "InnoDB: Error: a power of 2 from 512 to 16384.\n",
2290  log_block_size);
2291  goto error;
2292  }
2293 
2294  os_innodb_umask = (ulint)internal::my_umask;
2295 
2296 
2297  /* Set InnoDB initialization parameters according to the values
2298  read from MySQL .cnf file */
2299 
2300  /*--------------- Data files -------------------------*/
2301 
2302  /* The default dir for data files is the datadir of MySQL */
2303 
2304  srv_data_home = (char *)innobase_data_home_dir.c_str();
2305 
2306  /* Set default InnoDB data file size to 10 MB and let it be
2307  auto-extending. Thus users can use InnoDB in >= 4.0 without having
2308  to specify any startup options. */
2309 
2310  if (innobase_data_file_path.empty())
2311  {
2312  innobase_data_file_path= std::string("ibdata1:10M:autoextend");
2313  }
2314 
2315  /* Since InnoDB edits the argument in the next call, we make another
2316  copy of it: */
2317 
2318  internal_innobase_data_file_path = strdup(innobase_data_file_path.c_str());
2319 
2321  internal_innobase_data_file_path);
2322  if (ret == FALSE) {
2323  errmsg_printf(error::ERROR, "InnoDB: syntax error in innodb_data_file_path");
2324 
2325 mem_free_and_error:
2327  free(internal_innobase_data_file_path);
2328  goto error;
2329  }
2330 
2331  /* -------------- Log files ---------------------------*/
2332 
2333  /* The default dir for log files is the datadir of MySQL */
2334 
2335  if (vm.count("log-group-home-dir"))
2336  {
2337  innobase_log_group_home_dir= vm["log-group-home-dir"].as<string>();
2338  }
2339  else
2340  {
2341  innobase_log_group_home_dir= getDataHome().file_string();
2342  }
2343 
2344  ret = (bool)
2345  srv_parse_log_group_home_dirs((char *)innobase_log_group_home_dir.c_str());
2346 
2347  if (ret == FALSE || innobase_mirrored_log_groups.get() != 1) {
2348  errmsg_printf(error::ERROR, _("syntax error in innodb_log_group_home_dir, or a "
2349  "wrong number of mirrored log groups"));
2350 
2351  goto mem_free_and_error;
2352  }
2353 
2354 
2355  /* Validate the file format by animal name */
2356  if (vm.count("file-format"))
2357  {
2359  vm["file-format"].as<string>().c_str());
2360 
2361  if (format_id > DICT_TF_FORMAT_MAX) {
2362 
2363  errmsg_printf(error::ERROR, "InnoDB: wrong innodb_file_format.");
2364 
2365  goto mem_free_and_error;
2366  }
2367  } else {
2368  /* Set it to the default file format id.*/
2369  format_id = 0;
2370  }
2371 
2372  srv_file_format = format_id;
2373 
2374  innobase_file_format_name =
2375  trx_sys_file_format_id_to_name(format_id);
2376 
2377  /* Check innobase_file_format_check variable */
2378  if (!innobase_file_format_check)
2379  {
2380  /* Set the value to disable checking. */
2382  } else {
2383  /* Set the value to the lowest supported format. */
2385  }
2386 
2387  /* Did the user specify a format name that we support?
2388  As a side effect it will update the variable
2389  srv_max_file_format_at_startup */
2390  if (innobase_file_format_validate_and_set(innobase_file_format_max.c_str()) < 0)
2391  {
2392  errmsg_printf(error::ERROR, _("InnoDB: invalid innodb_file_format_max value: "
2393  "should be any value up to %s or its equivalent numeric id"),
2395  goto mem_free_and_error;
2396  }
2397 
2398  if (vm.count("change-buffering"))
2399  {
2400  ulint use;
2401 
2402  for (use = 0;
2403  use < UT_ARR_SIZE(innobase_change_buffering_values);
2404  use++) {
2405  if (!innobase_strcasecmp(
2406  innobase_change_buffering.c_str(),
2408  ibuf_use = static_cast<ibuf_use_t>(use);
2409  goto innobase_change_buffering_inited_ok;
2410  }
2411  }
2412 
2413  errmsg_printf(error::ERROR, "InnoDB: invalid value innodb_change_buffering=%s",
2414  vm["change-buffering"].as<string>().c_str());
2415  goto mem_free_and_error;
2416  }
2417 
2418 innobase_change_buffering_inited_ok:
2419  ut_a((ulint) ibuf_use < UT_ARR_SIZE(innobase_change_buffering_values));
2420  innobase_change_buffering = innobase_change_buffering_values[ibuf_use];
2421 
2422  /* --------------------------------------------------*/
2423 
2424  if (vm.count("flush-method") != 0)
2425  {
2426  srv_file_flush_method_str = (char *)vm["flush-method"].as<string>().c_str();
2427  }
2428 
2429  srv_n_log_groups = (ulint) innobase_mirrored_log_groups;
2430  srv_n_log_files = (ulint) innobase_log_files_in_group;
2431  srv_log_file_size = (ulint) innobase_log_file_size;
2432 
2433  srv_log_buffer_size = (ulint) innobase_log_buffer_size;
2434 
2435  srv_buf_pool_size = (ulint) innobase_buffer_pool_size;
2436  srv_buf_pool_instances = (ulint) innobase_buffer_pool_instances;
2437 
2438  srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;
2439 
2440  srv_n_read_io_threads = (ulint) innobase_read_io_threads;
2441  srv_n_write_io_threads = (ulint) innobase_write_io_threads;
2442 
2443  srv_read_ahead &= 3;
2444  srv_adaptive_flushing_method %= 3;
2445 
2446  srv_force_recovery = (ulint) innobase_force_recovery;
2447 
2448  srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
2449  srv_use_checksums = (ibool) innobase_use_checksums;
2450 
2451 #ifdef HAVE_LARGE_PAGES
2452  if ((os_use_large_pages = (ibool) my_use_large_pages))
2453  os_large_page_size = (ulint) opt_large_page_size;
2454 #endif
2455 
2456  row_rollback_on_timeout = (ibool) innobase_rollback_on_timeout;
2457 
2458  srv_locks_unsafe_for_binlog = (ibool) TRUE;
2459 
2460  srv_max_n_open_files = (ulint) innobase_open_files;
2461  srv_innodb_status = (ibool) innobase_create_status_file;
2462 
2463  srv_print_verbose_log = true;
2464 
2465  /* Store the default charset-collation number of this MySQL
2466  installation */
2467 
2468  data_mysql_default_charset_coll = (ulint)default_charset_info->number;
2469 
2470  /* Since we in this module access directly the fields of a trx
2471  struct, and due to different headers and flags it might happen that
2472  mutex_t has a different size in this module and in InnoDB
2473  modules, we check at run time that the size is the same in
2474  these compilation modules. */
2475 
2477 
2478  if (err != DB_SUCCESS)
2479  {
2480  goto mem_free_and_error;
2481  }
2482 
2483  err = dict_create_sys_replication_log();
2484 
2485  if (err != DB_SUCCESS) {
2486  goto mem_free_and_error;
2487  }
2488 
2489 
2490  innobase_old_blocks_pct = buf_LRU_old_ratio_update(innobase_old_blocks_pct.get(),
2491  TRUE);
2492 
2493  innobase_open_tables = hash_create(200);
2494  innodb_inited= 1;
2495 
2496  actuall_engine_ptr->dropTemporarySchema();
2497 
2498  context.add(new InnodbStatusTool);
2499  context.add(innodb_engine_ptr);
2500  context.add(new CmpTool(false));
2501  context.add(new CmpTool(true));
2502  context.add(new CmpmemTool(false));
2503  context.add(new CmpmemTool(true));
2504  context.add(new InnodbTrxTool("INNODB_TRX"));
2505  context.add(new InnodbTrxTool("INNODB_LOCKS"));
2506  context.add(new InnodbTrxTool("INNODB_LOCK_WAITS"));
2507  context.add(new InnodbSysTablesTool());
2508  context.add(new InnodbSysTableStatsTool());
2509  context.add(new InnodbSysIndexesTool());
2510  context.add(new InnodbSysColumnsTool());
2511  context.add(new InnodbSysFieldsTool());
2512  context.add(new InnodbSysForeignTool());
2513  context.add(new InnodbSysForeignColsTool());
2514  context.add(new InnodbInternalTables());
2515  context.add(new InnodbReplicationTable());
2516 
2517  if (innobase_use_replication_log)
2518  {
2519  ReplicationLog *replication_logger= new ReplicationLog();
2520  context.add(replication_logger);
2521  ReplicationLog::setup(replication_logger, sysvar_transaction_log_use_replicator);
2522  }
2523 
2524  context.registerVariable(new sys_var_const_string_val("data-home-dir", innobase_data_home_dir));
2525  context.registerVariable(new sys_var_const_string_val("flush-method",
2526  vm.count("flush-method") ? vm["flush-method"].as<string>() : ""));
2527  context.registerVariable(new sys_var_const_string_val("log-group-home-dir", innobase_log_group_home_dir));
2528  context.registerVariable(new sys_var_const_string_val("data-file-path", innobase_data_file_path));
2529  context.registerVariable(new sys_var_const_string_val("version", vm["version"].as<string>()));
2530 
2531 
2532  context.registerVariable(new sys_var_bool_ptr_readonly("replication_log", &innobase_use_replication_log));
2533  context.registerVariable(new sys_var_bool_ptr_readonly("checksums", &innobase_use_checksums));
2534  context.registerVariable(new sys_var_bool_ptr_readonly("doublewrite", &innobase_use_doublewrite));
2535  context.registerVariable(new sys_var_bool_ptr("file-per-table", &srv_file_per_table));
2536  context.registerVariable(new sys_var_bool_ptr_readonly("file-format-check", &innobase_file_format_check));
2537  context.registerVariable(new sys_var_bool_ptr("adaptive-flushing", &srv_adaptive_flushing));
2538  context.registerVariable(new sys_var_bool_ptr("status-file", &innobase_create_status_file));
2539  context.registerVariable(new sys_var_bool_ptr_readonly("use-sys-malloc", &srv_use_sys_malloc));
2540  context.registerVariable(new sys_var_bool_ptr_readonly("use-native-aio", &srv_use_native_aio));
2541 
2542  context.registerVariable(new sys_var_bool_ptr("support-xa", &support_xa));
2543  context.registerVariable(new sys_var_bool_ptr("strict_mode", &strict_mode));
2544  context.registerVariable(new sys_var_constrained_value<uint32_t>("lock_wait_timeout", lock_wait_timeout));
2545 
2546  context.registerVariable(new sys_var_constrained_value_readonly<size_t>("additional_mem_pool_size",innobase_additional_mem_pool_size));
2547  context.registerVariable(new sys_var_constrained_value<uint32_t>("autoextend_increment",
2548  innodb_auto_extend_increment,
2549  auto_extend_update));
2550  context.registerVariable(new sys_var_constrained_value<uint32_t>("io_capacity",
2551  innodb_io_capacity,
2552  io_capacity_update));
2553  context.registerVariable(new sys_var_constrained_value<uint32_t>("purge_batch_size",
2554  innodb_purge_batch_size,
2555  purge_batch_update));
2556  context.registerVariable(new sys_var_constrained_value<uint32_t>("purge_threads",
2557  innodb_n_purge_threads,
2558  purge_threads_update));
2559  context.registerVariable(new sys_var_constrained_value<uint32_t>("fast_shutdown", innobase_fast_shutdown));
2560  context.registerVariable(new sys_var_std_string("file_format",
2561  innobase_file_format_name,
2563  context.registerVariable(new sys_var_std_string("change_buffering",
2564  innobase_change_buffering,
2566  context.registerVariable(new sys_var_std_string("file_format_max",
2567  innobase_file_format_max,
2569  context.registerVariable(new sys_var_constrained_value_readonly<size_t>("buffer_pool_size", innobase_buffer_pool_size));
2570  context.registerVariable(new sys_var_constrained_value_readonly<int64_t>("log_file_size", innobase_log_file_size));
2571  context.registerVariable(new sys_var_constrained_value_readonly<uint32_t>("page_size", innobase_page_size));
2572  context.registerVariable(new sys_var_constrained_value_readonly<uint32_t>("log_block_size", innobase_log_block_size));
2573  context.registerVariable(new sys_var_constrained_value_readonly<uint32_t>("flush_log_at_trx_commit",
2574  innodb_flush_log_at_trx_commit));
2575  context.registerVariable(new sys_var_constrained_value_readonly<unsigned int>("max_dirty_pages_pct",
2576  innodb_max_dirty_pages_pct));
2577  context.registerVariable(new sys_var_constrained_value_readonly<uint64_t>("max_purge_lag", innodb_max_purge_lag));
2578  context.registerVariable(new sys_var_constrained_value_readonly<uint64_t>("stats_sample_pages", innodb_stats_sample_pages));
2579  context.registerVariable(new sys_var_bool_ptr("adaptive_hash_index", &btr_search_enabled, innodb_adaptive_hash_index_update));
2580 
2581  context.registerVariable(new sys_var_constrained_value<uint32_t>("commit_concurrency",
2582  innobase_commit_concurrency,
2583  innodb_commit_concurrency_validate));
2584  context.registerVariable(new sys_var_constrained_value<uint32_t>("concurrency_tickets",
2585  innodb_concurrency_tickets));
2586  context.registerVariable(new sys_var_constrained_value_readonly<uint32_t>("read_io_threads", innobase_read_io_threads));
2587  context.registerVariable(new sys_var_constrained_value_readonly<uint32_t>("write_io_threads", innobase_write_io_threads));
2588  context.registerVariable(new sys_var_constrained_value_readonly<uint64_t>("replication_delay", innodb_replication_delay));
2589  context.registerVariable(new sys_var_constrained_value_readonly<uint32_t>("force_recovery", innobase_force_recovery));
2590  context.registerVariable(new sys_var_constrained_value_readonly<size_t>("log_buffer_size", innobase_log_buffer_size));
2591  context.registerVariable(new sys_var_constrained_value_readonly<uint32_t>("log_files_in_group", innobase_log_files_in_group));
2592  context.registerVariable(new sys_var_constrained_value_readonly<uint32_t>("mirrored_log_groups", innobase_mirrored_log_groups));
2593  context.registerVariable(new sys_var_constrained_value_readonly<uint32_t>("open_files", innobase_open_files));
2594  context.registerVariable(new sys_var_constrained_value<uint32_t>("old_blocks_pct",
2595  innobase_old_blocks_pct,
2596  innodb_old_blocks_pct_update));
2597  context.registerVariable(new sys_var_uint32_t_ptr("old_blocks_time", &buf_LRU_old_threshold_ms));
2598  context.registerVariable(new sys_var_constrained_value<uint32_t>("sync_spin_loops", innodb_sync_spin_loops, innodb_sync_spin_loops_update));
2599  context.registerVariable(new sys_var_constrained_value<uint32_t>("spin_wait_delay", innodb_spin_wait_delay, innodb_spin_wait_delay_update));
2600  context.registerVariable(new sys_var_constrained_value<uint32_t>("thread_sleep_delay", innodb_thread_sleep_delay, innodb_thread_sleep_delay_update));
2601  context.registerVariable(new sys_var_constrained_value<uint32_t>("thread_concurrency",
2602  innobase_thread_concurrency,
2603  innodb_thread_concurrency_update));
2604  context.registerVariable(new sys_var_constrained_value<uint32_t>("read_ahead_threshold",
2605  innodb_read_ahead_threshold,
2606  innodb_read_ahead_threshold_update));
2607  context.registerVariable(new sys_var_constrained_value<uint32_t>("auto_lru_dump",
2608  buffer_pool_restore_at_startup,
2609  auto_lru_dump_update));
2610  context.registerVariable(new sys_var_constrained_value_readonly<uint64_t>("ibuf_max_size",
2611  ibuf_max_size));
2612  context.registerVariable(new sys_var_constrained_value<uint32_t>("ibuf_active_contract",
2613  ibuf_active_contract,
2614  ibuf_active_contract_update));
2615  context.registerVariable(new sys_var_constrained_value<uint32_t>("ibuf_accel_rate",
2616  ibuf_accel_rate,
2617  ibuf_accel_rate_update));
2618  context.registerVariable(new sys_var_constrained_value<uint32_t>("checkpoint_age_target",
2619  checkpoint_age_target,
2620  checkpoint_age_target_update));
2621  context.registerVariable(new sys_var_constrained_value<uint32_t>("flush_neighbor_pages",
2622  flush_neighbor_pages,
2623  flush_neighbor_pages_update));
2624  context.registerVariable(new sys_var_std_string("read_ahead",
2625  read_ahead,
2627  context.registerVariable(new sys_var_std_string("adaptive_flushing_method",
2628  adaptive_flushing_method,
2630  /* Get the current high water mark format. */
2631  innobase_file_format_max = trx_sys_file_format_max_get();
2632  btr_search_fully_disabled = (!btr_search_enabled);
2633 
2634  context.registerVariable(new sys_var_const_string("use-replicator",
2635  sysvar_transaction_log_use_replicator));
2636 
2637  return(FALSE);
2638 
2639 error:
2640  return(TRUE);
2641 }
2642 
2643 
2644 /****************************************************************/
2648 bool
2650 /*=====================*/
2651 {
2652  bool result = 0;
2653 
2654  assert(this == innodb_engine_ptr);
2655 
2657 
2658  return(result);
2659 }
2660 
2661 /*****************************************************************/
2663 static
2664 void
2666 /*================*/
2667  trx_t* trx)
2668 {
2669  if (trx->conc_state == TRX_NOT_STARTED) {
2670 
2671  return;
2672  }
2673 
2674  trx_commit_for_mysql(trx);
2675 }
2676 
2677 /*****************************************************************/
2683 int
2685 /*====================================*/
2686  Session* session,
2688  start_transaction_option_t options)
2689 {
2690  assert(this == innodb_engine_ptr);
2691 
2692  /* Create a new trx struct for session, if it does not yet have one */
2693  trx_t *trx = check_trx_exists(session);
2694 
2695  /* This is just to play safe: release a possible FIFO ticket and
2696  search latch. Since we will reserve the kernel mutex, we have to
2697  release the search system latch first to obey the latching order. */
2699 
2700  /* If the transaction is not started yet, start it */
2702 
2703  /* Assign a read view if the transaction does not have it yet */
2704  if (options == START_TRANS_OPT_WITH_CONS_SNAPSHOT)
2705  trx_assign_read_view(trx);
2706 
2707  return 0;
2708 }
2709 
2710 /*****************************************************************/
2714 int
2716 /*============*/
2717  Session* session,
2719  bool all)
2721 {
2722  trx_t* trx;
2723 
2724  assert(this == innodb_engine_ptr);
2725 
2726  trx = check_trx_exists(session);
2727 
2728  /* Since we will reserve the kernel mutex, we have to release
2729  the search system latch first to obey the latching order. */
2730 
2731  if (trx->has_search_latch) {
2733  }
2734 
2735  if (all)
2736  {
2737  /* We were instructed to commit the whole transaction, or
2738  this is an SQL statement end and autocommit is on */
2739 
2740  /* We need current binlog position for ibbackup to work.
2741  Note, the position is current because of
2742  prepare_commit_mutex */
2743  const uint32_t commit_concurrency= innobase_commit_concurrency.get();
2744  if (commit_concurrency)
2745  {
2746  do
2747  {
2748  boost::mutex::scoped_lock scopedLock(commit_cond_m);
2749  commit_threads++;
2750 
2751  if (commit_threads <= commit_concurrency)
2752  break;
2753 
2754  commit_threads--;
2755  commit_cond.wait(scopedLock);
2756  } while (1);
2757  }
2758 
2759  trx->mysql_log_file_name = NULL;
2760  trx->mysql_log_offset = 0;
2761 
2762  /* Don't do write + flush right now. For group commit
2763  to work we want to do the flush after releasing the
2764  prepare_commit_mutex. */
2765  trx->flush_log_later = TRUE;
2766  innobase_commit_low(trx);
2767  trx->flush_log_later = FALSE;
2768 
2769  if (commit_concurrency)
2770  {
2771  boost::mutex::scoped_lock scopedLock(commit_cond_m);
2772  commit_threads--;
2773  commit_cond.notify_one();
2774  }
2775 
2776  /* Now do a write + flush of logs. */
2778 
2779  } else {
2780  /* We just mark the SQL statement ended and do not do a
2781  transaction commit */
2782 
2783  /* If we had reserved the auto-inc lock for some
2784  table in this SQL statement we release it now */
2785 
2787 
2788  /* Store the current undo_no of the transaction so that we
2789  know where to roll back if we have to roll back the next
2790  SQL statement */
2791 
2792  trx_mark_sql_stat_end(trx);
2793 
2794  if (! session_test_options(session, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
2795  {
2796  if (trx->conc_state != TRX_NOT_STARTED)
2797  {
2798  commit(session, TRUE);
2799  }
2800  }
2801  }
2802 
2803  trx->n_autoinc_rows = 0; /* Reset the number AUTO-INC rows required */
2804 
2805  if (trx->declared_to_be_inside_innodb) {
2806  /* Release our possible ticket in the FIFO */
2807 
2809  }
2810 
2811  /* Tell the InnoDB server that there might be work for utility
2812  threads: */
2814 
2815  if (trx->isolation_level <= TRX_ISO_READ_COMMITTED &&
2816  trx->global_read_view)
2817  {
2818  /* At low transaction isolation levels we let
2819  each consistent read set its own snapshot */
2821  }
2822 
2823  return(0);
2824 }
2825 
2826 /*****************************************************************/
2829 int
2831 /*==============*/
2832  Session* session,
2834  bool all)
2836 {
2837  int error = 0;
2838  trx_t* trx;
2839 
2840  assert(this == innodb_engine_ptr);
2841 
2842  trx = check_trx_exists(session);
2843 
2844  /* Release a possible FIFO ticket and search latch. Since we will
2845  reserve the kernel mutex, we have to release the search system latch
2846  first to obey the latching order. */
2847 
2849 
2850  trx->n_autoinc_rows = 0;
2851 
2852  /* If we had reserved the auto-inc lock for some table (if
2853  we come here to roll back the latest SQL statement) we
2854  release it now before a possibly lengthy rollback */
2855 
2857 
2858  if (all)
2859  {
2860  error = trx_rollback_for_mysql(trx);
2861  } else {
2863  }
2864 
2865  if (trx->isolation_level <= TRX_ISO_READ_COMMITTED &&
2866  trx->global_read_view)
2867  {
2868  /* At low transaction isolation levels we let
2869  each consistent read set its own snapshot */
2871  }
2872 
2873  return(convert_error_code_to_mysql(error, 0, NULL));
2874 }
2875 
2876 /*****************************************************************/
2879 static
2880 int
2882 /*==================*/
2883  trx_t* trx)
2884 {
2885  int error = 0;
2886 
2887  /* Release a possible FIFO ticket and search latch. Since we will
2888  reserve the kernel mutex, we have to release the search system latch
2889  first to obey the latching order. */
2890 
2892 
2893  /* If we had reserved the auto-inc lock for some table (if
2894  we come here to roll back the latest SQL statement) we
2895  release it now before a possibly lengthy rollback */
2896 
2898 
2899  error = trx_rollback_for_mysql(trx);
2900 
2901  return(convert_error_code_to_mysql(error, 0, NULL));
2902 }
2903 
2904 /*****************************************************************/
2908 int
2910 /*===========================*/
2911  Session* session,
2913  drizzled::NamedSavepoint &named_savepoint)
2914 {
2915  ib_int64_t mysql_binlog_cache_pos;
2916  int error = 0;
2917  trx_t* trx;
2918 
2919  assert(this == innodb_engine_ptr);
2920 
2921  trx = check_trx_exists(session);
2922 
2923  /* Release a possible FIFO ticket and search latch. Since we will
2924  reserve the kernel mutex, we have to release the search system latch
2925  first to obey the latching order. */
2926 
2928 
2929  error= (int)trx_rollback_to_savepoint_for_mysql(trx, named_savepoint.getName().c_str(),
2930  &mysql_binlog_cache_pos);
2931  return(convert_error_code_to_mysql(error, 0, NULL));
2932 }
2933 
2934 /*****************************************************************/
2938 int
2940 /*=======================*/
2941  Session* session,
2943  drizzled::NamedSavepoint &named_savepoint)
2944 {
2945  int error = 0;
2946  trx_t* trx;
2947 
2948  assert(this == innodb_engine_ptr);
2949 
2950  trx = check_trx_exists(session);
2951 
2952  error = (int) trx_release_savepoint_for_mysql(trx, named_savepoint.getName().c_str());
2953 
2954  return(convert_error_code_to_mysql(error, 0, NULL));
2955 }
2956 
2957 /*****************************************************************/
2960 int
2962 /*===============*/
2963  Session* session,
2964  drizzled::NamedSavepoint &named_savepoint)
2965 {
2966  int error = 0;
2967  trx_t* trx;
2968 
2969  assert(this == innodb_engine_ptr);
2970 
2971  /*
2972  In the autocommit mode there is no sense to set a savepoint
2973  (unless we are in sub-statement), so SQL layer ensures that
2974  this method is never called in such situation.
2975  */
2976 
2977  trx = check_trx_exists(session);
2978 
2979  /* Release a possible FIFO ticket and search latch. Since we will
2980  reserve the kernel mutex, we have to release the search system latch
2981  first to obey the latching order. */
2982 
2984 
2985  /* cannot happen outside of transaction */
2986  assert(trx->conc_state != TRX_NOT_STARTED);
2987 
2988  error = (int) trx_savepoint_for_mysql(trx, named_savepoint.getName().c_str(), (ib_int64_t)0);
2989 
2990  return(convert_error_code_to_mysql(error, 0, NULL));
2991 }
2992 
2993 /*****************************************************************/
2996 int
2998 /*======================*/
2999  Session* session)
3001 {
3002  trx_t* trx;
3003 
3004  assert(this == innodb_engine_ptr);
3005  trx = session_to_trx(session);
3006 
3007  ut_a(trx);
3008 
3009  assert(session->getKilled() != Session::NOT_KILLED ||
3010  trx->conc_state == TRX_NOT_STARTED);
3011 
3012  /* Warn if rolling back some things... */
3013  if (session->getKilled() != Session::NOT_KILLED &&
3014  trx->conc_state != TRX_NOT_STARTED &&
3015  trx->undo_no > 0 &&
3016  global_system_variables.log_warnings)
3017  {
3018  errmsg_printf(error::WARN,
3019  "Drizzle is closing a connection during a KILL operation\n"
3020  "that has an active InnoDB transaction. %llu row modifications will "
3021  "roll back.\n",
3022  (ullint) trx->undo_no);
3023  }
3024 
3025  innobase_rollback_trx(trx);
3026 
3027  thr_local_free(trx->mysql_thread_id);
3028  trx_free_for_mysql(trx);
3029 
3030  return(0);
3031 }
3032 
3033 
3034 /*************************************************************************/
3038 /****************************************************************/
3040 UNIV_INTERN
3041 const char*
3043 /*====================*/
3044  uint)
3046 {
3047  return("BTREE");
3048 }
3049 
3050 /****************************************************************/
3053 UNIV_INTERN
3054 uint
3056 /*===================================*/
3057 {
3058  return(MAX_KEY);
3059 }
3060 
3061 /****************************************************************/
3064 UNIV_INTERN
3065 uint32_t
3067 /*=========================================*/
3068 {
3069  /* An InnoDB page must store >= 2 keys; a secondary key record
3070  must also contain the primary key value: max key length is
3071  therefore set to slightly less than 1 / 4 of page size which
3072  is 16 kB; but currently MySQL does not work with keys whose
3073  size is > MAX_KEY_LENGTH */
3074  return(3500);
3075 }
3076 
3077 /****************************************************************/
3080 UNIV_INTERN
3081 const key_map*
3083 {
3084  return(&key_map_full);
3085 }
3086 
3087 
3088 /****************************************************************/
3091 UNIV_INTERN
3092 bool
3094 {
3095  return(true);
3096 }
3097 
3098 /********************************************************************/
3101 static
3102 uint64_t
3104 /*===========================*/
3105  const Field* field)
3106 {
3107  uint64_t max_value = 0;
3108 
3109  switch(field->key_type()) {
3110  /* TINY */
3111  case HA_KEYTYPE_BINARY:
3112  max_value = 0xFFULL;
3113  break;
3114  /* LONG */
3115  case HA_KEYTYPE_ULONG_INT:
3116  max_value = 0xFFFFFFFFULL;
3117  break;
3118  case HA_KEYTYPE_LONG_INT:
3119  max_value = 0x7FFFFFFFULL;
3120  break;
3121  /* BIG */
3122  case HA_KEYTYPE_ULONGLONG:
3123  max_value = 0xFFFFFFFFFFFFFFFFULL;
3124  break;
3125  case HA_KEYTYPE_LONGLONG:
3126  max_value = 0x7FFFFFFFFFFFFFFFULL;
3127  break;
3128  case HA_KEYTYPE_DOUBLE:
3129  /* We use the maximum as per IEEE754-2008 standard, 2^53 */
3130  max_value = 0x20000000000000ULL;
3131  break;
3132  default:
3133  ut_error;
3134  }
3135 
3136  return(max_value);
3137 }
3138 
3139 /*******************************************************************/
3143 static
3144 ibool
3146 /*=========================*/
3147  const KeyInfo* key_info,
3149  const dict_index_t* index_info)
3151 {
3152  const KeyPartInfo* key_part;
3153  const KeyPartInfo* key_end;
3154  const dict_field_t* innodb_idx_fld;
3155  const dict_field_t* innodb_idx_fld_end;
3156 
3157  /* Check whether user defined index column count matches */
3158  if (key_info->key_parts != index_info->n_user_defined_cols) {
3159  return(FALSE);
3160  }
3161 
3162  key_part = key_info->key_part;
3163  key_end = key_part + key_info->key_parts;
3164  innodb_idx_fld = index_info->fields;
3165  innodb_idx_fld_end = index_info->fields + index_info->n_fields;
3166 
3167  /* Check each index column's datatype. We do not check
3168  column name because there exists case that index
3169  column name got modified in mysql but such change does not
3170  propagate to InnoDB.
3171  One hidden assumption here is that the index column sequences
3172  are matched up between those in mysql and Innodb. */
3173  for (; key_part != key_end; ++key_part) {
3174  ulint col_type;
3175  ibool is_unsigned;
3176  ulint mtype = innodb_idx_fld->col->mtype;
3177 
3178  /* Need to translate to InnoDB column type before
3179  comparison. */
3180  col_type = get_innobase_type_from_mysql_type(&is_unsigned,
3181  key_part->field);
3182 
3183  /* Ignore Innodb specific system columns. */
3184  while (mtype == DATA_SYS) {
3185  innodb_idx_fld++;
3186 
3187  if (innodb_idx_fld >= innodb_idx_fld_end) {
3188  return(FALSE);
3189  }
3190  }
3191 
3192  if (col_type != mtype) {
3193  /* Column Type mismatches */
3194  return(FALSE);
3195  }
3196 
3197  innodb_idx_fld++;
3198  }
3199 
3200  return(TRUE);
3201 }
3202 
3203 /*******************************************************************/
3214 static
3215 ibool
3217 /*=============================*/
3218  const Table* table,
3220  dict_table_t* ib_table,
3222  INNOBASE_SHARE* share)
3225 {
3226  ulint mysql_num_index;
3227  ulint ib_num_index;
3228  dict_index_t** index_mapping;
3229  ibool ret = TRUE;
3230 
3231  mutex_enter(&dict_sys->mutex);
3232 
3233  mysql_num_index = table->getShare()->keys;
3234  ib_num_index = UT_LIST_GET_LEN(ib_table->indexes);
3235 
3236  index_mapping = share->idx_trans_tbl.index_mapping;
3237 
3238  /* If there exists inconsistency between MySQL and InnoDB dictionary
3239  (metadata) information, the number of index defined in MySQL
3240  could exceed that in InnoDB, do not build index translation
3241  table in such case */
3242  if (UNIV_UNLIKELY(ib_num_index < mysql_num_index)) {
3243  ret = FALSE;
3244  goto func_exit;
3245  }
3246 
3247  /* If index entry count is non-zero, nothing has
3248  changed since last update, directly return TRUE */
3249  if (share->idx_trans_tbl.index_count) {
3250  /* Index entry count should still match mysql_num_index */
3251  ut_a(share->idx_trans_tbl.index_count == mysql_num_index);
3252  goto func_exit;
3253  }
3254 
3255  /* The number of index increased, rebuild the mapping table */
3256  if (mysql_num_index > share->idx_trans_tbl.array_size) {
3257  index_mapping = (dict_index_t**) realloc(index_mapping,
3258  mysql_num_index *
3259  sizeof(*index_mapping));
3260 
3261  if (!index_mapping) {
3262  /* Report an error if index_mapping continues to be
3263  NULL and mysql_num_index is a non-zero value */
3264  errmsg_printf(error::ERROR, "InnoDB: fail to allocate memory for "
3265  "index translation table. Number of Index:%lu, array size:%lu",
3266  mysql_num_index,
3267  share->idx_trans_tbl.array_size);
3268  ret = FALSE;
3269  goto func_exit;
3270  }
3271 
3272  share->idx_trans_tbl.array_size = mysql_num_index;
3273  }
3274 
3275  /* For each index in the mysql key_info array, fetch its
3276  corresponding InnoDB index pointer into index_mapping
3277  array. */
3278  for (ulint count = 0; count < mysql_num_index; count++) {
3279 
3280  /* Fetch index pointers into index_mapping according to mysql
3281  index sequence */
3282  index_mapping[count] = dict_table_get_index_on_name(
3283  ib_table, table->key_info[count].name);
3284 
3285  if (!index_mapping[count]) {
3286  errmsg_printf(error::ERROR, "Cannot find index %s in InnoDB index dictionary.",
3287  table->key_info[count].name);
3288  ret = FALSE;
3289  goto func_exit;
3290  }
3291 
3292  /* Double check fetched index has the same
3293  column info as those in mysql key_info. */
3294  if (!innobase_match_index_columns(&table->key_info[count], index_mapping[count])) {
3295  errmsg_printf(error::ERROR, "Found index %s whose column info does not match that of MySQL.",
3296  table->key_info[count].name);
3297  ret = FALSE;
3298  goto func_exit;
3299  }
3300  }
3301 
3302  /* Successfully built the translation table */
3303  share->idx_trans_tbl.index_count = mysql_num_index;
3304 
3305 func_exit:
3306  if (!ret) {
3307  /* Build translation table failed. */
3308  free(index_mapping);
3309 
3310  share->idx_trans_tbl.array_size = 0;
3311  share->idx_trans_tbl.index_count = 0;
3312  index_mapping = NULL;
3313  }
3314 
3315  share->idx_trans_tbl.index_mapping = index_mapping;
3316 
3317  mutex_exit(&dict_sys->mutex);
3318 
3319  return(ret);
3320 }
3321 
3322 /*******************************************************************/
3331 static
3332 dict_index_t*
3334 /*==================*/
3335  INNOBASE_SHARE* share,
3337  uint keynr)
3339 {
3340  if (!share->idx_trans_tbl.index_mapping
3341  || keynr >= share->idx_trans_tbl.index_count) {
3342  return(NULL);
3343  }
3344 
3345  return(share->idx_trans_tbl.index_mapping[keynr]);
3346 }
3347 
3348 /********************************************************************/
3351 UNIV_INTERN
3352 void
3354 /*======================================*/
3355 {
3356  uint64_t auto_inc;
3357  const Field* field = getTable()->found_next_number_field;
3358 
3359  if (field != NULL) {
3360  auto_inc = innobase_get_int_col_max_value(field);
3361  } else {
3362  /* We have no idea what's been passed in to us as the
3363  autoinc column. We set it to the 0, effectively disabling
3364  updates to the table. */
3365  auto_inc = 0;
3366 
3367  ut_print_timestamp(stderr);
3368  errmsg_printf(error::ERROR, "InnoDB: Unable to determine the AUTOINC column name");
3369  }
3370 
3371  if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
3372  /* If the recovery level is set so high that writes
3373  are disabled we force the AUTOINC counter to 0
3374  value effectively disabling writes to the table.
3375  Secondly, we avoid reading the table in case the read
3376  results in failure due to a corrupted table/index.
3377 
3378  We will not return an error to the client, so that the
3379  tables can be dumped with minimal hassle. If an error
3380  were returned in this case, the first attempt to read
3381  the table would fail and subsequent SELECTs would succeed. */
3382  auto_inc = 0;
3383  } else if (field == NULL) {
3384  /* This is a far more serious error, best to avoid
3385  opening the table and return failure. */
3386  my_error(ER_AUTOINC_READ_FAILED, MYF(0));
3387  } else {
3388  dict_index_t* index;
3389  const char* col_name;
3390  uint64_t read_auto_inc;
3391  ulint err;
3392 
3393  update_session(getTable()->in_use);
3394  col_name = field->field_name;
3395 
3397 
3398  index = innobase_get_index(getTable()->getShare()->next_number_index);
3399 
3400  /* Execute SELECT MAX(col_name) FROM TABLE; */
3401  err = row_search_max_autoinc(index, col_name, &read_auto_inc);
3402 
3403  switch (err) {
3404  case DB_SUCCESS: {
3405  uint64_t col_max_value;
3406 
3407  col_max_value = innobase_get_int_col_max_value(field);
3408 
3409  /* At the this stage we do not know the increment
3410  nor the offset, so use a default increment of 1. */
3411 
3412  auto_inc = innobase_next_autoinc(read_auto_inc, 1, 1, col_max_value);
3413 
3414  break;
3415  }
3416  case DB_RECORD_NOT_FOUND:
3417  ut_print_timestamp(stderr);
3418  errmsg_printf(error::ERROR, "InnoDB: MySQL and InnoDB data dictionaries are out of sync.\n"
3419  "InnoDB: Unable to find the AUTOINC column %s in the InnoDB table %s.\n"
3420  "InnoDB: We set the next AUTOINC column value to 0,\n"
3421  "InnoDB: in effect disabling the AUTOINC next value generation.\n"
3422  "InnoDB: You can either set the next AUTOINC value explicitly using ALTER TABLE\n"
3423  "InnoDB: or fix the data dictionary by recreating the table.\n",
3424  col_name, index->table->name);
3425 
3426  /* This will disable the AUTOINC generation. */
3427  auto_inc = 0;
3428 
3429  /* We want the open to succeed, so that the user can
3430  take corrective action. ie. reads should succeed but
3431  updates should fail. */
3432  err = DB_SUCCESS;
3433  break;
3434  default:
3435  /* row_search_max_autoinc() should only return
3436  one of DB_SUCCESS or DB_RECORD_NOT_FOUND. */
3437  ut_error;
3438  }
3439  }
3440 
3441  dict_table_autoinc_initialize(prebuilt->table, auto_inc);
3442 }
3443 
3444 /*****************************************************************/
3448 UNIV_INTERN
3449 int
3451  int mode,
3452  uint test_if_locked)
3453 {
3454  dict_table_t* ib_table;
3455  Session* session;
3456 
3457  UT_NOT_USED(mode);
3458  UT_NOT_USED(test_if_locked);
3459 
3460  session= getTable()->in_use;
3461 
3462  /* Under some cases Drizzle seems to call this function while
3463  holding btr_search_latch. This breaks the latching order as
3464  we acquire dict_sys->mutex below and leads to a deadlock. */
3465  if (session != NULL) {
3466  getTransactionalEngine()->releaseTemporaryLatches(session);
3467  }
3468 
3469  user_session = NULL;
3470 
3471  std::string search_string(identifier.getSchemaName());
3472  boost::algorithm::to_lower(search_string);
3473 
3474  if (search_string.compare("data_dictionary") == 0)
3475  {
3476  std::string table_name(identifier.getTableName());
3477  boost::algorithm::to_upper(table_name);
3478  if (!(share=get_share(table_name.c_str())))
3479  {
3480  return 1;
3481  }
3482  }
3483  else
3484  {
3485  if (!(share=get_share(identifier.getKeyPath().c_str())))
3486  {
3487  return(1);
3488  }
3489  }
3490 
3491  /* Create buffers for packing the fields of a record. Why
3492  table->stored_rec_length did not work here? Obviously, because char
3493  fields when packed actually became 1 byte longer, when we also
3494  stored the string length as the first byte. */
3495 
3496  upd_and_key_val_buff_len =
3497  getTable()->getShare()->sizeStoredRecord()
3498  + getTable()->getShare()->max_key_length
3499  + MAX_REF_PARTS * 3;
3500 
3501  upd_buff.resize(upd_and_key_val_buff_len);
3502 
3503  if (upd_buff.size() < upd_and_key_val_buff_len)
3504  {
3505  free_share(share);
3506  }
3507 
3508  key_val_buff.resize(upd_and_key_val_buff_len);
3509  if (key_val_buff.size() < upd_and_key_val_buff_len)
3510  {
3511  return(1);
3512  }
3513 
3514  /* Get pointer to a table object in InnoDB dictionary cache */
3515  if (search_string.compare("data_dictionary") == 0)
3516  {
3517  std::string table_name(identifier.getTableName());
3518  boost::algorithm::to_upper(table_name);
3519  ib_table = dict_table_get(table_name.c_str(), TRUE);
3520  }
3521  else
3522  {
3523  ib_table = dict_table_get(identifier.getKeyPath().c_str(), TRUE);
3524  }
3525 
3526  if (NULL == ib_table) {
3527  errmsg_printf(error::ERROR, "Cannot find or open table %s from\n"
3528  "the internal data dictionary of InnoDB "
3529  "though the .frm file for the\n"
3530  "table exists. Maybe you have deleted and "
3531  "recreated InnoDB data\n"
3532  "files but have forgotten to delete the "
3533  "corresponding .frm files\n"
3534  "of InnoDB tables, or you have moved .frm "
3535  "files to another database?\n"
3536  "or, the table contains indexes that this "
3537  "version of the engine\n"
3538  "doesn't support.\n"
3539  "See " REFMAN "innodb-troubleshooting.html\n"
3540  "how you can resolve the problem.\n",
3541  identifier.getKeyPath().c_str());
3542  free_share(share);
3543  upd_buff.resize(0);
3544  key_val_buff.resize(0);
3545  errno = ENOENT;
3546 
3547  return(HA_ERR_NO_SUCH_TABLE);
3548  }
3549 
3550  if (ib_table->ibd_file_missing && ! session->doing_tablespace_operation()) {
3551  errmsg_printf(error::ERROR, "MySQL is trying to open a table handle but "
3552  "the .ibd file for\ntable %s does not exist.\n"
3553  "Have you deleted the .ibd file from the "
3554  "database directory under\nthe MySQL datadir, "
3555  "or have you used DISCARD TABLESPACE?\n"
3556  "See " REFMAN "innodb-troubleshooting.html\n"
3557  "how you can resolve the problem.\n",
3558  identifier.getKeyPath().c_str());
3559  free_share(share);
3560  upd_buff.resize(0);
3561  key_val_buff.resize(0);
3562  errno = ENOENT;
3563 
3564  dict_table_decrement_handle_count(ib_table, FALSE);
3565  return(HA_ERR_NO_SUCH_TABLE);
3566  }
3567 
3568  prebuilt = row_create_prebuilt(ib_table);
3569 
3570  prebuilt->mysql_row_len = getTable()->getShare()->sizeStoredRecord();
3571  prebuilt->default_rec = getTable()->getDefaultValues();
3573 
3574  /* Looks like MySQL-3.23 sometimes has primary key number != 0 */
3575 
3576  primary_key = getTable()->getShare()->getPrimaryKey();
3577  key_used_on_scan = primary_key;
3578 
3579  if (!innobase_build_index_translation(getTable(), ib_table, share)) {
3580  errmsg_printf(error::ERROR, "Build InnoDB index translation table for"
3581  " Table %s failed", identifier.getKeyPath().c_str());
3582  }
3583 
3584  /* Allocate a buffer for a 'row reference'. A row reference is
3585  a string of bytes of length ref_length which uniquely specifies
3586  a row in our table. Note that MySQL may also compare two row
3587  references for equality by doing a simple memcmp on the strings
3588  of length ref_length! */
3589 
3590  if (!row_table_got_default_clust_index(ib_table)) {
3591 
3593 
3594  if (UNIV_UNLIKELY(primary_key >= MAX_KEY)) {
3595  errmsg_printf(error::ERROR, "Table %s has a primary key in "
3596  "InnoDB data dictionary, but not "
3597  "in MySQL!", identifier.getTableName().c_str());
3598 
3599  /* This mismatch could cause further problems
3600  if not attended, bring this to the user's attention
3601  by printing a warning in addition to log a message
3602  in the errorlog */
3603  push_warning_printf(session, DRIZZLE_ERROR::WARN_LEVEL_WARN,
3604  ER_NO_SUCH_INDEX,
3605  "InnoDB: Table %s has a "
3606  "primary key in InnoDB data "
3607  "dictionary, but not in "
3608  "MySQL!", identifier.getTableName().c_str());
3609 
3610  /* If primary_key >= MAX_KEY, its (primary_key)
3611  value could be out of bound if continue to index
3612  into key_info[] array. Find InnoDB primary index,
3613  and assign its key_length to ref_length.
3614  In addition, since MySQL indexes are sorted starting
3615  with primary index, unique index etc., initialize
3616  ref_length to the first index key length in
3617  case we fail to find InnoDB cluster index.
3618 
3619  Please note, this will not resolve the primary
3620  index mismatch problem, other side effects are
3621  possible if users continue to use the table.
3622  However, we allow this table to be opened so
3623  that user can adopt necessary measures for the
3624  mismatch while still being accessible to the table
3625  date. */
3626  ref_length = getTable()->key_info[0].key_length;
3627 
3628  /* Find correspoinding cluster index
3629  key length in MySQL's key_info[] array */
3630  for (ulint i = 0; i < getTable()->getShare()->keys; i++) {
3631  dict_index_t* index;
3632  index = innobase_get_index(i);
3633  if (dict_index_is_clust(index)) {
3634  ref_length =
3635  getTable()->key_info[i].key_length;
3636  }
3637  }
3638  } else {
3639  /* MySQL allocates the buffer for ref.
3640  key_info->key_length includes space for all key
3641  columns + one byte for each column that may be
3642  NULL. ref_length must be as exact as possible to
3643  save space, because all row reference buffers are
3644  allocated based on ref_length. */
3645 
3646  ref_length = getTable()->key_info[primary_key].key_length;
3647  }
3648  } else {
3649  if (primary_key != MAX_KEY) {
3650  errmsg_printf(error::ERROR,
3651  "Table %s has no primary key in InnoDB data "
3652  "dictionary, but has one in MySQL! If you "
3653  "created the table with a MySQL version < "
3654  "3.23.54 and did not define a primary key, "
3655  "but defined a unique key with all non-NULL "
3656  "columns, then MySQL internally treats that "
3657  "key as the primary key. You can fix this "
3658  "error by dump + DROP + CREATE + reimport "
3659  "of the table.", identifier.getTableName().c_str());
3660 
3661  /* This mismatch could cause further problems
3662  if not attended, bring this to the user attention
3663  by printing a warning in addition to log a message
3664  in the errorlog */
3665  push_warning_printf(session, DRIZZLE_ERROR::WARN_LEVEL_WARN,
3666  ER_NO_SUCH_INDEX,
3667  "InnoDB: Table %s has no "
3668  "primary key in InnoDB data "
3669  "dictionary, but has one in "
3670  "MySQL!", identifier.getTableName().c_str());
3671  }
3672 
3674 
3675  ref_length = DATA_ROW_ID_LEN;
3676 
3677  /* If we automatically created the clustered index, then
3678  MySQL does not know about it, and MySQL must NOT be aware
3679  of the index used on scan, to make it avoid checking if we
3680  update the column of the index. That is why we assert below
3681  that key_used_on_scan is the undefined value MAX_KEY.
3682  The column is the row id in the automatical generation case,
3683  and it will never be updated anyway. */
3684 
3685  if (key_used_on_scan != MAX_KEY) {
3686  errmsg_printf(error::WARN,
3687  "Table %s key_used_on_scan is %lu even "
3688  "though there is no primary key inside "
3689  "InnoDB.", identifier.getTableName().c_str(), (ulong) key_used_on_scan);
3690  }
3691  }
3692 
3693  /* Index block size in InnoDB: used by MySQL in query optimization */
3694  stats.block_size = 16 * 1024;
3695 
3696  /* Init table lock structure */
3697  lock.init(&share->lock);
3698 
3699  if (prebuilt->table) {
3700  /* We update the highest file format in the system table
3701  space, if this table has higher file format setting. */
3702 
3703  char changed_file_format_max[100];
3704  strcpy(changed_file_format_max, innobase_file_format_max.c_str());
3705  trx_sys_file_format_max_upgrade((const char **)&changed_file_format_max,
3707  innobase_file_format_max= changed_file_format_max;
3708  }
3709 
3710  /* Only if the table has an AUTOINC column. */
3711  if (prebuilt->table != NULL && getTable()->found_next_number_field != NULL) {
3712 
3713  dict_table_autoinc_lock(prebuilt->table);
3714 
3715  /* Since a table can already be "open" in InnoDB's internal
3716  data dictionary, we only init the autoinc counter once, the
3717  first time the table is loaded. We can safely reuse the
3718  autoinc value from a previous Drizzle open. */
3719  if (dict_table_autoinc_read(prebuilt->table) == 0) {
3720 
3722  }
3723 
3724  dict_table_autoinc_unlock(prebuilt->table);
3725  }
3726 
3727  info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
3728 
3729  return(0);
3730 }
3731 
3732 UNIV_INTERN
3733 uint32_t
3734 InnobaseEngine::max_supported_key_part_length() const
3735 {
3736  return(DICT_MAX_INDEX_COL_LEN - 1);
3737 }
3738 
3739 /******************************************************************/
3742 UNIV_INTERN
3743 int
3745 /*====================*/
3746 {
3747  Session* session;
3748 
3749  session= getTable()->in_use;
3750  if (session != NULL) {
3751  getTransactionalEngine()->releaseTemporaryLatches(session);
3752  }
3753 
3754  row_prebuilt_free(prebuilt, FALSE);
3755 
3756  upd_buff.clear();
3757  key_val_buff.clear();
3758  free_share(share);
3759 
3760  /* Tell InnoDB server that there might be work for
3761  utility threads: */
3762 
3764 
3765  return(0);
3766 }
3767 
3768 /* The following accessor functions should really be inside MySQL code! */
3769 
3770 /**************************************************************/
3773 static inline
3774 uint
3776 /*=============*/
3777  Table* table,
3778  Field* field)
3779 {
3780  return((uint) (field->ptr - table->getInsertRecord()));
3781 }
3782 
3783 /**************************************************************/
3787 static inline
3788 uint
3790 /*====================*/
3791  Table* table,
3792  Field* field,
3793  char* record)
3794 {
3795  int null_offset;
3796 
3797  if (!field->null_ptr) {
3798 
3799  return(0);
3800  }
3801 
3802  null_offset = (uint) ((char*) field->null_ptr
3803  - (char*) table->getInsertRecord());
3804 
3805  if (record[null_offset] & field->null_bit) {
3806 
3807  return(1);
3808  }
3809 
3810  return(0);
3811 }
3812 
3813 /**************************************************************/
3816 static inline
3817 void
3819 /*========================*/
3820  Table* table,
3821  Field* field,
3822  char* record)
3823 {
3824  int null_offset;
3825 
3826  null_offset = (uint) ((char*) field->null_ptr
3827  - (char*) table->getInsertRecord());
3828 
3829  record[null_offset] = record[null_offset] | field->null_bit;
3830 }
3831 
3832 /*************************************************************/
3838 UNIV_INTERN int
3840 /*===============*/
3841  int mysql_type,
3842  uint charset_number,
3843  const unsigned char* a,
3844  unsigned int a_length,
3846  const unsigned char* b, /* in: data field */
3847  unsigned int b_length); /* in: data field length,
3848  not UNIV_SQL_NULL */
3849 
3850 int
3852 /*===============*/
3853  /* out: 1, 0, -1, if a is greater, equal, less than b, respectively */
3854  int mysql_type, /* in: MySQL type */
3855  uint charset_number, /* in: number of the charset */
3856  const unsigned char* a, /* in: data field */
3857  unsigned int a_length, /* in: data field length, not UNIV_SQL_NULL */
3858  const unsigned char* b, /* in: data field */
3859  unsigned int b_length) /* in: data field length, not UNIV_SQL_NULL */
3860 {
3861  const charset_info_st* charset;
3862  enum_field_types mysql_tp;
3863  int ret;
3864 
3865  assert(a_length != UNIV_SQL_NULL);
3866  assert(b_length != UNIV_SQL_NULL);
3867 
3868  mysql_tp = (enum_field_types) mysql_type;
3869 
3870  switch (mysql_tp) {
3871 
3872  case DRIZZLE_TYPE_BLOB:
3873  case DRIZZLE_TYPE_VARCHAR:
3874  /* Use the charset number to pick the right charset struct for
3875  the comparison. Since the MySQL function get_charset may be
3876  slow before Bar removes the mutex operation there, we first
3877  look at 2 common charsets directly. */
3878 
3879  if (charset_number == default_charset_info->number) {
3880  charset = default_charset_info;
3881  } else {
3882  charset = get_charset(charset_number);
3883 
3884  if (charset == NULL) {
3885  errmsg_printf(error::ERROR, "InnoDB needs charset %lu for doing "
3886  "a comparison, but MySQL cannot "
3887  "find that charset.",
3888  (ulong) charset_number);
3889  ut_a(0);
3890  }
3891  }
3892 
3893  /* Starting from 4.1.3, we use strnncollsp() in comparisons of
3894  non-latin1_swedish_ci strings. NOTE that the collation order
3895  changes then: 'b\0\0...' is ordered BEFORE 'b ...'. Users
3896  having indexes on such data need to rebuild their tables! */
3897 
3898  ret = charset->coll->strnncollsp(charset,
3899  a, a_length,
3900  b, b_length, 0);
3901  if (ret < 0) {
3902  return(-1);
3903  } else if (ret > 0) {
3904  return(1);
3905  } else {
3906  return(0);
3907  }
3908  default:
3909  ut_error;
3910  }
3911 
3912  return(0);
3913 }
3914 
3915 /**************************************************************/
3920 UNIV_INTERN
3921 ulint
3923 /*==============================*/
3924  ulint* unsigned_flag,
3929  const void* f)
3930 {
3931  const class Field* field = reinterpret_cast<const class Field*>(f);
3932 
3933  /* The following asserts try to check that the MySQL type code fits in
3934  8 bits: this is used in ibuf and also when DATA_NOT_NULL is ORed to
3935  the type */
3936 
3937  assert((ulint)DRIZZLE_TYPE_DOUBLE < 256);
3938 
3939  if (field->flags & UNSIGNED_FLAG) {
3940 
3941  *unsigned_flag = DATA_UNSIGNED;
3942  } else {
3943  *unsigned_flag = 0;
3944  }
3945 
3946  if (field->real_type() == DRIZZLE_TYPE_ENUM)
3947  {
3948  /* MySQL has field->type() a string type for these, but the
3949  data is actually internally stored as an unsigned integer
3950  code! */
3951 
3952  *unsigned_flag = DATA_UNSIGNED; /* MySQL has its own unsigned
3953  flag set to zero, even though
3954  internally this is an unsigned
3955  integer type */
3956  return(DATA_INT);
3957  }
3958 
3959  switch (field->type()) {
3960  /* NOTE that we only allow string types in DATA_DRIZZLE and
3961  DATA_VARDRIZZLE */
3962  case DRIZZLE_TYPE_VARCHAR: /* new >= 5.0.3 true VARCHAR */
3963  if (field->binary()) {
3964  return(DATA_BINARY);
3965  } else {
3966  return(DATA_VARMYSQL);
3967  }
3968  case DRIZZLE_TYPE_DECIMAL:
3969  case DRIZZLE_TYPE_MICROTIME:
3970  return(DATA_FIXBINARY);
3971  case DRIZZLE_TYPE_LONG:
3972  case DRIZZLE_TYPE_LONGLONG:
3973  case DRIZZLE_TYPE_DATETIME:
3974  case DRIZZLE_TYPE_TIME:
3975  case DRIZZLE_TYPE_DATE:
3976  case DRIZZLE_TYPE_TIMESTAMP:
3977  case DRIZZLE_TYPE_ENUM:
3978  return(DATA_INT);
3979  case DRIZZLE_TYPE_DOUBLE:
3980  return(DATA_DOUBLE);
3981  case DRIZZLE_TYPE_BLOB:
3982  return(DATA_BLOB);
3983  case DRIZZLE_TYPE_BOOLEAN:
3984  case DRIZZLE_TYPE_UUID:
3985  return(DATA_FIXBINARY);
3986  case DRIZZLE_TYPE_IPV6:
3987  return(DATA_FIXBINARY);
3988  case DRIZZLE_TYPE_NULL:
3989  ut_error;
3990  }
3991 
3992  return(0);
3993 }
3994 
3995 /*******************************************************************/
3998 static inline
3999 void
4001 /*==============================*/
4002  byte* buf,
4003  ulint val)
4004 {
4005  ut_a(val < 256 * 256);
4006 
4007  buf[0] = (byte)(val & 0xFF);
4008  buf[1] = (byte)(val / 256);
4009 }
4010 
4011 /*******************************************************************/
4015 static inline
4016 uint
4018 /*===============================*/
4019  const unsigned char* buf)
4020 {
4021  return (uint) ((ulint)(buf[0]) + 256 * ((ulint)(buf[1])));
4022 }
4023 
4024 /*******************************************************************/
4027 UNIV_INTERN
4028 uint
4030 /*===============================*/
4031  uint keynr,
4032  char* buff,
4034  uint buff_len,
4035  const unsigned char* record)
4036 {
4037  KeyInfo* key_info = &getTable()->key_info[keynr];
4038  KeyPartInfo* key_part = key_info->key_part;
4039  KeyPartInfo* end = key_part + key_info->key_parts;
4040  char* buff_start = buff;
4041  enum_field_types mysql_type;
4042  Field* field;
4043  ibool is_null;
4044 
4045  /* The format for storing a key field in MySQL is the following:
4046 
4047  1. If the column can be NULL, then in the first byte we put 1 if the
4048  field value is NULL, 0 otherwise.
4049 
4050  2. If the column is of a BLOB type (it must be a column prefix field
4051  in this case), then we put the length of the data in the field to the
4052  next 2 bytes, in the little-endian format. If the field is SQL NULL,
4053  then these 2 bytes are set to 0. Note that the length of data in the
4054  field is <= column prefix length.
4055 
4056  3. In a column prefix field, prefix_len next bytes are reserved for
4057  data. In a normal field the max field length next bytes are reserved
4058  for data. For a VARCHAR(n) the max field length is n. If the stored
4059  value is the SQL NULL then these data bytes are set to 0.
4060 
4061  4. We always use a 2 byte length for a true >= 5.0.3 VARCHAR. Note that
4062  in the MySQL row format, the length is stored in 1 or 2 bytes,
4063  depending on the maximum allowed length. But in the MySQL key value
4064  format, the length always takes 2 bytes.
4065 
4066  We have to zero-fill the buffer so that MySQL is able to use a
4067  simple memcmp to compare two key values to determine if they are
4068  equal. MySQL does this to compare contents of two 'ref' values. */
4069 
4070  bzero(buff, buff_len);
4071 
4072  for (; key_part != end; key_part++) {
4073  is_null = FALSE;
4074 
4075  if (key_part->null_bit) {
4076  if (record[key_part->null_offset]
4077  & key_part->null_bit) {
4078  *buff = 1;
4079  is_null = TRUE;
4080  } else {
4081  *buff = 0;
4082  }
4083  buff++;
4084  }
4085 
4086  field = key_part->field;
4087  mysql_type = field->type();
4088 
4089  if (mysql_type == DRIZZLE_TYPE_VARCHAR) {
4090  /* >= 5.0.3 true VARCHAR */
4091  ulint lenlen;
4092  ulint len;
4093  const byte* data;
4094  ulint key_len;
4095  ulint true_len;
4096  const charset_info_st* cs;
4097  int error=0;
4098 
4099  key_len = key_part->length;
4100 
4101  if (is_null) {
4102  buff += key_len + 2;
4103 
4104  continue;
4105  }
4106  cs = field->charset();
4107 
4108  lenlen = (ulint)
4109  (((Field_varstring*)field)->pack_length_no_ptr());
4110 
4111  data = row_mysql_read_true_varchar(&len,
4112  (byte*) (record
4113  + (ulint)get_field_offset(getTable(), field)),
4114  lenlen);
4115 
4116  true_len = len;
4117 
4118  /* For multi byte character sets we need to calculate
4119  the true length of the key */
4120 
4121  if (len > 0 && cs->mbmaxlen > 1) {
4122  true_len = (ulint) cs->cset->well_formed_len(*cs, str_ref(data, len), (uint) (key_len / cs->mbmaxlen), &error);
4123  }
4124 
4125  /* In a column prefix index, we may need to truncate
4126  the stored value: */
4127 
4128  if (true_len > key_len) {
4129  true_len = key_len;
4130  }
4131 
4132  /* The length in a key value is always stored in 2
4133  bytes */
4134 
4135  row_mysql_store_true_var_len((byte*)buff, true_len, 2);
4136  buff += 2;
4137 
4138  memcpy(buff, data, true_len);
4139 
4140  /* Note that we always reserve the maximum possible
4141  length of the true VARCHAR in the key value, though
4142  only len first bytes after the 2 length bytes contain
4143  actual data. The rest of the space was reset to zero
4144  in the bzero() call above. */
4145 
4146  buff += key_len;
4147 
4148  } else if (mysql_type == DRIZZLE_TYPE_BLOB) {
4149 
4150  const charset_info_st* cs;
4151  ulint key_len;
4152  ulint true_len;
4153  int error=0;
4154  ulint blob_len;
4155  const byte* blob_data;
4156 
4157  ut_a(key_part->key_part_flag & HA_PART_KEY_SEG);
4158 
4159  key_len = key_part->length;
4160 
4161  if (is_null) {
4162  buff += key_len + 2;
4163 
4164  continue;
4165  }
4166 
4167  cs = field->charset();
4168 
4169  blob_data = row_mysql_read_blob_ref(&blob_len,
4170  (byte*) (record
4171  + (ulint)get_field_offset(getTable(), field)),
4172  (ulint) field->pack_length());
4173 
4174  true_len = blob_len;
4175 
4176  ut_a(get_field_offset(getTable(), field)
4177  == key_part->offset);
4178 
4179  /* For multi byte character sets we need to calculate
4180  the true length of the key */
4181 
4182  if (blob_len > 0 && cs->mbmaxlen > 1) {
4183  true_len = (ulint) cs->cset->well_formed_len(*cs, str_ref(blob_data, blob_len), (uint) (key_len / cs->mbmaxlen), &error);
4184  }
4185 
4186  /* All indexes on BLOB and TEXT are column prefix
4187  indexes, and we may need to truncate the data to be
4188  stored in the key value: */
4189 
4190  if (true_len > key_len) {
4191  true_len = key_len;
4192  }
4193 
4194  /* MySQL reserves 2 bytes for the length and the
4195  storage of the number is little-endian */
4196 
4198  (byte*)buff, true_len);
4199  buff += 2;
4200 
4201  memcpy(buff, blob_data, true_len);
4202 
4203  /* Note that we always reserve the maximum possible
4204  length of the BLOB prefix in the key value. */
4205 
4206  buff += key_len;
4207  } else {
4208  /* Here we handle all other data types except the
4209  true VARCHAR, BLOB and TEXT. Note that the column
4210  value we store may be also in a column prefix
4211  index. */
4212 
4213  ulint true_len;
4214  ulint key_len;
4215  const unsigned char* src_start;
4216  const charset_info_st* cs= field->charset();
4217 
4218  key_len = key_part->length;
4219 
4220  if (is_null) {
4221  buff += key_len;
4222 
4223  continue;
4224  }
4225 
4226  src_start = record + key_part->offset;
4227  true_len = key_len;
4228 
4229  /* Character set for the field is defined only
4230  to fields whose type is string and real field
4231  type is not enum or set. For these fields check
4232  if character set is multi byte. */
4233 
4234  memcpy(buff, src_start, true_len);
4235  buff += true_len;
4236 
4237  /* Pad the unused space with spaces. */
4238 
4239  if (true_len < key_len) {
4240  ulint pad_len = key_len - true_len;
4241  ut_a(!(pad_len % cs->mbminlen));
4242 
4243  cs->cset->fill(cs, buff, pad_len,
4244  0x20 /* space */);
4245  buff += pad_len;
4246  }
4247  }
4248  }
4249 
4250  ut_a(buff <= buff_start + buff_len);
4251 
4252  return((uint)(buff - buff_start));
4253 }
4254 
4255 /**************************************************************/
4258 static
4259 void
4261 /*===========*/
4262  row_prebuilt_t* prebuilt,
4263  Session* ,
4266  Table* table,
4267  uint templ_type)
4269 {
4270  dict_index_t* index;
4271  dict_index_t* clust_index;
4272  mysql_row_templ_t* templ;
4273  Field* field;
4274  ulint n_fields;
4275  ulint n_requested_fields = 0;
4276  ibool fetch_all_in_key = FALSE;
4277  ibool fetch_primary_key_cols = FALSE;
4278  ulint i= 0;
4279  /* byte offset of the end of last requested column */
4280  ulint mysql_prefix_len = 0;
4281 
4282  if (prebuilt->select_lock_type == LOCK_X) {
4283  /* We always retrieve the whole clustered index record if we
4284  use exclusive row level locks, for example, if the read is
4285  done in an UPDATE statement. */
4286 
4287  templ_type = ROW_MYSQL_WHOLE_ROW;
4288  }
4289 
4290  if (templ_type == ROW_MYSQL_REC_FIELDS) {
4291  if (prebuilt->hint_need_to_fetch_extra_cols
4292  == ROW_RETRIEVE_ALL_COLS) {
4293 
4294  /* We know we must at least fetch all columns in the
4295  key, or all columns in the table */
4296 
4297  if (prebuilt->read_just_key) {
4298  /* MySQL has instructed us that it is enough
4299  to fetch the columns in the key; looks like
4300  MySQL can set this flag also when there is
4301  only a prefix of the column in the key: in
4302  that case we retrieve the whole column from
4303  the clustered index */
4304 
4305  fetch_all_in_key = TRUE;
4306  } else {
4307  templ_type = ROW_MYSQL_WHOLE_ROW;
4308  }
4309  } else if (prebuilt->hint_need_to_fetch_extra_cols
4310  == ROW_RETRIEVE_PRIMARY_KEY) {
4311  /* We must at least fetch all primary key cols. Note
4312  that if the clustered index was internally generated
4313  by InnoDB on the row id (no primary key was
4314  defined), then row_search_for_mysql() will always
4315  retrieve the row id to a special buffer in the
4316  prebuilt struct. */
4317 
4318  fetch_primary_key_cols = TRUE;
4319  }
4320  }
4321 
4322  clust_index = dict_table_get_first_index(prebuilt->table);
4323 
4324  if (templ_type == ROW_MYSQL_REC_FIELDS) {
4325  index = prebuilt->index;
4326  } else {
4327  index = clust_index;
4328  }
4329 
4330  if (index == clust_index) {
4331  prebuilt->need_to_access_clustered = TRUE;
4332  } else {
4333  prebuilt->need_to_access_clustered = FALSE;
4334  /* Below we check column by column if we need to access
4335  the clustered index */
4336  }
4337 
4338  n_fields = (ulint)table->getShare()->sizeFields(); /* number of columns */
4339 
4340  if (!prebuilt->mysql_template) {
4341  prebuilt->mysql_template = (mysql_row_templ_t*)
4342  mem_alloc(n_fields * sizeof(mysql_row_templ_t));
4343  }
4344 
4345  prebuilt->template_type = templ_type;
4346  prebuilt->null_bitmap_len = table->getShare()->null_bytes;
4347 
4348  prebuilt->templ_contains_blob = FALSE;
4349 
4350  /* Note that in InnoDB, i is the column number. MySQL calls columns
4351  'fields'. */
4352  for (i = 0; i < n_fields; i++)
4353  {
4354  const dict_col_t *col= &index->table->cols[i];
4355  templ = prebuilt->mysql_template + n_requested_fields;
4356  field = table->getField(i);
4357 
4358  if (UNIV_LIKELY(templ_type == ROW_MYSQL_REC_FIELDS)) {
4359  /* Decide which columns we should fetch
4360  and which we can skip. */
4361  register const ibool index_contains_field =
4362  dict_index_contains_col_or_prefix(index, i);
4363 
4364  if (!index_contains_field && prebuilt->read_just_key) {
4365  /* If this is a 'key read', we do not need
4366  columns that are not in the key */
4367 
4368  goto skip_field;
4369  }
4370 
4371  if (index_contains_field && fetch_all_in_key) {
4372  /* This field is needed in the query */
4373 
4374  goto include_field;
4375  }
4376 
4377  if (field->isReadSet() || field->isWriteSet())
4378  /* This field is needed in the query */
4379  goto include_field;
4380 
4381  assert(table->isReadSet(i) == field->isReadSet());
4382  assert(table->isWriteSet(i) == field->isWriteSet());
4383 
4384  if (fetch_primary_key_cols
4385  && dict_table_col_in_clustered_key(
4386  index->table, i)) {
4387  /* This field is needed in the query */
4388 
4389  goto include_field;
4390  }
4391 
4392  /* This field is not needed in the query, skip it */
4393 
4394  goto skip_field;
4395  }
4396 include_field:
4397  n_requested_fields++;
4398 
4399  templ->col_no = i;
4400  templ->clust_rec_field_no = dict_col_get_clust_pos(col, clust_index);
4401  ut_ad(templ->clust_rec_field_no != ULINT_UNDEFINED);
4402 
4403  if (index == clust_index) {
4404  templ->rec_field_no = templ->clust_rec_field_no;
4405  } else {
4406  templ->rec_field_no = dict_index_get_nth_col_pos(
4407  index, i);
4408  if (templ->rec_field_no == ULINT_UNDEFINED) {
4409  prebuilt->need_to_access_clustered = TRUE;
4410  }
4411  }
4412 
4413  if (field->null_ptr) {
4414  templ->mysql_null_byte_offset =
4415  (ulint) ((char*) field->null_ptr
4416  - (char*) table->getInsertRecord());
4417 
4418  templ->mysql_null_bit_mask = (ulint) field->null_bit;
4419  } else {
4420  templ->mysql_null_bit_mask = 0;
4421  }
4422 
4423  templ->mysql_col_offset = (ulint)
4424  get_field_offset(table, field);
4425 
4426  templ->mysql_col_len = (ulint) field->pack_length();
4427  if (mysql_prefix_len < templ->mysql_col_offset
4428  + templ->mysql_col_len) {
4429  mysql_prefix_len = templ->mysql_col_offset
4430  + templ->mysql_col_len;
4431  }
4432  templ->type = col->mtype;
4433  templ->mysql_type = (ulint)field->type();
4434 
4435  if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
4436  templ->mysql_length_bytes = (ulint)
4437  (((Field_varstring*)field)->pack_length_no_ptr());
4438  }
4439 
4440  templ->charset = dtype_get_charset_coll(col->prtype);
4441  templ->mbminlen = dict_col_get_mbminlen(col);
4442  templ->mbmaxlen = dict_col_get_mbmaxlen(col);
4443  templ->is_unsigned = col->prtype & DATA_UNSIGNED;
4444  if (templ->type == DATA_BLOB) {
4445  prebuilt->templ_contains_blob = TRUE;
4446  }
4447 skip_field:
4448  ;
4449  }
4450 
4451  prebuilt->n_template = n_requested_fields;
4452  prebuilt->mysql_prefix_len = mysql_prefix_len;
4453 
4454  if (index != clust_index && prebuilt->need_to_access_clustered) {
4455  /* Change rec_field_no's to correspond to the clustered index
4456  record */
4457  for (i = 0; i < n_requested_fields; i++) {
4458  templ = prebuilt->mysql_template + i;
4459 
4460  templ->rec_field_no = templ->clust_rec_field_no;
4461  }
4462  }
4463 }
4464 
4465 /********************************************************************/
4472 UNIV_INTERN
4473 ulint
4475 /*====================================*/
4476 {
4477  ulint error = DB_SUCCESS;
4478 
4479  dict_table_autoinc_lock(prebuilt->table);
4480 
4481  return(ulong(error));
4482 }
4483 
4484 /********************************************************************/
4487 UNIV_INTERN
4488 ulint
4490 /*================================*/
4491  uint64_t autoinc)
4492 {
4493  dict_table_autoinc_lock(prebuilt->table);
4494  dict_table_autoinc_initialize(prebuilt->table, autoinc);
4495  dict_table_autoinc_unlock(prebuilt->table);
4496 
4497  return(ulong(DB_SUCCESS));
4498 }
4499 
4500 /********************************************************************/
4504 UNIV_INTERN
4505 ulint
4507 /*==================================*/
4508  uint64_t auto_inc)
4509 {
4510  dict_table_autoinc_lock(prebuilt->table);
4511  dict_table_autoinc_update_if_greater(prebuilt->table, auto_inc);
4512  dict_table_autoinc_unlock(prebuilt->table);
4513 
4514  return(ulong(DB_SUCCESS));
4515 }
4516 
4517 /********************************************************************/
4521 UNIV_INTERN
4522 int
4524 /*===================*/
4525  unsigned char* record)
4526 {
4527  ulint error = 0;
4528  int error_result= 0;
4529  ibool auto_inc_used= FALSE;
4530  ulint sql_command;
4532 
4533  if (prebuilt->trx != trx) {
4534  errmsg_printf(error::ERROR, "The transaction object for the table handle is at "
4535  "%p, but for the current thread it is at %p",
4536  (const void*) prebuilt->trx, (const void*) trx);
4537 
4538  fputs("InnoDB: Dump of 200 bytes around prebuilt: ", stderr);
4539  ut_print_buf(stderr, ((const byte*)prebuilt) - 100, 200);
4540  fputs("\n"
4541  "InnoDB: Dump of 200 bytes around ha_data: ",
4542  stderr);
4543  ut_print_buf(stderr, ((const byte*) trx) - 100, 200);
4544  putc('\n', stderr);
4545  ut_error;
4546  }
4547 
4548  sql_command = user_session->getSqlCommand();
4549 
4550  if ((sql_command == SQLCOM_ALTER_TABLE
4551  || sql_command == SQLCOM_CREATE_INDEX
4552  || sql_command == SQLCOM_DROP_INDEX)
4553  && num_write_row >= 10000) {
4554  /* ALTER TABLE is COMMITted at every 10000 copied rows.
4555  The IX table lock for the original table has to be re-issued.
4556  As this method will be called on a temporary table where the
4557  contents of the original table is being copied to, it is
4558  a bit tricky to determine the source table. The cursor
4559  position in the source table need not be adjusted after the
4560  intermediate COMMIT, since writes by other transactions are
4561  being blocked by a MySQL table lock TL_WRITE_ALLOW_READ. */
4562 
4563  dict_table_t* src_table;
4564  enum lock_mode mode;
4565 
4566  num_write_row = 0;
4567 
4568  /* Commit the transaction. This will release the table
4569  locks, so they have to be acquired again. */
4570 
4571  /* Altering an InnoDB table */
4572  /* Get the source table. */
4573  src_table = lock_get_src_table(
4574  prebuilt->trx, prebuilt->table, &mode);
4575  if (!src_table) {
4576 no_commit:
4577  /* Unknown situation: do not commit */
4578  /*
4579  ut_print_timestamp(stderr);
4580  fprintf(stderr,
4581  " InnoDB: ALTER TABLE is holding lock"
4582  " on %lu tables!\n",
4583  prebuilt->trx->mysql_n_tables_locked);
4584  */
4585  ;
4586  } else if (src_table == prebuilt->table) {
4587  /* Source table is not in InnoDB format:
4588  no need to re-acquire locks on it. */
4589 
4590  /* Altering to InnoDB format */
4591  getTransactionalEngine()->commit(user_session, 1);
4592  /* We will need an IX lock on the destination table. */
4593  prebuilt->sql_stat_start = TRUE;
4594  } else {
4595  /* Ensure that there are no other table locks than
4596  LOCK_IX and LOCK_AUTO_INC on the destination table. */
4597 
4599  prebuilt->trx)) {
4600  goto no_commit;
4601  }
4602 
4603  /* Commit the transaction. This will release the table
4604  locks, so they have to be acquired again. */
4605  getTransactionalEngine()->commit(user_session, 1);
4606  /* Re-acquire the table lock on the source table. */
4607  row_lock_table_for_mysql(prebuilt, src_table, mode);
4608  /* We will need an IX lock on the destination table. */
4609  prebuilt->sql_stat_start = TRUE;
4610  }
4611  }
4612 
4613  num_write_row++;
4614 
4615  /* This is the case where the table has an auto-increment column */
4616  if (getTable()->next_number_field && record == getTable()->getInsertRecord()) {
4617 
4618  /* Reset the error code before calling
4619  innobase_get_auto_increment(). */
4620  prebuilt->autoinc_error = DB_SUCCESS;
4621 
4622  if ((error = update_auto_increment())) {
4623  /* We don't want to mask autoinc overflow errors. */
4624 
4625  /* Handle the case where the AUTOINC sub-system
4626  failed during initialization. */
4627  if (prebuilt->autoinc_error == DB_UNSUPPORTED) {
4628  error_result = ER_AUTOINC_READ_FAILED;
4629  /* Set the error message to report too. */
4630  my_error(ER_AUTOINC_READ_FAILED, MYF(0));
4631  goto func_exit;
4632  } else if (prebuilt->autoinc_error != DB_SUCCESS) {
4633  error = (int) prebuilt->autoinc_error;
4634 
4635  goto report_error;
4636  }
4637 
4638  /* MySQL errors are passed straight back. */
4639  error_result = (int) error;
4640  goto func_exit;
4641  }
4642 
4643  auto_inc_used = TRUE;
4644  }
4645 
4646  if (prebuilt->mysql_template == NULL
4647  || prebuilt->template_type != ROW_MYSQL_WHOLE_ROW) {
4648 
4649  /* Build the template used in converting quickly between
4650  the two database formats */
4651 
4652  build_template(prebuilt, NULL, getTable(), ROW_MYSQL_WHOLE_ROW);
4653  }
4654 
4656 
4657  error = row_insert_for_mysql((byte*) record, prebuilt);
4658 
4659  user_session->setXaId(trx->id);
4660 
4661  /* Handle duplicate key errors */
4662  if (auto_inc_used) {
4663  ulint err;
4664  uint64_t auto_inc;
4665  uint64_t col_max_value;
4666 
4667  /* Note the number of rows processed for this statement, used
4668  by get_auto_increment() to determine the number of AUTO-INC
4669  values to reserve. This is only useful for a mult-value INSERT
4670  and is a statement level counter.*/
4671  if (trx->n_autoinc_rows > 0) {
4672  --trx->n_autoinc_rows;
4673  }
4674 
4675  /* We need the upper limit of the col type to check for
4676  whether we update the table autoinc counter or not. */
4677  col_max_value = innobase_get_int_col_max_value(
4678  getTable()->next_number_field);
4679  /* Get the value that MySQL attempted to store in the table.*/
4680  auto_inc = getTable()->next_number_field->val_int();
4681 
4682  switch (error) {
4683  case DB_DUPLICATE_KEY:
4684 
4685  /* A REPLACE command and LOAD DATA INFILE REPLACE
4686  handle a duplicate key error themselves, but we
4687  must update the autoinc counter if we are performing
4688  those statements. */
4689 
4690  switch (sql_command) {
4691  case SQLCOM_LOAD:
4692  if ((trx->duplicates
4693  & (TRX_DUP_IGNORE | TRX_DUP_REPLACE))) {
4694 
4695  goto set_max_autoinc;
4696  }
4697  break;
4698 
4699  case SQLCOM_REPLACE:
4700  case SQLCOM_INSERT_SELECT:
4701  case SQLCOM_REPLACE_SELECT:
4702  goto set_max_autoinc;
4703 
4704  default:
4705  break;
4706  }
4707 
4708  break;
4709 
4710  case DB_SUCCESS:
4711  /* If the actual value inserted is greater than
4712  the upper limit of the interval, then we try and
4713  update the table upper limit. Note: last_value
4714  will be 0 if get_auto_increment() was not called.*/
4715 
4716  if (auto_inc >= prebuilt->autoinc_last_value) {
4717 set_max_autoinc:
4718  /* This should filter out the negative
4719  values set explicitly by the user. */
4720  if (auto_inc <= col_max_value) {
4722 
4723  uint64_t need;
4724  uint64_t offset;
4725 
4726  offset = prebuilt->autoinc_offset;
4727  need = prebuilt->autoinc_increment;
4728 
4729  auto_inc = innobase_next_autoinc(
4730  auto_inc,
4731  need, offset, col_max_value);
4732 
4734  auto_inc);
4735 
4736  if (err != DB_SUCCESS) {
4737  error = err;
4738  }
4739  }
4740  }
4741  break;
4742  }
4743  }
4744 
4746 
4747 report_error:
4748  error_result = convert_error_code_to_mysql((int) error,
4749  prebuilt->table->flags,
4750  user_session);
4751 
4752 func_exit:
4754 
4755  return(error_result);
4756 }
4757 
4758 /**********************************************************************/
4762 static
4763 int
4765 /*================*/
4766  upd_t* uvect,
4767  unsigned char* old_row,
4768  unsigned char* new_row,
4769  Table* table,
4771  unsigned char* upd_buff,
4772  ulint buff_len,
4773  row_prebuilt_t* prebuilt,
4774  Session* )
4775 {
4776  unsigned char* original_upd_buff = upd_buff;
4777  enum_field_types field_mysql_type;
4778  uint n_fields;
4779  ulint o_len;
4780  ulint n_len;
4781  ulint col_pack_len;
4782  const byte* new_mysql_row_col;
4783  const byte* o_ptr;
4784  const byte* n_ptr;
4785  byte* buf;
4786  upd_field_t* ufield;
4787  ulint col_type;
4788  ulint n_changed = 0;
4789  dfield_t dfield;
4790  dict_index_t* clust_index;
4791  uint i= 0;
4792 
4793  n_fields = table->getShare()->sizeFields();
4794  clust_index = dict_table_get_first_index(prebuilt->table);
4795 
4796  /* We use upd_buff to convert changed fields */
4797  buf = (byte*) upd_buff;
4798 
4799  for (i = 0; i < n_fields; i++) {
4800  Field *field= table->getField(i);
4801 
4802  o_ptr = (const byte*) old_row + get_field_offset(table, field);
4803  n_ptr = (const byte*) new_row + get_field_offset(table, field);
4804 
4805  /* Use new_mysql_row_col and col_pack_len save the values */
4806 
4807  new_mysql_row_col = n_ptr;
4808  col_pack_len = field->pack_length();
4809 
4810  o_len = col_pack_len;
4811  n_len = col_pack_len;
4812 
4813  /* We use o_ptr and n_ptr to dig up the actual data for
4814  comparison. */
4815 
4816  field_mysql_type = field->type();
4817 
4818  col_type = prebuilt->table->cols[i].mtype;
4819 
4820  switch (col_type) {
4821 
4822  case DATA_BLOB:
4823  o_ptr = row_mysql_read_blob_ref(&o_len, o_ptr, o_len);
4824  n_ptr = row_mysql_read_blob_ref(&n_len, n_ptr, n_len);
4825 
4826  break;
4827 
4828  case DATA_VARCHAR:
4829  case DATA_BINARY:
4830  case DATA_VARMYSQL:
4831  if (field_mysql_type == DRIZZLE_TYPE_VARCHAR) {
4832  /* This is a >= 5.0.3 type true VARCHAR where
4833  the real payload data length is stored in
4834  1 or 2 bytes */
4835 
4837  &o_len, o_ptr,
4838  (ulint)
4839  (((Field_varstring*)field)->pack_length_no_ptr()));
4840 
4842  &n_len, n_ptr,
4843  (ulint)
4844  (((Field_varstring*)field)->pack_length_no_ptr()));
4845  }
4846 
4847  break;
4848  default:
4849  ;
4850  }
4851 
4852  if (field->null_ptr) {
4853  if (field_in_record_is_null(table, field,
4854  (char*) old_row)) {
4855  o_len = UNIV_SQL_NULL;
4856  }
4857 
4858  if (field_in_record_is_null(table, field,
4859  (char*) new_row)) {
4860  n_len = UNIV_SQL_NULL;
4861  }
4862  }
4863 
4864  if (o_len != n_len || (o_len != UNIV_SQL_NULL &&
4865  0 != memcmp(o_ptr, n_ptr, o_len))) {
4866  /* The field has changed */
4867 
4868  ufield = uvect->fields + n_changed;
4869 
4870  /* Let us use a dummy dfield to make the conversion
4871  from the MySQL column format to the InnoDB format */
4872 
4873  dict_col_copy_type(prebuilt->table->cols + i,
4874  &dfield.type);
4875 
4876  if (n_len != UNIV_SQL_NULL) {
4878  &dfield,
4879  (byte*)buf,
4880  TRUE,
4881  new_mysql_row_col,
4882  col_pack_len,
4883  dict_table_is_comp(prebuilt->table));
4884  dfield_copy_data(&ufield->new_val, &dfield);
4885  } else {
4886  dfield_set_null(&ufield->new_val);
4887  }
4888 
4889  ufield->exp = NULL;
4890  ufield->orig_len = 0;
4891  ufield->field_no = dict_col_get_clust_pos(
4892  &prebuilt->table->cols[i], clust_index);
4893  n_changed++;
4894  }
4895  }
4896 
4897  uvect->n_fields = n_changed;
4898  uvect->info_bits = 0;
4899 
4900  ut_a(buf <= (byte*)original_upd_buff + buff_len);
4901 
4902  return(0);
4903 }
4904 
4905 /**********************************************************************/
4913 UNIV_INTERN
4914 int
4916 /*====================*/
4917  const unsigned char* old_row,
4918  unsigned char* new_row)
4919 {
4920  upd_t* uvect;
4921  int error = 0;
4923 
4924  ut_a(prebuilt->trx == trx);
4925 
4926  if (prebuilt->upd_node) {
4927  uvect = prebuilt->upd_node->update;
4928  } else {
4930  }
4931 
4932  /* Build an update vector from the modified fields in the rows
4933  (uses upd_buff of the handle) */
4934 
4935  calc_row_difference(uvect, (unsigned char*) old_row, new_row, getTable(),
4936  &upd_buff[0], (ulint)upd_and_key_val_buff_len,
4938 
4939  /* This is not a delete */
4940  prebuilt->upd_node->is_delete = FALSE;
4941 
4942  ut_a(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
4943 
4944  if (getTable()->found_next_number_field)
4945  {
4946  uint64_t auto_inc;
4947  uint64_t col_max_value;
4948 
4949  auto_inc = getTable()->found_next_number_field->val_int();
4950 
4951  /* We need the upper limit of the col type to check for
4952  whether we update the table autoinc counter or not. */
4953  col_max_value = innobase_get_int_col_max_value(
4954  getTable()->found_next_number_field);
4955 
4956  uint64_t current_autoinc;
4957  ulint autoinc_error= innobase_get_autoinc(&current_autoinc);
4958  if (autoinc_error == DB_SUCCESS
4959  && auto_inc <= col_max_value && auto_inc != 0
4960  && auto_inc >= current_autoinc)
4961  {
4962 
4963  uint64_t need;
4964  uint64_t offset;
4965 
4966  offset = prebuilt->autoinc_offset;
4967  need = prebuilt->autoinc_increment;
4968 
4969  auto_inc = innobase_next_autoinc(
4970  auto_inc, need, offset, col_max_value);
4971 
4972  dict_table_autoinc_update_if_greater(prebuilt->table, auto_inc);
4973  }
4974 
4975  dict_table_autoinc_unlock(prebuilt->table);
4976  }
4977 
4979 
4980  error = row_update_for_mysql((byte*) old_row, prebuilt);
4981 
4982  user_session->setXaId(trx->id);
4983 
4984  /* We need to do some special AUTOINC handling for the following case:
4985 
4986  INSERT INTO t (c1,c2) VALUES(x,y) ON DUPLICATE KEY UPDATE ...
4987 
4988  We need to use the AUTOINC counter that was actually used by
4989  MySQL in the UPDATE statement, which can be different from the
4990  value used in the INSERT statement.*/
4991 
4992  if (error == DB_SUCCESS
4993  && getTable()->next_number_field
4994  && new_row == getTable()->getInsertRecord()
4995  && user_session->getSqlCommand() == SQLCOM_INSERT
4996  && (trx->duplicates & (TRX_DUP_IGNORE | TRX_DUP_REPLACE))
4997  == TRX_DUP_IGNORE) {
4998 
4999  uint64_t auto_inc;
5000  uint64_t col_max_value;
5001 
5002  auto_inc = getTable()->next_number_field->val_int();
5003 
5004  /* We need the upper limit of the col type to check for
5005  whether we update the table autoinc counter or not. */
5006  col_max_value = innobase_get_int_col_max_value(
5007  getTable()->next_number_field);
5008 
5009  if (auto_inc <= col_max_value && auto_inc != 0) {
5010 
5011  uint64_t need;
5012  uint64_t offset;
5013 
5014  offset = prebuilt->autoinc_offset;
5015  need = prebuilt->autoinc_increment;
5016 
5017  auto_inc = innobase_next_autoinc(
5018  auto_inc, need, offset, col_max_value);
5019 
5020  error = innobase_set_max_autoinc(auto_inc);
5021  }
5022  }
5023 
5025 
5026  error = convert_error_code_to_mysql(error,
5027  prebuilt->table->flags,
5028  user_session);
5029 
5030  if (error == 0 /* success */
5031  && uvect->n_fields == 0 /* no columns were updated */) {
5032 
5033  /* This is the same as success, but instructs
5034  MySQL that the row is not really updated and it
5035  should not increase the count of updated rows.
5036  This is fix for http://bugs.mysql.com/29157 */
5037  error = HA_ERR_RECORD_IS_THE_SAME;
5038  }
5039 
5040  /* Tell InnoDB server that there might be work for
5041  utility threads: */
5042 
5044 
5045  return(error);
5046 }
5047 
5048 /**********************************************************************/
5051 UNIV_INTERN
5052 int
5054 /*====================*/
5055  const unsigned char* record)
5056 {
5057  int error = 0;
5059 
5060  ut_a(prebuilt->trx == trx);
5061 
5062  if (!prebuilt->upd_node) {
5064  }
5065 
5066  /* This is a delete */
5067 
5068  prebuilt->upd_node->is_delete = TRUE;
5069 
5071 
5072  error = row_update_for_mysql((byte*) record, prebuilt);
5073 
5074  user_session->setXaId(trx->id);
5075 
5077 
5079  error, prebuilt->table->flags, user_session);
5080 
5081  /* Tell the InnoDB server that there might be work for
5082  utility threads: */
5083 
5085 
5086  return(error);
5087 }
5088 
5089 /**********************************************************************/
5093 UNIV_INTERN
5094 void
5096 /*=========================*/
5097 {
5098  /* Consistent read does not take any locks, thus there is
5099  nothing to unlock. */
5100 
5101  if (prebuilt->select_lock_type == LOCK_NONE) {
5102  return;
5103  }
5104 
5105  switch (prebuilt->row_read_type) {
5106  case ROW_READ_WITH_LOCKS:
5108  && prebuilt->trx->isolation_level
5109  > TRX_ISO_READ_COMMITTED) {
5110  break;
5111  }
5112  /* fall through */
5113  case ROW_READ_TRY_SEMI_CONSISTENT:
5115  break;
5116  case ROW_READ_DID_SEMI_CONSISTENT:
5117  prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
5118  break;
5119  }
5120 
5121  return;
5122 }
5123 
5124 /* See Cursor.h and row0mysql.h for docs on this function. */
5125 UNIV_INTERN
5126 bool
5128 /*=======================================*/
5129 {
5130  return(prebuilt->row_read_type == ROW_READ_DID_SEMI_CONSISTENT);
5131 }
5132 
5133 /* See Cursor.h and row0mysql.h for docs on this function. */
5134 UNIV_INTERN
5135 void
5137 /*===========================================*/
5138 {
5139  ut_a(prebuilt->trx == session_to_trx(getTable()->in_use));
5140 
5141  /* Row read type is set to semi consistent read if this was
5142  requested by the MySQL and either innodb_locks_unsafe_for_binlog
5143  option is used or this session is using READ COMMITTED isolation
5144  level. */
5145 
5146  if (yes
5148  || prebuilt->trx->isolation_level <= TRX_ISO_READ_COMMITTED)) {
5149  prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
5150  } else {
5151  prebuilt->row_read_type = ROW_READ_WITH_LOCKS;
5152  }
5153 }
5154 
5155 /******************************************************************/
5158 UNIV_INTERN
5159 int
5161 /*====================*/
5162  uint keynr,
5163  bool )
5164 {
5165  return(change_active_index(keynr));
5166 }
5167 
5168 /******************************************************************/
5171 UNIV_INTERN
5172 int
5174 /*========================*/
5175 {
5176  int error = 0;
5177  active_index=MAX_KEY;
5178  return(error);
5179 }
5180 
5181 /*********************************************************************/
5184 static inline
5185 ulint
5187 /*============================*/
5188  enum ha_rkey_function find_flag)
5189 {
5190  switch (find_flag) {
5191  case HA_READ_KEY_EXACT:
5192  /* this does not require the index to be UNIQUE */
5193  return(PAGE_CUR_GE);
5194  case HA_READ_KEY_OR_NEXT:
5195  return(PAGE_CUR_GE);
5196  case HA_READ_KEY_OR_PREV:
5197  return(PAGE_CUR_LE);
5198  case HA_READ_AFTER_KEY:
5199  return(PAGE_CUR_G);
5200  case HA_READ_BEFORE_KEY:
5201  return(PAGE_CUR_L);
5202  case HA_READ_PREFIX:
5203  return(PAGE_CUR_GE);
5204  case HA_READ_PREFIX_LAST:
5205  return(PAGE_CUR_LE);
5206  case HA_READ_PREFIX_LAST_OR_PREV:
5207  return(PAGE_CUR_LE);
5208  /* In MySQL-4.0 HA_READ_PREFIX and HA_READ_PREFIX_LAST always
5209  pass a complete-field prefix of a key value as the search
5210  tuple. I.e., it is not allowed that the last field would
5211  just contain n first bytes of the full field value.
5212  MySQL uses a 'padding' trick to convert LIKE 'abc%'
5213  type queries so that it can use as a search tuple
5214  a complete-field-prefix of a key value. Thus, the InnoDB
5215  search mode PAGE_CUR_LE_OR_EXTENDS is never used.
5216  TODO: when/if MySQL starts to use also partial-field
5217  prefixes, we have to deal with stripping of spaces
5218  and comparison of non-latin1 char type fields in
5219  innobase_mysql_cmp() to get PAGE_CUR_LE_OR_EXTENDS to
5220  work correctly. */
5221  case HA_READ_MBR_CONTAIN:
5222  case HA_READ_MBR_INTERSECT:
5223  case HA_READ_MBR_WITHIN:
5224  case HA_READ_MBR_DISJOINT:
5225  case HA_READ_MBR_EQUAL:
5226  return(PAGE_CUR_UNSUPP);
5227  /* do not use "default:" in order to produce a gcc warning:
5228  enumeration value '...' not handled in switch
5229  (if -Wswitch or -Wall is used) */
5230  }
5231 
5232  my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "this functionality");
5233 
5234  return(PAGE_CUR_UNSUPP);
5235 }
5236 
5237 /*
5238  BACKGROUND INFO: HOW A SELECT SQL QUERY IS EXECUTED
5239  ---------------------------------------------------
5240 The following does not cover all the details, but explains how we determine
5241 the start of a new SQL statement, and what is associated with it.
5242 
5243 For each table in the database the MySQL interpreter may have several
5244 table handle instances in use, also in a single SQL query. For each table
5245 handle instance there is an InnoDB 'prebuilt' struct which contains most
5246 of the InnoDB data associated with this table handle instance.
5247 
5248  A) if the user has not explicitly set any MySQL table level locks:
5249 
5250  1) Drizzle calls StorageEngine::doStartStatement(), indicating to
5251  InnoDB that a new SQL statement has begun.
5252 
5253  2a) For each InnoDB-managed table in the SELECT, Drizzle calls ::external_lock
5254  to set an 'intention' table level lock on the table of the Cursor instance.
5255  There we set prebuilt->sql_stat_start = TRUE. The flag sql_stat_start should
5256  be set true if we are taking this table handle instance to use in a new SQL
5257  statement issued by the user.
5258 
5259  2b) If prebuilt->sql_stat_start == TRUE we 'pre-compile' the MySQL search
5260 instructions to prebuilt->template of the table handle instance in
5261 ::index_read. The template is used to save CPU time in large joins.
5262 
5263  3) In row_search_for_mysql, if prebuilt->sql_stat_start is true, we
5264 allocate a new consistent read view for the trx if it does not yet have one,
5265 or in the case of a locking read, set an InnoDB 'intention' table level
5266 lock on the table.
5267 
5268  4) We do the SELECT. MySQL may repeatedly call ::index_read for the
5269 same table handle instance, if it is a join.
5270 
5271 5) When the SELECT ends, the Drizzle kernel calls doEndStatement()
5272 
5273  (a) we execute a COMMIT there if the autocommit is on. The Drizzle interpreter
5274  does NOT execute autocommit for pure read transactions, though it should.
5275  That is why we must execute the COMMIT in ::doEndStatement().
5276  (b) we also release possible 'SQL statement level resources' InnoDB may
5277  have for this SQL statement.
5278 
5279  @todo
5280 
5281  Remove need for InnoDB to call autocommit for read-only trx
5282 
5283  @todo Check the below is still valid (I don't think it is...)
5284 
5285  B) If the user has explicitly set MySQL table level locks, then MySQL
5286 does NOT call ::external_lock at the start of the statement. To determine
5287 when we are at the start of a new SQL statement we at the start of
5288 ::index_read also compare the query id to the latest query id where the
5289 table handle instance was used. If it has changed, we know we are at the
5290 start of a new SQL statement. Since the query id can theoretically
5291 overwrap, we use this test only as a secondary way of determining the
5292 start of a new SQL statement. */
5293 
5294 
5295 /**********************************************************************/
5299 UNIV_INTERN
5300 int
5302 /*====================*/
5303  unsigned char* buf,
5305  const unsigned char* key_ptr,
5314  uint key_len,
5315  enum ha_rkey_function find_flag)
5316 {
5317  ulint mode;
5318  dict_index_t* index;
5319  ulint match_mode = 0;
5320  int error;
5321  ulint ret;
5322 
5324 
5325  ha_statistic_increment(&system_status_var::ha_read_key_count);
5326 
5327  index = prebuilt->index;
5328 
5329  if (UNIV_UNLIKELY(index == NULL)) {
5330  prebuilt->index_usable = FALSE;
5331  return(HA_ERR_CRASHED);
5332  }
5333 
5334  if (UNIV_UNLIKELY(!prebuilt->index_usable)) {
5335  return(HA_ERR_TABLE_DEF_CHANGED);
5336  }
5337 
5338  /* Note that if the index for which the search template is built is not
5339  necessarily prebuilt->index, but can also be the clustered index */
5340 
5341  if (prebuilt->sql_stat_start) {
5342  build_template(prebuilt, user_session, getTable(),
5343  ROW_MYSQL_REC_FIELDS);
5344  }
5345 
5346  if (key_ptr) {
5347  /* Convert the search key value to InnoDB format into
5348  prebuilt->search_tuple */
5349 
5352  (byte*) &key_val_buff[0],
5353  (ulint)upd_and_key_val_buff_len,
5354  index,
5355  (byte*) key_ptr,
5356  (ulint) key_len,
5357  prebuilt->trx);
5358  } else {
5359  /* We position the cursor to the last or the first entry
5360  in the index */
5361 
5362  dtuple_set_n_fields(prebuilt->search_tuple, 0);
5363  }
5364 
5365  mode = convert_search_mode_to_innobase(find_flag);
5366 
5367  match_mode = 0;
5368 
5369  if (find_flag == HA_READ_KEY_EXACT) {
5370 
5371  match_mode = ROW_SEL_EXACT;
5372 
5373  } else if (find_flag == HA_READ_PREFIX
5374  || find_flag == HA_READ_PREFIX_LAST) {
5375 
5376  match_mode = ROW_SEL_EXACT_PREFIX;
5377  }
5378 
5379  last_match_mode = (uint) match_mode;
5380 
5381  if (mode != PAGE_CUR_UNSUPP) {
5382 
5384 
5385  ret = row_search_for_mysql((byte*) buf, mode, prebuilt,
5386  match_mode, 0);
5387 
5389  } else {
5390 
5391  ret = DB_UNSUPPORTED;
5392  }
5393 
5394  switch (ret) {
5395  case DB_SUCCESS:
5396  error = 0;
5397  getTable()->status = 0;
5398  break;
5399  case DB_RECORD_NOT_FOUND:
5400  error = HA_ERR_KEY_NOT_FOUND;
5401  getTable()->status = STATUS_NOT_FOUND;
5402  break;
5403  case DB_END_OF_INDEX:
5404  error = HA_ERR_KEY_NOT_FOUND;
5405  getTable()->status = STATUS_NOT_FOUND;
5406  break;
5407  default:
5408  error = convert_error_code_to_mysql((int) ret,
5409  prebuilt->table->flags,
5410  user_session);
5411  getTable()->status = STATUS_NOT_FOUND;
5412  break;
5413  }
5414 
5415  return(error);
5416 }
5417 
5418 /*******************************************************************/
5422 UNIV_INTERN
5423 int
5425 /*=========================*/
5426  unsigned char* buf,
5427  const unsigned char* key_ptr,
5429  uint key_len)
5431 {
5432  return(index_read(buf, key_ptr, key_len, HA_READ_PREFIX_LAST));
5433 }
5434 
5435 /********************************************************************/
5438 UNIV_INTERN
5439 dict_index_t*
5441 /*============================*/
5442  uint keynr)
5445 {
5446  dict_index_t* index = 0;
5447 
5448  ha_statistic_increment(&system_status_var::ha_read_key_count);
5449 
5450  if (keynr != MAX_KEY && getTable()->getShare()->sizeKeys() > 0)
5451  {
5452  KeyInfo *key = getTable()->key_info + keynr;
5453  index = innobase_index_lookup(share, keynr);
5454 
5455  if (index) {
5456  ut_a(ut_strcmp(index->name, key->name) == 0);
5457  } else {
5458  /* Can't find index with keynr in the translation
5459  table. Only print message if the index translation
5460  table exists */
5462  errmsg_printf(error::ERROR,
5463  "InnoDB could not find "
5464  "index %s key no %u for "
5465  "table %s through its "
5466  "index translation table",
5467  key ? key->name : "NULL",
5468  keynr,
5469  prebuilt->table->name);
5470  }
5471 
5472  index = dict_table_get_index_on_name(prebuilt->table,
5473  key->name);
5474  }
5475  } else {
5476  index = dict_table_get_first_index(prebuilt->table);
5477  }
5478 
5479  if (!index) {
5480  errmsg_printf(error::ERROR,
5481  "Innodb could not find key n:o %u with name %s "
5482  "from dict cache for table %s",
5483  keynr, getTable()->getShare()->getTableMessage()->indexes(keynr).name().c_str(),
5484  prebuilt->table->name);
5485  }
5486 
5487  return(index);
5488 }
5489 
5490 /********************************************************************/
5493 UNIV_INTERN
5494 int
5496 /*=============================*/
5497  uint keynr)
5500 {
5501  ut_ad(user_session == table->in_use);
5503 
5504  active_index = keynr;
5505 
5506  prebuilt->index = innobase_get_index(keynr);
5507 
5508  if (UNIV_UNLIKELY(!prebuilt->index)) {
5509  errmsg_printf(error::WARN, "InnoDB: change_active_index(%u) failed",
5510  keynr);
5511  prebuilt->index_usable = FALSE;
5512  return(1);
5513  }
5514 
5516  prebuilt->index);
5517 
5518  if (UNIV_UNLIKELY(!prebuilt->index_usable)) {
5519  push_warning_printf(user_session, DRIZZLE_ERROR::WARN_LEVEL_WARN,
5520  HA_ERR_TABLE_DEF_CHANGED,
5521  "InnoDB: insufficient history for index %u",
5522  keynr);
5523  /* The caller seems to ignore this. Thus, we must check
5524  this again in row_search_for_mysql(). */
5525  return(2);
5526  }
5527 
5528  ut_a(prebuilt->search_tuple != 0);
5529 
5530  dtuple_set_n_fields(prebuilt->search_tuple, prebuilt->index->n_fields);
5531 
5532  dict_index_copy_types(prebuilt->search_tuple, prebuilt->index,
5533  prebuilt->index->n_fields);
5534 
5535  /* MySQL changes the active index for a handle also during some
5536  queries, for example SELECT MAX(a), SUM(a) first retrieves the MAX()
5537  and then calculates the sum. Previously we played safe and used
5538  the flag ROW_MYSQL_WHOLE_ROW below, but that caused unnecessary
5539  copying. Starting from MySQL-4.1 we use a more efficient flag here. */
5540 
5541  build_template(prebuilt, user_session, getTable(), ROW_MYSQL_REC_FIELDS);
5542 
5543  return(0);
5544 }
5545 
5546 /**********************************************************************/
5551 UNIV_INTERN
5552 int
5554 /*========================*/
5555  unsigned char* buf,
5557  uint keynr,
5558  const unsigned char* key,
5561  uint key_len,
5562  enum ha_rkey_function find_flag)
5563 {
5564  if (change_active_index(keynr)) {
5565 
5566  return(1);
5567  }
5568 
5569  return(index_read(buf, key, key_len, find_flag));
5570 }
5571 
5572 /***********************************************************************/
5576 UNIV_INTERN
5577 int
5579 /*=======================*/
5580  unsigned char* buf,
5582  uint direction,
5583  uint match_mode)
5585 {
5586  ulint ret;
5587  int error = 0;
5588 
5590 
5592 
5593  ret = row_search_for_mysql(
5594  (byte*)buf, 0, prebuilt, match_mode, direction);
5595 
5597 
5598  switch (ret) {
5599  case DB_SUCCESS:
5600  error = 0;
5601  getTable()->status = 0;
5602  break;
5603  case DB_RECORD_NOT_FOUND:
5604  error = HA_ERR_END_OF_FILE;
5605  getTable()->status = STATUS_NOT_FOUND;
5606  break;
5607  case DB_END_OF_INDEX:
5608  error = HA_ERR_END_OF_FILE;
5609  getTable()->status = STATUS_NOT_FOUND;
5610  break;
5611  default:
5613  (int) ret, prebuilt->table->flags, user_session);
5614  getTable()->status = STATUS_NOT_FOUND;
5615  break;
5616  }
5617 
5618  return(error);
5619 }
5620 
5621 /***********************************************************************/
5625 UNIV_INTERN
5626 int
5628 /*====================*/
5629  unsigned char* buf)
5631 {
5632  ha_statistic_increment(&system_status_var::ha_read_next_count);
5633 
5634  return(general_fetch(buf, ROW_SEL_NEXT, 0));
5635 }
5636 
5637 /*******************************************************************/
5640 UNIV_INTERN
5641 int
5643 /*=========================*/
5644  unsigned char* buf,
5645  const unsigned char* ,
5646  uint )
5647 {
5648  ha_statistic_increment(&system_status_var::ha_read_next_count);
5649 
5650  return(general_fetch(buf, ROW_SEL_NEXT, last_match_mode));
5651 }
5652 
5653 /***********************************************************************/
5657 UNIV_INTERN
5658 int
5660 /*====================*/
5661  unsigned char* buf)
5662 {
5663  ha_statistic_increment(&system_status_var::ha_read_prev_count);
5664 
5665  return(general_fetch(buf, ROW_SEL_PREV, 0));
5666 }
5667 
5668 /********************************************************************/
5672 UNIV_INTERN
5673 int
5675 /*=====================*/
5676  unsigned char* buf)
5677 {
5678  int error;
5679 
5680  ha_statistic_increment(&system_status_var::ha_read_first_count);
5681 
5682  error = index_read(buf, NULL, 0, HA_READ_AFTER_KEY);
5683 
5684  /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */
5685 
5686  if (error == HA_ERR_KEY_NOT_FOUND) {
5687  error = HA_ERR_END_OF_FILE;
5688  }
5689 
5690  return(error);
5691 }
5692 
5693 /********************************************************************/
5697 UNIV_INTERN
5698 int
5700 /*====================*/
5701  unsigned char* buf)
5702 {
5703  int error;
5704 
5705  ha_statistic_increment(&system_status_var::ha_read_last_count);
5706 
5707  error = index_read(buf, NULL, 0, HA_READ_BEFORE_KEY);
5708 
5709  /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */
5710 
5711  if (error == HA_ERR_KEY_NOT_FOUND) {
5712  error = HA_ERR_END_OF_FILE;
5713  }
5714 
5715  return(error);
5716 }
5717 
5718 /****************************************************************/
5721 UNIV_INTERN
5722 int
5724 /*==================*/
5725  bool scan)
5726 {
5727  int err;
5728 
5729  /* Store the active index value so that we can restore the original
5730  value after a scan */
5731 
5733  err = change_active_index(MAX_KEY);
5734  } else {
5735  err = change_active_index(primary_key);
5736  }
5737 
5738  /* Don't use semi-consistent read in random row reads (by position).
5739  This means we must disable semi_consistent_read if scan is false */
5740 
5741  if (!scan) {
5743  }
5744 
5745  start_of_scan = 1;
5746 
5747  return(err);
5748 }
5749 
5750 /*****************************************************************/
5753 UNIV_INTERN
5754 int
5756 /*======================*/
5757 {
5758  return(doEndIndexScan());
5759 }
5760 
5761 /*****************************************************************/
5765 UNIV_INTERN
5766 int
5768 /*==================*/
5769  unsigned char* buf)
5771 {
5772  int error;
5773 
5774  ha_statistic_increment(&system_status_var::ha_read_rnd_next_count);
5775 
5776  if (start_of_scan) {
5777  error = index_first(buf);
5778 
5779  if (error == HA_ERR_KEY_NOT_FOUND) {
5780  error = HA_ERR_END_OF_FILE;
5781  }
5782 
5783  start_of_scan = 0;
5784  } else {
5785  error = general_fetch(buf, ROW_SEL_NEXT, 0);
5786  }
5787 
5788  return(error);
5789 }
5790 
5791 /**********************************************************************/
5794 UNIV_INTERN
5795 int
5797 /*=================*/
5798  unsigned char* buf,
5799  unsigned char* pos)
5803 {
5804  int error;
5805  uint keynr = active_index;
5806 
5807  ha_statistic_increment(&system_status_var::ha_read_rnd_count);
5808 
5809  ut_a(prebuilt->trx == session_to_trx(getTable()->in_use));
5810 
5812  /* No primary key was defined for the table and we
5813  generated the clustered index from the row id: the
5814  row reference is the row id, not any key value
5815  that MySQL knows of */
5816 
5817  error = change_active_index(MAX_KEY);
5818  } else {
5819  error = change_active_index(primary_key);
5820  }
5821 
5822  if (error) {
5823  return(error);
5824  }
5825 
5826  /* Note that we assume the length of the row reference is fixed
5827  for the table, and it is == ref_length */
5828 
5829  error = index_read(buf, pos, ref_length, HA_READ_KEY_EXACT);
5830 
5831  if (error) {
5832  }
5833 
5834  change_active_index(keynr);
5835 
5836  return(error);
5837 }
5838 
5839 /*********************************************************************/
5847 UNIV_INTERN
5848 void
5850 /*==================*/
5851  const unsigned char* record)
5852 {
5853  uint len;
5854 
5855  ut_a(prebuilt->trx == session_to_trx(getTable()->in_use));
5856 
5858  /* No primary key was defined for the table and we
5859  generated the clustered index from row id: the
5860  row reference will be the row id, not any key value
5861  that MySQL knows of */
5862 
5863  len = DATA_ROW_ID_LEN;
5864 
5865  memcpy(ref, prebuilt->row_id, len);
5866  } else {
5867  len = store_key_val_for_row(primary_key, (char*)ref,
5868  ref_length, record);
5869  }
5870 
5871  /* We assume that the 'ref' value len is always fixed for the same
5872  table. */
5873 
5874  if (len != ref_length) {
5875  errmsg_printf(error::ERROR, "Stored ref len is %lu, but table ref len is %lu",
5876  (ulong) len, (ulong) ref_length);
5877  }
5878 }
5879 
5880 
5881 /*****************************************************************/
5883 static
5884 int
5886 /*=============*/
5887  trx_t* trx,
5888  Table* form,
5890  const char* table_name,
5891  const char* path_of_temp_table,
5899  ulint flags)
5900 {
5901  Field* field;
5902  dict_table_t* table;
5903  ulint n_cols;
5904  int error;
5905  ulint col_type;
5906  ulint col_len;
5907  ulint nulls_allowed;
5908  ulint unsigned_type;
5909  ulint binary_type;
5910  ulint long_true_varchar;
5911  ulint charset_no;
5912  ulint i;
5913 
5914  n_cols = form->getShare()->sizeFields();
5915 
5916  /* We pass 0 as the space id, and determine at a lower level the space
5917  id where to store the table */
5918 
5919  table = dict_mem_table_create(table_name, 0, n_cols, flags);
5920 
5921  if (path_of_temp_table) {
5922  table->dir_path_of_temp_table =
5923  mem_heap_strdup(table->heap, path_of_temp_table);
5924  }
5925 
5926  for (i = 0; i < n_cols; i++) {
5927  field = form->getField(i);
5928 
5929  col_type = get_innobase_type_from_mysql_type(&unsigned_type,
5930  field);
5931 
5932  if (!col_type) {
5933  push_warning_printf(
5934  trx->mysql_thd,
5935  DRIZZLE_ERROR::WARN_LEVEL_WARN,
5936  ER_CANT_CREATE_TABLE,
5937  "Error creating table '%s' with "
5938  "column '%s'. Please check its "
5939  "column type and try to re-create "
5940  "the table with an appropriate "
5941  "column type.",
5942  table->name, (char*) field->field_name);
5943  goto err_col;
5944  }
5945 
5946  if (field->null_ptr) {
5947  nulls_allowed = 0;
5948  } else {
5949  nulls_allowed = DATA_NOT_NULL;
5950  }
5951 
5952  if (field->binary()) {
5953  binary_type = DATA_BINARY_TYPE;
5954  } else {
5955  binary_type = 0;
5956  }
5957 
5958  charset_no = 0;
5959 
5960  if (dtype_is_string_type(col_type)) {
5961 
5962  charset_no = (ulint)field->charset()->number;
5963 
5964  if (UNIV_UNLIKELY(charset_no >= 256)) {
5965  /* in data0type.h we assume that the
5966  number fits in one byte in prtype */
5967  push_warning_printf(
5968  trx->mysql_thd,
5969  DRIZZLE_ERROR::WARN_LEVEL_ERROR,
5970  ER_CANT_CREATE_TABLE,
5971  "In InnoDB, charset-collation codes"
5972  " must be below 256."
5973  " Unsupported code %lu.",
5974  (ulong) charset_no);
5975  return(ER_CANT_CREATE_TABLE);
5976  }
5977  }
5978 
5979  ut_a(field->type() < 256); /* we assume in dtype_form_prtype()
5980  that this fits in one byte */
5981  col_len = field->pack_length();
5982 
5983  /* The MySQL pack length contains 1 or 2 bytes length field
5984  for a true VARCHAR. Let us subtract that, so that the InnoDB
5985  column length in the InnoDB data dictionary is the real
5986  maximum byte length of the actual data. */
5987 
5988  long_true_varchar = 0;
5989 
5990  if (field->type() == DRIZZLE_TYPE_VARCHAR) {
5991  col_len -= ((Field_varstring*)field)->pack_length_no_ptr();
5992 
5993  if (((Field_varstring*)field)->pack_length_no_ptr() == 2) {
5994  long_true_varchar = DATA_LONG_TRUE_VARCHAR;
5995  }
5996  }
5997 
5998  /* First check whether the column to be added has a
5999  system reserved name. */
6000  if (dict_col_name_is_reserved(field->field_name)){
6001  my_error(ER_WRONG_COLUMN_NAME, MYF(0), field->field_name);
6002 
6003  err_col:
6004  dict_mem_table_free(table);
6005  trx_commit_for_mysql(trx);
6006 
6007  error = DB_ERROR;
6008  goto error_ret;
6009  }
6010 
6011  dict_mem_table_add_col(table, table->heap,
6012  (char*) field->field_name,
6013  col_type,
6014  dtype_form_prtype(
6015  (ulint)field->type()
6016  | nulls_allowed | unsigned_type
6017  | binary_type | long_true_varchar,
6018  charset_no),
6019  col_len);
6020  }
6021 
6022  error = row_create_table_for_mysql(table, trx);
6023 
6024  if (error == DB_DUPLICATE_KEY) {
6025  char buf[100];
6026  char* buf_end = innobase_convert_identifier(
6027  buf, sizeof buf - 1, table_name, strlen(table_name),
6028  trx->mysql_thd, TRUE);
6029 
6030  *buf_end = '\0';
6031  my_error(ER_TABLE_EXISTS_ERROR, MYF(0), buf);
6032  }
6033 
6034 error_ret:
6035  error = convert_error_code_to_mysql(error, flags, NULL);
6036 
6037  return(error);
6038 }
6039 
6040 /*****************************************************************/
6042 static
6043 int
6045 /*=========*/
6046  trx_t* trx,
6047  Table* form,
6049  ulint flags,
6050  const char* table_name,
6051  uint key_num)
6052 {
6053  Field* field;
6054  dict_index_t* index;
6055  int error;
6056  ulint n_fields;
6057  KeyInfo* key;
6058  KeyPartInfo* key_part;
6059  ulint ind_type;
6060  ulint col_type;
6061  ulint prefix_len;
6062  ulint is_unsigned;
6063  ulint i;
6064  ulint j;
6065  ulint* field_lengths;
6066 
6067  key = &form->key_info[key_num];
6068 
6069  n_fields = key->key_parts;
6070 
6071  /* Assert that "GEN_CLUST_INDEX" cannot be used as non-primary index */
6072  ut_a(innobase_strcasecmp(key->name, innobase_index_reserve_name) != 0);
6073 
6074  ind_type = 0;
6075 
6076  if (key_num == form->getShare()->getPrimaryKey()) {
6077  ind_type = ind_type | DICT_CLUSTERED;
6078  }
6079 
6080  if (key->flags & HA_NOSAME ) {
6081  ind_type = ind_type | DICT_UNIQUE;
6082  }
6083 
6084  /* We pass 0 as the space id, and determine at a lower level the space
6085  id where to store the table */
6086 
6087  index = dict_mem_index_create(table_name, key->name, 0,
6088  ind_type, n_fields);
6089 
6090  field_lengths = (ulint*) malloc(sizeof(ulint) * n_fields);
6091 
6092  for (i = 0; i < n_fields; i++) {
6093  key_part = key->key_part + i;
6094 
6095  /* (The flag HA_PART_KEY_SEG denotes in MySQL a column prefix
6096  field in an index: we only store a specified number of first
6097  bytes of the column to the index field.) The flag does not
6098  seem to be properly set by MySQL. Let us fall back on testing
6099  the length of the key part versus the column. */
6100 
6101  field = NULL;
6102  for (j = 0; j < form->getShare()->sizeFields(); j++)
6103  {
6104 
6105  field = form->getField(j);
6106 
6107  if (0 == innobase_strcasecmp(
6108  field->field_name,
6109  key_part->field->field_name)) {
6110  /* Found the corresponding column */
6111 
6112  break;
6113  }
6114  }
6115 
6116  ut_a(j < form->getShare()->sizeFields());
6117 
6119  &is_unsigned, key_part->field);
6120 
6121  if (DATA_BLOB == col_type
6122  || (key_part->length < field->pack_length()
6123  && field->type() != DRIZZLE_TYPE_VARCHAR)
6124  || (field->type() == DRIZZLE_TYPE_VARCHAR
6125  && key_part->length < field->pack_length()
6126  - ((Field_varstring*)field)->pack_length_no_ptr())) {
6127 
6128  prefix_len = key_part->length;
6129 
6130  if (col_type == DATA_INT
6131  || col_type == DATA_FLOAT
6132  || col_type == DATA_DOUBLE
6133  || col_type == DATA_DECIMAL) {
6134  errmsg_printf(error::ERROR,
6135  "MySQL is trying to create a column "
6136  "prefix index field, on an "
6137  "inappropriate data type. Table "
6138  "name %s, column name %s.",
6139  table_name,
6140  key_part->field->field_name);
6141 
6142  prefix_len = 0;
6143  }
6144  } else {
6145  prefix_len = 0;
6146  }
6147 
6148  field_lengths[i] = key_part->length;
6149 
6150  dict_mem_index_add_field(index,
6151  (char*) key_part->field->field_name, prefix_len);
6152  }
6153 
6154  /* Even though we've defined max_supported_key_part_length, we
6155  still do our own checking using field_lengths to be absolutely
6156  sure we don't create too long indexes. */
6157  error = row_create_index_for_mysql(index, trx, field_lengths);
6158 
6159  error = convert_error_code_to_mysql(error, flags, NULL);
6160 
6161  free(field_lengths);
6162 
6163  return(error);
6164 }
6165 
6166 /*****************************************************************/
6169 static
6170 int
6172 /*===================================*/
6173  trx_t* trx,
6174  ulint flags,
6175  const char* table_name)
6176 {
6177  dict_index_t* index;
6178  int error;
6179 
6180  /* We pass 0 as the space id, and determine at a lower level the space
6181  id where to store the table */
6182 
6183  index = dict_mem_index_create(table_name,
6184  innobase_index_reserve_name,
6185  0, DICT_CLUSTERED, 0);
6186 
6187  error = row_create_index_for_mysql(index, trx, NULL);
6188 
6189  error = convert_error_code_to_mysql(error, flags, NULL);
6190 
6191  return(error);
6192 }
6193 
6194 /*****************************************************************/
6200 #if 0
6201 static
6202 ibool
6203 create_options_are_valid(
6204 /*=====================*/
6205  Session* session,
6206  Table& form,
6208  message::Table& create_proto)
6209 {
6210  ibool kbs_specified = FALSE;
6211  ibool ret = TRUE;
6212 
6213 
6214  ut_ad(session != NULL);
6215 
6216  /* If innodb_strict_mode is not set don't do any validation. */
6217  if (!(SessionVAR(session, strict_mode))) {
6218  return(TRUE);
6219  }
6220 
6221  /* Now check for ROW_FORMAT specifier. */
6222  return(ret);
6223 }
6224 #endif
6225 
6226 /*********************************************************************
6227 Creates a new table to an InnoDB database. */
6228 UNIV_INTERN
6229 int
6231  /*================*/
6232  Session &session,
6233  Table& form,
6234  const identifier::Table &identifier,
6235  const message::Table& create_proto)
6236 {
6237  int error;
6238  dict_table_t* innobase_table;
6239  trx_t* parent_trx;
6240  trx_t* trx;
6241  int primary_key_no;
6242  uint i;
6243  ib_int64_t auto_inc_value;
6244  ulint iflags;
6245  /* Cache the value of innodb_file_format, in case it is
6246  modified by another thread while the table is being created. */
6247  const ulint file_format = srv_file_format;
6248  bool lex_identified_temp_table= (create_proto.type() == message::Table::TEMPORARY);
6249  const char* stmt;
6250  size_t stmt_len;
6251 
6252  std::string search_string(identifier.getSchemaName());
6253  boost::algorithm::to_lower(search_string);
6254 
6255  if (search_string.compare("data_dictionary") == 0)
6256  {
6257  return HA_WRONG_CREATE_OPTION;
6258  }
6259 
6260  if (form.getShare()->sizeFields() > 1000) {
6261  /* The limit probably should be REC_MAX_N_FIELDS - 3 = 1020,
6262  but we play safe here */
6263 
6264  return(HA_ERR_TO_BIG_ROW);
6265  }
6266 
6267  /* Get the transaction associated with the current session, or create one
6268  if not yet created */
6269 
6270  parent_trx = check_trx_exists(&session);
6271 
6272  /* In case MySQL calls this in the middle of a SELECT query, release
6273  possible adaptive hash latch to avoid deadlocks of threads */
6274 
6276 
6277  trx = innobase_trx_allocate(&session);
6278 
6279  srv_lower_case_table_names = TRUE;
6280 
6281  /* Latch the InnoDB data dictionary exclusively so that no deadlocks
6282  or lock waits can happen in it during a table create operation.
6283  Drop table etc. do this latching in row0mysql.c. */
6284 
6285  row_mysql_lock_data_dictionary(trx);
6286 
6287  /* Create the table definition in InnoDB */
6288 
6289  iflags = 0;
6290 
6291 #if 0 // Since we validate the options before this stage, we no longer need to do this.
6292  /* Validate create options if innodb_strict_mode is set. */
6293  if (! create_options_are_valid(&session, form, create_proto)) {
6294  error = ER_ILLEGAL_HA_CREATE_OPTION;
6295  goto cleanup;
6296  }
6297 #endif
6298 
6299  // We assume compact format by default
6300  iflags= DICT_TF_COMPACT;
6301 
6302  size_t num_engine_options= create_proto.engine().options_size();
6303  for (size_t x= 0; x < num_engine_options; ++x)
6304  {
6305  if (boost::iequals(create_proto.engine().options(x).name(), "ROW_FORMAT"))
6306  {
6307  if (boost::iequals(create_proto.engine().options(x).state(), "COMPRESSED"))
6308  {
6309  iflags= DICT_TF_FORMAT_ZIP;
6310  }
6311  else if (boost::iequals(create_proto.engine().options(x).state(), "COMPACT"))
6312  {
6313  iflags= DICT_TF_FORMAT_ZIP;
6314  }
6315  else if (boost::iequals(create_proto.engine().options(x).state(), "DYNAMIC"))
6316  {
6317  iflags= DICT_TF_COMPACT;
6318  }
6319  else if (boost::iequals(create_proto.engine().options(x).state(), "REDUNDANT"))
6320  {
6321  iflags= DICT_TF_COMPACT;
6322  }
6323  }
6324  else
6325  {
6326  assert(0); // This should never happen since we have already validated the options.
6327  }
6328  }
6329 
6330  if (iflags == DICT_TF_FORMAT_ZIP)
6331  {
6332  /*
6333  ROW_FORMAT=COMPRESSED without KEY_BLOCK_SIZE implies half the maximum KEY_BLOCK_SIZE.
6334  @todo implement KEY_BLOCK_SIZE
6335  */
6336  iflags= (DICT_TF_ZSSIZE_MAX - 1)
6338  | DICT_TF_COMPACT
6341 
6342  if (strict_mode)
6343  {
6344  if (! srv_file_per_table)
6345  {
6346  push_warning_printf(
6347  &session,
6348  DRIZZLE_ERROR::WARN_LEVEL_WARN,
6349  ER_ILLEGAL_HA_CREATE_OPTION,
6350  "InnoDB: ROW_FORMAT=COMPRESSED requires innodb_file_per_table.");
6351  }
6352  else if (file_format < DICT_TF_FORMAT_ZIP)
6353  {
6354  push_warning_printf(
6355  &session,
6356  DRIZZLE_ERROR::WARN_LEVEL_WARN,
6357  ER_ILLEGAL_HA_CREATE_OPTION,
6358  "InnoDB: ROW_FORMAT=compressed requires innodb_file_format > Antelope.");
6359  }
6360  }
6361  }
6362 
6363  /* Look for a primary key */
6364 
6365  primary_key_no= (form.getShare()->hasPrimaryKey() ?
6366  (int) form.getShare()->getPrimaryKey() :
6367  -1);
6368 
6369  /* Our function innobase_get_mysql_key_number_for_index assumes
6370  the primary key is always number 0, if it exists */
6371 
6372  assert(primary_key_no == -1 || primary_key_no == 0);
6373 
6374  /* Check for name conflicts (with reserved name) for
6375  any user indices to be created. */
6376  if (innobase_index_name_is_reserved(trx, form.key_info,
6377  form.getShare()->keys)) {
6378  error = -1;
6379  goto cleanup;
6380  }
6381 
6382  if (lex_identified_temp_table)
6383  iflags |= DICT_TF2_TEMPORARY << DICT_TF2_SHIFT;
6384 
6385  error= create_table_def(trx, &form, identifier.getKeyPath().c_str(),
6386  lex_identified_temp_table ? identifier.getKeyPath().c_str() : NULL,
6387  iflags);
6388 
6389  session.setXaId(trx->id);
6390 
6391  if (error) {
6392  goto cleanup;
6393  }
6394 
6395  /* Create the keys */
6396 
6397  if (form.getShare()->sizeKeys() == 0 || primary_key_no == -1) {
6398  /* Create an index which is used as the clustered index;
6399  order the rows by their row id which is internally generated
6400  by InnoDB */
6401 
6402  error = create_clustered_index_when_no_primary(trx, iflags, identifier.getKeyPath().c_str());
6403  if (error) {
6404  goto cleanup;
6405  }
6406  }
6407 
6408  if (primary_key_no != -1) {
6409  /* In InnoDB the clustered index must always be created first */
6410  if ((error = create_index(trx, &form, iflags, identifier.getKeyPath().c_str(),
6411  (uint) primary_key_no))) {
6412  goto cleanup;
6413  }
6414  }
6415 
6416  for (i = 0; i < form.getShare()->sizeKeys(); i++) {
6417  if (i != (uint) primary_key_no) {
6418 
6419  if ((error = create_index(trx, &form, iflags, identifier.getKeyPath().c_str(),
6420  i))) {
6421  goto cleanup;
6422  }
6423  }
6424  }
6425 
6426  stmt= session.getQueryStringCopy(stmt_len);
6427 
6428  if (stmt) {
6429  string generated_create_table;
6430  const char *query= stmt;
6431 
6432  if (session.getSqlCommand() == SQLCOM_CREATE_TABLE)
6433  {
6434  message::transformTableDefinitionToSql(create_proto,
6435  generated_create_table,
6436  message::DRIZZLE, true);
6437  query= generated_create_table.c_str();
6438  }
6439 
6441  query, strlen(query),
6442  identifier.getKeyPath().c_str(),
6443  lex_identified_temp_table);
6444  switch (error) {
6445 
6446  case DB_PARENT_NO_INDEX:
6447  push_warning_printf(
6448  &session, DRIZZLE_ERROR::WARN_LEVEL_WARN,
6449  HA_ERR_CANNOT_ADD_FOREIGN,
6450  "Create table '%s' with foreign key constraint"
6451  " failed. There is no index in the referenced"
6452  " table where the referenced columns appear"
6453  " as the first columns.\n", identifier.getKeyPath().c_str());
6454  break;
6455 
6456  case DB_CHILD_NO_INDEX:
6457  push_warning_printf(
6458  &session, DRIZZLE_ERROR::WARN_LEVEL_WARN,
6459  HA_ERR_CANNOT_ADD_FOREIGN,
6460  "Create table '%s' with foreign key constraint"
6461  " failed. There is no index in the referencing"
6462  " table where referencing columns appear"
6463  " as the first columns.\n", identifier.getKeyPath().c_str());
6464  break;
6465  }
6466 
6467  error = convert_error_code_to_mysql(error, iflags, NULL);
6468 
6469  if (error) {
6470  goto cleanup;
6471  }
6472  }
6473 
6474  innobase_commit_low(trx);
6475 
6477 
6478  /* Flush the log to reduce probability that the .frm files and
6479  the InnoDB data dictionary get out-of-sync if the user runs
6480  with innodb_flush_log_at_trx_commit = 0 */
6481 
6483 
6484  innobase_table = dict_table_get(identifier.getKeyPath().c_str(), FALSE);
6485 
6486  assert(innobase_table != 0);
6487 
6488  if (innobase_table) {
6489  /* We update the highest file format in the system table
6490  space, if this table has higher file format setting. */
6491 
6492  char changed_file_format_max[100];
6493  strcpy(changed_file_format_max, innobase_file_format_max.c_str());
6494  trx_sys_file_format_max_upgrade((const char **)&changed_file_format_max,
6495  dict_table_get_format(innobase_table));
6496  innobase_file_format_max= changed_file_format_max;
6497  }
6498 
6499  /* Note: We can't call update_session() as prebuilt will not be
6500  setup at this stage and so we use session. */
6501 
6502  /* We need to copy the AUTOINC value from the old table if
6503  this is an ALTER TABLE or CREATE INDEX because CREATE INDEX
6504  does a table copy too. */
6505 
6506  if ((create_proto.options().has_auto_increment_value()
6507  || session.getSqlCommand() == SQLCOM_ALTER_TABLE
6508  || session.getSqlCommand() == SQLCOM_CREATE_INDEX)
6509  && create_proto.options().auto_increment_value() != 0) {
6510 
6511  /* Query was one of :
6512  CREATE TABLE ...AUTO_INCREMENT = x; or
6513  ALTER TABLE...AUTO_INCREMENT = x; or
6514  CREATE INDEX x on t(...);
6515  Find out a table definition from the dictionary and get
6516  the current value of the auto increment field. Set a new
6517  value to the auto increment field if the value is greater
6518  than the maximum value in the column. */
6519 
6520  auto_inc_value = create_proto.options().auto_increment_value();
6521 
6522  dict_table_autoinc_lock(innobase_table);
6523  dict_table_autoinc_initialize(innobase_table, auto_inc_value);
6524  dict_table_autoinc_unlock(innobase_table);
6525  }
6526 
6527  /* Tell the InnoDB server that there might be work for
6528  utility threads: */
6529 
6531 
6532  trx_free_for_mysql(trx);
6533 
6534  if (lex_identified_temp_table)
6535  {
6536  session.getMessageCache().storeTableMessage(identifier, create_proto);
6537  }
6538  else
6539  {
6540  StorageEngine::writeDefinitionFromPath(identifier, create_proto);
6541  }
6542 
6543  return(0);
6544 
6545 cleanup:
6546  innobase_commit_low(trx);
6547 
6549 
6550  trx_free_for_mysql(trx);
6551 
6552  return(error);
6553 }
6554 
6555 /*****************************************************************/
6558 UNIV_INTERN
6559 int
6561 /*======================================*/
6562  my_bool discard)
6563 {
6564  dict_table_t* dict_table;
6565  trx_t* trx;
6566  int err;
6567 
6568  ut_a(prebuilt->trx);
6569  ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
6570  ut_a(prebuilt->trx == session_to_trx(getTable()->in_use));
6571 
6572  dict_table = prebuilt->table;
6573  trx = prebuilt->trx;
6574 
6575  if (discard) {
6576  err = row_discard_tablespace_for_mysql(dict_table->name, trx);
6577  } else {
6578  err = row_import_tablespace_for_mysql(dict_table->name, trx);
6579  }
6580 
6581  err = convert_error_code_to_mysql(err, dict_table->flags, NULL);
6582 
6583  return(err);
6584 }
6585 
6586 /*****************************************************************/
6589 UNIV_INTERN
6590 int
6592 /*==============================*/
6593 {
6594  int error;
6595 
6596  /* Get the transaction associated with the current session, or create one
6597  if not yet created, and update prebuilt->trx */
6598 
6599  update_session(getTable()->in_use);
6600 
6601  if (user_session->getSqlCommand() != SQLCOM_TRUNCATE) {
6602  fallback:
6603  /* We only handle TRUNCATE TABLE t as a special case.
6604  DELETE FROM t will have to use ha_innobase::doDeleteRecord(),
6605  because DELETE is transactional while TRUNCATE is not. */
6606  return(errno=HA_ERR_WRONG_COMMAND);
6607  }
6608 
6609  /* Truncate the table in InnoDB */
6610 
6612  if (error == DB_ERROR) {
6613  /* Cannot truncate; resort to ha_innobase::doDeleteRecord() */
6614  goto fallback;
6615  }
6616 
6618  NULL);
6619 
6620  return(error);
6621 }
6622 
6623 /*****************************************************************/
6630 UNIV_INTERN
6631 int
6633 /*======================*/
6634  Session &session,
6635  const identifier::Table &identifier)
6636 {
6637  int error;
6638  trx_t* parent_trx;
6639  trx_t* trx;
6640 
6641  ut_a(identifier.getPath().length() < 1000);
6642 
6643  std::string search_string(identifier.getSchemaName());
6644  boost::algorithm::to_lower(search_string);
6645 
6646  if (search_string.compare("data_dictionary") == 0)
6647  {
6648  return HA_ERR_TABLE_READONLY;
6649  }
6650 
6651  /* Get the transaction associated with the current session, or create one
6652  if not yet created */
6653 
6654  parent_trx = check_trx_exists(&session);
6655 
6656  /* In case MySQL calls this in the middle of a SELECT query, release
6657  possible adaptive hash latch to avoid deadlocks of threads */
6658 
6660 
6661  trx = innobase_trx_allocate(&session);
6662 
6663  srv_lower_case_table_names = TRUE;
6664 
6665  /* Drop the table in InnoDB */
6666 
6667  error = row_drop_table_for_mysql(identifier.getKeyPath().c_str(), trx,
6668  session.getSqlCommand()
6669  == SQLCOM_DROP_DB);
6670 
6671  session.setXaId(trx->id);
6672 
6673  /* Flush the log to reduce probability that the .frm files and
6674  the InnoDB data dictionary get out-of-sync if the user runs
6675  with innodb_flush_log_at_trx_commit = 0 */
6676 
6678 
6679  /* Tell the InnoDB server that there might be work for
6680  utility threads: */
6681 
6683 
6684  innobase_commit_low(trx);
6685 
6686  trx_free_for_mysql(trx);
6687 
6688  if (error != ENOENT)
6689  error = convert_error_code_to_mysql(error, 0, NULL);
6690 
6691  if (error == 0 || error == ENOENT)
6692  {
6693  if (identifier.getType() == message::Table::TEMPORARY)
6694  {
6695  session.getMessageCache().removeTableMessage(identifier);
6696  ulint sql_command = session.getSqlCommand();
6697 
6698  // If this was the final removal to an alter table then we will need
6699  // to remove the .dfe that was left behind.
6700  if ((sql_command == SQLCOM_ALTER_TABLE
6701  || sql_command == SQLCOM_CREATE_INDEX
6702  || sql_command == SQLCOM_DROP_INDEX))
6703  {
6704  string path(identifier.getPath());
6705 
6706  path.append(DEFAULT_FILE_EXTENSION);
6707 
6708  (void)internal::my_delete(path.c_str(), MYF(0));
6709  }
6710  }
6711  else
6712  {
6713  string path(identifier.getPath());
6714 
6715  path.append(DEFAULT_FILE_EXTENSION);
6716 
6717  (void)internal::my_delete(path.c_str(), MYF(0));
6718  }
6719  }
6720 
6721  return(error);
6722 }
6723 
6724 /*****************************************************************/
6726 bool
6728 /*===================*/
6729  const identifier::Schema &identifier)
6734 {
6735  trx_t* trx;
6736  int error;
6737  string schema_path(identifier.getPath());
6738  Session* session = current_session;
6739 
6740  /* Get the transaction associated with the current session, or create one
6741  if not yet created */
6742 
6743  assert(this == innodb_engine_ptr);
6744 
6745  /* In the Windows plugin, session = current_session is always NULL */
6746  if (session) {
6747  trx_t* parent_trx = check_trx_exists(session);
6748 
6749  /* In case Drizzle calls this in the middle of a SELECT
6750  query, release possible adaptive hash latch to avoid
6751  deadlocks of threads */
6752 
6754  }
6755 
6756  schema_path.append("/");
6757  trx = innobase_trx_allocate(session);
6758  error = row_drop_database_for_mysql(schema_path.c_str(), trx);
6759 
6760  /* Flush the log to reduce probability that the .frm files and
6761  the InnoDB data dictionary get out-of-sync if the user runs
6762  with innodb_flush_log_at_trx_commit = 0 */
6763 
6765 
6766  /* Tell the InnoDB server that there might be work for
6767  utility threads: */
6768 
6770 
6771  innobase_commit_low(trx);
6772  trx_free_for_mysql(trx);
6773 
6774  if (error) {
6775  // What do we do here?
6776  }
6777 
6778  return false; // We are just a listener since we lack control over DDL, so we give no positive acknowledgement.
6779 }
6780 
6781 void InnobaseEngine::dropTemporarySchema()
6782 {
6783  string schema_path(GLOBAL_TEMPORARY_EXT);
6784  schema_path += "/";
6785 
6786  trx_t* trx = trx_allocate_for_mysql();
6787 
6788  trx->mysql_thd = NULL;
6789 
6790  trx->check_foreigns = false;
6791  trx->check_unique_secondary = false;
6792 
6793  (void)row_drop_database_for_mysql(schema_path.c_str(), trx);
6794 
6795  /* Flush the log to reduce probability that the .frm files and
6796  the InnoDB data dictionary get out-of-sync if the user runs
6797  with innodb_flush_log_at_trx_commit = 0 */
6798 
6800 
6801  /* Tell the InnoDB server that there might be work for
6802  utility threads: */
6803 
6805 
6806  innobase_commit_low(trx);
6807  trx_free_for_mysql(trx);
6808 }
6809 /*********************************************************************/
6812 static
6813 int
6815 /*==================*/
6816  trx_t* trx,
6817  const identifier::Table &from,
6818  const identifier::Table &to,
6819  ibool lock_and_commit)
6821 {
6822  int error;
6823 
6824  srv_lower_case_table_names = TRUE;
6825 
6826  /* Serialize data dictionary operations with dictionary mutex:
6827  no deadlocks can occur then in these operations */
6828 
6829  if (lock_and_commit) {
6830  row_mysql_lock_data_dictionary(trx);
6831  }
6832 
6833  error = row_rename_table_for_mysql(from.getKeyPath().c_str(), to.getKeyPath().c_str(), trx, lock_and_commit);
6834 
6835  if (error != DB_SUCCESS) {
6836  FILE* ef = dict_foreign_err_file;
6837 
6838  fputs("InnoDB: Renaming table ", ef);
6839  ut_print_name(ef, trx, TRUE, from.getKeyPath().c_str());
6840  fputs(" to ", ef);
6841  ut_print_name(ef, trx, TRUE, to.getKeyPath().c_str());
6842  fputs(" failed!\n", ef);
6843  }
6844 
6845  if (lock_and_commit) {
6847 
6848  /* Flush the log to reduce probability that the .frm
6849  files and the InnoDB data dictionary get out-of-sync
6850  if the user runs with innodb_flush_log_at_trx_commit = 0 */
6851 
6853  }
6854 
6855  return error;
6856 }
6857 /*********************************************************************/
6860 UNIV_INTERN int InnobaseEngine::doRenameTable(Session &session, const identifier::Table &from, const identifier::Table &to)
6861 {
6862  // A temp table alter table/rename is a shallow rename and only the
6863  // definition needs to be updated.
6864  if (to.getType() == message::Table::TEMPORARY && from.getType() == message::Table::TEMPORARY)
6865  {
6866  session.getMessageCache().renameTableMessage(from, to);
6867  return 0;
6868  }
6869 
6870  trx_t* trx;
6871  int error;
6872  trx_t* parent_trx;
6873 
6874  /* Get the transaction associated with the current session, or create one
6875  if not yet created */
6876 
6877  parent_trx = check_trx_exists(&session);
6878 
6879  /* In case MySQL calls this in the middle of a SELECT query, release
6880  possible adaptive hash latch to avoid deadlocks of threads */
6881 
6883 
6884  trx = innobase_trx_allocate(&session);
6885 
6886  error = innobase_rename_table(trx, from, to, TRUE);
6887 
6888  session.setXaId(trx->id);
6889 
6890  /* Tell the InnoDB server that there might be work for
6891  utility threads: */
6892 
6894 
6895  innobase_commit_low(trx);
6896  trx_free_for_mysql(trx);
6897 
6898  /* Add a special case to handle the Duplicated Key error
6899  and return DB_ERROR instead.
6900  This is to avoid a possible SIGSEGV error from mysql error
6901  handling code. Currently, mysql handles the Duplicated Key
6902  error by re-entering the storage layer and getting dup key
6903  info by calling get_dup_key(). This operation requires a valid
6904  table handle ('row_prebuilt_t' structure) which could no
6905  longer be available in the error handling stage. The suggested
6906  solution is to report a 'table exists' error message (since
6907  the dup key error here is due to an existing table whose name
6908  is the one we are trying to rename to) and return the generic
6909  error code. */
6910  if (error == (int) DB_DUPLICATE_KEY) {
6911  my_error(ER_TABLE_EXISTS_ERROR, to);
6912  error = DB_ERROR;
6913  }
6914 
6915  error = convert_error_code_to_mysql(error, 0, NULL);
6916 
6917  if (not error)
6918  {
6919  // If this fails, we are in trouble
6920  plugin::StorageEngine::renameDefinitionFromPath(to, from);
6921  }
6922 
6923  return(error);
6924 }
6925 
6926 /*********************************************************************/
6929 UNIV_INTERN
6930 ha_rows
6932 /*==========================*/
6933  uint keynr,
6934  key_range *min_key,
6936  key_range *max_key)
6938 {
6939  KeyInfo* key;
6940  dict_index_t* index;
6941  unsigned char* key_val_buff2 = (unsigned char*) malloc(
6942  getTable()->getShare()->sizeStoredRecord()
6943  + getTable()->getShare()->max_key_length + 100);
6944  ulint buff2_len = getTable()->getShare()->sizeStoredRecord()
6945  + getTable()->getShare()->max_key_length + 100;
6946  dtuple_t* range_start;
6947  dtuple_t* range_end;
6948  ib_int64_t n_rows;
6949  ulint mode1;
6950  ulint mode2;
6951  mem_heap_t* heap;
6952 
6953  ut_a(prebuilt->trx == session_to_trx(getTable()->in_use));
6954 
6955  prebuilt->trx->op_info = "estimating records in index range";
6956 
6957  /* In case MySQL calls this in the middle of a SELECT query, release
6958  possible adaptive hash latch to avoid deadlocks of threads */
6959 
6961 
6962  active_index = keynr;
6963 
6964  key = &getTable()->key_info[active_index];
6965 
6966  index = innobase_get_index(keynr);
6967 
6968  /* There exists possibility of not being able to find requested
6969  index due to inconsistency between MySQL and InoDB dictionary info.
6970  Necessary message should have been printed in innobase_get_index() */
6971  if (UNIV_UNLIKELY(!index)) {
6972  n_rows = HA_POS_ERROR;
6973  goto func_exit;
6974  }
6975 
6976  if (UNIV_UNLIKELY(!row_merge_is_index_usable(prebuilt->trx, index))) {
6977  n_rows = HA_ERR_TABLE_DEF_CHANGED;
6978  goto func_exit;
6979  }
6980 
6981  heap = mem_heap_create(2 * (key->key_parts * sizeof(dfield_t)
6982  + sizeof(dtuple_t)));
6983 
6984  range_start = dtuple_create(heap, key->key_parts);
6985  dict_index_copy_types(range_start, index, key->key_parts);
6986 
6987  range_end = dtuple_create(heap, key->key_parts);
6988  dict_index_copy_types(range_end, index, key->key_parts);
6989 
6991  range_start, (byte*) &key_val_buff[0],
6992  (ulint)upd_and_key_val_buff_len,
6993  index,
6994  (byte*) (min_key ? min_key->key :
6995  (const unsigned char*) 0),
6996  (ulint) (min_key ? min_key->length : 0),
6997  prebuilt->trx);
6998 
7000  range_end, (byte*) key_val_buff2,
7001  buff2_len, index,
7002  (byte*) (max_key ? max_key->key :
7003  (const unsigned char*) 0),
7004  (ulint) (max_key ? max_key->length : 0),
7005  prebuilt->trx);
7006 
7007  mode1 = convert_search_mode_to_innobase(min_key ? min_key->flag :
7008  HA_READ_KEY_EXACT);
7009  mode2 = convert_search_mode_to_innobase(max_key ? max_key->flag :
7010  HA_READ_KEY_EXACT);
7011 
7012  if (mode1 != PAGE_CUR_UNSUPP && mode2 != PAGE_CUR_UNSUPP) {
7013 
7014  n_rows = btr_estimate_n_rows_in_range(index, range_start,
7015  mode1, range_end,
7016  mode2);
7017  } else {
7018 
7019  n_rows = HA_POS_ERROR;
7020  }
7021 
7022  mem_heap_free(heap);
7023 
7024 func_exit:
7025  free(key_val_buff2);
7026 
7027  prebuilt->trx->op_info = "";
7028 
7029  /* The MySQL optimizer seems to believe an estimate of 0 rows is
7030  always accurate and may return the result 'Empty set' based on that.
7031  The accuracy is not guaranteed, and even if it were, for a locking
7032  read we should anyway perform the search to set the next-key lock.
7033  Add 1 to the value to make sure MySQL does not make the assumption! */
7034 
7035  if (n_rows == 0) {
7036  n_rows = 1;
7037  }
7038 
7039  return((ha_rows) n_rows);
7040 }
7041 
7042 /*********************************************************************/
7046 UNIV_INTERN
7047 ha_rows
7049 /*======================================*/
7050 {
7051  dict_index_t* index;
7052  uint64_t estimate;
7053  uint64_t local_data_file_length;
7054  ulint stat_n_leaf_pages;
7055 
7056  /* We do not know if MySQL can call this function before calling
7057  external_lock(). To be safe, update the session of the current table
7058  handle. */
7059 
7060  update_session(getTable()->in_use);
7061 
7062  prebuilt->trx->op_info = (char*)
7063  "calculating upper bound for table rows";
7064 
7065  /* In case MySQL calls this in the middle of a SELECT query, release
7066  possible adaptive hash latch to avoid deadlocks of threads */
7067 
7069 
7070  index = dict_table_get_first_index(prebuilt->table);
7071 
7072  stat_n_leaf_pages = index->stat_n_leaf_pages;
7073 
7074  ut_a(stat_n_leaf_pages > 0);
7075 
7076  local_data_file_length =
7077  ((uint64_t) stat_n_leaf_pages) * UNIV_PAGE_SIZE;
7078 
7079 
7080  /* Calculate a minimum length for a clustered index record and from
7081  that an upper bound for the number of rows. Since we only calculate
7082  new statistics in row0mysql.c when a table has grown by a threshold
7083  factor, we must add a safety factor 2 in front of the formula below. */
7084 
7085  estimate = 2 * local_data_file_length /
7086  dict_index_calc_min_rec_len(index);
7087 
7088  prebuilt->trx->op_info = "";
7089 
7090  return((ha_rows) estimate);
7091 }
7092 
7093 /*********************************************************************/
7098 UNIV_INTERN
7099 double
7101 /*====================*/
7102 {
7103  /* Since MySQL seems to favor table scans too much over index
7104  searches, we pretend that a sequential read takes the same time
7105  as a random disk read, that is, we do not divide the following
7106  by 10, which would be physically realistic. */
7107 
7108  return((double) (prebuilt->table->stat_clustered_index_size));
7109 }
7110 
7111 /******************************************************************/
7115 UNIV_INTERN
7116 double
7118 /*===================*/
7119  uint index,
7120  uint ranges,
7121  ha_rows rows)
7122 {
7123  ha_rows total_rows;
7124  double time_for_scan;
7125 
7126  if (index != getTable()->getShare()->getPrimaryKey()) {
7127  /* Not clustered */
7128  return(Cursor::read_time(index, ranges, rows));
7129  }
7130 
7131  if (rows <= 2) {
7132 
7133  return((double) rows);
7134  }
7135 
7136  /* Assume that the read time is proportional to the scan time for all
7137  rows + at most one seek per range. */
7138 
7139  time_for_scan = scan_time();
7140 
7141  if ((total_rows = estimate_rows_upper_bound()) < rows) {
7142 
7143  return(time_for_scan);
7144  }
7145 
7146  return(ranges + (double) rows / (double) total_rows * time_for_scan);
7147 }
7148 
7149 /*********************************************************************/
7158 static
7159 unsigned int
7161 /*====================================*/
7162  INNOBASE_SHARE* share,
7164  const drizzled::Table* table,
7166  dict_table_t* ib_table,
7168  const dict_index_t* index)
7169 {
7170  const dict_index_t* ind;
7171  unsigned int i;
7172 
7173  ut_ad(index);
7174  ut_ad(ib_table);
7175  ut_ad(table);
7176  ut_ad(share);
7177 
7178  /* If index does not belong to the table of share structure. Search
7179  index->table instead */
7180  if (index->table != ib_table) {
7181  i = 0;
7182  ind = dict_table_get_first_index(index->table);
7183 
7184  while (index != ind) {
7185  ind = dict_table_get_next_index(ind);
7186  i++;
7187  }
7188 
7190  ut_a(i > 0);
7191  i--;
7192  }
7193 
7194  return(i);
7195  }
7196 
7197  /* If index does not belong to the table of share structure. Search
7198  index->table instead */
7199  if (index->table != ib_table) {
7200  i = 0;
7201  ind = dict_table_get_first_index(index->table);
7202 
7203  while (index != ind) {
7204  ind = dict_table_get_next_index(ind);
7205  i++;
7206  }
7207 
7209  ut_a(i > 0);
7210  i--;
7211  }
7212 
7213  return(i);
7214  }
7215 
7216  /* If index translation table exists, we will first check
7217  the index through index translation table for a match. */
7218  if (share->idx_trans_tbl.index_mapping) {
7219  for (i = 0; i < share->idx_trans_tbl.index_count; i++) {
7220  if (share->idx_trans_tbl.index_mapping[i] == index) {
7221  return(i);
7222  }
7223  }
7224 
7225  /* Print an error message if we cannot find the index
7226  ** in the "index translation table". */
7227  errmsg_printf(error::ERROR,
7228  "Cannot find index %s in InnoDB index "
7229  "translation table.", index->name);
7230  }
7231 
7232  /* If we do not have an "index translation table", or not able
7233  to find the index in the translation table, we'll directly find
7234  matching index in the dict_index_t list */
7235  for (i = 0; i < table->getShare()->keys; i++) {
7236  ind = dict_table_get_index_on_name(
7237  ib_table, table->key_info[i].name);
7238 
7239  if (index == ind) {
7240  return(i);
7241  }
7242  }
7243 
7244  errmsg_printf(error::ERROR,
7245  "Cannot find matching index number for index %s "
7246  "in InnoDB index list.", index->name);
7247 
7248  return(0);
7249 }
7250 /*********************************************************************/
7253 UNIV_INTERN
7254 int
7256 /*==============*/
7257  uint flag)
7258 {
7259  dict_table_t* ib_table;
7260  dict_index_t* index;
7261  ha_rows rec_per_key;
7262  ib_int64_t n_rows;
7263  os_file_stat_t stat_info;
7264 
7265  /* If we are forcing recovery at a high level, we will suppress
7266  statistics calculation on tables, because that may crash the
7267  server if an index is badly corrupted. */
7268 
7269  /* We do not know if MySQL can call this function before calling
7270  external_lock(). To be safe, update the session of the current table
7271  handle. */
7272 
7273  update_session(getTable()->in_use);
7274 
7275  /* In case MySQL calls this in the middle of a SELECT query, release
7276  possible adaptive hash latch to avoid deadlocks of threads */
7277 
7278  prebuilt->trx->op_info = "returning various info to MySQL";
7279 
7281 
7282  ib_table = prebuilt->table;
7283 
7284  if (flag & HA_STATUS_TIME) {
7285  /* In Analyze we call with this flag: update
7286  then statistics so that they are up-to-date */
7287 
7288  prebuilt->trx->op_info = "updating table statistics";
7289 
7290  dict_update_statistics(ib_table,
7291  FALSE /* update even if stats
7292  are initialized */);
7293 
7294 
7295  prebuilt->trx->op_info = "returning various info to MySQL";
7296 
7297  fs::path get_status_path(catalog::local_identifier().getPath());
7298  get_status_path /= ib_table->name;
7299  fs::change_extension(get_status_path, "dfe");
7300 
7301  /* Note that we do not know the access time of the table,
7302  nor the CHECK TABLE time, nor the UPDATE or INSERT time. */
7303 
7304  if (os_file_get_status(get_status_path.file_string().c_str(), &stat_info)) {
7305  stats.create_time = (ulong) stat_info.ctime;
7306  }
7307  }
7308 
7309  if (flag & HA_STATUS_VARIABLE) {
7310 
7311  dict_table_stats_lock(ib_table, RW_S_LATCH);
7312 
7313  n_rows = ib_table->stat_n_rows;
7314 
7315  /* Because we do not protect stat_n_rows by any mutex in a
7316  delete, it is theoretically possible that the value can be
7317  smaller than zero! TODO: fix this race.
7318 
7319  The MySQL optimizer seems to assume in a left join that n_rows
7320  is an accurate estimate if it is zero. Of course, it is not,
7321  since we do not have any locks on the rows yet at this phase.
7322  Since SHOW TABLE STATUS seems to call this function with the
7323  HA_STATUS_TIME flag set, while the left join optimizer does not
7324  set that flag, we add one to a zero value if the flag is not
7325  set. That way SHOW TABLE STATUS will show the best estimate,
7326  while the optimizer never sees the table empty. */
7327 
7328  if (n_rows < 0) {
7329  n_rows = 0;
7330  }
7331 
7332  if (n_rows == 0 && !(flag & HA_STATUS_TIME)) {
7333  n_rows++;
7334  }
7335 
7336  /* Fix bug#40386: Not flushing query cache after truncate.
7337  n_rows can not be 0 unless the table is empty, set to 1
7338  instead. The original problem of bug#29507 is actually
7339  fixed in the server code. */
7340  if (user_session->getSqlCommand() == SQLCOM_TRUNCATE) {
7341 
7342  n_rows = 1;
7343 
7344  /* We need to reset the prebuilt value too, otherwise
7345  checks for values greater than the last value written
7346  to the table will fail and the autoinc counter will
7347  not be updated. This will force doInsertRecord() into
7348  attempting an update of the table's AUTOINC counter. */
7349 
7351  }
7352 
7353  stats.records = (ha_rows)n_rows;
7354  stats.deleted = 0;
7355  stats.data_file_length = ((uint64_t)
7356  ib_table->stat_clustered_index_size)
7357  * UNIV_PAGE_SIZE;
7358  stats.index_file_length = ((uint64_t)
7360  * UNIV_PAGE_SIZE;
7361 
7362  dict_table_stats_unlock(ib_table, RW_S_LATCH);
7363 
7364  /* Since fsp_get_available_space_in_free_extents() is
7365  acquiring latches inside InnoDB, we do not call it if we
7366  are asked by MySQL to avoid locking. Another reason to
7367  avoid the call is that it uses quite a lot of CPU.
7368  See Bug#38185. */
7369  if (flag & HA_STATUS_NO_LOCK) {
7370  /* We do not update delete_length if no
7371  locking is requested so the "old" value can
7372  remain. delete_length is initialized to 0 in
7373  the ha_statistics' constructor. */
7374  } else if (UNIV_UNLIKELY
7375  (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE)) {
7376  /* Avoid accessing the tablespace if
7377  innodb_crash_recovery is set to a high value. */
7378  stats.delete_length = 0;
7379  } else {
7380  ullint avail_space;
7381 
7382  avail_space = fsp_get_available_space_in_free_extents(ib_table->space);
7383 
7384  if (avail_space == ULLINT_UNDEFINED) {
7385  Session* session;
7386 
7387  session= getTable()->in_use;
7388  assert(session);
7389 
7390  push_warning_printf(
7391  session,
7392  DRIZZLE_ERROR::WARN_LEVEL_WARN,
7393  ER_CANT_GET_STAT,
7394  "InnoDB: Trying to get the free "
7395  "space for table %s but its "
7396  "tablespace has been discarded or "
7397  "the .ibd file is missing. Setting "
7398  "the free space to zero.",
7399  ib_table->name);
7400 
7401  stats.delete_length = 0;
7402  } else {
7403  stats.delete_length = avail_space * 1024;
7404  }
7405  }
7406 
7407  stats.check_time = 0;
7408 
7409  if (stats.records == 0) {
7410  stats.mean_rec_length = 0;
7411  } else {
7412  stats.mean_rec_length = (ulong) (stats.data_file_length / stats.records);
7413  }
7414  }
7415 
7416  if (flag & HA_STATUS_CONST) {
7417  ulong i;
7418  /* Verify the number of index in InnoDB and MySQL
7419  matches up. If prebuilt->clust_index_was_generated
7420  holds, InnoDB defines GEN_CLUST_INDEX internally */
7421  ulint num_innodb_index = UT_LIST_GET_LEN(ib_table->indexes) - prebuilt->clust_index_was_generated;
7422 
7423  if (getTable()->getShare()->keys != num_innodb_index) {
7424  errmsg_printf(error::ERROR, "Table %s contains %lu "
7425  "indexes inside InnoDB, which "
7426  "is different from the number of "
7427  "indexes %u defined in the MySQL ",
7428  ib_table->name, num_innodb_index,
7429  getTable()->getShare()->keys);
7430  }
7431 
7432  dict_table_stats_lock(ib_table, RW_S_LATCH);
7433 
7434  for (i = 0; i < getTable()->getShare()->sizeKeys(); i++) {
7435  ulong j;
7436  /* We could get index quickly through internal
7437  index mapping with the index translation table.
7438  The identity of index (match up index name with
7439  that of table->key_info[i]) is already verified in
7440  innobase_get_index(). */
7441  index = innobase_get_index(i);
7442 
7443  if (index == NULL) {
7444  errmsg_printf(error::ERROR, "Table %s contains fewer "
7445  "indexes inside InnoDB than "
7446  "are defined in the MySQL "
7447  ".frm file. Have you mixed up "
7448  ".frm files from different "
7449  "installations? See "
7450  REFMAN
7451  "innodb-troubleshooting.html\n",
7452  ib_table->name);
7453  break;
7454  }
7455 
7456  for (j = 0; j < getTable()->key_info[i].key_parts; j++) {
7457 
7458  if (j + 1 > index->n_uniq) {
7459  errmsg_printf(error::ERROR,
7460 "Index %s of %s has %lu columns unique inside InnoDB, but MySQL is asking "
7461 "statistics for %lu columns. Have you mixed up .frm files from different "
7462 "installations? "
7463 "See " REFMAN "innodb-troubleshooting.html\n",
7464  index->name,
7465  ib_table->name,
7466  (unsigned long)
7467  index->n_uniq, j + 1);
7468  break;
7469  }
7470 
7471  if (index->stat_n_diff_key_vals[j + 1] == 0) {
7472 
7473  rec_per_key = stats.records;
7474  } else {
7475  rec_per_key = (ha_rows)(stats.records /
7476  index->stat_n_diff_key_vals[j + 1]);
7477  }
7478 
7479  /* Since MySQL seems to favor table scans
7480  too much over index searches, we pretend
7481  index selectivity is 2 times better than
7482  our estimate: */
7483 
7484  rec_per_key = rec_per_key / 2;
7485 
7486  if (rec_per_key == 0) {
7487  rec_per_key = 1;
7488  }
7489 
7490  getTable()->key_info[i].rec_per_key[j]=
7491  rec_per_key >= ~(ulong) 0 ? ~(ulong) 0 :
7492  (ulong) rec_per_key;
7493  }
7494  }
7495 
7496  dict_table_stats_unlock(ib_table, RW_S_LATCH);
7497  }
7498 
7499  if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
7500  goto func_exit;
7501  }
7502 
7503  if (flag & HA_STATUS_ERRKEY) {
7504  const dict_index_t* err_index;
7505 
7506  ut_a(prebuilt->trx);
7507  ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
7508 
7509  err_index = trx_get_error_info(prebuilt->trx);
7510 
7511  if (err_index) {
7512  errkey = (unsigned int)
7513  innobase_get_mysql_key_number_for_index(share, getTable(), ib_table,
7514  err_index);
7515  } else {
7516  errkey = (unsigned int) prebuilt->trx->error_key_num;
7517  }
7518  }
7519 
7520  if ((flag & HA_STATUS_AUTO) && getTable()->found_next_number_field) {
7521  stats.auto_increment_value = innobase_peek_autoinc();
7522  }
7523 
7524 func_exit:
7525  prebuilt->trx->op_info = "";
7526 
7527  return(0);
7528 }
7529 
7530 /**********************************************************************/
7534 UNIV_INTERN
7535 int
7537 /*=================*/
7538  Session*)
7539 {
7540  /* Simply call ::info() with all the flags */
7541  info(HA_STATUS_TIME | HA_STATUS_CONST | HA_STATUS_VARIABLE);
7542 
7543  return(0);
7544 }
7545 
7546 /*******************************************************************/
7551 UNIV_INTERN
7552 int
7554 /*===============*/
7555  Session* session)
7556 {
7557  dict_index_t* index;
7558  ulint n_rows;
7559  ulint n_rows_in_table = ULINT_UNDEFINED;
7560  ibool is_ok = TRUE;
7561  ulint old_isolation_level;
7562 
7563  assert(session == getTable()->in_use);
7564  ut_a(prebuilt->trx);
7565  ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
7566  ut_a(prebuilt->trx == session_to_trx(session));
7567 
7568  if (prebuilt->mysql_template == NULL) {
7569  /* Build the template; we will use a dummy template
7570  in index scans done in checking */
7571 
7572  build_template(prebuilt, NULL, getTable(), ROW_MYSQL_WHOLE_ROW);
7573  }
7574 
7576  errmsg_printf(error::ERROR, "InnoDB: Error:\n"
7577  "InnoDB: MySQL is trying to use a table handle"
7578  " but the .ibd file for\n"
7579  "InnoDB: table %s does not exist.\n"
7580  "InnoDB: Have you deleted the .ibd file"
7581  " from the database directory under\n"
7582  "InnoDB: the MySQL datadir, or have you"
7583  " used DISCARD TABLESPACE?\n"
7584  "InnoDB: Please refer to\n"
7585  "InnoDB: " REFMAN "innodb-troubleshooting.html\n"
7586  "InnoDB: how you can resolve the problem.\n",
7587  prebuilt->table->name);
7588  return(HA_ADMIN_CORRUPT);
7589  }
7590 
7591  prebuilt->trx->op_info = "checking table";
7592 
7593  old_isolation_level = prebuilt->trx->isolation_level;
7594 
7595  /* We must run the index record counts at an isolation level
7596  >= READ COMMITTED, because a dirty read can see a wrong number
7597  of records in some index; to play safe, we use always
7598  REPEATABLE READ here */
7599 
7600  prebuilt->trx->isolation_level = TRX_ISO_REPEATABLE_READ;
7601 
7602  /* Enlarge the fatal lock wait timeout during CHECK TABLE. */
7603  mutex_enter(&kernel_mutex);
7604  srv_fatal_semaphore_wait_threshold += 7200; /* 2 hours */
7605  mutex_exit(&kernel_mutex);
7606 
7607  for (index = dict_table_get_first_index(prebuilt->table);
7608  index != NULL;
7609  index = dict_table_get_next_index(index)) {
7610 #if 0
7611  fputs("Validating index ", stderr);
7612  ut_print_name(stderr, trx, FALSE, index->name);
7613  putc('\n', stderr);
7614 #endif
7615 
7616  if (!btr_validate_index(index, prebuilt->trx)) {
7617  is_ok = FALSE;
7618  push_warning_printf(session, DRIZZLE_ERROR::WARN_LEVEL_WARN,
7619  ER_NOT_KEYFILE,
7620  "InnoDB: The B-tree of"
7621  " index '%-.200s' is corrupted.",
7622  index->name);
7623  continue;
7624  }
7625 
7626  /* Instead of invoking change_active_index(), set up
7627  a dummy template for non-locking reads, disabling
7628  access to the clustered index. */
7629  prebuilt->index = index;
7630 
7632  prebuilt->trx, prebuilt->index);
7633 
7634  if (UNIV_UNLIKELY(!prebuilt->index_usable)) {
7635  push_warning_printf(session, DRIZZLE_ERROR::WARN_LEVEL_WARN,
7636  HA_ERR_TABLE_DEF_CHANGED,
7637  "InnoDB: Insufficient history for"
7638  " index '%-.200s'",
7639  index->name);
7640  continue;
7641  }
7642 
7643  prebuilt->sql_stat_start = TRUE;
7644  prebuilt->template_type = ROW_MYSQL_DUMMY_TEMPLATE;
7645  prebuilt->n_template = 0;
7647 
7648  dtuple_set_n_fields(prebuilt->search_tuple, 0);
7649 
7650  prebuilt->select_lock_type = LOCK_NONE;
7651 
7652  if (!row_check_index_for_mysql(prebuilt, index, &n_rows)) {
7653  push_warning_printf(session, DRIZZLE_ERROR::WARN_LEVEL_WARN,
7654  ER_NOT_KEYFILE,
7655  "InnoDB: The B-tree of"
7656  " index '%-.200s' is corrupted.",
7657  index->name);
7658  is_ok = FALSE;
7659  }
7660 
7661  if (user_session->getKilled()) {
7662  break;
7663  }
7664 
7665 #if 0
7666  fprintf(stderr, "%lu entries in index %s\n", n_rows,
7667  index->name);
7668 #endif
7669 
7670  if (index == dict_table_get_first_index(prebuilt->table)) {
7671  n_rows_in_table = n_rows;
7672  } else if (n_rows != n_rows_in_table) {
7673  push_warning_printf(session, DRIZZLE_ERROR::WARN_LEVEL_WARN,
7674  ER_NOT_KEYFILE,
7675  "InnoDB: Index '%-.200s'"
7676  " contains %lu entries,"
7677  " should be %lu.",
7678  index->name,
7679  (ulong) n_rows,
7680  (ulong) n_rows_in_table);
7681  is_ok = FALSE;
7682  }
7683  }
7684 
7685  /* Restore the original isolation level */
7686  prebuilt->trx->isolation_level = old_isolation_level;
7687 
7688  /* We validate also the whole adaptive hash index for all tables
7689  at every CHECK TABLE */
7690 
7691  if (!btr_search_validate()) {
7692  push_warning(session, DRIZZLE_ERROR::WARN_LEVEL_WARN,
7693  ER_NOT_KEYFILE,
7694  "InnoDB: The adaptive hash index is corrupted.");
7695  is_ok = FALSE;
7696  }
7697 
7698  /* Restore the fatal lock wait timeout after CHECK TABLE. */
7699  mutex_enter(&kernel_mutex);
7700  srv_fatal_semaphore_wait_threshold -= 7200; /* 2 hours */
7701  mutex_exit(&kernel_mutex);
7702 
7703  prebuilt->trx->op_info = "";
7704  if (user_session->getKilled()) {
7705  my_error(ER_QUERY_INTERRUPTED, MYF(0));
7706  }
7707 
7708  return(is_ok ? HA_ADMIN_OK : HA_ADMIN_CORRUPT);
7709 }
7710 
7711 /*************************************************************/
7716 UNIV_INTERN
7717 char*
7719 /*==============================*/
7720  const char* comment)
7721 {
7722  uint length = (uint) strlen(comment);
7723  char* str;
7724  long flen;
7725 
7726  /* We do not know if MySQL can call this function before calling
7727  external_lock(). To be safe, update the session of the current table
7728  handle. */
7729 
7730  if (length > 64000 - 3) {
7731  return((char*)comment); /* string too long */
7732  }
7733 
7734  update_session(getTable()->in_use);
7735 
7736  prebuilt->trx->op_info = "returning table comment";
7737 
7738  /* In case MySQL calls this in the middle of a SELECT query, release
7739  possible adaptive hash latch to avoid deadlocks of threads */
7740 
7742  str = NULL;
7743 
7744  /* output the data to a temporary file */
7745 
7746  mutex_enter(&srv_dict_tmpfile_mutex);
7747  rewind(srv_dict_tmpfile);
7748 
7749  fprintf(srv_dict_tmpfile, "InnoDB free: %llu kB",
7750  fsp_get_available_space_in_free_extents(
7751  prebuilt->table->space));
7752 
7753  dict_print_info_on_foreign_keys(FALSE, srv_dict_tmpfile,
7754  prebuilt->trx, prebuilt->table);
7755  flen = ftell(srv_dict_tmpfile);
7756  if (flen < 0) {
7757  flen = 0;
7758  } else if (length + flen + 3 > 64000) {
7759  flen = 64000 - 3 - length;
7760  }
7761 
7762  /* allocate buffer for the full string, and
7763  read the contents of the temporary file */
7764 
7765  str = (char*) malloc(length + flen + 3);
7766 
7767  if (str) {
7768  char* pos = str + length;
7769  if (length) {
7770  memcpy(str, comment, length);
7771  *pos++ = ';';
7772  *pos++ = ' ';
7773  }
7774  rewind(srv_dict_tmpfile);
7775  flen = (uint) fread(pos, 1, flen, srv_dict_tmpfile);
7776  pos[flen] = 0;
7777  }
7778 
7779  mutex_exit(&srv_dict_tmpfile_mutex);
7780 
7781  prebuilt->trx->op_info = "";
7782 
7783  return(str ? str : (char*) comment);
7784 }
7785 
7786 /*******************************************************************/
7791 UNIV_INTERN
7792 char*
7794 /*==========================================*/
7795 {
7796  char* str = 0;
7797  long flen;
7798 
7799  ut_a(prebuilt != NULL);
7800 
7801  /* We do not know if MySQL can call this function before calling
7802  external_lock(). To be safe, update the session of the current table
7803  handle. */
7804 
7805  update_session(getTable()->in_use);
7806 
7807  prebuilt->trx->op_info = "getting info on foreign keys";
7808 
7809  /* In case MySQL calls this in the middle of a SELECT query,
7810  release possible adaptive hash latch to avoid
7811  deadlocks of threads */
7812 
7814 
7815  mutex_enter(&srv_dict_tmpfile_mutex);
7816  rewind(srv_dict_tmpfile);
7817 
7818  /* output the data to a temporary file */
7819  dict_print_info_on_foreign_keys(TRUE, srv_dict_tmpfile,
7820  prebuilt->trx, prebuilt->table);
7821  prebuilt->trx->op_info = "";
7822 
7823  flen = ftell(srv_dict_tmpfile);
7824  if (flen < 0) {
7825  flen = 0;
7826  }
7827 
7828  /* allocate buffer for the string, and
7829  read the contents of the temporary file */
7830 
7831  str = (char*) malloc(flen + 1);
7832 
7833  if (str) {
7834  rewind(srv_dict_tmpfile);
7835  flen = (uint) fread(str, 1, flen, srv_dict_tmpfile);
7836  str[flen] = 0;
7837  }
7838 
7839  mutex_exit(&srv_dict_tmpfile_mutex);
7840 
7841  return(str);
7842 }
7843 
7844 
7845 UNIV_INTERN
7846 int
7848 {
7849  ut_a(prebuilt != NULL);
7850  update_session(getTable()->in_use);
7851  prebuilt->trx->op_info = "getting list of foreign keys";
7853  mutex_enter(&(dict_sys->mutex));
7855 
7856  while (foreign != NULL)
7857  {
7858  uint ulen;
7859  char uname[NAME_LEN + 1]; /* Unencoded name */
7860  char db_name[NAME_LEN + 1];
7861  const char *tmp_buff;
7862 
7864  tmp_buff = foreign->id;
7865  uint i = 0;
7866  while (tmp_buff[i] != '/')
7867  i++;
7868  tmp_buff += i + 1;
7869  lex_string_t *tmp_foreign_id = session->make_lex_string(NULL, str_ref(tmp_buff));
7870 
7871  /* Database name */
7872  tmp_buff = foreign->referenced_table_name;
7873 
7874  i= 0;
7875  while (tmp_buff[i] != '/')
7876  {
7877  db_name[i]= tmp_buff[i];
7878  i++;
7879  }
7880  db_name[i] = 0;
7881  ulen= identifier::Table::filename_to_tablename(db_name, uname, sizeof(uname));
7882  lex_string_t *tmp_referenced_db = session->make_lex_string(NULL, str_ref(uname, ulen));
7883 
7884  /* Table name */
7885  tmp_buff += i + 1;
7886  ulen= identifier::Table::filename_to_tablename(tmp_buff, uname, sizeof(uname));
7887  lex_string_t *tmp_referenced_table = session->make_lex_string(NULL, str_ref(uname, ulen));
7888 
7890  List<lex_string_t> tmp_foreign_fields;
7891  List<lex_string_t> tmp_referenced_fields;
7892  for (i= 0;;)
7893  {
7894  tmp_foreign_fields.push_back(session->make_lex_string(NULL, str_ref(foreign->foreign_col_names[i])));
7895  tmp_referenced_fields.push_back(session->make_lex_string(NULL, str_ref(foreign->referenced_col_names[i])));
7896  if (++i >= foreign->n_fields)
7897  break;
7898  }
7899 
7900  if (foreign->type & DICT_FOREIGN_ON_DELETE_CASCADE)
7901  tmp_buff= "CASCADE";
7902  else if (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL)
7903  tmp_buff= "SET NULL";
7904  else if (foreign->type & DICT_FOREIGN_ON_DELETE_NO_ACTION)
7905  tmp_buff= "NO ACTION";
7906  else
7907  tmp_buff= "RESTRICT";
7908  lex_string_t *tmp_delete_method = session->make_lex_string(NULL, str_ref(tmp_buff));
7909 
7910  if (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE)
7911  tmp_buff= "CASCADE";
7912  else if (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL)
7913  tmp_buff= "SET NULL";
7914  else if (foreign->type & DICT_FOREIGN_ON_UPDATE_NO_ACTION)
7915  tmp_buff= "NO ACTION";
7916  else
7917  tmp_buff= "RESTRICT";
7918  lex_string_t *tmp_update_method = session->make_lex_string(NULL, str_ref(tmp_buff));
7919 
7920  lex_string_t *tmp_referenced_key_name = foreign->referenced_index && foreign->referenced_index->name
7921  ? session->make_lex_string(NULL, str_ref(foreign->referenced_index->name))
7922  : NULL;
7923 
7924  ForeignKeyInfo f_key_info(
7925  tmp_foreign_id, tmp_referenced_db, tmp_referenced_table,
7926  tmp_update_method, tmp_delete_method, tmp_referenced_key_name,
7927  tmp_foreign_fields, tmp_referenced_fields);
7928 
7929  f_key_list->push_back((ForeignKeyInfo*)session->mem.memdup(&f_key_info, sizeof(ForeignKeyInfo)));
7930  foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
7931  }
7932  mutex_exit(&(dict_sys->mutex));
7933  prebuilt->trx->op_info = "";
7934 
7935  return 0;
7936 }
7937 
7938 /*****************************************************************/
7943 UNIV_INTERN
7944 bool
7946 /*=================================*/
7947 {
7948  bool can_switch;
7949 
7950  ut_a(prebuilt->trx == session_to_trx(getTable()->in_use));
7951 
7952  prebuilt->trx->op_info =
7953  "determining if there are foreign key constraints";
7954  row_mysql_lock_data_dictionary(prebuilt->trx);
7955 
7958 
7960  prebuilt->trx->op_info = "";
7961 
7962  return(can_switch);
7963 }
7964 
7965 /*******************************************************************/
7971 UNIV_INTERN
7972 uint
7974 /*========================================*/
7975 {
7976  if (dict_table_is_referenced_by_foreign_key(prebuilt->table)) {
7977 
7978  return(1);
7979  }
7980 
7981  return(0);
7982 }
7983 
7984 /*******************************************************************/
7987 UNIV_INTERN
7988 void
7990 /*======================================*/
7991  char* str)
7992 {
7993  free(str);
7994 }
7995 
7996 /*******************************************************************/
7999 UNIV_INTERN
8000 int
8002 /*===============*/
8003  enum ha_extra_function operation)
8005 {
8006  /* Warning: since it is not sure that MySQL calls external_lock
8007  before calling this function, the trx field in prebuilt can be
8008  obsolete! */
8009 
8010  switch (operation) {
8011  case HA_EXTRA_FLUSH:
8012  if (prebuilt->blob_heap) {
8014  }
8015  break;
8016  case HA_EXTRA_RESET_STATE:
8018  break;
8019  case HA_EXTRA_NO_KEYREAD:
8020  prebuilt->read_just_key = 0;
8021  break;
8022  case HA_EXTRA_KEYREAD:
8023  prebuilt->read_just_key = 1;
8024  break;
8025  case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
8027  break;
8028 
8029  /* IMPORTANT: prebuilt->trx can be obsolete in
8030  this method, because it is not sure that MySQL
8031  calls external_lock before this method with the
8032  parameters below. We must not invoke update_session()
8033  either, because the calling threads may change.
8034  CAREFUL HERE, OR MEMORY CORRUPTION MAY OCCUR! */
8035  case HA_EXTRA_IGNORE_DUP_KEY:
8036  session_to_trx(getTable()->in_use)->duplicates |= TRX_DUP_IGNORE;
8037  break;
8038  case HA_EXTRA_WRITE_CAN_REPLACE:
8039  session_to_trx(getTable()->in_use)->duplicates |= TRX_DUP_REPLACE;
8040  break;
8041  case HA_EXTRA_WRITE_CANNOT_REPLACE:
8042  session_to_trx(getTable()->in_use)->duplicates &= ~TRX_DUP_REPLACE;
8043  break;
8044  case HA_EXTRA_NO_IGNORE_DUP_KEY:
8045  session_to_trx(getTable()->in_use)->duplicates &=
8046  ~(TRX_DUP_IGNORE | TRX_DUP_REPLACE);
8047  break;
8048  default:/* Do nothing */
8049  ;
8050  }
8051 
8052  return(0);
8053 }
8054 
8055 UNIV_INTERN
8056 int
8058 {
8059  if (prebuilt->blob_heap) {
8061  }
8062 
8064 
8065  /* TODO: This should really be reset in reset_template() but for now
8066  it's safer to do it explicitly here. */
8067 
8068  /* This is a statement level counter. */
8070 
8071  return(0);
8072 }
8073 
8074 /******************************************************************/
8077 static inline
8078 ulint
8080 /*=========================*/
8081  enum_tx_isolation iso)
8082 {
8083  switch(iso) {
8084  case ISO_REPEATABLE_READ: return(TRX_ISO_REPEATABLE_READ);
8085  case ISO_READ_COMMITTED: return(TRX_ISO_READ_COMMITTED);
8086  case ISO_SERIALIZABLE: return(TRX_ISO_SERIALIZABLE);
8087  case ISO_READ_UNCOMMITTED: return(TRX_ISO_READ_UNCOMMITTED);
8088  default: ut_a(0); return(0);
8089  }
8090 }
8091 
8092 /******************************************************************/
8097 UNIV_INTERN
8098 int
8100 /*=======================*/
8101  Session* session,
8102  int lock_type)
8103 {
8104  update_session(session);
8105 
8106  trx_t *trx= prebuilt->trx;
8107 
8108  prebuilt->sql_stat_start = TRUE;
8110 
8112 
8113  if (lock_type == F_WRLCK) {
8114 
8115  /* If this is a SELECT, then it is in UPDATE TABLE ...
8116  or SELECT ... FOR UPDATE */
8117  prebuilt->select_lock_type = LOCK_X;
8119  }
8120 
8121  if (lock_type != F_UNLCK) {
8122  /* MySQL is setting a new table lock */
8123 
8124  if (trx->isolation_level == TRX_ISO_SERIALIZABLE
8125  && prebuilt->select_lock_type == LOCK_NONE
8126  && session_test_options(session,
8127  OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
8128 
8129  /* To get serializable execution, we let InnoDB
8130  conceptually add 'LOCK IN SHARE MODE' to all SELECTs
8131  which otherwise would have been consistent reads. An
8132  exception is consistent reads in the AUTOCOMMIT=1 mode:
8133  we know that they are read-only transactions, and they
8134  can be serialized also if performed as consistent
8135  reads. */
8136 
8137  prebuilt->select_lock_type = LOCK_S;
8139  }
8140 
8141  /* Starting from 4.1.9, no InnoDB table lock is taken in LOCK
8142  TABLES if AUTOCOMMIT=1. It does not make much sense to acquire
8143  an InnoDB table lock if it is released immediately at the end
8144  of LOCK TABLES, and InnoDB's table locks in that case cause
8145  VERY easily deadlocks.
8146 
8147  We do not set InnoDB table locks if user has not explicitly
8148  requested a table lock. Note that session_in_lock_tables(session)
8149  can hold in some cases, e.g., at the start of a stored
8150  procedure call (SQLCOM_CALL). */
8151 
8152  if (prebuilt->select_lock_type != LOCK_NONE) {
8153  trx->mysql_n_tables_locked++;
8154  }
8155 
8156  prebuilt->mysql_has_locked = TRUE;
8157 
8158  return(0);
8159  }
8160 
8161  /* MySQL is releasing a table lock */
8162  prebuilt->mysql_has_locked = FALSE;
8163  trx->mysql_n_tables_locked= 0;
8164 
8165  return(0);
8166 }
8167 
8168 /************************************************************************/
8171 static
8172 bool
8174 /*===============*/
8175  plugin::StorageEngine* engine,
8176  Session* session,
8177  stat_print_fn *stat_print)
8178 {
8179  trx_t* trx;
8180  static const char truncated_msg[] = "... truncated...\n";
8181  const long MAX_STATUS_SIZE = 1048576;
8182  ulint trx_list_start = ULINT_UNDEFINED;
8183  ulint trx_list_end = ULINT_UNDEFINED;
8184 
8185  assert(engine == innodb_engine_ptr);
8186 
8187  trx = check_trx_exists(session);
8188 
8190 
8191  /* We let the InnoDB Monitor to output at most MAX_STATUS_SIZE
8192  bytes of text. */
8193 
8194  long flen, usable_len;
8195  char* str;
8196 
8197  mutex_enter(&srv_monitor_file_mutex);
8198  rewind(srv_monitor_file);
8199  srv_printf_innodb_monitor(srv_monitor_file, FALSE,
8200  &trx_list_start, &trx_list_end);
8201  flen = ftell(srv_monitor_file);
8202  os_file_set_eof(srv_monitor_file);
8203 
8204  if (flen < 0) {
8205  flen = 0;
8206  }
8207 
8208  if (flen > MAX_STATUS_SIZE) {
8209  usable_len = MAX_STATUS_SIZE;
8210  srv_truncated_status_writes++;
8211  } else {
8212  usable_len = flen;
8213  }
8214 
8215  /* allocate buffer for the string, and
8216  read the contents of the temporary file */
8217 
8218  if (!(str = (char*) malloc(usable_len + 1))) {
8219  mutex_exit(&srv_monitor_file_mutex);
8220  return(TRUE);
8221  }
8222 
8223  rewind(srv_monitor_file);
8224  if (flen < MAX_STATUS_SIZE) {
8225  /* Display the entire output. */
8226  flen = (long) fread(str, 1, flen, srv_monitor_file);
8227  } else if (trx_list_end < (ulint) flen
8228  && trx_list_start < trx_list_end
8229  && trx_list_start + (flen - trx_list_end)
8230  < MAX_STATUS_SIZE - sizeof truncated_msg - 1) {
8231  /* Omit the beginning of the list of active transactions. */
8232  long len = (long) fread(str, 1, trx_list_start, srv_monitor_file);
8233  memcpy(str + len, truncated_msg, sizeof truncated_msg - 1);
8234  len += sizeof truncated_msg - 1;
8235  usable_len = (MAX_STATUS_SIZE - 1) - len;
8236  fseek(srv_monitor_file, flen - usable_len, SEEK_SET);
8237  len += (long) fread(str + len, 1, usable_len, srv_monitor_file);
8238  flen = len;
8239  } else {
8240  /* Omit the end of the output. */
8241  flen = (long) fread(str, 1, MAX_STATUS_SIZE - 1, srv_monitor_file);
8242  }
8243 
8244  mutex_exit(&srv_monitor_file_mutex);
8245 
8246  stat_print(session, innobase_engine_name, strlen(innobase_engine_name),
8247  STRING_WITH_LEN(""), str, flen);
8248 
8249  free(str);
8250 
8251  return(FALSE);
8252 }
8253 
8254 /************************************************************************/
8257 static
8258 bool
8260 /*=====================*/
8261  plugin::StorageEngine* engine,
8262  Session* session,
8264  stat_print_fn* stat_print)
8266 {
8267  char buf1[IO_SIZE], buf2[IO_SIZE];
8268  mutex_t* mutex;
8269  rw_lock_t* lock;
8270  ulint block_mutex_oswait_count = 0;
8271  ulint block_lock_oswait_count = 0;
8272  mutex_t* block_mutex = NULL;
8273  rw_lock_t* block_lock = NULL;
8274 #ifdef UNIV_DEBUG
8275  ulint rw_lock_count= 0;
8276  ulint rw_lock_count_spin_loop= 0;
8277  ulint rw_lock_count_spin_rounds= 0;
8278  ulint rw_lock_count_os_wait= 0;
8279  ulint rw_lock_count_os_yield= 0;
8280  uint64_t rw_lock_wait_time= 0;
8281 #endif /* UNIV_DEBUG */
8282  uint engine_name_len= strlen(innobase_engine_name), buf1len, buf2len;
8283  assert(engine == innodb_engine_ptr);
8284 
8285  mutex_enter(&mutex_list_mutex);
8286 
8287  for (mutex = UT_LIST_GET_FIRST(mutex_list); mutex != NULL;
8288  mutex = UT_LIST_GET_NEXT(list, mutex)) {
8289  if (mutex->count_os_wait == 0) {
8290  continue;
8291  }
8292 
8293 
8294  if (buf_pool_is_block_mutex(mutex)) {
8295  block_mutex = mutex;
8296  block_mutex_oswait_count += mutex->count_os_wait;
8297  continue;
8298  }
8299 #ifdef UNIV_DEBUG
8300  if (mutex->mutex_type != 1) {
8301  if (mutex->count_using > 0) {
8302  buf1len= my_snprintf(buf1, sizeof(buf1),
8303  "%s:%s",
8304  mutex->cmutex_name, mutex->cfile_name);
8305  buf2len= my_snprintf(buf2, sizeof(buf2),
8306  "count=%lu, spin_waits=%lu,"
8307  " spin_rounds=%lu, "
8308  "os_waits=%lu, os_yields=%lu,"
8309  " os_wait_times=%lu",
8310  mutex->count_using,
8311  mutex->count_spin_loop,
8312  mutex->count_spin_rounds,
8313  mutex->count_os_wait,
8314  mutex->count_os_yield,
8315  (ulong) (mutex->lspent_time/1000));
8316 
8317  if (stat_print(session, innobase_engine_name,
8318  engine_name_len, buf1, buf1len,
8319  buf2, buf2len)) {
8320  mutex_exit(&mutex_list_mutex);
8321  return(1);
8322  }
8323  }
8324  } else {
8325  rw_lock_count += mutex->count_using;
8326  rw_lock_count_spin_loop += mutex->count_spin_loop;
8327  rw_lock_count_spin_rounds += mutex->count_spin_rounds;
8328  rw_lock_count_os_wait += mutex->count_os_wait;
8329  rw_lock_count_os_yield += mutex->count_os_yield;
8330  rw_lock_wait_time += mutex->lspent_time;
8331  }
8332 #else /* UNIV_DEBUG */
8333  buf1len= snprintf(buf1, sizeof(buf1), "%s:%lu",
8334  mutex->cfile_name, (ulong) mutex->cline);
8335  buf2len= snprintf(buf2, sizeof(buf2), "os_waits=%lu",
8336  (ulong) mutex->count_os_wait);
8337 
8338  if (stat_print(session, innobase_engine_name,
8339  engine_name_len, buf1, buf1len,
8340  buf2, buf2len)) {
8341  mutex_exit(&mutex_list_mutex);
8342  return(1);
8343  }
8344 #endif /* UNIV_DEBUG */
8345  }
8346 
8347  if (block_mutex) {
8348  buf1len = snprintf(buf1, sizeof buf1,
8349  "combined %s:%lu",
8350  block_mutex->cfile_name,
8351  (ulong) block_mutex->cline);
8352  buf2len = snprintf(buf2, sizeof buf2,
8353  "os_waits=%lu",
8354  (ulong) block_mutex_oswait_count);
8355 
8356  if (stat_print(session, innobase_engine_name,
8357  strlen(innobase_engine_name), buf1, buf1len,
8358  buf2, buf2len)) {
8359  mutex_exit(&mutex_list_mutex);
8360  return(1);
8361  }
8362  }
8363 
8364  mutex_exit(&mutex_list_mutex);
8365 
8366  mutex_enter(&rw_lock_list_mutex);
8367 
8368  for (lock = UT_LIST_GET_FIRST(rw_lock_list); lock != NULL;
8369  lock = UT_LIST_GET_NEXT(list, lock)) {
8370  if (lock->count_os_wait == 0) {
8371  continue;
8372  }
8373 
8374  if (buf_pool_is_block_lock(lock)) {
8375  block_lock = lock;
8376  block_lock_oswait_count += lock->count_os_wait;
8377  continue;
8378  }
8379 
8380  buf1len = snprintf(buf1, sizeof buf1, "%s:%lu",
8381  lock->cfile_name, (ulong) lock->cline);
8382  buf2len = snprintf(buf2, sizeof buf2, "os_waits=%lu",
8383  (ulong) lock->count_os_wait);
8384 
8385  if (stat_print(session, innobase_engine_name,
8386  strlen(innobase_engine_name), buf1, buf1len,
8387  buf2, buf2len)) {
8388  mutex_exit(&rw_lock_list_mutex);
8389  return(1);
8390  }
8391  }
8392 
8393  if (block_lock) {
8394  buf1len = snprintf(buf1, sizeof buf1,
8395  "combined %s:%lu",
8396  block_lock->cfile_name,
8397  (ulong) block_lock->cline);
8398  buf2len = snprintf(buf2, sizeof buf2,
8399  "os_waits=%lu",
8400  (ulong) block_lock_oswait_count);
8401 
8402  if (stat_print(session, innobase_engine_name,
8403  strlen(innobase_engine_name), buf1, buf1len,
8404  buf2, buf2len)) {
8405  mutex_exit(&rw_lock_list_mutex);
8406  return(1);
8407  }
8408  }
8409 
8410  mutex_exit(&rw_lock_list_mutex);
8411 
8412 #ifdef UNIV_DEBUG
8413  buf2len = snprintf(buf2, sizeof buf2,
8414  "count=%lu, spin_waits=%lu, spin_rounds=%lu, "
8415  "os_waits=%lu, os_yields=%lu, os_wait_times=%lu",
8416  (ulong) rw_lock_count,
8417  (ulong) rw_lock_count_spin_loop,
8418  (ulong) rw_lock_count_spin_rounds,
8419  (ulong) rw_lock_count_os_wait,
8420  (ulong) rw_lock_count_os_yield,
8421  (ulong) (rw_lock_wait_time / 1000));
8422 
8423  if (stat_print(session, innobase_engine_name, engine_name_len,
8424  STRING_WITH_LEN("rw_lock_mutexes"), buf2, buf2len)) {
8425  return(1);
8426  }
8427 #endif /* UNIV_DEBUG */
8428 
8429  return(FALSE);
8430 }
8431 
8432 bool InnobaseEngine::show_status(Session* session,
8433  stat_print_fn* stat_print,
8434  enum ha_stat_type stat_type)
8435 {
8436  assert(this == innodb_engine_ptr);
8437 
8438  switch (stat_type) {
8439  case HA_ENGINE_STATUS:
8440  return innodb_show_status(this, session, stat_print);
8441  case HA_ENGINE_MUTEX:
8442  return innodb_mutex_show_status(this, session, stat_print);
8443  default:
8444  return(FALSE);
8445  }
8446 }
8447 
8448 /************************************************************************/
8453 static INNOBASE_SHARE* get_share(const char* table_name)
8454 {
8455  INNOBASE_SHARE *share;
8456  boost::mutex::scoped_lock scopedLock(innobase_share_mutex);
8457 
8458  ulint fold = ut_fold_string(table_name);
8459 
8460  HASH_SEARCH(table_name_hash, innobase_open_tables, fold,
8461  INNOBASE_SHARE*, share,
8462  ut_ad(share->use_count > 0),
8463  !strcmp(share->table_name, table_name));
8464 
8465  if (!share) {
8466  /* TODO: invoke HASH_MIGRATE if innobase_open_tables
8467  grows too big */
8468 
8469  share= new INNOBASE_SHARE(table_name);
8470 
8471  HASH_INSERT(INNOBASE_SHARE, table_name_hash,
8472  innobase_open_tables, fold, share);
8473 
8474  thr_lock_init(&share->lock);
8475 
8476  /* Index translation table initialization */
8477  share->idx_trans_tbl.index_mapping = NULL;
8478  share->idx_trans_tbl.index_count = 0;
8479  share->idx_trans_tbl.array_size = 0;
8480  }
8481 
8482  share->use_count++;
8483 
8484  return(share);
8485 }
8486 
8487 static void free_share(INNOBASE_SHARE* share)
8488 {
8489  boost::mutex::scoped_lock scopedLock(innobase_share_mutex);
8490 
8491 #ifdef UNIV_DEBUG
8492  INNOBASE_SHARE* share2;
8493  ulint fold = ut_fold_string(share->table_name);
8494 
8495  HASH_SEARCH(table_name_hash, innobase_open_tables, fold,
8496  INNOBASE_SHARE*, share2,
8497  ut_ad(share->use_count > 0),
8498  !strcmp(share->table_name, share2->table_name));
8499 
8500  ut_a(share2 == share);
8501 #endif /* UNIV_DEBUG */
8502 
8503  if (!--share->use_count) {
8504  ulint fold = ut_fold_string(share->table_name);
8505 
8506  HASH_DELETE(INNOBASE_SHARE, table_name_hash,
8507  innobase_open_tables, fold, share);
8508 
8509  /* Free any memory from index translation table */
8510  free(share->idx_trans_tbl.index_mapping);
8511 
8512  delete share;
8513 
8514  /* TODO: invoke HASH_MIGRATE if innobase_open_tables
8515  shrinks too much */
8516  }
8517 }
8518 
8519 /*****************************************************************/
8528 UNIV_INTERN
8529 THR_LOCK_DATA**
8531 /*====================*/
8532  Session* session,
8533  THR_LOCK_DATA** to,
8538  enum thr_lock_type lock_type)
8541 {
8542  trx_t* trx;
8543 
8544  /* Note that trx in this function is NOT necessarily prebuilt->trx
8545  because we call update_session() later, in ::external_lock()! Failure to
8546  understand this caused a serious memory corruption bug in 5.1.11. */
8547 
8548  trx = check_trx_exists(session);
8549 
8550  assert(EQ_CURRENT_SESSION(session));
8551  const uint32_t sql_command = session->getSqlCommand();
8552 
8553  if (sql_command == SQLCOM_DROP_TABLE) {
8554 
8555  /* MySQL calls this function in DROP Table though this table
8556  handle may belong to another session that is running a query.
8557  Let us in that case skip any changes to the prebuilt struct. */
8558 
8559  } else if (lock_type == TL_READ_WITH_SHARED_LOCKS
8560  || lock_type == TL_READ_NO_INSERT
8561  || (lock_type != TL_IGNORE
8562  && sql_command != SQLCOM_SELECT)) {
8563 
8564  /* The OR cases above are in this order:
8565  1) MySQL is doing LOCK TABLES ... READ LOCAL, or we
8566  are processing a stored procedure or function, or
8567  2) (we do not know when TL_READ_HIGH_PRIORITY is used), or
8568  3) this is a SELECT ... IN SHARE MODE, or
8569  4) we are doing a complex SQL statement like
8570  INSERT INTO ... SELECT ... and the logical logging (MySQL
8571  binlog) requires the use of a locking read, or
8572  MySQL is doing LOCK TABLES ... READ.
8573  5) we let InnoDB do locking reads for all SQL statements that
8574  are not simple SELECTs; note that select_lock_type in this
8575  case may get strengthened in ::external_lock() to LOCK_X.
8576  Note that we MUST use a locking read in all data modifying
8577  SQL statements, because otherwise the execution would not be
8578  serializable, and also the results from the update could be
8579  unexpected if an obsolete consistent read view would be
8580  used. */
8581 
8582  ulint isolation_level;
8583 
8584  isolation_level = trx->isolation_level;
8585 
8587  || isolation_level <= TRX_ISO_READ_COMMITTED)
8588  && isolation_level != TRX_ISO_SERIALIZABLE
8589  && (lock_type == TL_READ || lock_type == TL_READ_NO_INSERT)
8590  && (sql_command == SQLCOM_INSERT_SELECT
8591  || sql_command == SQLCOM_REPLACE_SELECT
8592  || sql_command == SQLCOM_UPDATE
8593  || sql_command == SQLCOM_CREATE_TABLE
8594  || sql_command == SQLCOM_SET_OPTION)) {
8595 
8596  /* If we either have innobase_locks_unsafe_for_binlog
8597  option set or this session is using READ COMMITTED
8598  isolation level and isolation level of the transaction
8599  is not set to serializable and MySQL is doing
8600  INSERT INTO...SELECT or REPLACE INTO...SELECT
8601  or UPDATE ... = (SELECT ...) or CREATE ...
8602  SELECT... or SET ... = (SELECT ...) without
8603  FOR UPDATE or IN SHARE MODE in select,
8604  then we use consistent read for select. */
8605 
8606  prebuilt->select_lock_type = LOCK_NONE;
8607  prebuilt->stored_select_lock_type = LOCK_NONE;
8608  } else if (sql_command == SQLCOM_CHECKSUM) {
8609  /* Use consistent read for checksum table */
8610 
8611  prebuilt->select_lock_type = LOCK_NONE;
8612  prebuilt->stored_select_lock_type = LOCK_NONE;
8613  } else {
8614  prebuilt->select_lock_type = LOCK_S;
8616  }
8617 
8618  } else if (lock_type != TL_IGNORE) {
8619 
8620  /* We set possible LOCK_X value in external_lock, not yet
8621  here even if this would be SELECT ... FOR UPDATE */
8622 
8623  prebuilt->select_lock_type = LOCK_NONE;
8624  prebuilt->stored_select_lock_type = LOCK_NONE;
8625  }
8626 
8627  if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK) {
8628 
8629  /* If we are not doing a LOCK TABLE, DISCARD/IMPORT
8630  TABLESPACE or TRUNCATE TABLE then allow multiple
8631  writers. Note that ALTER TABLE uses a TL_WRITE_ALLOW_READ
8632  < TL_WRITE_CONCURRENT_INSERT.
8633  */
8634 
8635  if ((lock_type >= TL_WRITE_CONCURRENT_INSERT
8636  && lock_type <= TL_WRITE)
8637  && ! session->doing_tablespace_operation()
8638  && sql_command != SQLCOM_TRUNCATE
8639  && sql_command != SQLCOM_CREATE_TABLE) {
8640 
8641  lock_type = TL_WRITE_ALLOW_WRITE;
8642  }
8643 
8644  /* In queries of type INSERT INTO t1 SELECT ... FROM t2 ...
8645  MySQL would use the lock TL_READ_NO_INSERT on t2, and that
8646  would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts
8647  to t2. Convert the lock to a normal read lock to allow
8648  concurrent inserts to t2.
8649  */
8650 
8651  if (lock_type == TL_READ_NO_INSERT) {
8652 
8653  lock_type = TL_READ;
8654  }
8655 
8656  lock.type = lock_type;
8657  }
8658 
8659  *to++= &lock;
8660 
8661  return(to);
8662 }
8663 
8664 /*********************************************************************/
8669 UNIV_INTERN
8670 ulint
8672 /*==============================*/
8673  uint64_t* value)
8674 {
8675  *value = 0;
8676 
8677  dict_table_autoinc_lock(prebuilt->table);
8678  prebuilt->autoinc_error= DB_SUCCESS;
8679  /* Determine the first value of the interval */
8680  *value = dict_table_autoinc_read(prebuilt->table);
8681 
8682  /* It should have been initialized during open. */
8683  if (*value == 0) {
8684  prebuilt->autoinc_error = DB_UNSUPPORTED;
8685  dict_table_autoinc_unlock(prebuilt->table);
8686  }
8687 
8688  return(DB_SUCCESS);
8689 }
8690 
8691 /*******************************************************************/
8695 UNIV_INTERN
8696 uint64_t
8698 /*====================================*/
8699 {
8700  uint64_t auto_inc;
8701  dict_table_t* innodb_table;
8702 
8703  ut_a(prebuilt != NULL);
8704  ut_a(prebuilt->table != NULL);
8705 
8706  innodb_table = prebuilt->table;
8707 
8708  dict_table_autoinc_lock(innodb_table);
8709 
8710  auto_inc = dict_table_autoinc_read(innodb_table);
8711 
8712  if (auto_inc == 0) {
8713  ut_print_timestamp(stderr);
8714  errmsg_printf(error::ERROR, " InnoDB: AUTOINC next value generation is disabled for '%s'\n", innodb_table->name);
8715  }
8716 
8717  dict_table_autoinc_unlock(innodb_table);
8718 
8719  return(auto_inc);
8720 }
8721 
8722 /*********************************************************************/
8729 UNIV_INTERN
8730 void
8732 /*============================*/
8733  uint64_t offset,
8734  uint64_t increment,
8735  uint64_t nb_desired_values,
8736  uint64_t *first_value,
8737  uint64_t *nb_reserved_values)
8738 {
8739  trx_t* trx;
8740  ulint error;
8741  uint64_t autoinc = 0;
8742 
8743  /* Prepare prebuilt->trx in the table handle */
8744  update_session(getTable()->in_use);
8745 
8746  error = innobase_get_autoinc(&autoinc);
8747 
8748  if (error != DB_SUCCESS) {
8749  *first_value = (~(uint64_t) 0);
8750  return;
8751  }
8752 
8753  /* This is a hack, since nb_desired_values seems to be accurate only
8754  for the first call to get_auto_increment() for multi-row INSERT and
8755  meaningless for other statements e.g, LOAD etc. Subsequent calls to
8756  this method for the same statement results in different values which
8757  don't make sense. Therefore we store the value the first time we are
8758  called and count down from that as rows are written (see doInsertRecord()).
8759  */
8760 
8761  trx = prebuilt->trx;
8762 
8763  /* Note: We can't rely on *first_value since some MySQL engines,
8764  in particular the partition engine, don't initialize it to 0 when
8765  invoking this method. So we are not sure if it's guaranteed to
8766  be 0 or not. */
8767 
8768  /* We need the upper limit of the col type to check for
8769  whether we update the table autoinc counter or not. */
8770  uint64_t col_max_value = innobase_get_int_col_max_value(getTable()->next_number_field);
8771 
8772  /* Called for the first time ? */
8773  if (trx->n_autoinc_rows == 0) {
8774 
8775  trx->n_autoinc_rows = (ulint) nb_desired_values;
8776 
8777  /* It's possible for nb_desired_values to be 0:
8778  e.g., INSERT INTO T1(C) SELECT C FROM T2; */
8779  if (nb_desired_values == 0) {
8780 
8781  trx->n_autoinc_rows = 1;
8782  }
8783 
8784  set_if_bigger(*first_value, autoinc);
8785  /* Not in the middle of a mult-row INSERT. */
8786  } else if (prebuilt->autoinc_last_value == 0) {
8787  set_if_bigger(*first_value, autoinc);
8788  /* Check for -ve values. */
8789  } else if (*first_value > col_max_value && trx->n_autoinc_rows > 0) {
8790  /* Set to next logical value. */
8791  ut_a(autoinc > trx->n_autoinc_rows);
8792  *first_value = (autoinc - trx->n_autoinc_rows) - 1;
8793  }
8794 
8795  *nb_reserved_values = trx->n_autoinc_rows;
8796 
8797  /* This all current style autoinc. */
8798  {
8799  uint64_t need;
8800  uint64_t current;
8801  uint64_t next_value;
8802 
8803  current = *first_value > col_max_value ? autoinc : *first_value;
8804  need = *nb_reserved_values * increment;
8805 
8806  /* Compute the last value in the interval */
8807  next_value = innobase_next_autoinc(current, need, offset, col_max_value);
8808 
8809  prebuilt->autoinc_last_value = next_value;
8810 
8811  if (prebuilt->autoinc_last_value < *first_value) {
8812  *first_value = (~(unsigned long long) 0);
8813  } else {
8814  /* Update the table autoinc variable */
8815  dict_table_autoinc_update_if_greater(
8817  }
8818  }
8819 
8820  /* The increment to be used to increase the AUTOINC value, we use
8821  this in doInsertRecord() and doUpdateRecord() to increase the autoinc counter
8822  for columns that are filled by the user. We need the offset and
8823  the increment. */
8824  prebuilt->autoinc_offset = offset;
8825  prebuilt->autoinc_increment = increment;
8826 
8827  dict_table_autoinc_unlock(prebuilt->table);
8828 }
8829 
8830 /*******************************************************************/
8836 UNIV_INTERN
8837 int
8839 /*==============================*/
8840  uint64_t value)
8841 {
8842  int error;
8843 
8844  update_session(getTable()->in_use);
8845 
8847 
8848  if (error != DB_SUCCESS) {
8849  error = convert_error_code_to_mysql(error,
8850  prebuilt->table->flags,
8851  user_session);
8852 
8853  return(error);
8854  }
8855 
8856  /* The next value can never be 0. */
8857  if (value == 0) {
8858  value = 1;
8859  }
8860 
8861  innobase_reset_autoinc(value);
8862 
8863  return 0;
8864 }
8865 
8866 /* See comment in Cursor.cc */
8867 UNIV_INTERN
8868 bool
8869 InnobaseEngine::get_error_message(int, String *buf) const
8870 {
8871  trx_t* trx = check_trx_exists(current_session);
8872 
8873  buf->copy(trx->detailed_error, (uint) strlen(trx->detailed_error),
8874  system_charset_info);
8875 
8876  return(FALSE);
8877 }
8878 
8879 /*******************************************************************/
8884 UNIV_INTERN
8885 int
8887 /*=================*/
8888  const unsigned char* ref1,
8890  const unsigned char* ref2)
8892 {
8893  enum_field_types mysql_type;
8894  Field* field;
8895  KeyPartInfo* key_part;
8896  KeyPartInfo* key_part_end;
8897  uint len1;
8898  uint len2;
8899  int result;
8900 
8902  /* The 'ref' is an InnoDB row id */
8903 
8904  return(memcmp(ref1, ref2, DATA_ROW_ID_LEN));
8905  }
8906 
8907  /* Do a type-aware comparison of primary key fields. PK fields
8908  are always NOT NULL, so no checks for NULL are performed. */
8909 
8910  key_part = getTable()->key_info[getTable()->getShare()->getPrimaryKey()].key_part;
8911 
8912  key_part_end = key_part
8913  + getTable()->key_info[getTable()->getShare()->getPrimaryKey()].key_parts;
8914 
8915  for (; key_part != key_part_end; ++key_part) {
8916  field = key_part->field;
8917  mysql_type = field->type();
8918 
8919  if (mysql_type == DRIZZLE_TYPE_BLOB) {
8920 
8921  /* In the MySQL key value format, a column prefix of
8922  a BLOB is preceded by a 2-byte length field */
8923 
8926 
8927  ref1 += 2;
8928  ref2 += 2;
8929  result = ((Field_blob*)field)->cmp( ref1, len1,
8930  ref2, len2);
8931  } else {
8932  result = field->key_cmp(ref1, ref2);
8933  }
8934 
8935  if (result) {
8936 
8937  return(result);
8938  }
8939 
8940  ref1 += key_part->store_length;
8941  ref2 += key_part->store_length;
8942  }
8943 
8944  return(0);
8945 }
8946 
8947 /**********************************************************************
8948 This function is used to find the storage length in bytes of the first n
8949 characters for prefix indexes using a multibyte character set. The function
8950 finds charset information and returns length of prefix_len characters in the
8951 index field in bytes.
8952 @return number of bytes occupied by the first n characters */
8953 
8954 ulint
8956 /*===========================*/
8957  ulint charset_id,
8958  ulint prefix_len,
8961  ulint data_len,
8962  const char* str)
8963 {
8964  ulint char_length;
8965  ulint n_chars;
8966  const charset_info_st* charset;
8968  charset = get_charset((uint) charset_id);
8969 
8970  ut_ad(charset);
8971  ut_ad(charset->mbmaxlen);
8972 
8973  /* Calculate how many characters at most the prefix index contains */
8974 
8975  n_chars = prefix_len / charset->mbmaxlen;
8976 
8977  /* If the charset is multi-byte, then we must find the length of the
8978  first at most n chars in the string. If the string contains less
8979  characters than n, then we return the length to the end of the last
8980  character. */
8981 
8982  if (charset->mbmaxlen > 1) {
8983  /* my_charpos() returns the byte length of the first n_chars
8984  characters, or a value bigger than the length of str, if
8985  there were not enough full characters in str.
8986 
8987  Why does the code below work:
8988  Suppose that we are looking for n UTF-8 characters.
8989 
8990  1) If the string is long enough, then the prefix contains at
8991  least n complete UTF-8 characters + maybe some extra
8992  characters + an incomplete UTF-8 character. No problem in
8993  this case. The function returns the pointer to the
8994  end of the nth character.
8995 
8996  2) If the string is not long enough, then the string contains
8997  the complete value of a column, that is, only complete UTF-8
8998  characters, and we can store in the column prefix index the
8999  whole string. */
9000 
9001  char_length = my_charpos(charset, str,
9002  str + data_len, (int) n_chars);
9003  if (char_length > data_len) {
9004  char_length = data_len;
9005  }
9006  } else {
9007  if (data_len < prefix_len) {
9008  char_length = data_len;
9009  } else {
9010  char_length = prefix_len;
9011  }
9012  }
9013 
9014  return(char_length);
9015 }
9022 void
9024  Session *session)
9025 {
9026  /*
9027  * Create the InnoDB transaction structure
9028  * for the session
9029  */
9030  trx_t *trx= check_trx_exists(session);
9031 
9032  /* "reset" the error message for the transaction */
9033  trx->detailed_error[0]= '\0';
9034 
9035  /* Set the isolation level of the transaction. */
9036  trx->isolation_level= innobase_map_isolation_level(session->getTxIsolation());
9037 }
9038 
9039 void
9040 InnobaseEngine::doEndStatement(
9041  Session *session)
9042 {
9043  trx_t *trx= check_trx_exists(session);
9044 
9045  /* Release a possible FIFO ticket and search latch. Since we
9046  may reserve the kernel mutex, we have to release the search
9047  system latch first to obey the latching order. */
9048 
9050 
9051 }
9052 
9053 /*******************************************************************/
9056 int
9058 /*================*/
9059  Session* session,
9062  bool all)
9065 {
9066  int error = 0;
9067  trx_t* trx = check_trx_exists(session);
9068 
9069  assert(this == innodb_engine_ptr);
9070 
9071  /* we use support_xa value as it was seen at transaction start
9072  time, not the current session variable value. Any possible changes
9073  to the session variable take effect only in the next transaction */
9074  if (!trx->support_xa) {
9075 
9076  return(0);
9077  }
9078 
9079  session->get_xid(reinterpret_cast<DrizzleXid*>(&trx->xid));
9080 
9081  /* Release a possible FIFO ticket and search latch. Since we will
9082  reserve the kernel mutex, we have to release the search system latch
9083  first to obey the latching order. */
9084 
9086 
9087  if (all
9088  || (!session_test_options(session, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
9089 
9090  /* We were instructed to prepare the whole transaction, or
9091  this is an SQL statement end and autocommit is on */
9092 
9093  ut_ad(trx->conc_state != TRX_NOT_STARTED);
9094 
9095  error = (int) trx_prepare_for_mysql(trx);
9096  } else {
9097  /* We just mark the SQL statement ended and do not do a
9098  transaction prepare */
9099 
9100  /* If we had reserved the auto-inc lock for some
9101  table in this SQL statement we release it now */
9102 
9104 
9105  /* Store the current undo_no of the transaction so that we
9106  know where to roll back if we have to roll back the next
9107  SQL statement */
9108 
9109  trx_mark_sql_stat_end(trx);
9110  }
9111 
9112  /* Tell the InnoDB server that there might be work for utility
9113  threads: */
9114 
9116 
9117  return(error);
9118 }
9119 
9120 uint64_t InnobaseEngine::doGetCurrentTransactionId(Session *session)
9121 {
9122  trx_t *trx= session_to_trx(session);
9123  return (trx->id);
9124 }
9125 
9126 uint64_t InnobaseEngine::doGetNewTransactionId(Session *session)
9127 {
9128  trx_t*& trx = session_to_trx(session);
9129 
9130  if (trx == NULL)
9131  {
9132  trx = innobase_trx_allocate(session);
9133 
9134  innobase_trx_init(session, trx);
9135  }
9136 
9137  mutex_enter(&kernel_mutex);
9138  trx->id= trx_sys_get_new_trx_id();
9139  mutex_exit(&kernel_mutex);
9140 
9141  uint64_t transaction_id= trx->id;
9142 
9143  return transaction_id;
9144 }
9145 
9146 /*******************************************************************/
9149 int
9151 /*================*/
9152  ::drizzled::XID* xid_list,
9153  size_t len)
9154 {
9155  assert(this == innodb_engine_ptr);
9156 
9157  if (len == 0 || xid_list == NULL) {
9158 
9159  return(0);
9160  }
9161 
9162  return(trx_recover_for_mysql((::XID *)xid_list, len));
9163 }
9164 
9165 /*******************************************************************/
9169 int
9171 /*===================*/
9172  ::drizzled::XID* xid)
9173 {
9174  trx_t* trx;
9175 
9176  assert(this == innodb_engine_ptr);
9177 
9178  trx = trx_get_trx_by_xid((::XID *)xid);
9179 
9180  if (trx) {
9181  innobase_commit_low(trx);
9182 
9183  return(XA_OK);
9184  } else {
9185  return(XAER_NOTA);
9186  }
9187 }
9188 
9189 /*******************************************************************/
9193 int
9195 /*=====================*/
9196  ::drizzled::XID* xid)
9198 {
9199  trx_t* trx;
9200 
9201  assert(this == innodb_engine_ptr);
9202 
9203  trx = trx_get_trx_by_xid((::XID *)xid);
9204 
9205  if (trx) {
9206  return(innobase_rollback_trx(trx));
9207  } else {
9208  return(XAER_NOTA);
9209  }
9210 }
9211 
9212 
9213 /************************************************************/
9216 static
9217 uint
9219 /*=============================*/
9220  const char* format_name)
9221 {
9222  char* endp;
9223  uint format_id;
9224 
9225  ut_a(format_name != NULL);
9226 
9227  /* The format name can contain the format id itself instead of
9228  the name and we check for that. */
9229  format_id = (uint) strtoul(format_name, &endp, 10);
9230 
9231  /* Check for valid parse. */
9232  if (*endp == '\0' && *format_name != '\0') {
9233 
9234  if (format_id <= DICT_TF_FORMAT_MAX) {
9235 
9236  return(format_id);
9237  }
9238  } else {
9239 
9240  for (format_id = 0; format_id <= DICT_TF_FORMAT_MAX;
9241  format_id++) {
9242  const char* name;
9243 
9244  name = trx_sys_file_format_id_to_name(format_id);
9245 
9246  if (!innobase_strcasecmp(format_name, name)) {
9247 
9248  return(format_id);
9249  }
9250  }
9251  }
9252 
9253  return(DICT_TF_FORMAT_MAX + 1);
9254 }
9255 
9256 /************************************************************/
9260 static
9261 int
9263 /*================================*/
9264  const char* format_max)
9265 {
9266  uint format_id;
9267 
9268  format_id = innobase_file_format_name_lookup(format_max);
9269 
9270  if (format_id < DICT_TF_FORMAT_MAX + 1) {
9271  srv_max_file_format_at_startup = format_id;
9272  return((int) format_id);
9273  } else {
9274  return(-1);
9275  }
9276 }
9277 
9278 
9279 
9280 static void init_options(drizzled::module::option_context &context)
9281 {
9282  context("disable-checksums",
9283  "Disable InnoDB checksums validation.");
9284  context("data-home-dir",
9285  po::value<string>(),
9286  "The common part for InnoDB table spaces.");
9287  context("disable-doublewrite",
9288  "Disable InnoDB doublewrite buffer.");
9289  context("io-capacity",
9290  po::value<io_capacity_constraint>(&innodb_io_capacity)->default_value(200),
9291  "Number of IOPs the server can do. Tunes the background IO rate");
9292  context("fast-shutdown",
9293  po::value<trinary_constraint>(&innobase_fast_shutdown)->default_value(1),
9294  "Speeds up the shutdown process of the InnoDB storage engine. Possible values are 0, 1 (faster) or 2 (fastest - crash-like).");
9295  context("purge-batch-size",
9296  po::value<purge_batch_constraint>(&innodb_purge_batch_size)->default_value(20),
9297  "Number of UNDO logs to purge in one batch from the history list. "
9298  "Default is 20.");
9299  context("purge-threads",
9300  po::value<purge_threads_constraint>(&innodb_n_purge_threads)->default_value(1),
9301  "Purge threads can be either 0 or 1. Default is 1.");
9302  context("file-per-table",
9303  po::value<bool>(&srv_file_per_table)->default_value(false)->zero_tokens(),
9304  "Stores each InnoDB table to an .ibd file in the database dir.");
9305  context("file-format-max",
9306  po::value<string>(&innobase_file_format_max)->default_value("Antelope"),
9307  "The highest file format in the tablespace.");
9308  context("file-format-check",
9309  po::value<bool>(&innobase_file_format_check)->default_value(true)->zero_tokens(),
9310  "Whether to perform system file format check.");
9311  context("file-format",
9312  po::value<string>(&innobase_file_format_name)->default_value("Antelope"),
9313  "File format to use for new tables in .ibd files.");
9314  context("flush-log-at-trx-commit",
9315  po::value<trinary_constraint>(&innodb_flush_log_at_trx_commit)->default_value(1),
9316  "Set to 0 (write and flush once per second), 1 (write and flush at each commit) or 2 (write at commit, flush once per second).");
9317  context("flush-method",
9318  po::value<string>(),
9319  "With which method to flush data.");
9320  context("log-group-home-dir",
9321  po::value<string>(),
9322  "Path to InnoDB log files.");
9323  context("max-dirty-pages-pct",
9324  po::value<max_dirty_pages_constraint>(&innodb_max_dirty_pages_pct)->default_value(75),
9325  "Percentage of dirty pages allowed in bufferpool.");
9326  context("disable-adaptive-flushing",
9327  "Do not attempt flushing dirty pages to avoid IO bursts at checkpoints.");
9328  context("max-purge-lag",
9329  po::value<uint64_constraint>(&innodb_max_purge_lag)->default_value(0),
9330  "Desired maximum length of the purge queue (0 = no limit)");
9331  context("status-file",
9332  po::value<bool>(&innobase_create_status_file)->default_value(false)->zero_tokens(),
9333  "Enable SHOW INNODB STATUS output in the innodb_status.<pid> file");
9334  context("disable-stats-on-metadata",
9335  "Disable statistics gathering for metadata commands such as SHOW TABLE STATUS (on by default)");
9336  context("stats-sample-pages",
9337  po::value<uint64_nonzero_constraint>(&innodb_stats_sample_pages)->default_value(8),
9338  "The number of index pages to sample when calculating statistics (default 8)");
9339  context("disable-adaptive-hash-index",
9340  "Enable InnoDB adaptive hash index (enabled by default)");
9341  context("replication-delay",
9342  po::value<uint64_constraint>(&innodb_replication_delay)->default_value(0),
9343  "Replication thread delay (ms) on the slave server if innodb_thread_concurrency is reached (0 by default)");
9344  context("additional-mem-pool-size",
9345  po::value<additional_mem_pool_constraint>(&innobase_additional_mem_pool_size)->default_value(8*1024*1024L),
9346  "Size of a memory pool InnoDB uses to store data dictionary information and other internal data structures.");
9347  context("autoextend-increment",
9348  po::value<autoextend_constraint>(&innodb_auto_extend_increment)->default_value(64L),
9349  "Data file autoextend increment in megabytes");
9350  context("buffer-pool-size",
9351  po::value<buffer_pool_constraint>(&innobase_buffer_pool_size)->default_value(128*1024*1024L),
9352  "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.");
9353  context("buffer-pool-instances",
9354  po::value<buffer_pool_instances_constraint>(&innobase_buffer_pool_instances)->default_value(1),
9355  "Number of buffer pool instances, set to higher value on high-end machines to increase scalability");
9356 
9357  context("commit-concurrency",
9358  po::value<concurrency_constraint>(&innobase_commit_concurrency)->default_value(0),
9359  "Helps in performance tuning in heavily concurrent environments.");
9360  context("concurrency-tickets",
9361  po::value<uint32_nonzero_constraint>(&innodb_concurrency_tickets)->default_value(500L),
9362  "Number of times a thread is allowed to enter InnoDB within the same SQL query after it has once got the ticket");
9363  context("read-io-threads",
9364  po::value<io_threads_constraint>(&innobase_read_io_threads)->default_value(4),
9365  "Number of background read I/O threads in InnoDB.");
9366  context("write-io-threads",
9367  po::value<io_threads_constraint>(&innobase_write_io_threads)->default_value(4),
9368  "Number of background write I/O threads in InnoDB.");
9369  context("force-recovery",
9370  po::value<force_recovery_constraint>(&innobase_force_recovery)->default_value(0),
9371  "Helps to save your data in case the disk image of the database becomes corrupt.");
9372  context("log-buffer-size",
9373  po::value<log_buffer_constraint>(&innobase_log_buffer_size)->default_value(8*1024*1024L),
9374  "The size of the buffer which InnoDB uses to write log to the log files on disk.");
9375  context("log-file-size",
9376  po::value<log_file_constraint>(&innobase_log_file_size)->default_value(20*1024*1024L),
9377  "The size of the buffer which InnoDB uses to write log to the log files on disk.");
9378  context("page-size",
9379  po::value<page_size_constraint>(&innobase_page_size)->default_value(1 << 14),
9380  "###EXPERIMENTAL###: The universal page size of the database. Changing for created database is not supported. Use on your own risk!");
9381  context("log-block-size",
9382  po::value<log_block_size_constraint>(&innobase_log_block_size)->default_value(1 << 9),
9383  "###EXPERIMENTAL###: The log block size of the transaction log file. Changing for created log file is not supported. Use on your own risk!");
9384  context("log-files-in-group",
9385  po::value<log_files_in_group_constraint>(&innobase_log_files_in_group)->default_value(2),
9386  "Number of log files in the log group. InnoDB writes to the files in a circular fashion.");
9387  context("mirrored-log-groups",
9388  po::value<mirrored_log_groups_constraint>(&innobase_mirrored_log_groups)->default_value(1),
9389  "Number of identical copies of log groups we keep for the database. Currently this should be set to 1.");
9390  context("open-files",
9391  po::value<open_files_constraint>(&innobase_open_files)->default_value(300L),
9392  "How many files at the maximum InnoDB keeps open at the same time.");
9393  context("sync-spin-loops",
9394  po::value<uint32_constraint>(&innodb_sync_spin_loops)->default_value(30L),
9395  "Count of spin-loop rounds in InnoDB mutexes (30 by default)");
9396  context("spin-wait-delay",
9397  po::value<uint32_constraint>(&innodb_spin_wait_delay)->default_value(6L),
9398  "Maximum delay between polling for a spin lock (6 by default)");
9399  context("thread-concurrency",
9400  po::value<concurrency_constraint>(&innobase_thread_concurrency)->default_value(0),
9401  "Helps in performance tuning in heavily concurrent environments. Sets the maximum number of threads allowed inside InnoDB. Value 0 will disable the thread throttling.");
9402  context("thread-sleep-delay",
9403  po::value<uint32_constraint>(&innodb_thread_sleep_delay)->default_value(10000L),
9404  "Time of innodb thread sleeping before joining InnoDB queue (usec). Value 0 disable a sleep");
9405  context("data-file-path",
9406  po::value<string>(),
9407  "Path to individual files and their sizes.");
9408  context("version",
9409  po::value<string>()->default_value(INNODB_VERSION_STR),
9410  "InnoDB version");
9411  context("use-internal-malloc",
9412  "Use InnoDB's internal memory allocator instal of the OS memory allocator.");
9413  context("disable-native-aio",
9414  _("Do not use Native AIO library for IO, even if available"));
9415  context("change-buffering",
9416  po::value<string>(&innobase_change_buffering),
9417  "Buffer changes to reduce random access: OFF, ON, inserting, deleting, changing, or purging.");
9418  context("read-ahead-threshold",
9419  po::value<read_ahead_threshold_constraint>(&innodb_read_ahead_threshold)->default_value(56),
9420  "Number of pages that must be accessed sequentially for InnoDB to trigger a readahead.");
9421  context("auto-lru-dump",
9422  po::value<uint32_constraint>(&buffer_pool_restore_at_startup)->default_value(0),
9423  "Time in seconds between automatic buffer pool dumps. "
9424  "0 (the default) disables automatic dumps.");
9425  context("ibuf-max-size",
9426  po::value<uint64_constraint>(&ibuf_max_size)->default_value(UINT64_MAX),
9427  "The maximum size of the insert buffer (in bytes).");
9428  context("ibuf-active-contract",
9429  po::value<binary_constraint>(&ibuf_active_contract)->default_value(1),
9430  "Enable/Disable active_contract of insert buffer. 0:disable 1:enable");
9431  context("ibuf-accel-rate",
9432  po::value<ibuf_accel_rate_constraint>(&ibuf_accel_rate)->default_value(100),
9433  "Tunes amount of insert buffer processing of background, in addition to innodb_io_capacity. (in percentage)");
9434  context("checkpoint-age-target",
9435  po::value<uint32_constraint>(&checkpoint_age_target)->default_value(0),
9436  "Control soft limit of checkpoint age. (0 : not control)");
9437  context("flush-neighbor-pages",
9438  po::value<binary_constraint>(&flush_neighbor_pages)->default_value(1),
9439  "Enable/Disable flushing also neighbor pages. 0:disable 1:enable");
9440  context("read-ahead",
9441  po::value<string>(&read_ahead)->default_value("linear"),
9442  "Control read ahead activity (none, random, [linear], both). [from 1.0.5: random read ahead is ignored]");
9443  context("adaptive-flushing-method",
9444  po::value<string>(&adaptive_flushing_method)->default_value("estimate"),
9445  "Choose method of innodb_adaptive_flushing. (native, [estimate], keep_average)");
9446  context("disable-xa",
9447  "Disable InnoDB support for the XA two-phase commit");
9448  context("disable-table-locks",
9449  "Disable InnoDB locking in LOCK TABLES");
9450  context("strict-mode",
9451  po::value<bool>(&strict_mode)->default_value(false)->zero_tokens(),
9452  "Use strict mode when evaluating create options.");
9453  context("replication-log",
9454  po::value<bool>(&innobase_use_replication_log)->default_value(false)->zero_tokens(),
9455  _("Enable internal replication log."));
9456  context("use-replicator",
9457  po::value<string>(&sysvar_transaction_log_use_replicator)->default_value(DEFAULT_USE_REPLICATOR),
9458  _("Name of the replicator plugin to use (default='default_replicator')"));
9459  context("lock-wait-timeout",
9460  po::value<lock_wait_constraint>(&lock_wait_timeout)->default_value(50),
9461  _("Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout."));
9462  context("old-blocks-pct",
9463  po::value<old_blocks_constraint>(&innobase_old_blocks_pct)->default_value(100 * 3 / 8),
9464  _("Percentage of the buffer pool to reserve for 'old' blocks."));
9465  context("old-blocks-time",
9466  po::value<uint32_t>(&buf_LRU_old_threshold_ms)->default_value(0),
9467  _("ove blocks to the 'new' end of the buffer pool if the first access"
9468  " was at least this many milliseconds ago."
9469  " The timeout is disabled if 0 (the default)."));
9470 }
9471 
9472 
9473 
9474 DRIZZLE_DECLARE_PLUGIN
9475 {
9476  DRIZZLE_VERSION_ID,
9477  "innodb",
9478  INNODB_VERSION_STR,
9479  "Innobase Oy",
9480  "InnoDB storage engine: transactional, row-level locking, foreign keys",
9481  PLUGIN_LICENSE_GPL,
9482  innobase_init, /* Plugin Init */
9483  NULL, /* depends */
9484  init_options /* reserved */
9485 }
9486 DRIZZLE_DECLARE_PLUGIN_END;
9487 
9489  const key_range *end_key,
9490  bool eq_range_arg,
9491  bool sorted)
9492 {
9493  int res;
9494  //if (!eq_range_arg)
9495  //in_range_read= TRUE;
9496  res= Cursor::read_range_first(start_key, end_key, eq_range_arg, sorted);
9497  //if (res)
9498  // in_range_read= FALSE;
9499  return res;
9500 }
9501 
9502 
9504 {
9505  int res= Cursor::read_range_next();
9506  //if (res)
9507  // in_range_read= FALSE;
9508  return res;
9509 }
9510 
9511 /***********************************************************************
9512 This function checks each index name for a table against reserved
9513 system default primary index name 'GEN_CLUST_INDEX'. If a name matches,
9514 this function pushes an warning message to the client, and returns true. */
9515 UNIV_INTERN
9516 bool
9517 innobase_index_name_is_reserved(
9518 /*============================*/
9519  /* out: true if an index name
9520  matches the reserved name */
9521  const trx_t* trx, /* in: InnoDB transaction handle */
9522  const KeyInfo* key_info, /* in: Indexes to be created */
9523  ulint num_of_keys) /* in: Number of indexes to
9524  be created. */
9525 {
9526  const KeyInfo* key;
9527  uint key_num; /* index number */
9528 
9529  for (key_num = 0; key_num < num_of_keys; key_num++) {
9530  key = &key_info[key_num];
9531 
9532  if (innobase_strcasecmp(key->name,
9533  innobase_index_reserve_name) == 0) {
9534  /* Push warning to drizzle */
9535  push_warning_printf(trx->mysql_thd,
9536  DRIZZLE_ERROR::WARN_LEVEL_WARN,
9537  ER_WRONG_NAME_FOR_INDEX,
9538  "Cannot Create Index with name "
9539  "'%s'. The name is reserved "
9540  "for the system default primary "
9541  "index.",
9542  innobase_index_reserve_name);
9543 
9544  my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
9545  innobase_index_reserve_name);
9546 
9547  return(true);
9548  }
9549  }
9550 
9551  return(false);
9552 }
9553 
9554 #ifdef UNIV_COMPILE_TEST_FUNCS
9555 
9556 typedef struct innobase_convert_name_test_struct {
9557  char* buf;
9558  ulint buflen;
9559  const char* id;
9560  ulint idlen;
9561  drizzled::Session *session;
9562  ibool file_id;
9563 
9564  const char* expected;
9565 } innobase_convert_name_test_t;
9566 
9567 void
9568 test_innobase_convert_name()
9569 {
9570  char buf[1024];
9571  ulint i;
9572 
9573  innobase_convert_name_test_t test_input[] = {
9574  {buf, sizeof(buf), "abcd", 4, NULL, TRUE, "\"abcd\""},
9575  {buf, 7, "abcd", 4, NULL, TRUE, "\"abcd\""},
9576  {buf, 6, "abcd", 4, NULL, TRUE, "\"abcd\""},
9577  {buf, 5, "abcd", 4, NULL, TRUE, "\"abc\""},
9578  {buf, 4, "abcd", 4, NULL, TRUE, "\"ab\""},
9579 
9580  {buf, sizeof(buf), "ab@0060cd", 9, NULL, TRUE, "\"ab`cd\""},
9581  {buf, 9, "ab@0060cd", 9, NULL, TRUE, "\"ab`cd\""},
9582  {buf, 8, "ab@0060cd", 9, NULL, TRUE, "\"ab`cd\""},
9583  {buf, 7, "ab@0060cd", 9, NULL, TRUE, "\"ab`cd\""},
9584  {buf, 6, "ab@0060cd", 9, NULL, TRUE, "\"ab`c\""},
9585  {buf, 5, "ab@0060cd", 9, NULL, TRUE, "\"ab`\""},
9586  {buf, 4, "ab@0060cd", 9, NULL, TRUE, "\"ab\""},
9587 
9588  {buf, sizeof(buf), "ab\"cd", 5, NULL, TRUE,
9589  "\"#mysql50#ab\"\"cd\""},
9590  {buf, 17, "ab\"cd", 5, NULL, TRUE,
9591  "\"#mysql50#ab\"\"cd\""},
9592  {buf, 16, "ab\"cd", 5, NULL, TRUE,
9593  "\"#mysql50#ab\"\"c\""},
9594  {buf, 15, "ab\"cd", 5, NULL, TRUE,
9595  "\"#mysql50#ab\"\"\""},
9596  {buf, 14, "ab\"cd", 5, NULL, TRUE,
9597  "\"#mysql50#ab\""},
9598  {buf, 13, "ab\"cd", 5, NULL, TRUE,
9599  "\"#mysql50#ab\""},
9600  {buf, 12, "ab\"cd", 5, NULL, TRUE,
9601  "\"#mysql50#a\""},
9602  {buf, 11, "ab\"cd", 5, NULL, TRUE,
9603  "\"#mysql50#\""},
9604  {buf, 10, "ab\"cd", 5, NULL, TRUE,
9605  "\"#mysql50\""},
9606 
9607  {buf, sizeof(buf), "ab/cd", 5, NULL, TRUE, "\"ab\".\"cd\""},
9608  {buf, 9, "ab/cd", 5, NULL, TRUE, "\"ab\".\"cd\""},
9609  {buf, 8, "ab/cd", 5, NULL, TRUE, "\"ab\".\"c\""},
9610  {buf, 7, "ab/cd", 5, NULL, TRUE, "\"ab\".\"\""},
9611  {buf, 6, "ab/cd", 5, NULL, TRUE, "\"ab\"."},
9612  {buf, 5, "ab/cd", 5, NULL, TRUE, "\"ab\"."},
9613  {buf, 4, "ab/cd", 5, NULL, TRUE, "\"ab\""},
9614  {buf, 3, "ab/cd", 5, NULL, TRUE, "\"a\""},
9615  {buf, 2, "ab/cd", 5, NULL, TRUE, "\"\""},
9616  /* XXX probably "" is a better result in this case
9617  {buf, 1, "ab/cd", 5, NULL, TRUE, "."},
9618  */
9619  {buf, 0, "ab/cd", 5, NULL, TRUE, ""},
9620  };
9621 
9622  for (i = 0; i < sizeof(test_input) / sizeof(test_input[0]); i++) {
9623 
9624  char* end;
9625  ibool ok = TRUE;
9626  size_t res_len;
9627 
9628  fprintf(stderr, "TESTING %lu, %s, %lu, %s\n",
9629  test_input[i].buflen,
9630  test_input[i].id,
9631  test_input[i].idlen,
9632  test_input[i].expected);
9633 
9634  end = innobase_convert_name(
9635  test_input[i].buf,
9636  test_input[i].buflen,
9637  test_input[i].id,
9638  test_input[i].idlen,
9639  test_input[i].session,
9640  test_input[i].file_id);
9641 
9642  res_len = (size_t) (end - test_input[i].buf);
9643 
9644  if (res_len != strlen(test_input[i].expected)) {
9645 
9646  fprintf(stderr, "unexpected len of the result: %u, "
9647  "expected: %u\n", (unsigned) res_len,
9648  (unsigned) strlen(test_input[i].expected));
9649  ok = FALSE;
9650  }
9651 
9652  if (memcmp(test_input[i].buf,
9653  test_input[i].expected,
9654  strlen(test_input[i].expected)) != 0
9655  || !ok) {
9656 
9657  fprintf(stderr, "unexpected result: %.*s, "
9658  "expected: %s\n", (int) res_len,
9659  test_input[i].buf,
9660  test_input[i].expected);
9661  ok = FALSE;
9662  }
9663 
9664  if (ok) {
9665  fprintf(stderr, "OK: res: %.*s\n\n", (int) res_len,
9666  buf);
9667  } else {
9668  fprintf(stderr, "FAILED\n\n");
9669  return;
9670  }
9671  }
9672 }
9673 
9674 #endif /* UNIV_COMPILE_TEST_FUNCS */