Drizzled Public API Documentation

trx0trx.cc
1 /*****************************************************************************
2 
3 Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free Software
7 Foundation; version 2 of the License.
8 
9 This program is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12 
13 You should have received a copy of the GNU General Public License along with
14 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
15 St, Fifth Floor, Boston, MA 02110-1301 USA
16 
17 *****************************************************************************/
18 
19 /**************************************************/
26 #include "trx0trx.h"
27 
28 #ifdef UNIV_NONINL
29 #include "trx0trx.ic"
30 #endif
31 
32 #include "trx0undo.h"
33 #include "trx0rseg.h"
34 #include "log0log.h"
35 #include "que0que.h"
36 #include "lock0lock.h"
37 #include "trx0roll.h"
38 #include "usr0sess.h"
39 #include "read0read.h"
40 #include "srv0srv.h"
41 #include "btr0sea.h"
42 #include "os0proc.h"
43 #include "trx0xa.h"
44 #include "trx0purge.h"
45 #include "ha_prototypes.h"
46 
48 UNIV_INTERN sess_t* trx_dummy_sess = NULL;
49 
52 UNIV_INTERN ulint trx_n_mysql_transactions = 0;
53 
54 #ifdef UNIV_PFS_MUTEX
55 /* Key to register the mutex with performance schema */
56 UNIV_INTERN mysql_pfs_key_t trx_undo_mutex_key;
57 #endif /* UNIV_PFS_MUTEX */
58 
59 /*************************************************************/
61 UNIV_INTERN
62 void
64 /*===================*/
65  trx_t* trx,
66  const char* msg)
67 {
68  ut_strlcpy(trx->detailed_error, msg, sizeof(trx->detailed_error));
69 }
70 
71 /*************************************************************/
74 UNIV_INTERN
75 void
77 /*=============================*/
78  trx_t* trx,
79  FILE* file)
80 {
82  sizeof(trx->detailed_error));
83 }
84 
85 /****************************************************************/
88 UNIV_INTERN
89 trx_t*
91 /*=======*/
92  sess_t* sess)
93 {
94  trx_t* trx;
95 
96  ut_ad(mutex_own(&kernel_mutex));
97  ut_ad(sess);
98 
99  trx = static_cast<trx_t *>(mem_alloc(sizeof(trx_t)));
100 
101  trx->magic_n = TRX_MAGIC_N;
102 
103  trx->op_info = "";
104 
105  trx->is_purge = 0;
106  trx->is_recovered = 0;
107  trx->conc_state = TRX_NOT_STARTED;
108  trx->start_time = time(NULL);
109 
110  trx->isolation_level = TRX_ISO_REPEATABLE_READ;
111 
112  trx->id = 0;
113  trx->no = IB_ULONGLONG_MAX;
114 
115  trx->support_xa = TRUE;
116 
117  trx->check_foreigns = TRUE;
118  trx->check_unique_secondary = TRUE;
119 
120  trx->flush_log_later = FALSE;
121  trx->must_flush_log_later = FALSE;
122 
124  trx->table_id = 0;
125 
126  trx->mysql_thd = NULL;
127  trx->duplicates = 0;
128 
129  trx->mysql_n_tables_locked = 0;
130 
131  trx->mysql_log_file_name = NULL;
132  trx->mysql_log_offset = 0;
133 
134  mutex_create(trx_undo_mutex_key, &trx->undo_mutex, SYNC_TRX_UNDO);
135 
136  trx->rseg = NULL;
137 
138  trx->undo_no = 0;
139  trx->last_sql_stat_start.least_undo_no = 0;
140  trx->insert_undo = NULL;
141  trx->update_undo = NULL;
142  trx->undo_no_arr = NULL;
143 
144  trx->error_state = DB_SUCCESS;
145  trx->error_key_num = 0;
146  trx->detailed_error[0] = '\0';
147 
148  trx->sess = sess;
149  trx->que_state = TRX_QUE_RUNNING;
150  trx->n_active_thrs = 0;
151 
152  trx->handling_signals = FALSE;
153 
154  UT_LIST_INIT(trx->signals);
156 
157  trx->graph = NULL;
158 
159  trx->wait_lock = NULL;
160  trx->was_chosen_as_deadlock_victim = FALSE;
161  UT_LIST_INIT(trx->wait_thrs);
162 
164  UT_LIST_INIT(trx->trx_locks);
165 
167 
168  trx->dict_operation_lock_mode = 0;
169  trx->has_search_latch = FALSE;
170  trx->search_latch_timeout = BTR_SEA_TIMEOUT;
171 
172  trx->declared_to_be_inside_innodb = FALSE;
173  trx->n_tickets_to_enter_innodb = 0;
174 
175  trx->global_read_view_heap = mem_heap_create(256);
176  trx->global_read_view = NULL;
177  trx->read_view = NULL;
178 
179  /* Set X/Open XA transaction identification to NULL */
180  memset(&trx->xid, 0, sizeof(trx->xid));
181  trx->xid.formatID = -1;
182 
183  trx->n_autoinc_rows = 0;
184 
185  /* Remember to free the vector explicitly. */
186  trx->autoinc_locks = ib_vector_create(
187  mem_heap_create(sizeof(ib_vector_t) + sizeof(void*) * 4), 4);
188 
189  trx->log_commit_id= FALSE;
190 
191  return(trx);
192 }
193 
194 /********************************************************************/
197 UNIV_INTERN
198 trx_t*
200 /*========================*/
201 {
202  trx_t* trx;
203 
204  mutex_enter(&kernel_mutex);
205 
206  trx = trx_create(trx_dummy_sess);
207 
209 
210  UT_LIST_ADD_FIRST(mysql_trx_list, trx_sys->mysql_trx_list, trx);
211 
212  mutex_exit(&kernel_mutex);
213 
214  trx->mysql_thread_id = os_thread_get_curr_id();
215 
216  trx->mysql_process_no = os_proc_get_number();
217 
218  return(trx);
219 }
220 
221 /********************************************************************/
224 UNIV_INTERN
225 trx_t*
227 /*=============================*/
228 {
229  trx_t* trx;
230 
231  mutex_enter(&kernel_mutex);
232 
233  trx = trx_create(trx_dummy_sess);
234 
235  mutex_exit(&kernel_mutex);
236 
237  return(trx);
238 }
239 
240 /********************************************************************/
242 UNIV_INTERN
243 void
245 /*=================================*/
246  trx_t* trx)
247 {
248  if (trx->has_search_latch) {
249  rw_lock_s_unlock(&btr_search_latch);
250 
251  trx->has_search_latch = FALSE;
252  }
253 }
254 
255 /********************************************************************/
257 UNIV_INTERN
258 void
260 /*=====*/
261  trx_t* trx)
262 {
263  ut_ad(mutex_own(&kernel_mutex));
264 
265  if (trx->declared_to_be_inside_innodb) {
266  ut_print_timestamp(stderr);
267  fputs(" InnoDB: Error: Freeing a trx which is declared"
268  " to be processing\n"
269  "InnoDB: inside InnoDB.\n", stderr);
270  trx_print(stderr, trx, 600);
271  putc('\n', stderr);
272 
273  /* This is an error but not a fatal error. We must keep
274  the counters like srv_conc_n_threads accurate. */
276  }
277 
278  if (trx->mysql_n_tables_locked != 0) {
279 
280  ut_print_timestamp(stderr);
281  fprintf(stderr,
282  " InnoDB: Error: MySQL is freeing a thd\n"
283  "InnoDB: and trx->mysql_n_tables_locked is %lu.\n",
284  (ulong)trx->mysql_n_tables_locked);
285 
286  trx_print(stderr, trx, 600);
287 
288  ut_print_buf(stderr, trx, sizeof(trx_t));
289  putc('\n', stderr);
290  }
291 
292  ut_a(trx->magic_n == TRX_MAGIC_N);
293 
294  trx->magic_n = 11112222;
295 
296  ut_a(trx->conc_state == TRX_NOT_STARTED);
297 
298  mutex_free(&(trx->undo_mutex));
299 
300  ut_a(trx->insert_undo == NULL);
301  ut_a(trx->update_undo == NULL);
302 
303  if (trx->undo_no_arr) {
305  }
306 
307  ut_a(UT_LIST_GET_LEN(trx->signals) == 0);
308  ut_a(UT_LIST_GET_LEN(trx->reply_signals) == 0);
309 
310  ut_a(trx->wait_lock == NULL);
311  ut_a(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
312 
313  ut_a(!trx->has_search_latch);
314 
315  ut_a(trx->dict_operation_lock_mode == 0);
316 
317  if (trx->lock_heap) {
318  mem_heap_free(trx->lock_heap);
319  }
320 
321  ut_a(UT_LIST_GET_LEN(trx->trx_locks) == 0);
322 
323  if (trx->global_read_view_heap) {
324  mem_heap_free(trx->global_read_view_heap);
325  }
326 
327  trx->global_read_view = NULL;
328 
329  ut_a(trx->read_view == NULL);
330 
331  ut_a(ib_vector_is_empty(trx->autoinc_locks));
332  /* We allocated a dedicated heap for the vector. */
333  ib_vector_free(trx->autoinc_locks);
334 
335  mem_free(trx);
336 }
337 
338 /********************************************************************/
340 UNIV_INTERN
341 void
343 /*===============*/
344  trx_t* trx)
345 {
346  mutex_enter(&kernel_mutex);
347 
348  UT_LIST_REMOVE(mysql_trx_list, trx_sys->mysql_trx_list, trx);
349 
350  trx_free(trx);
351 
353 
355 
356  mutex_exit(&kernel_mutex);
357 }
358 
359 /********************************************************************/
361 UNIV_INTERN
362 void
364 /*====================*/
365  trx_t* trx)
366 {
367  mutex_enter(&kernel_mutex);
368 
369  trx_free(trx);
370 
371  mutex_exit(&kernel_mutex);
372 }
373 
374 /****************************************************************/
379 static
380 void
381 trx_list_insert_ordered(
382 /*====================*/
383  trx_t* trx)
384 {
385  trx_t* trx2;
386 
387  ut_ad(mutex_own(&kernel_mutex));
388 
389  trx2 = UT_LIST_GET_FIRST(trx_sys->trx_list);
390 
391  while (trx2 != NULL) {
392  if (trx->id >= trx2->id) {
393 
394  ut_ad(trx->id > trx2->id);
395  break;
396  }
397  trx2 = UT_LIST_GET_NEXT(trx_list, trx2);
398  }
399 
400  if (trx2 != NULL) {
401  trx2 = UT_LIST_GET_PREV(trx_list, trx2);
402 
403  if (trx2 == NULL) {
404  UT_LIST_ADD_FIRST(trx_list, trx_sys->trx_list, trx);
405  } else {
406  UT_LIST_INSERT_AFTER(trx_list, trx_sys->trx_list,
407  trx2, trx);
408  }
409  } else {
410  UT_LIST_ADD_LAST(trx_list, trx_sys->trx_list, trx);
411  }
412 }
413 
414 /****************************************************************/
420 UNIV_INTERN
421 void
423 /*============================*/
424 {
425  trx_rseg_t* rseg;
426  trx_undo_t* undo;
427  trx_t* trx;
428 
429  ut_ad(mutex_own(&kernel_mutex));
430  UT_LIST_INIT(trx_sys->trx_list);
431 
432  /* Look from the rollback segments if there exist undo logs for
433  transactions */
434 
435  rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
436 
437  while (rseg != NULL) {
438  undo = UT_LIST_GET_FIRST(rseg->insert_undo_list);
439 
440  while (undo != NULL) {
441 
442  trx = trx_create(trx_dummy_sess);
443 
444  trx->is_recovered = TRUE;
445  trx->id = undo->trx_id;
446  trx->xid = undo->xid;
447  trx->insert_undo = undo;
448  trx->rseg = rseg;
449 
450  if (undo->state != TRX_UNDO_ACTIVE) {
451 
452  /* Prepared transactions are left in
453  the prepared state waiting for a
454  commit or abort decision from MySQL */
455 
456  if (undo->state == TRX_UNDO_PREPARED) {
457 
458  fprintf(stderr,
459  "InnoDB: Transaction "
460  TRX_ID_FMT
461  " was in the"
462  " XA prepared state.\n",
463  trx->id);
464 
465  if (srv_force_recovery == 0) {
466 
467  trx->conc_state = TRX_PREPARED;
468  } else {
469  fprintf(stderr,
470  "InnoDB: Since"
471  " innodb_force_recovery"
472  " > 0, we will"
473  " rollback it"
474  " anyway.\n");
475 
476  trx->conc_state = TRX_ACTIVE;
477  }
478  } else {
479  trx->conc_state
480  = TRX_COMMITTED_IN_MEMORY;
481  }
482 
483  /* We give a dummy value for the trx no;
484  this should have no relevance since purge
485  is not interested in committed transaction
486  numbers, unless they are in the history
487  list, in which case it looks the number
488  from the disk based undo log structure */
489 
490  trx->no = trx->id;
491  } else {
492  trx->conc_state = TRX_ACTIVE;
493 
494  /* A running transaction always has the number
495  field inited to IB_ULONGLONG_MAX */
496 
497  trx->no = IB_ULONGLONG_MAX;
498  }
499 
500  if (undo->dict_operation) {
502  trx, TRX_DICT_OP_TABLE);
503  trx->table_id = undo->table_id;
504  }
505 
506  if (!undo->empty) {
507  trx->undo_no = undo->top_undo_no + 1;
508  }
509 
510  trx_list_insert_ordered(trx);
511 
512  undo = UT_LIST_GET_NEXT(undo_list, undo);
513  }
514 
515  undo = UT_LIST_GET_FIRST(rseg->update_undo_list);
516 
517  while (undo != NULL) {
518  trx = trx_get_on_id(undo->trx_id);
519 
520  if (NULL == trx) {
521  trx = trx_create(trx_dummy_sess);
522 
523  trx->is_recovered = TRUE;
524  trx->id = undo->trx_id;
525  trx->xid = undo->xid;
526 
527  if (undo->state != TRX_UNDO_ACTIVE) {
528 
529  /* Prepared transactions are left in
530  the prepared state waiting for a
531  commit or abort decision from MySQL */
532 
533  if (undo->state == TRX_UNDO_PREPARED) {
534  fprintf(stderr,
535  "InnoDB: Transaction "
536  TRX_ID_FMT " was in the"
537  " XA prepared state.\n",
538  trx->id);
539 
540  if (srv_force_recovery == 0) {
541 
542  trx->conc_state
543  = TRX_PREPARED;
544  } else {
545  fprintf(stderr,
546  "InnoDB: Since"
547  " innodb_force_recovery"
548  " > 0, we will"
549  " rollback it"
550  " anyway.\n");
551 
552  trx->conc_state
553  = TRX_ACTIVE;
554  }
555  } else {
556  trx->conc_state
557  = TRX_COMMITTED_IN_MEMORY;
558  }
559 
560  /* We give a dummy value for the trx
561  number */
562 
563  trx->no = trx->id;
564  } else {
565  trx->conc_state = TRX_ACTIVE;
566 
567  /* A running transaction always has
568  the number field inited to
569  IB_ULONGLONG_MAX */
570 
571  trx->no = IB_ULONGLONG_MAX;
572  }
573 
574  trx->rseg = rseg;
575  trx_list_insert_ordered(trx);
576 
577  if (undo->dict_operation) {
579  trx, TRX_DICT_OP_TABLE);
580  trx->table_id = undo->table_id;
581  }
582  }
583 
584  trx->update_undo = undo;
585 
586  if ((!undo->empty)
587  && undo->top_undo_no >= trx->undo_no) {
588 
589  trx->undo_no = undo->top_undo_no + 1;
590  }
591 
592  undo = UT_LIST_GET_NEXT(undo_list, undo);
593  }
594 
595  rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
596  }
597 }
598 
599 /******************************************************************/
602 UNIV_INLINE
603 trx_rseg_t*
604 trx_assign_rseg(
605 /*============*/
606  ulint max_undo_logs)
607 {
608  trx_rseg_t* rseg = trx_sys->latest_rseg;
609 
610  ut_ad(mutex_own(&kernel_mutex));
611 
612  rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
613 
614  if (rseg == NULL || rseg->id == max_undo_logs - 1) {
615  rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
616  }
617 
618  trx_sys->latest_rseg = rseg;
619 
620  return(rseg);
621 }
622 
623 /****************************************************************/
626 UNIV_INTERN
627 ibool
629 /*==========*/
630  trx_t* trx,
631  ulint rseg_id)
634 {
635  trx_rseg_t* rseg;
636 
637  ut_ad(mutex_own(&kernel_mutex));
638  ut_ad(trx->rseg == NULL);
639 
640  if (trx->is_purge) {
641  trx->id = 0;
642  trx->conc_state = TRX_ACTIVE;
643  trx->start_time = time(NULL);
644 
645  return(TRUE);
646  }
647 
648  ut_ad(trx->conc_state != TRX_ACTIVE);
649 
650  ut_a(rseg_id == ULINT_UNDEFINED);
651 
652  rseg = trx_assign_rseg(srv_rollback_segments);
653 
654  trx->id = trx_sys_get_new_trx_id();
655 
656  /* The initial value for trx->no: IB_ULONGLONG_MAX is used in
657  read_view_open_now: */
658 
659  trx->no = IB_ULONGLONG_MAX;
660 
661  trx->rseg = rseg;
662 
663  trx->conc_state = TRX_ACTIVE;
664  trx->start_time = time(NULL);
665 
666  UT_LIST_ADD_FIRST(trx_list, trx_sys->trx_list, trx);
667 
668  return(TRUE);
669 }
670 
671 /****************************************************************/
674 UNIV_INTERN
675 ibool
677 /*======*/
678  trx_t* trx,
679  ulint rseg_id)
682 {
683  ibool ret;
684 
685  /* Update the info whether we should skip XA steps that eat CPU time
686  For the duration of the transaction trx->support_xa is not reread
687  from thd so any changes in the value take effect in the next
688  transaction. This is to avoid a scenario where some undo
689  generated by a transaction, has XA stuff, and other undo,
690  generated by the same transaction, doesn't. */
691  trx->support_xa = thd_supports_xa(trx->mysql_thd);
692 
693  mutex_enter(&kernel_mutex);
694 
695  ret = trx_start_low(trx, rseg_id);
696 
697  mutex_exit(&kernel_mutex);
698 
699  return(ret);
700 }
701 
702 /****************************************************************/
704 static
705 void
706 trx_serialisation_number_get(
707 /*=========================*/
708  trx_t* trx)
709 {
710  trx_rseg_t* rseg;
711 
712  rseg = trx->rseg;
713 
714  ut_ad(mutex_own(&rseg->mutex));
715 
716  mutex_enter(&kernel_mutex);
717 
718  trx->no = trx_sys_get_new_trx_id();
719 
720  /* If the rollack segment is not empty then the
721  new trx_t::no can't be less than any trx_t::no
722  already in the rollback segment. User threads only
723  produce events when a rollback segment is empty. */
724 
725  if (rseg->last_page_no == FIL_NULL) {
726  void* ptr;
727  rseg_queue_t rseg_queue;
728 
729  rseg_queue.rseg = rseg;
730  rseg_queue.trx_no = trx->no;
731 
732  mutex_enter(&purge_sys->bh_mutex);
733 
734  /* This is to reduce the pressure on the kernel mutex,
735  though in reality it should make very little (read no)
736  difference because this code path is only taken when the
737  rbs is empty. */
738 
739  mutex_exit(&kernel_mutex);
740 
741  ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue);
742  ut_a(ptr);
743 
744  mutex_exit(&purge_sys->bh_mutex);
745  } else {
746  mutex_exit(&kernel_mutex);
747  }
748 }
749 
750 /****************************************************************/
754 static
755 ib_uint64_t
756 trx_write_serialisation_history(
757 /*============================*/
758  trx_t* trx)
759 {
760  mtr_t mtr;
761  trx_rseg_t* rseg;
762 
763  ut_ad(!mutex_own(&kernel_mutex));
764 
765  rseg = trx->rseg;
766  mtr_start(&mtr);
767 
768  /* Change the undo log segment states from TRX_UNDO_ACTIVE
769  to some other state: these modifications to the file data
770  structure define the transaction as committed in the file
771  based domain, at the serialization point of the log sequence
772  number lsn obtained below. */
773 
774  if (trx->update_undo != NULL) {
775  page_t* undo_hdr_page;
776  trx_undo_t* undo = trx->update_undo;
777 
778  /* We have to hold the rseg mutex because update
779  log headers have to be put to the history list in the
780  (serialisation) order of the UNDO trx number. This is
781  required for the purge in-memory data structures too. */
782 
783  mutex_enter(&rseg->mutex);
784 
785  /* Assign the transaction serialisation number and also
786  update the purge min binary heap if this is the first
787  UNDO log being written to the assigned rollback segment. */
788 
789  trx_serialisation_number_get(trx);
790  /* It is not necessary to obtain trx->undo_mutex here
791  because only a single OS thread is allowed to do the
792  transaction commit for this transaction. */
793 
794  undo_hdr_page = trx_undo_set_state_at_finish(undo, &mtr);
795 
796  trx_undo_update_cleanup(trx, undo_hdr_page, &mtr);
797  } else {
798  mutex_enter(&rseg->mutex);
799  }
800 
801  if (trx->insert_undo != NULL) {
803  }
804 
805  mutex_exit(&rseg->mutex);
806 
807 
808  /* Update the highest commit id currently in the system */
809  if (trx_log_commit_id(trx))
810  {
811  mutex_enter(&commit_id_mutex);
812  trx_sys_flush_commit_id(trx_sys_commit_id,
814  &mtr);
815  mutex_exit(&commit_id_mutex);
816  }
817 
818  /* The following call commits the mini-transaction, making the
819  whole transaction committed in the file-based world, at this
820  log sequence number. The transaction becomes 'durable' when
821  we write the log to disk, but in the logical sense the commit
822  in the file-based data structures (undo logs etc.) happens
823  here.
824 
825  NOTE that transaction numbers, which are assigned only to
826  transactions with an update undo log, do not necessarily come
827  in exactly the same order as commit lsn's, if the transactions
828  have different rollback segments. To get exactly the same
829  order we should hold the kernel mutex up to this point,
830  adding to the contention of the kernel mutex. However, if
831  a transaction T2 is able to see modifications made by
832  a transaction T1, T2 will always get a bigger transaction
833  number and a bigger commit lsn than T1. */
834 
835  /*--------------*/
836  mtr_commit(&mtr);
837  /*--------------*/
838 
839  return(mtr.end_lsn);
840 }
841 
842 /****************************************************************/
844 UNIV_INTERN
845 void
847 /*==================*/
848  trx_t* trx)
849 {
850  ib_uint64_t lsn;
851 
852  ut_ad(mutex_own(&kernel_mutex));
853 
854  trx->must_flush_log_later = FALSE;
855 
856  /* If the transaction made any updates then we need to write the
857  UNDO logs for the updates to the assigned rollback segment. */
858 
859  if (trx->insert_undo != NULL || trx->update_undo != NULL) {
860  mutex_exit(&kernel_mutex);
861 
862  lsn = trx_write_serialisation_history(trx);
863 
864  mutex_enter(&kernel_mutex);
865  } else {
866  lsn = 0;
867  }
868 
869  ut_ad(trx->conc_state == TRX_ACTIVE || trx->conc_state == TRX_PREPARED);
870  ut_ad(mutex_own(&kernel_mutex));
871 
872  /* The following assignment makes the transaction committed in memory
873  and makes its changes to data visible to other transactions.
874  NOTE that there is a small discrepancy from the strict formal
875  visibility rules here: a human user of the database can see
876  modifications made by another transaction T even before the necessary
877  log segment has been flushed to the disk. If the database happens to
878  crash before the flush, the user has seen modifications from T which
879  will never be a committed transaction. However, any transaction T2
880  which sees the modifications of the committing transaction T, and
881  which also itself makes modifications to the database, will get an lsn
882  larger than the committing transaction T. In the case where the log
883  flush fails, and T never gets committed, also T2 will never get
884  committed. */
885 
886  /*--------------------------------------*/
887  trx->conc_state = TRX_COMMITTED_IN_MEMORY;
888  /*--------------------------------------*/
889 
890  /* If we release kernel_mutex below and we are still doing
891  recovery i.e.: back ground rollback thread is still active
892  then there is a chance that the rollback thread may see
893  this trx as COMMITTED_IN_MEMORY and goes adhead to clean it
894  up calling trx_cleanup_at_db_startup(). This can happen
895  in the case we are committing a trx here that is left in
896  PREPARED state during the crash. Note that commit of the
897  rollback of a PREPARED trx happens in the recovery thread
898  while the rollback of other transactions happen in the
899  background thread. To avoid this race we unconditionally
900  unset the is_recovered flag from the trx. */
901 
902  trx->is_recovered = FALSE;
903 
905 
906  if (trx->global_read_view) {
907  read_view_close(trx->global_read_view);
908  mem_heap_empty(trx->global_read_view_heap);
909  trx->global_read_view = NULL;
910  }
911 
912  trx->read_view = NULL;
913 
914  if (lsn) {
915 
916  mutex_exit(&kernel_mutex);
917 
918  if (trx->insert_undo != NULL) {
919 
921  }
922 
923  /* NOTE that we could possibly make a group commit more
924  efficient here: call os_thread_yield here to allow also other
925  trxs to come to commit! */
926 
927  /*-------------------------------------*/
928 
929  /* Depending on the my.cnf options, we may now write the log
930  buffer to the log files, making the transaction durable if
931  the OS does not crash. We may also flush the log files to
932  disk, making the transaction durable also at an OS crash or a
933  power outage.
934 
935  The idea in InnoDB's group commit is that a group of
936  transactions gather behind a trx doing a physical disk write
937  to log files, and when that physical write has been completed,
938  one of those transactions does a write which commits the whole
939  group. Note that this group commit will only bring benefit if
940  there are > 2 users in the database. Then at least 2 users can
941  gather behind one doing the physical log write to disk.
942 
943  If we are calling trx_commit() under prepare_commit_mutex, we
944  will delay possible log write and flush to a separate function
945  trx_commit_complete_for_mysql(), which is only called when the
946  thread has released the mutex. This is to make the
947  group commit algorithm to work. Otherwise, the prepare_commit
948  mutex would serialize all commits and prevent a group of
949  transactions from gathering. */
950 
951  if (trx->flush_log_later) {
952  /* Do nothing yet */
953  trx->must_flush_log_later = TRUE;
954  } else if (srv_flush_log_at_trx_commit == 0) {
955  /* Do nothing */
956  } else if (srv_flush_log_at_trx_commit == 1) {
957  if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
958  /* Write the log but do not flush it to disk */
959 
960  log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
961  FALSE);
962  } else {
963  /* Write the log to the log files AND flush
964  them to disk */
965 
966  log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
967  }
968  } else if (srv_flush_log_at_trx_commit == 2) {
969 
970  /* Write the log but do not flush it to disk */
971 
972  log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
973  } else {
974  ut_error;
975  }
976 
977  trx->commit_lsn = lsn;
978 
979  /*-------------------------------------*/
980 
981  mutex_enter(&kernel_mutex);
982  }
983 
984  /* Free all savepoints */
985  trx_roll_free_all_savepoints(trx);
986 
987  trx->conc_state = TRX_NOT_STARTED;
988  trx->rseg = NULL;
989  trx->undo_no = 0;
990  trx->last_sql_stat_start.least_undo_no = 0;
991 
992  ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
993  ut_ad(UT_LIST_GET_LEN(trx->trx_locks) == 0);
994 
995  UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx);
996 }
997 
998 /****************************************************************/
1002 UNIV_INTERN
1003 void
1005 /*======================*/
1006  trx_t* trx)
1007 {
1008  if (trx->insert_undo != NULL) {
1009 
1011  }
1012 
1013  trx->conc_state = TRX_NOT_STARTED;
1014  trx->rseg = NULL;
1015  trx->undo_no = 0;
1016  trx->last_sql_stat_start.least_undo_no = 0;
1017 
1018  UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx);
1019 }
1020 
1021 /********************************************************************/
1026 UNIV_INTERN
1027 read_view_t*
1029 /*=================*/
1030  trx_t* trx)
1031 {
1032  ut_ad(trx->conc_state == TRX_ACTIVE);
1033 
1034  if (trx->read_view) {
1035  return(trx->read_view);
1036  }
1037 
1038  mutex_enter(&kernel_mutex);
1039 
1040  if (!trx->read_view) {
1042  trx->id, trx->global_read_view_heap);
1043  trx->global_read_view = trx->read_view;
1044  }
1045 
1046  mutex_exit(&kernel_mutex);
1047 
1048  return(trx->read_view);
1049 }
1050 
1051 /****************************************************************/
1053 static
1054 void
1055 trx_handle_commit_sig_off_kernel(
1056 /*=============================*/
1057  trx_t* trx,
1058  que_thr_t** next_thr)
1063 {
1064  trx_sig_t* sig;
1065  trx_sig_t* next_sig;
1066 
1067  ut_ad(mutex_own(&kernel_mutex));
1068 
1069  trx->que_state = TRX_QUE_COMMITTING;
1070 
1071  trx_commit_off_kernel(trx);
1072 
1073  ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
1074 
1075  /* Remove all TRX_SIG_COMMIT signals from the signal queue and send
1076  reply messages to them */
1077 
1078  sig = UT_LIST_GET_FIRST(trx->signals);
1079 
1080  while (sig != NULL) {
1081  next_sig = UT_LIST_GET_NEXT(signals, sig);
1082 
1083  if (sig->type == TRX_SIG_COMMIT) {
1084 
1085  trx_sig_reply(sig, next_thr);
1086  trx_sig_remove(trx, sig);
1087  }
1088 
1089  sig = next_sig;
1090  }
1091 
1092  trx->que_state = TRX_QUE_RUNNING;
1093 }
1094 
1095 /***********************************************************/
1099 UNIV_INTERN
1100 void
1102 /*==============*/
1103  trx_t* trx)
1104 {
1105  que_thr_t* thr;
1106 
1107  ut_ad(mutex_own(&kernel_mutex));
1108  ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT);
1109 
1110  thr = UT_LIST_GET_FIRST(trx->wait_thrs);
1111 
1112  while (thr != NULL) {
1114 
1115  UT_LIST_REMOVE(trx_thrs, trx->wait_thrs, thr);
1116 
1117  thr = UT_LIST_GET_FIRST(trx->wait_thrs);
1118  }
1119 
1120  trx->que_state = TRX_QUE_RUNNING;
1121 }
1122 
1123 /***********************************************************/
1126 static
1127 void
1128 trx_lock_wait_to_suspended(
1129 /*=======================*/
1130  trx_t* trx)
1131 {
1132  que_thr_t* thr;
1133 
1134  ut_ad(mutex_own(&kernel_mutex));
1135  ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT);
1136 
1137  thr = UT_LIST_GET_FIRST(trx->wait_thrs);
1138 
1139  while (thr != NULL) {
1140  thr->state = QUE_THR_SUSPENDED;
1141 
1142  UT_LIST_REMOVE(trx_thrs, trx->wait_thrs, thr);
1143 
1144  thr = UT_LIST_GET_FIRST(trx->wait_thrs);
1145  }
1146 
1147  trx->que_state = TRX_QUE_RUNNING;
1148 }
1149 
1150 /***********************************************************/
1153 static
1154 void
1155 trx_sig_reply_wait_to_suspended(
1156 /*============================*/
1157  trx_t* trx)
1158 {
1159  trx_sig_t* sig;
1160  que_thr_t* thr;
1161 
1162  ut_ad(mutex_own(&kernel_mutex));
1163 
1164  sig = UT_LIST_GET_FIRST(trx->reply_signals);
1165 
1166  while (sig != NULL) {
1167  thr = sig->receiver;
1168 
1169  ut_ad(thr->state == QUE_THR_SIG_REPLY_WAIT);
1170 
1171  thr->state = QUE_THR_SUSPENDED;
1172 
1173  sig->receiver = NULL;
1174 
1175  UT_LIST_REMOVE(reply_signals, trx->reply_signals, sig);
1176 
1177  sig = UT_LIST_GET_FIRST(trx->reply_signals);
1178  }
1179 }
1180 
1181 /*****************************************************************/
1185 static
1186 ibool
1187 trx_sig_is_compatible(
1188 /*==================*/
1189  trx_t* trx,
1190  ulint type,
1191  ulint sender)
1192 {
1193  trx_sig_t* sig;
1194 
1195  ut_ad(mutex_own(&kernel_mutex));
1196 
1197  if (UT_LIST_GET_LEN(trx->signals) == 0) {
1198 
1199  return(TRUE);
1200  }
1201 
1202  if (sender == TRX_SIG_SELF) {
1203  if (type == TRX_SIG_ERROR_OCCURRED) {
1204 
1205  return(TRUE);
1206 
1207  } else if (type == TRX_SIG_BREAK_EXECUTION) {
1208 
1209  return(TRUE);
1210  } else {
1211  return(FALSE);
1212  }
1213  }
1214 
1215  ut_ad(sender == TRX_SIG_OTHER_SESS);
1216 
1217  sig = UT_LIST_GET_FIRST(trx->signals);
1218 
1219  if (type == TRX_SIG_COMMIT) {
1220  while (sig != NULL) {
1221 
1222  if (sig->type == TRX_SIG_TOTAL_ROLLBACK) {
1223 
1224  return(FALSE);
1225  }
1226 
1227  sig = UT_LIST_GET_NEXT(signals, sig);
1228  }
1229 
1230  return(TRUE);
1231 
1232  } else if (type == TRX_SIG_TOTAL_ROLLBACK) {
1233  while (sig != NULL) {
1234 
1235  if (sig->type == TRX_SIG_COMMIT) {
1236 
1237  return(FALSE);
1238  }
1239 
1240  sig = UT_LIST_GET_NEXT(signals, sig);
1241  }
1242 
1243  return(TRUE);
1244 
1245  } else if (type == TRX_SIG_BREAK_EXECUTION) {
1246 
1247  return(TRUE);
1248  } else {
1249  ut_error;
1250 
1251  return(FALSE);
1252  }
1253 }
1254 
1255 /****************************************************************/
1257 UNIV_INTERN
1258 void
1260 /*=========*/
1261  trx_t* trx,
1262  ulint type,
1263  ulint sender,
1265  que_thr_t* receiver_thr,
1268  trx_savept_t* savept,
1270  que_thr_t** next_thr)
1276 {
1277  trx_sig_t* sig;
1278  trx_t* receiver_trx;
1279 
1280  ut_ad(trx);
1281  ut_ad(mutex_own(&kernel_mutex));
1282 
1283  if (!trx_sig_is_compatible(trx, type, sender)) {
1284  /* The signal is not compatible with the other signals in
1285  the queue: die */
1286 
1287  ut_error;
1288  }
1289 
1290  /* Queue the signal object */
1291 
1292  if (UT_LIST_GET_LEN(trx->signals) == 0) {
1293 
1294  /* The signal list is empty: the 'sig' slot must be unused
1295  (we improve performance a bit by avoiding mem_alloc) */
1296  sig = &(trx->sig);
1297  } else {
1298  /* It might be that the 'sig' slot is unused also in this
1299  case, but we choose the easy way of using mem_alloc */
1300 
1301  sig = static_cast<trx_sig_t *>(mem_alloc(sizeof(trx_sig_t)));
1302  }
1303 
1304  UT_LIST_ADD_LAST(signals, trx->signals, sig);
1305 
1306  sig->type = type;
1307  sig->sender = sender;
1308  sig->receiver = receiver_thr;
1309 
1310  if (savept) {
1311  sig->savept = *savept;
1312  }
1313 
1314  if (receiver_thr) {
1315  receiver_trx = thr_get_trx(receiver_thr);
1316 
1317  UT_LIST_ADD_LAST(reply_signals, receiver_trx->reply_signals,
1318  sig);
1319  }
1320 
1321  if (trx->sess->state == SESS_ERROR) {
1322 
1323  trx_sig_reply_wait_to_suspended(trx);
1324  }
1325 
1326  if ((sender != TRX_SIG_SELF) || (type == TRX_SIG_BREAK_EXECUTION)) {
1327  ut_error;
1328  }
1329 
1330  /* If there were no other signals ahead in the queue, try to start
1331  handling of the signal */
1332 
1333  if (UT_LIST_GET_FIRST(trx->signals) == sig) {
1334 
1335  trx_sig_start_handle(trx, next_thr);
1336  }
1337 }
1338 
1339 /****************************************************************/
1344 UNIV_INTERN
1345 void
1347 /*====================*/
1348  trx_t* trx)
1349 {
1350  ut_ad(mutex_own(&kernel_mutex));
1351  ut_ad(trx->handling_signals == TRUE);
1352 
1353  trx->handling_signals = FALSE;
1354 
1355  trx->graph = trx->graph_before_signal_handling;
1356 
1357  if (trx->graph && (trx->sess->state == SESS_ERROR)) {
1358 
1359  que_fork_error_handle(trx, trx->graph);
1360  }
1361 }
1362 
1363 /****************************************************************/
1365 UNIV_INTERN
1366 void
1368 /*=================*/
1369  trx_t* trx,
1370  que_thr_t** next_thr)
1376 {
1377  trx_sig_t* sig;
1378  ulint type;
1379 loop:
1380  /* We loop in this function body as long as there are queued signals
1381  we can process immediately */
1382 
1383  ut_ad(trx);
1384  ut_ad(mutex_own(&kernel_mutex));
1385 
1386  if (trx->handling_signals && (UT_LIST_GET_LEN(trx->signals) == 0)) {
1387 
1389 
1390  return;
1391  }
1392 
1393  if (trx->conc_state == TRX_NOT_STARTED) {
1394 
1395  trx_start_low(trx, ULINT_UNDEFINED);
1396  }
1397 
1398  /* If the trx is in a lock wait state, moves the waiting query threads
1399  to the suspended state */
1400 
1401  if (trx->que_state == TRX_QUE_LOCK_WAIT) {
1402 
1403  trx_lock_wait_to_suspended(trx);
1404  }
1405 
1406  /* If the session is in the error state and this trx has threads
1407  waiting for reply from signals, moves these threads to the suspended
1408  state, canceling wait reservations; note that if the transaction has
1409  sent a commit or rollback signal to itself, and its session is not in
1410  the error state, then nothing is done here. */
1411 
1412  if (trx->sess->state == SESS_ERROR) {
1413  trx_sig_reply_wait_to_suspended(trx);
1414  }
1415 
1416  /* If there are no running query threads, we can start processing of a
1417  signal, otherwise we have to wait until all query threads of this
1418  transaction are aware of the arrival of the signal. */
1419 
1420  if (trx->n_active_thrs > 0) {
1421 
1422  return;
1423  }
1424 
1425  if (trx->handling_signals == FALSE) {
1426  trx->graph_before_signal_handling = trx->graph;
1427 
1428  trx->handling_signals = TRUE;
1429  }
1430 
1431  sig = UT_LIST_GET_FIRST(trx->signals);
1432  type = sig->type;
1433 
1434  if (type == TRX_SIG_COMMIT) {
1435 
1436  trx_handle_commit_sig_off_kernel(trx, next_thr);
1437 
1438  } else if ((type == TRX_SIG_TOTAL_ROLLBACK)
1439  || (type == TRX_SIG_ROLLBACK_TO_SAVEPT)) {
1440 
1441  trx_rollback(trx, sig, next_thr);
1442 
1443  /* No further signals can be handled until the rollback
1444  completes, therefore we return */
1445 
1446  return;
1447 
1448  } else if (type == TRX_SIG_ERROR_OCCURRED) {
1449 
1450  trx_rollback(trx, sig, next_thr);
1451 
1452  /* No further signals can be handled until the rollback
1453  completes, therefore we return */
1454 
1455  return;
1456 
1457  } else if (type == TRX_SIG_BREAK_EXECUTION) {
1458 
1459  trx_sig_reply(sig, next_thr);
1460  trx_sig_remove(trx, sig);
1461  } else {
1462  ut_error;
1463  }
1464 
1465  goto loop;
1466 }
1467 
1468 /****************************************************************/
1471 UNIV_INTERN
1472 void
1474 /*==========*/
1475  trx_sig_t* sig,
1476  que_thr_t** next_thr)
1481 {
1482  trx_t* receiver_trx;
1483 
1484  ut_ad(sig);
1485  ut_ad(mutex_own(&kernel_mutex));
1486 
1487  if (sig->receiver != NULL) {
1488  ut_ad((sig->receiver)->state == QUE_THR_SIG_REPLY_WAIT);
1489 
1490  receiver_trx = thr_get_trx(sig->receiver);
1491 
1492  UT_LIST_REMOVE(reply_signals, receiver_trx->reply_signals,
1493  sig);
1494  ut_ad(receiver_trx->sess->state != SESS_ERROR);
1495 
1496  que_thr_end_wait(sig->receiver, next_thr);
1497 
1498  sig->receiver = NULL;
1499 
1500  }
1501 }
1502 
1503 /****************************************************************/
1505 UNIV_INTERN
1506 void
1508 /*===========*/
1509  trx_t* trx,
1510  trx_sig_t* sig)
1511 {
1512  ut_ad(trx && sig);
1513  ut_ad(mutex_own(&kernel_mutex));
1514 
1515  ut_ad(sig->receiver == NULL);
1516 
1517  UT_LIST_REMOVE(signals, trx->signals, sig);
1518  sig->type = 0; /* reset the field to catch possible bugs */
1519 
1520  if (sig != &(trx->sig)) {
1521  mem_free(sig);
1522  }
1523 }
1524 
1525 /*********************************************************************/
1528 UNIV_INTERN
1531 /*===============*/
1532  mem_heap_t* heap)
1533 {
1534  commit_node_t* node;
1535 
1536  node = static_cast<commit_node_t *>(mem_heap_alloc(heap, sizeof(commit_node_t)));
1537  node->common.type = QUE_NODE_COMMIT;
1538  node->state = COMMIT_NODE_SEND;
1539 
1540  return(node);
1541 }
1542 
1543 /***********************************************************/
1546 UNIV_INTERN
1547 que_thr_t*
1549 /*============*/
1550  que_thr_t* thr)
1551 {
1552  commit_node_t* node;
1553  que_thr_t* next_thr;
1554 
1555  node = static_cast<commit_node_t *>(thr->run_node);
1556 
1557  ut_ad(que_node_get_type(node) == QUE_NODE_COMMIT);
1558 
1559  if (thr->prev_node == que_node_get_parent(node)) {
1560  node->state = COMMIT_NODE_SEND;
1561  }
1562 
1563  if (node->state == COMMIT_NODE_SEND) {
1564  mutex_enter(&kernel_mutex);
1565 
1566  node->state = COMMIT_NODE_WAIT;
1567 
1568  next_thr = NULL;
1569 
1570  thr->state = QUE_THR_SIG_REPLY_WAIT;
1571 
1572  /* Send the commit signal to the transaction */
1573 
1574  trx_sig_send(thr_get_trx(thr), TRX_SIG_COMMIT, TRX_SIG_SELF,
1575  thr, NULL, &next_thr);
1576 
1577  mutex_exit(&kernel_mutex);
1578 
1579  return(next_thr);
1580  }
1581 
1582  ut_ad(node->state == COMMIT_NODE_WAIT);
1583 
1584  node->state = COMMIT_NODE_SEND;
1585 
1586  thr->run_node = que_node_get_parent(node);
1587 
1588  return(thr);
1589 }
1590 
1591 /**********************************************************************/
1594 UNIV_INTERN
1595 ulint
1597 /*=================*/
1598  trx_t* trx)
1599 {
1600  /* Because we do not do the commit by sending an Innobase
1601  sig to the transaction, we must here make sure that trx has been
1602  started. */
1603 
1604  ut_a(trx);
1605 
1607 
1608  trx->op_info = "committing";
1609 
1610  mutex_enter(&kernel_mutex);
1611 
1612  trx_commit_off_kernel(trx);
1613 
1614  mutex_exit(&kernel_mutex);
1615 
1616  trx->op_info = "";
1617 
1618  return(DB_SUCCESS);
1619 }
1620 
1621 /**********************************************************************/
1625 UNIV_INTERN
1626 ulint
1628 /*==========================*/
1629  trx_t* trx)
1630 {
1631  ib_uint64_t lsn = trx->commit_lsn;
1632 
1633  ut_a(trx);
1634 
1635  trx->op_info = "flushing log";
1636 
1637  if (!trx->must_flush_log_later) {
1638  /* Do nothing */
1639  } else if (srv_flush_log_at_trx_commit == 0) {
1640  /* Do nothing */
1641  } else if (srv_flush_log_at_trx_commit == 1) {
1642  if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
1643  /* Write the log but do not flush it to disk */
1644 
1645  log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
1646  } else {
1647  /* Write the log to the log files AND flush them to
1648  disk */
1649 
1650  log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
1651  }
1652  } else if (srv_flush_log_at_trx_commit == 2) {
1653 
1654  /* Write the log but do not flush it to disk */
1655 
1656  log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
1657  } else {
1658  ut_error;
1659  }
1660 
1661  trx->must_flush_log_later = FALSE;
1662 
1663  trx->op_info = "";
1664 
1665  return(0);
1666 }
1667 
1668 /**********************************************************************/
1670 UNIV_INTERN
1671 void
1673 /*==================*/
1674  trx_t* trx)
1675 {
1676  ut_a(trx);
1677 
1678  if (trx->conc_state == TRX_NOT_STARTED) {
1679  trx->undo_no = 0;
1680  }
1681 
1682  trx->last_sql_stat_start.least_undo_no = trx->undo_no;
1683 }
1684 
1685 /**********************************************************************/
1688 UNIV_INTERN
1689 void
1691 /*======*/
1692  FILE* f,
1693  trx_t* trx,
1694  ulint max_query_len)
1696 {
1697  ibool newline;
1698 
1699  fprintf(f, "TRANSACTION " TRX_ID_FMT, trx->id);
1700 
1701  switch (trx->conc_state) {
1702  case TRX_NOT_STARTED:
1703  fputs(", not started", f);
1704  break;
1705  case TRX_ACTIVE:
1706  fprintf(f, ", ACTIVE %lu sec",
1707  (ulong)difftime(time(NULL), trx->start_time));
1708  break;
1709  case TRX_PREPARED:
1710  fprintf(f, ", ACTIVE (PREPARED) %lu sec",
1711  (ulong)difftime(time(NULL), trx->start_time));
1712  break;
1713  case TRX_COMMITTED_IN_MEMORY:
1714  fputs(", COMMITTED IN MEMORY", f);
1715  break;
1716  default:
1717  fprintf(f, " state %lu", (ulong) trx->conc_state);
1718  }
1719 
1720 #ifdef UNIV_LINUX
1721  fprintf(f, ", process no %lu", trx->mysql_process_no);
1722 #endif
1723  fprintf(f, ", OS thread id %lu",
1724  (ulong) os_thread_pf(trx->mysql_thread_id));
1725 
1726  if (*trx->op_info) {
1727  putc(' ', f);
1728  fputs(trx->op_info, f);
1729  }
1730 
1731  if (trx->is_recovered) {
1732  fputs(" recovered trx", f);
1733  }
1734 
1735  if (trx->is_purge) {
1736  fputs(" purge trx", f);
1737  }
1738 
1739  if (trx->declared_to_be_inside_innodb) {
1740  fprintf(f, ", thread declared inside InnoDB %lu",
1741  (ulong) trx->n_tickets_to_enter_innodb);
1742  }
1743 
1744  putc('\n', f);
1745 
1746  if (trx->mysql_n_tables_locked > 0) {
1747  fprintf(f, "mysql tables in locked %lu\n",
1748  (ulong) trx->mysql_n_tables_locked);
1749  }
1750 
1751  newline = TRUE;
1752 
1753  switch (trx->que_state) {
1754  case TRX_QUE_RUNNING:
1755  newline = FALSE; break;
1756  case TRX_QUE_LOCK_WAIT:
1757  fputs("LOCK WAIT ", f); break;
1758  case TRX_QUE_ROLLING_BACK:
1759  fputs("ROLLING BACK ", f); break;
1760  case TRX_QUE_COMMITTING:
1761  fputs("COMMITTING ", f); break;
1762  default:
1763  fprintf(f, "que state %lu ", (ulong) trx->que_state);
1764  }
1765 
1766  if (0 < UT_LIST_GET_LEN(trx->trx_locks)
1767  || mem_heap_get_size(trx->lock_heap) > 400) {
1768  newline = TRUE;
1769 
1770  fprintf(f, "%lu lock struct(s), heap size %lu,"
1771  " %lu row lock(s)",
1772  (ulong) UT_LIST_GET_LEN(trx->trx_locks),
1773  (ulong) mem_heap_get_size(trx->lock_heap),
1774  (ulong) lock_number_of_rows_locked(trx));
1775  }
1776 
1777  if (trx->has_search_latch) {
1778  newline = TRUE;
1779  fputs(", holds adaptive hash latch", f);
1780  }
1781 
1782  if (trx->undo_no != 0) {
1783  newline = TRUE;
1784  fprintf(f, ", undo log entries %llu",
1785  (ullint) trx->undo_no);
1786  }
1787 
1788  if (newline) {
1789  putc('\n', f);
1790  }
1791 
1792  if (trx->mysql_thd != NULL) {
1793  innobase_mysql_print_thd(f, trx->mysql_thd, max_query_len);
1794  }
1795 }
1796 
1797 /*******************************************************************/
1802 UNIV_INTERN
1803 ibool
1805 /*==========*/
1806  const trx_t* a,
1807  const trx_t* b)
1808 {
1809  ibool a_notrans_edit;
1810  ibool b_notrans_edit;
1811 
1812  /* If mysql_thd is NULL for a transaction we assume that it has
1813  not edited non-transactional tables. */
1814 
1815  a_notrans_edit = a->mysql_thd != NULL
1817 
1818  b_notrans_edit = b->mysql_thd != NULL
1820 
1821  if (a_notrans_edit != b_notrans_edit) {
1822 
1823  return(a_notrans_edit);
1824  }
1825 
1826  /* Either both had edited non-transactional tables or both had
1827  not, we fall back to comparing the number of altered/locked
1828  rows. */
1829 
1830 #if 0
1831  fprintf(stderr,
1832  "%s TRX_WEIGHT(a): %lld+%lu, TRX_WEIGHT(b): %lld+%lu\n",
1833  __func__,
1836 #endif
1837 
1838  return(TRX_WEIGHT(a) >= TRX_WEIGHT(b));
1839 }
1840 
1841 /****************************************************************/
1843 UNIV_INLINE
1844 void
1845 trx_prepare_off_kernel(
1846 /*===================*/
1847  trx_t* trx)
1848 {
1849  trx_rseg_t* rseg;
1850  ib_uint64_t lsn = 0;
1851  mtr_t mtr;
1852 
1853  ut_ad(mutex_own(&kernel_mutex));
1854 
1855  rseg = trx->rseg;
1856 
1857  if (trx->insert_undo != NULL || trx->update_undo != NULL) {
1858 
1859  mutex_exit(&kernel_mutex);
1860 
1861  mtr_start(&mtr);
1862 
1863  /* Change the undo log segment states from TRX_UNDO_ACTIVE
1864  to TRX_UNDO_PREPARED: these modifications to the file data
1865  structure define the transaction as prepared in the
1866  file-based world, at the serialization point of lsn. */
1867 
1868  mutex_enter(&(rseg->mutex));
1869 
1870  if (trx->insert_undo != NULL) {
1871 
1872  /* It is not necessary to obtain trx->undo_mutex here
1873  because only a single OS thread is allowed to do the
1874  transaction prepare for this transaction. */
1875 
1877  &mtr);
1878  }
1879 
1880  if (trx->update_undo) {
1882  trx, trx->update_undo, &mtr);
1883  }
1884 
1885  mutex_exit(&(rseg->mutex));
1886 
1887  /*--------------*/
1888  mtr_commit(&mtr); /* This mtr commit makes the
1889  transaction prepared in the file-based
1890  world */
1891  /*--------------*/
1892  lsn = mtr.end_lsn;
1893 
1894  mutex_enter(&kernel_mutex);
1895  }
1896 
1897  ut_ad(mutex_own(&kernel_mutex));
1898 
1899  /*--------------------------------------*/
1900  trx->conc_state = TRX_PREPARED;
1901  /*--------------------------------------*/
1902 
1903  if (lsn) {
1904  /* Depending on the my.cnf options, we may now write the log
1905  buffer to the log files, making the prepared state of the
1906  transaction durable if the OS does not crash. We may also
1907  flush the log files to disk, making the prepared state of the
1908  transaction durable also at an OS crash or a power outage.
1909 
1910  The idea in InnoDB's group prepare is that a group of
1911  transactions gather behind a trx doing a physical disk write
1912  to log files, and when that physical write has been completed,
1913  one of those transactions does a write which prepares the whole
1914  group. Note that this group prepare will only bring benefit if
1915  there are > 2 users in the database. Then at least 2 users can
1916  gather behind one doing the physical log write to disk.
1917 
1918  TODO: find out if MySQL holds some mutex when calling this.
1919  That would spoil our group prepare algorithm. */
1920 
1921  mutex_exit(&kernel_mutex);
1922 
1923  if (srv_flush_log_at_trx_commit == 0) {
1924  /* Do nothing */
1925  } else if (srv_flush_log_at_trx_commit == 1) {
1926  if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
1927  /* Write the log but do not flush it to disk */
1928 
1929  log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
1930  FALSE);
1931  } else {
1932  /* Write the log to the log files AND flush
1933  them to disk */
1934 
1935  log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
1936  }
1937  } else if (srv_flush_log_at_trx_commit == 2) {
1938 
1939  /* Write the log but do not flush it to disk */
1940 
1941  log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
1942  } else {
1943  ut_error;
1944  }
1945 
1946  mutex_enter(&kernel_mutex);
1947  }
1948 }
1949 
1950 /**********************************************************************/
1953 UNIV_INTERN
1954 ulint
1956 /*==================*/
1957  trx_t* trx)
1958 {
1959  /* Because we do not do the prepare by sending an Innobase
1960  sig to the transaction, we must here make sure that trx has been
1961  started. */
1962 
1963  ut_a(trx);
1964 
1965  trx->op_info = "preparing";
1966 
1968 
1969  mutex_enter(&kernel_mutex);
1970 
1971  trx_prepare_off_kernel(trx);
1972 
1973  mutex_exit(&kernel_mutex);
1974 
1975  trx->op_info = "";
1976 
1977  return(0);
1978 }
1979 
1980 /**********************************************************************/
1984 UNIV_INTERN
1985 int
1987 /*==================*/
1988  XID* xid_list,
1989  ulint len)
1990 {
1991  trx_t* trx;
1992  ulint count = 0;
1993 
1994  ut_ad(xid_list);
1995  ut_ad(len);
1996 
1997  /* We should set those transactions which are in the prepared state
1998  to the xid_list */
1999 
2000  mutex_enter(&kernel_mutex);
2001 
2002  trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
2003 
2004  while (trx) {
2005  if (trx->conc_state == TRX_PREPARED) {
2006  xid_list[count] = trx->xid;
2007 
2008  if (count == 0) {
2009  ut_print_timestamp(stderr);
2010  fprintf(stderr,
2011  " InnoDB: Starting recovery for"
2012  " XA transactions...\n");
2013  }
2014 
2015  ut_print_timestamp(stderr);
2016  fprintf(stderr,
2017  " InnoDB: Transaction " TRX_ID_FMT " in"
2018  " prepared state after recovery\n",
2019  trx->id);
2020 
2021  ut_print_timestamp(stderr);
2022  fprintf(stderr,
2023  " InnoDB: Transaction contains changes"
2024  " to %llu rows\n",
2025  (ullint) trx->undo_no);
2026 
2027  count++;
2028 
2029  if (count == len) {
2030  break;
2031  }
2032  }
2033 
2034  trx = UT_LIST_GET_NEXT(trx_list, trx);
2035  }
2036 
2037  mutex_exit(&kernel_mutex);
2038 
2039  if (count > 0){
2040  ut_print_timestamp(stderr);
2041  fprintf(stderr,
2042  " InnoDB: %lu transactions in prepared state"
2043  " after recovery\n",
2044  (ulong) count);
2045  }
2046 
2047  return ((int) count);
2048 }
2049 
2050 /*******************************************************************/
2054 UNIV_INTERN
2055 trx_t*
2057 /*===============*/
2058  const XID* xid)
2059 {
2060  trx_t* trx;
2061 
2062  if (xid == NULL) {
2063 
2064  return(NULL);
2065  }
2066 
2067  mutex_enter(&kernel_mutex);
2068 
2069  trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
2070 
2071  while (trx) {
2072  /* Compare two X/Open XA transaction id's: their
2073  length should be the same and binary comparison
2074  of gtrid_length+bqual_length bytes should be
2075  the same */
2076 
2077  if (trx->conc_state == TRX_PREPARED
2078  && xid->gtrid_length == trx->xid.gtrid_length
2079  && xid->bqual_length == trx->xid.bqual_length
2080  && memcmp(xid->data, trx->xid.data,
2081  xid->gtrid_length + xid->bqual_length) == 0) {
2082 
2083  /* Invalidate the XID, so that subsequent calls
2084  will not find it. */
2085  memset(&trx->xid, 0, sizeof(trx->xid));
2086  trx->xid.formatID = -1;
2087  break;
2088  }
2089 
2090  trx = UT_LIST_GET_NEXT(trx_list, trx);
2091  }
2092 
2093  mutex_exit(&kernel_mutex);
2094 
2095  return(trx);
2096 }