Drizzled Public API Documentation

btr0btr.cc
1 /*****************************************************************************
2 
3 Copyright (C) 1994, 2010, Innobase Oy. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free Software
7 Foundation; version 2 of the License.
8 
9 This program is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12 
13 You should have received a copy of the GNU General Public License along with
14 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
15 St, Fifth Floor, Boston, MA 02110-1301 USA
16 
17 *****************************************************************************/
18 
19 /**************************************************/
26 #include "btr0btr.h"
27 
28 #ifdef UNIV_NONINL
29 #include "btr0btr.ic"
30 #endif
31 
32 #include "fsp0fsp.h"
33 #include "page0page.h"
34 #include "xtrabackup_api.h"
35 
36 #ifndef UNIV_HOTBACKUP
37 #include "btr0cur.h"
38 #include "btr0sea.h"
39 #include "btr0pcur.h"
40 #include "rem0cmp.h"
41 #include "lock0lock.h"
42 #include "ibuf0ibuf.h"
43 #include "trx0trx.h"
44 #include "page0zip.h"
45 
46 #ifdef UNIV_BLOB_DEBUG
47 # include "srv0srv.h"
48 # include "ut0rbt.h"
49 
51 static ibool btr_blob_dbg_msg;
52 
57 #define btr_blob_dbg_msg_issue(op, b, ctx) \
58  fprintf(stderr, op " %u:%u:%u->%u %s(%u,%u,%u)\n", \
59  (b)->ref_page_no, (b)->ref_heap_no, \
60  (b)->ref_field_no, (b)->blob_page_no, ctx, \
61  (b)->owner, (b)->always_owner, (b)->del)
62 
67 UNIV_INTERN
68 void
69 btr_blob_dbg_rbt_insert(
70 /*====================*/
71  dict_index_t* index,
72  const btr_blob_dbg_t* b,
73  const char* ctx)
74 {
75  if (btr_blob_dbg_msg) {
76  btr_blob_dbg_msg_issue("insert", b, ctx);
77  }
78  mutex_enter(&index->blobs_mutex);
79  rbt_insert(index->blobs, b, b);
80  mutex_exit(&index->blobs_mutex);
81 }
82 
87 UNIV_INTERN
88 void
89 btr_blob_dbg_rbt_delete(
90 /*====================*/
91  dict_index_t* index,
92  const btr_blob_dbg_t* b,
93  const char* ctx)
94 {
95  if (btr_blob_dbg_msg) {
96  btr_blob_dbg_msg_issue("delete", b, ctx);
97  }
98  mutex_enter(&index->blobs_mutex);
99  ut_a(rbt_delete(index->blobs, b));
100  mutex_exit(&index->blobs_mutex);
101 }
102 
103 /**************************************************************/
107 static
108 int
109 btr_blob_dbg_cmp(
110 /*=============*/
111  const void* a,
112  const void* b)
113 {
114  const btr_blob_dbg_t* aa = a;
115  const btr_blob_dbg_t* bb = b;
116 
117  ut_ad(aa != NULL);
118  ut_ad(bb != NULL);
119 
120  if (aa->ref_page_no != bb->ref_page_no) {
121  return(aa->ref_page_no < bb->ref_page_no ? -1 : 1);
122  }
123  if (aa->ref_heap_no != bb->ref_heap_no) {
124  return(aa->ref_heap_no < bb->ref_heap_no ? -1 : 1);
125  }
126  if (aa->ref_field_no != bb->ref_field_no) {
127  return(aa->ref_field_no < bb->ref_field_no ? -1 : 1);
128  }
129  return(0);
130 }
131 
132 /**************************************************************/
134 UNIV_INTERN
135 void
136 btr_blob_dbg_add_blob(
137 /*==================*/
138  const rec_t* rec,
139  ulint field_no,
140  ulint page_no,
141  dict_index_t* index,
142  const char* ctx)
143 {
144  btr_blob_dbg_t b;
145  const page_t* page = page_align(rec);
146 
147  ut_a(index->blobs);
148 
149  b.blob_page_no = page_no;
150  b.ref_page_no = page_get_page_no(page);
151  b.ref_heap_no = page_rec_get_heap_no(rec);
152  b.ref_field_no = field_no;
153  ut_a(b.ref_field_no >= index->n_uniq);
154  b.always_owner = b.owner = TRUE;
155  b.del = FALSE;
156  ut_a(!rec_get_deleted_flag(rec, page_is_comp(page)));
157  btr_blob_dbg_rbt_insert(index, &b, ctx);
158 }
159 
160 /**************************************************************/
163 UNIV_INTERN
164 ulint
165 btr_blob_dbg_add_rec(
166 /*=================*/
167  const rec_t* rec,
168  dict_index_t* index,
169  const ulint* offsets,
170  const char* ctx)
171 {
172  ulint count = 0;
173  ulint i;
174  btr_blob_dbg_t b;
175  ibool del;
176 
177  ut_ad(rec_offs_validate(rec, index, offsets));
178 
179  if (!rec_offs_any_extern(offsets)) {
180  return(0);
181  }
182 
183  b.ref_page_no = page_get_page_no(page_align(rec));
184  b.ref_heap_no = page_rec_get_heap_no(rec);
185  del = (rec_get_deleted_flag(rec, rec_offs_comp(offsets)) != 0);
186 
187  for (i = 0; i < rec_offs_n_fields(offsets); i++) {
188  if (rec_offs_nth_extern(offsets, i)) {
189  ulint len;
190  const byte* field_ref = rec_get_nth_field(
191  rec, offsets, i, &len);
192 
193  ut_a(len != UNIV_SQL_NULL);
195  field_ref += len - BTR_EXTERN_FIELD_REF_SIZE;
196 
197  if (!memcmp(field_ref, field_ref_zero,
198  BTR_EXTERN_FIELD_REF_SIZE)) {
199  /* the column has not been stored yet */
200  continue;
201  }
202 
203  b.ref_field_no = i;
204  b.blob_page_no = mach_read_from_4(
205  field_ref + BTR_EXTERN_PAGE_NO);
206  ut_a(b.ref_field_no >= index->n_uniq);
207  b.always_owner = b.owner
208  = !(field_ref[BTR_EXTERN_LEN]
210  b.del = del;
211 
212  btr_blob_dbg_rbt_insert(index, &b, ctx);
213  count++;
214  }
215  }
216 
217  return(count);
218 }
219 
220 /**************************************************************/
224 UNIV_INTERN
225 void
226 btr_blob_dbg_print(
227 /*===============*/
228  const dict_index_t* index)
229 {
230  const ib_rbt_node_t* node;
231 
232  if (!index->blobs) {
233  return;
234  }
235 
236  /* We intentionally do not acquire index->blobs_mutex here.
237  This function is to be called from a debugger, and the caller
238  should make sure that the index->blobs_mutex is held. */
239 
240  for (node = rbt_first(index->blobs);
241  node != NULL; node = rbt_next(index->blobs, node)) {
242  const btr_blob_dbg_t* b
243  = rbt_value(btr_blob_dbg_t, node);
244  fprintf(stderr, "%u:%u:%u->%u%s%s%s\n",
245  b->ref_page_no, b->ref_heap_no, b->ref_field_no,
246  b->blob_page_no,
247  b->owner ? "" : "(disowned)",
248  b->always_owner ? "" : "(has disowned)",
249  b->del ? "(deleted)" : "");
250  }
251 }
252 
253 /**************************************************************/
256 UNIV_INTERN
257 ulint
258 btr_blob_dbg_remove_rec(
259 /*====================*/
260  const rec_t* rec,
261  dict_index_t* index,
262  const ulint* offsets,
263  const char* ctx)
264 {
265  ulint i;
266  ulint count = 0;
267  btr_blob_dbg_t b;
268 
269  ut_ad(rec_offs_validate(rec, index, offsets));
270 
271  if (!rec_offs_any_extern(offsets)) {
272  return(0);
273  }
274 
275  b.ref_page_no = page_get_page_no(page_align(rec));
276  b.ref_heap_no = page_rec_get_heap_no(rec);
277 
278  for (i = 0; i < rec_offs_n_fields(offsets); i++) {
279  if (rec_offs_nth_extern(offsets, i)) {
280  ulint len;
281  const byte* field_ref = rec_get_nth_field(
282  rec, offsets, i, &len);
283 
284  ut_a(len != UNIV_SQL_NULL);
285  ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
286  field_ref += len - BTR_EXTERN_FIELD_REF_SIZE;
287 
288  b.ref_field_no = i;
289  b.blob_page_no = mach_read_from_4(
290  field_ref + BTR_EXTERN_PAGE_NO);
291 
292  switch (b.blob_page_no) {
293  case 0:
294  /* The column has not been stored yet.
295  The BLOB pointer must be all zero.
296  There cannot be a BLOB starting at
297  page 0, because page 0 is reserved for
298  the tablespace header. */
299  ut_a(!memcmp(field_ref, field_ref_zero,
300  BTR_EXTERN_FIELD_REF_SIZE));
301  /* fall through */
302  case FIL_NULL:
303  /* the column has been freed already */
304  continue;
305  }
306 
307  btr_blob_dbg_rbt_delete(index, &b, ctx);
308  count++;
309  }
310  }
311 
312  return(count);
313 }
314 
315 /**************************************************************/
319 UNIV_INTERN
320 ibool
321 btr_blob_dbg_is_empty(
322 /*==================*/
323  dict_index_t* index,
324  ulint page_no)
325 {
326  const ib_rbt_node_t* node;
327  ibool success = TRUE;
328 
329  if (!index->blobs) {
330  return(success);
331  }
332 
333  mutex_enter(&index->blobs_mutex);
334 
335  for (node = rbt_first(index->blobs);
336  node != NULL; node = rbt_next(index->blobs, node)) {
337  const btr_blob_dbg_t* b
338  = rbt_value(btr_blob_dbg_t, node);
339 
340  if (b->ref_page_no != page_no && b->blob_page_no != page_no) {
341  continue;
342  }
343 
344  fprintf(stderr,
345  "InnoDB: orphan BLOB ref%s%s%s %u:%u:%u->%u\n",
346  b->owner ? "" : "(disowned)",
347  b->always_owner ? "" : "(has disowned)",
348  b->del ? "(deleted)" : "",
349  b->ref_page_no, b->ref_heap_no, b->ref_field_no,
350  b->blob_page_no);
351 
352  if (b->blob_page_no != page_no || b->owner || !b->del) {
353  success = FALSE;
354  }
355  }
356 
357  mutex_exit(&index->blobs_mutex);
358  return(success);
359 }
360 
361 /**************************************************************/
364 UNIV_INTERN
365 ulint
366 btr_blob_dbg_op(
367 /*============*/
368  const page_t* page,
369  const rec_t* rec,
371  dict_index_t* index,
372  const char* ctx,
373  const btr_blob_dbg_op_f op)
374 {
375  ulint count = 0;
376  mem_heap_t* heap = NULL;
377  ulint offsets_[REC_OFFS_NORMAL_SIZE];
378  ulint* offsets = offsets_;
379  rec_offs_init(offsets_);
380 
381  ut_a(fil_page_get_type(page) == FIL_PAGE_INDEX);
382  ut_a(!rec || page_align(rec) == page);
383 
384  if (!index->blobs || !page_is_leaf(page)
385  || !dict_index_is_clust(index)) {
386  return(0);
387  }
388 
389  if (rec == NULL) {
390  rec = page_get_infimum_rec(page);
391  }
392 
393  do {
394  offsets = rec_get_offsets(rec, index, offsets,
395  ULINT_UNDEFINED, &heap);
396  count += op(rec, index, offsets, ctx);
397  rec = page_rec_get_next_const(rec);
398  } while (!page_rec_is_supremum(rec));
399 
400  if (UNIV_LIKELY_NULL(heap)) {
401  mem_heap_free(heap);
402  }
403 
404  return(count);
405 }
406 
407 /**************************************************************/
411 UNIV_INTERN
412 ulint
413 btr_blob_dbg_add(
414 /*=============*/
415  const page_t* page,
416  dict_index_t* index,
417  const char* ctx)
418 {
419  btr_blob_dbg_assert_empty(index, page_get_page_no(page));
420 
421  return(btr_blob_dbg_op(page, NULL, index, ctx, btr_blob_dbg_add_rec));
422 }
423 
424 /**************************************************************/
429 UNIV_INTERN
430 ulint
431 btr_blob_dbg_remove(
432 /*================*/
433  const page_t* page,
434  dict_index_t* index,
435  const char* ctx)
436 {
437  ulint count;
438 
439  count = btr_blob_dbg_op(page, NULL, index, ctx,
440  btr_blob_dbg_remove_rec);
441 
442  /* Check that no references exist. */
443  btr_blob_dbg_assert_empty(index, page_get_page_no(page));
444 
445  return(count);
446 }
447 
448 /**************************************************************/
451 UNIV_INTERN
452 void
453 btr_blob_dbg_restore(
454 /*=================*/
455  const page_t* npage,
456  const page_t* page,
457  dict_index_t* index,
458  const char* ctx)
459 {
460  ulint removed;
461  ulint added;
462 
463  ut_a(page_get_page_no(npage) == page_get_page_no(page));
464  ut_a(page_get_space_id(npage) == page_get_space_id(page));
465 
466  removed = btr_blob_dbg_remove(npage, index, ctx);
467  added = btr_blob_dbg_add(page, index, ctx);
468  ut_a(added == removed);
469 }
470 
471 /**************************************************************/
473 UNIV_INTERN
474 void
475 btr_blob_dbg_set_deleted_flag(
476 /*==========================*/
477  const rec_t* rec,
478  dict_index_t* index,
479  const ulint* offsets,
480  ibool del)
481 {
482  const ib_rbt_node_t* node;
483  btr_blob_dbg_t b;
484  btr_blob_dbg_t* c;
485  ulint i;
486 
487  ut_ad(rec_offs_validate(rec, index, offsets));
488  ut_a(dict_index_is_clust(index));
489  ut_a(del == !!del);/* must be FALSE==0 or TRUE==1 */
490 
491  if (!rec_offs_any_extern(offsets) || !index->blobs) {
492 
493  return;
494  }
495 
496  b.ref_page_no = page_get_page_no(page_align(rec));
497  b.ref_heap_no = page_rec_get_heap_no(rec);
498 
499  for (i = 0; i < rec_offs_n_fields(offsets); i++) {
500  if (rec_offs_nth_extern(offsets, i)) {
501  ulint len;
502  const byte* field_ref = rec_get_nth_field(
503  rec, offsets, i, &len);
504 
505  ut_a(len != UNIV_SQL_NULL);
506  ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
507  field_ref += len - BTR_EXTERN_FIELD_REF_SIZE;
508 
509  b.ref_field_no = i;
510  b.blob_page_no = mach_read_from_4(
511  field_ref + BTR_EXTERN_PAGE_NO);
512 
513  switch (b.blob_page_no) {
514  case 0:
515  ut_a(memcmp(field_ref, field_ref_zero,
516  BTR_EXTERN_FIELD_REF_SIZE));
517  /* page number 0 is for the
518  page allocation bitmap */
519  case FIL_NULL:
520  /* the column has been freed already */
521  ut_error;
522  }
523 
524  mutex_enter(&index->blobs_mutex);
525  node = rbt_lookup(index->blobs, &b);
526  ut_a(node);
527 
528  c = rbt_value(btr_blob_dbg_t, node);
529  /* The flag should be modified. */
530  c->del = del;
531  if (btr_blob_dbg_msg) {
532  b = *c;
533  mutex_exit(&index->blobs_mutex);
534  btr_blob_dbg_msg_issue("del_mk", &b, "");
535  } else {
536  mutex_exit(&index->blobs_mutex);
537  }
538  }
539  }
540 }
541 
542 /**************************************************************/
544 UNIV_INTERN
545 void
546 btr_blob_dbg_owner(
547 /*===============*/
548  const rec_t* rec,
549  dict_index_t* index,
550  const ulint* offsets,
551  ulint i,
552  ibool own)
553 {
554  const ib_rbt_node_t* node;
555  btr_blob_dbg_t b;
556  const byte* field_ref;
557  ulint len;
558 
559  ut_ad(rec_offs_validate(rec, index, offsets));
560  ut_a(rec_offs_nth_extern(offsets, i));
561 
562  field_ref = rec_get_nth_field(rec, offsets, i, &len);
563  ut_a(len != UNIV_SQL_NULL);
564  ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
565  field_ref += len - BTR_EXTERN_FIELD_REF_SIZE;
566 
567  b.ref_page_no = page_get_page_no(page_align(rec));
568  b.ref_heap_no = page_rec_get_heap_no(rec);
569  b.ref_field_no = i;
570  b.owner = !(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG);
571  b.blob_page_no = mach_read_from_4(field_ref + BTR_EXTERN_PAGE_NO);
572 
573  ut_a(b.owner == own);
574 
575  mutex_enter(&index->blobs_mutex);
576  node = rbt_lookup(index->blobs, &b);
577  /* row_ins_clust_index_entry_by_modify() invokes
578  btr_cur_unmark_extern_fields() also for the newly inserted
579  references, which are all zero bytes until the columns are stored.
580  The node lookup must fail if and only if that is the case. */
581  ut_a(!memcmp(field_ref, field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE)
582  == !node);
583 
584  if (node) {
585  btr_blob_dbg_t* c = rbt_value(btr_blob_dbg_t, node);
586  /* Some code sets ownership from TRUE to TRUE.
587  We do not allow changing ownership from FALSE to FALSE. */
588  ut_a(own || c->owner);
589 
590  c->owner = own;
591  if (!own) {
592  c->always_owner = FALSE;
593  }
594  }
595 
596  mutex_exit(&index->blobs_mutex);
597 }
598 #endif /* UNIV_BLOB_DEBUG */
599 
600 /*
601 Latching strategy of the InnoDB B-tree
602 --------------------------------------
603 A tree latch protects all non-leaf nodes of the tree. Each node of a tree
604 also has a latch of its own.
605 
606 A B-tree operation normally first acquires an S-latch on the tree. It
607 searches down the tree and releases the tree latch when it has the
608 leaf node latch. To save CPU time we do not acquire any latch on
609 non-leaf nodes of the tree during a search, those pages are only bufferfixed.
610 
611 If an operation needs to restructure the tree, it acquires an X-latch on
612 the tree before searching to a leaf node. If it needs, for example, to
613 split a leaf,
614 (1) InnoDB decides the split point in the leaf,
615 (2) allocates a new page,
616 (3) inserts the appropriate node pointer to the first non-leaf level,
617 (4) releases the tree X-latch,
618 (5) and then moves records from the leaf to the new allocated page.
619 
620 Node pointers
621 -------------
622 Leaf pages of a B-tree contain the index records stored in the
623 tree. On levels n > 0 we store 'node pointers' to pages on level
624 n - 1. For each page there is exactly one node pointer stored:
625 thus the our tree is an ordinary B-tree, not a B-link tree.
626 
627 A node pointer contains a prefix P of an index record. The prefix
628 is long enough so that it determines an index record uniquely.
629 The file page number of the child page is added as the last
630 field. To the child page we can store node pointers or index records
631 which are >= P in the alphabetical order, but < P1 if there is
632 a next node pointer on the level, and P1 is its prefix.
633 
634 If a node pointer with a prefix P points to a non-leaf child,
635 then the leftmost record in the child must have the same
636 prefix P. If it points to a leaf node, the child is not required
637 to contain any record with a prefix equal to P. The leaf case
638 is decided this way to allow arbitrary deletions in a leaf node
639 without touching upper levels of the tree.
640 
641 We have predefined a special minimum record which we
642 define as the smallest record in any alphabetical order.
643 A minimum record is denoted by setting a bit in the record
644 header. A minimum record acts as the prefix of a node pointer
645 which points to a leftmost node on any level of the tree.
646 
647 File page allocation
648 --------------------
649 In the root node of a B-tree there are two file segment headers.
650 The leaf pages of a tree are allocated from one file segment, to
651 make them consecutive on disk if possible. From the other file segment
652 we allocate pages for the non-leaf levels of the tree.
653 */
654 
655 #ifdef UNIV_BTR_DEBUG
656 /**************************************************************/
659 static
660 ibool
661 btr_root_fseg_validate(
662 /*===================*/
663  const fseg_header_t* seg_header,
664  ulint space)
665 {
666  ulint offset = mach_read_from_2(seg_header + FSEG_HDR_OFFSET);
667 
668  ut_a(mach_read_from_4(seg_header + FSEG_HDR_SPACE) == space);
669  ut_a(offset >= FIL_PAGE_DATA);
670  ut_a(offset <= UNIV_PAGE_SIZE - FIL_PAGE_DATA_END);
671  return(TRUE);
672 }
673 #endif /* UNIV_BTR_DEBUG */
674 
675 /**************************************************************/
679 btr_root_block_get(
680 /*===============*/
681  dict_index_t* index,
682  mtr_t* mtr)
683 {
684  ulint space;
685  ulint zip_size;
686  ulint root_page_no;
687  buf_block_t* block;
688 
689  space = dict_index_get_space(index);
690  zip_size = dict_table_zip_size(index->table);
691  root_page_no = dict_index_get_page(index);
692 
693  block = btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, mtr);
694  ut_a((ibool)!!page_is_comp(buf_block_get_frame(block))
695  == dict_table_is_comp(index->table));
696 #ifdef UNIV_BTR_DEBUG
697  if (!dict_index_is_ibuf(index)) {
698  const page_t* root = buf_block_get_frame(block);
699 
700  ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
701  + root, space));
702  ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
703  + root, space));
704  }
705 #endif /* UNIV_BTR_DEBUG */
706 
707  return(block);
708 }
709 
710 /**************************************************************/
713 UNIV_INTERN
714 page_t*
715 btr_root_get(
716 /*=========*/
717  dict_index_t* index,
718  mtr_t* mtr)
719 {
720  return(buf_block_get_frame(btr_root_block_get(index, mtr)));
721 }
722 
723 /*************************************************************/
727 UNIV_INTERN
728 rec_t*
729 btr_get_prev_user_rec(
730 /*==================*/
731  rec_t* rec,
732  mtr_t* mtr)
734 {
735  page_t* page;
736  page_t* prev_page;
737  ulint prev_page_no;
738 
739  if (!page_rec_is_infimum(rec)) {
740 
741  rec_t* prev_rec = page_rec_get_prev(rec);
742 
743  if (!page_rec_is_infimum(prev_rec)) {
744 
745  return(prev_rec);
746  }
747  }
748 
749  page = page_align(rec);
750  prev_page_no = btr_page_get_prev(page, mtr);
751 
752  if (prev_page_no != FIL_NULL) {
753 
754  ulint space;
755  ulint zip_size;
756  buf_block_t* prev_block;
757 
758  space = page_get_space_id(page);
759  zip_size = fil_space_get_zip_size(space);
760 
761  prev_block = buf_page_get_with_no_latch(space, zip_size,
762  prev_page_no, mtr);
763  prev_page = buf_block_get_frame(prev_block);
764  /* The caller must already have a latch to the brother */
765  ut_ad(mtr_memo_contains(mtr, prev_block,
766  MTR_MEMO_PAGE_S_FIX)
767  || mtr_memo_contains(mtr, prev_block,
768  MTR_MEMO_PAGE_X_FIX));
769 #ifdef UNIV_BTR_DEBUG
770  ut_a(page_is_comp(prev_page) == page_is_comp(page));
771  ut_a(btr_page_get_next(prev_page, mtr)
772  == page_get_page_no(page));
773 #endif /* UNIV_BTR_DEBUG */
774 
775  return(page_rec_get_prev(page_get_supremum_rec(prev_page)));
776  }
777 
778  return(NULL);
779 }
780 
781 /*************************************************************/
785 UNIV_INTERN
786 rec_t*
787 btr_get_next_user_rec(
788 /*==================*/
789  rec_t* rec,
790  mtr_t* mtr)
792 {
793  page_t* page;
794  page_t* next_page;
795  ulint next_page_no;
796 
797  if (!page_rec_is_supremum(rec)) {
798 
799  rec_t* next_rec = page_rec_get_next(rec);
800 
801  if (!page_rec_is_supremum(next_rec)) {
802 
803  return(next_rec);
804  }
805  }
806 
807  page = page_align(rec);
808  next_page_no = btr_page_get_next(page, mtr);
809 
810  if (next_page_no != FIL_NULL) {
811  ulint space;
812  ulint zip_size;
813  buf_block_t* next_block;
814 
815  space = page_get_space_id(page);
816  zip_size = fil_space_get_zip_size(space);
817 
818  next_block = buf_page_get_with_no_latch(space, zip_size,
819  next_page_no, mtr);
820  next_page = buf_block_get_frame(next_block);
821  /* The caller must already have a latch to the brother */
822  ut_ad(mtr_memo_contains(mtr, next_block, MTR_MEMO_PAGE_S_FIX)
823  || mtr_memo_contains(mtr, next_block,
824  MTR_MEMO_PAGE_X_FIX));
825 #ifdef UNIV_BTR_DEBUG
826  ut_a(page_is_comp(next_page) == page_is_comp(page));
827  ut_a(btr_page_get_prev(next_page, mtr)
828  == page_get_page_no(page));
829 #endif /* UNIV_BTR_DEBUG */
830 
831  return(page_rec_get_next(page_get_infimum_rec(next_page)));
832  }
833 
834  return(NULL);
835 }
836 
837 /**************************************************************/
840 static
841 void
842 btr_page_create(
843 /*============*/
844  buf_block_t* block,
845  page_zip_des_t* page_zip,
846  dict_index_t* index,
847  ulint level,
848  mtr_t* mtr)
849 {
850  page_t* page = buf_block_get_frame(block);
851 
852  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
853  btr_blob_dbg_assert_empty(index, buf_block_get_page_no(block));
854 
855  if (UNIV_LIKELY_NULL(page_zip)) {
856  page_create_zip(block, index, level, mtr);
857  } else {
858  page_create(block, mtr, dict_table_is_comp(index->table));
859  /* Set the level of the new index page */
860  btr_page_set_level(page, NULL, level, mtr);
861  }
862 
863  block->check_index_page_at_flush = TRUE;
864 
865  btr_page_set_index_id(page, page_zip, index->id, mtr);
866 }
867 
868 /**************************************************************/
872 static
874 btr_page_alloc_for_ibuf(
875 /*====================*/
876  dict_index_t* index,
877  mtr_t* mtr)
878 {
879  fil_addr_t node_addr;
880  page_t* root;
881  page_t* new_page;
882  buf_block_t* new_block;
883 
884  root = btr_root_get(index, mtr);
885 
886  node_addr = flst_get_first(root + PAGE_HEADER
887  + PAGE_BTR_IBUF_FREE_LIST, mtr);
888  ut_a(node_addr.page != FIL_NULL);
889 
890  new_block = buf_page_get(dict_index_get_space(index),
891  dict_table_zip_size(index->table),
892  node_addr.page, RW_X_LATCH, mtr);
893  new_page = buf_block_get_frame(new_block);
894  buf_block_dbg_add_level(new_block, SYNC_TREE_NODE_NEW);
895 
896  flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
897  new_page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE,
898  mtr);
899  ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
900  mtr));
901 
902  return(new_block);
903 }
904 
905 /**************************************************************/
909 UNIV_INTERN
911 btr_page_alloc(
912 /*===========*/
913  dict_index_t* index,
914  ulint hint_page_no,
915  byte file_direction,
917  ulint level,
919  mtr_t* mtr)
920 {
921  fseg_header_t* seg_header;
922  page_t* root;
923  buf_block_t* new_block;
924  ulint new_page_no;
925 
926  if (dict_index_is_ibuf(index)) {
927 
928  return(btr_page_alloc_for_ibuf(index, mtr));
929  }
930 
931  root = btr_root_get(index, mtr);
932 
933  if (level == 0) {
934  seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
935  } else {
936  seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
937  }
938 
939  /* Parameter TRUE below states that the caller has made the
940  reservation for free extents, and thus we know that a page can
941  be allocated: */
942 
943  new_page_no = fseg_alloc_free_page_general(seg_header, hint_page_no,
944  file_direction, TRUE, mtr);
945  if (new_page_no == FIL_NULL) {
946 
947  return(NULL);
948  }
949 
950  new_block = buf_page_get(dict_index_get_space(index),
951  dict_table_zip_size(index->table),
952  new_page_no, RW_X_LATCH, mtr);
953  buf_block_dbg_add_level(new_block, SYNC_TREE_NODE_NEW);
954 
955  return(new_block);
956 }
957 
958 /**************************************************************/
961 UNIV_INTERN
962 ulint
963 btr_get_size(
964 /*=========*/
965  dict_index_t* index,
966  ulint flag)
967 {
968  fseg_header_t* seg_header;
969  page_t* root;
970  ulint n;
971  ulint dummy;
972  mtr_t mtr;
973 
974  mtr_start(&mtr);
975 
976  mtr_s_lock(dict_index_get_lock(index), &mtr);
977 
978  root = btr_root_get(index, &mtr);
979 
980  if (flag == BTR_N_LEAF_PAGES) {
981  seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
982 
983  fseg_n_reserved_pages(seg_header, &n, &mtr);
984 
985  } else if (flag == BTR_TOTAL_SIZE) {
986  seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
987 
988  n = fseg_n_reserved_pages(seg_header, &dummy, &mtr);
989 
990  seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
991 
992  n += fseg_n_reserved_pages(seg_header, &dummy, &mtr);
993  } else {
994  ut_error;
995  }
996 
997  mtr_commit(&mtr);
998 
999  return(n);
1000 }
1001 
1002 /**************************************************************/
1005 static
1006 void
1007 btr_page_free_for_ibuf(
1008 /*===================*/
1009  dict_index_t* index,
1010  buf_block_t* block,
1011  mtr_t* mtr)
1012 {
1013  page_t* root;
1014 
1015  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
1016  root = btr_root_get(index, mtr);
1017 
1018  flst_add_first(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
1019  buf_block_get_frame(block)
1020  + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr);
1021 
1022  ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
1023  mtr));
1024 }
1025 
1026 /**************************************************************/
1030 UNIV_INTERN
1031 void
1032 btr_page_free_low(
1033 /*==============*/
1034  dict_index_t* index,
1035  buf_block_t* block,
1036  ulint level,
1037  mtr_t* mtr)
1038 {
1039  fseg_header_t* seg_header;
1040  page_t* root;
1041 
1042  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
1043  /* The page gets invalid for optimistic searches: increment the frame
1044  modify clock */
1045 
1047  btr_blob_dbg_assert_empty(index, buf_block_get_page_no(block));
1048 
1049  if (dict_index_is_ibuf(index)) {
1050 
1051  btr_page_free_for_ibuf(index, block, mtr);
1052 
1053  return;
1054  }
1055 
1056  root = btr_root_get(index, mtr);
1057 
1058  if (level == 0) {
1059  seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
1060  } else {
1061  seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
1062  }
1063 
1064  fseg_free_page(seg_header,
1065  buf_block_get_space(block),
1066  buf_block_get_page_no(block), mtr);
1067 }
1068 
1069 /**************************************************************/
1072 UNIV_INTERN
1073 void
1074 btr_page_free(
1075 /*==========*/
1076  dict_index_t* index,
1077  buf_block_t* block,
1078  mtr_t* mtr)
1079 {
1080  ulint level;
1081 
1082  level = btr_page_get_level(buf_block_get_frame(block), mtr);
1083 
1084  btr_page_free_low(index, block, level, mtr);
1085 }
1086 
1087 /**************************************************************/
1089 UNIV_INLINE
1090 void
1091 btr_node_ptr_set_child_page_no(
1092 /*===========================*/
1093  rec_t* rec,
1094  page_zip_des_t* page_zip,
1096  const ulint* offsets,
1097  ulint page_no,
1098  mtr_t* mtr)
1099 {
1100  byte* field;
1101  ulint len;
1102 
1103  ut_ad(rec_offs_validate(rec, NULL, offsets));
1104  ut_ad(!page_is_leaf(page_align(rec)));
1105  ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec));
1106 
1107  /* The child address is in the last field */
1108  field = rec_get_nth_field(rec, offsets,
1109  rec_offs_n_fields(offsets) - 1, &len);
1110 
1111  ut_ad(len == REC_NODE_PTR_SIZE);
1112 
1113  if (UNIV_LIKELY_NULL(page_zip)) {
1114  page_zip_write_node_ptr(page_zip, rec,
1115  rec_offs_data_size(offsets),
1116  page_no, mtr);
1117  } else {
1118  mlog_write_ulint(field, page_no, MLOG_4BYTES, mtr);
1119  }
1120 }
1121 
1122 /************************************************************/
1125 buf_block_t*
1126 btr_node_ptr_get_child(
1127 /*===================*/
1128  const rec_t* node_ptr,
1129  dict_index_t* index,
1130  const ulint* offsets,
1131  mtr_t* mtr)
1132 {
1133  ulint page_no;
1134  ulint space;
1135 
1136  ut_ad(rec_offs_validate(node_ptr, index, offsets));
1137  space = page_get_space_id(page_align(node_ptr));
1138  page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
1139 
1140  return(btr_block_get(space, dict_table_zip_size(index->table),
1141  page_no, RW_X_LATCH, mtr));
1142 }
1143 
1144 /************************************************************/
1148 static
1149 ulint*
1150 btr_page_get_father_node_ptr_func(
1151 /*==============================*/
1152  ulint* offsets,
1153  mem_heap_t* heap,
1154  btr_cur_t* cursor,
1157  const char* file,
1158  ulint line,
1159  mtr_t* mtr)
1160 {
1161  dtuple_t* tuple;
1162  rec_t* user_rec;
1163  rec_t* node_ptr;
1164  ulint level;
1165  ulint page_no;
1166  dict_index_t* index;
1167 
1168  page_no = buf_block_get_page_no(btr_cur_get_block(cursor));
1169  index = btr_cur_get_index(cursor);
1170 
1171  ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
1172  MTR_MEMO_X_LOCK));
1173 
1174  ut_ad(dict_index_get_page(index) != page_no);
1175 
1176  level = btr_page_get_level(btr_cur_get_page(cursor), mtr);
1177 
1178  user_rec = btr_cur_get_rec(cursor);
1179  ut_a(page_rec_is_user_rec(user_rec));
1180  tuple = dict_index_build_node_ptr(index, user_rec, 0, heap, level);
1181 
1182  btr_cur_search_to_nth_level(index, level + 1, tuple, PAGE_CUR_LE,
1183  BTR_CONT_MODIFY_TREE, cursor, 0,
1184  file, line, mtr);
1185 
1186  node_ptr = btr_cur_get_rec(cursor);
1187  ut_ad(!page_rec_is_comp(node_ptr)
1188  || rec_get_status(node_ptr) == REC_STATUS_NODE_PTR);
1189  offsets = rec_get_offsets(node_ptr, index, offsets,
1190  ULINT_UNDEFINED, &heap);
1191 
1192  if (UNIV_UNLIKELY(btr_node_ptr_get_child_page_no(node_ptr, offsets)
1193  != page_no)) {
1194  rec_t* print_rec;
1195  fputs("InnoDB: Dump of the child page:\n", stderr);
1196  buf_page_print(page_align(user_rec), 0);
1197  fputs("InnoDB: Dump of the parent page:\n", stderr);
1198  buf_page_print(page_align(node_ptr), 0);
1199 
1200  fputs("InnoDB: Corruption of an index tree: table ", stderr);
1201  ut_print_name(stderr, NULL, TRUE, index->table_name);
1202  fputs(", index ", stderr);
1203  ut_print_name(stderr, NULL, FALSE, index->name);
1204  fprintf(stderr, ",\n"
1205  "InnoDB: father ptr page no %lu, child page no %lu\n",
1206  (ulong)
1207  btr_node_ptr_get_child_page_no(node_ptr, offsets),
1208  (ulong) page_no);
1209  print_rec = page_rec_get_next(
1210  page_get_infimum_rec(page_align(user_rec)));
1211  offsets = rec_get_offsets(print_rec, index,
1212  offsets, ULINT_UNDEFINED, &heap);
1213  page_rec_print(print_rec, offsets);
1214  offsets = rec_get_offsets(node_ptr, index, offsets,
1215  ULINT_UNDEFINED, &heap);
1216  page_rec_print(node_ptr, offsets);
1217 
1218  fputs("InnoDB: You should dump + drop + reimport the table"
1219  " to fix the\n"
1220  "InnoDB: corruption. If the crash happens at "
1221  "the database startup, see\n"
1222  "InnoDB: " REFMAN "forcing-innodb-recovery.html about\n"
1223  "InnoDB: forcing recovery. "
1224  "Then dump + drop + reimport.\n", stderr);
1225 
1226  ut_error;
1227  }
1228 
1229  return(offsets);
1230 }
1231 
1232 #define btr_page_get_father_node_ptr(of,heap,cur,mtr) \
1233  btr_page_get_father_node_ptr_func(of,heap,cur,__FILE__,__LINE__,mtr)
1234 
1235 /************************************************************/
1239 static
1240 ulint*
1241 btr_page_get_father_block(
1242 /*======================*/
1243  ulint* offsets,
1244  mem_heap_t* heap,
1245  dict_index_t* index,
1246  buf_block_t* block,
1247  mtr_t* mtr,
1248  btr_cur_t* cursor)
1250 {
1251  rec_t* rec
1252  = page_rec_get_next(page_get_infimum_rec(buf_block_get_frame(
1253  block)));
1254  btr_cur_position(index, rec, block, cursor);
1255  return(btr_page_get_father_node_ptr(offsets, heap, cursor, mtr));
1256 }
1257 
1258 /************************************************************/
1261 static
1262 void
1263 btr_page_get_father(
1264 /*================*/
1265  dict_index_t* index,
1266  buf_block_t* block,
1267  mtr_t* mtr,
1268  btr_cur_t* cursor)
1270 {
1271  mem_heap_t* heap;
1272  rec_t* rec
1273  = page_rec_get_next(page_get_infimum_rec(buf_block_get_frame(
1274  block)));
1275  btr_cur_position(index, rec, block, cursor);
1276 
1277  heap = mem_heap_create(100);
1278  btr_page_get_father_node_ptr(NULL, heap, cursor, mtr);
1279  mem_heap_free(heap);
1280 }
1281 
1282 /************************************************************/
1285 UNIV_INTERN
1286 ulint
1287 btr_create(
1288 /*=======*/
1289  ulint type,
1290  ulint space,
1291  ulint zip_size,
1293  index_id_t index_id,
1294  dict_index_t* index,
1295  mtr_t* mtr)
1296 {
1297  ulint page_no;
1298  buf_block_t* block;
1299  buf_frame_t* frame;
1300  page_t* page;
1301  page_zip_des_t* page_zip;
1302 
1303  /* Create the two new segments (one, in the case of an ibuf tree) for
1304  the index tree; the segment headers are put on the allocated root page
1305  (for an ibuf tree, not in the root, but on a separate ibuf header
1306  page) */
1307 
1308  if (type & DICT_IBUF) {
1309  /* Allocate first the ibuf header page */
1310  buf_block_t* ibuf_hdr_block = fseg_create(
1311  space, 0,
1312  IBUF_HEADER + IBUF_TREE_SEG_HEADER, mtr);
1313 
1314  buf_block_dbg_add_level(ibuf_hdr_block, SYNC_TREE_NODE_NEW);
1315 
1316  ut_ad(buf_block_get_page_no(ibuf_hdr_block)
1317  == IBUF_HEADER_PAGE_NO);
1318  /* Allocate then the next page to the segment: it will be the
1319  tree root page */
1320 
1321  page_no = fseg_alloc_free_page(buf_block_get_frame(
1322  ibuf_hdr_block)
1323  + IBUF_HEADER
1324  + IBUF_TREE_SEG_HEADER,
1325  IBUF_TREE_ROOT_PAGE_NO,
1326  FSP_UP, mtr);
1327  ut_ad(page_no == IBUF_TREE_ROOT_PAGE_NO);
1328 
1329  block = buf_page_get(space, zip_size, page_no,
1330  RW_X_LATCH, mtr);
1331  } else {
1332 #ifdef UNIV_BLOB_DEBUG
1333  if ((type & DICT_CLUSTERED) && !index->blobs) {
1334  mutex_create(PFS_NOT_INSTRUMENTED,
1335  &index->blobs_mutex, SYNC_ANY_LATCH);
1336  index->blobs = rbt_create(sizeof(btr_blob_dbg_t),
1337  btr_blob_dbg_cmp);
1338  }
1339 #endif /* UNIV_BLOB_DEBUG */
1340  block = fseg_create(space, 0,
1341  PAGE_HEADER + PAGE_BTR_SEG_TOP, mtr);
1342  }
1343 
1344  if (block == NULL) {
1345 
1346  return(FIL_NULL);
1347  }
1348 
1349  page_no = buf_block_get_page_no(block);
1350  frame = buf_block_get_frame(block);
1351 
1352  buf_block_dbg_add_level(block, SYNC_TREE_NODE_NEW);
1353 
1354  if (type & DICT_IBUF) {
1355  /* It is an insert buffer tree: initialize the free list */
1356 
1357  ut_ad(page_no == IBUF_TREE_ROOT_PAGE_NO);
1358 
1359  flst_init(frame + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr);
1360  } else {
1361  /* It is a non-ibuf tree: create a file segment for leaf
1362  pages */
1363  if (!fseg_create(space, page_no,
1364  PAGE_HEADER + PAGE_BTR_SEG_LEAF, mtr)) {
1365  /* Not enough space for new segment, free root
1366  segment before return. */
1367  btr_free_root(space, zip_size, page_no, mtr);
1368 
1369  return(FIL_NULL);
1370  }
1371 
1372  /* The fseg create acquires a second latch on the page,
1373  therefore we must declare it: */
1374  buf_block_dbg_add_level(block, SYNC_TREE_NODE_NEW);
1375  }
1376 
1377  /* Create a new index page on the the allocated segment page */
1378  page_zip = buf_block_get_page_zip(block);
1379 
1380  if (UNIV_LIKELY_NULL(page_zip)) {
1381  page = page_create_zip(block, index, 0, mtr);
1382  } else {
1383  page = page_create(block, mtr,
1384  dict_table_is_comp(index->table));
1385  /* Set the level of the new index page */
1386  btr_page_set_level(page, NULL, 0, mtr);
1387  }
1388 
1389  block->check_index_page_at_flush = TRUE;
1390 
1391  /* Set the index id of the page */
1392  btr_page_set_index_id(page, page_zip, index_id, mtr);
1393 
1394  /* Set the next node and previous node fields */
1395  btr_page_set_next(page, page_zip, FIL_NULL, mtr);
1396  btr_page_set_prev(page, page_zip, FIL_NULL, mtr);
1397 
1398  /* We reset the free bits for the page to allow creation of several
1399  trees in the same mtr, otherwise the latch on a bitmap page would
1400  prevent it because of the latching order */
1401 
1402  if (!(type & DICT_CLUSTERED)) {
1403  ibuf_reset_free_bits(block);
1404  }
1405 
1406  /* In the following assertion we test that two records of maximum
1407  allowed size fit on the root page: this fact is needed to ensure
1408  correctness of split algorithms */
1409 
1411 
1412  return(page_no);
1413 }
1414 
1415 /************************************************************/
1418 UNIV_INTERN
1419 void
1420 btr_free_but_not_root(
1421 /*==================*/
1422  ulint space,
1423  ulint zip_size,
1425  ulint root_page_no)
1426 {
1427  ibool finished;
1428  page_t* root;
1429  mtr_t mtr;
1430 
1431 leaf_loop:
1432  mtr_start(&mtr);
1433 
1434  root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH, &mtr);
1435 #ifdef UNIV_BTR_DEBUG
1436  ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
1437  + root, space));
1438  ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
1439  + root, space));
1440 #endif /* UNIV_BTR_DEBUG */
1441 
1442  /* NOTE: page hash indexes are dropped when a page is freed inside
1443  fsp0fsp. */
1444 
1445  finished = fseg_free_step(root + PAGE_HEADER + PAGE_BTR_SEG_LEAF,
1446  &mtr);
1447  mtr_commit(&mtr);
1448 
1449  if (!finished) {
1450 
1451  goto leaf_loop;
1452  }
1453 top_loop:
1454  mtr_start(&mtr);
1455 
1456  root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH, &mtr);
1457 #ifdef UNIV_BTR_DEBUG
1458  ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
1459  + root, space));
1460 #endif /* UNIV_BTR_DEBUG */
1461 
1462  finished = fseg_free_step_not_header(
1463  root + PAGE_HEADER + PAGE_BTR_SEG_TOP, &mtr);
1464  mtr_commit(&mtr);
1465 
1466  if (!finished) {
1467 
1468  goto top_loop;
1469  }
1470 }
1471 
1472 /************************************************************/
1474 UNIV_INTERN
1475 void
1476 btr_free_root(
1477 /*==========*/
1478  ulint space,
1479  ulint zip_size,
1481  ulint root_page_no,
1482  mtr_t* mtr)
1484 {
1485  buf_block_t* block;
1486  fseg_header_t* header;
1487 
1488  block = btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, mtr);
1489 
1490  btr_search_drop_page_hash_index(block);
1491 
1492  header = buf_block_get_frame(block) + PAGE_HEADER + PAGE_BTR_SEG_TOP;
1493 #ifdef UNIV_BTR_DEBUG
1494  ut_a(btr_root_fseg_validate(header, space));
1495 #endif /* UNIV_BTR_DEBUG */
1496 
1497  while (!fseg_free_step(header, mtr)) {};
1498 }
1499 #endif /* !UNIV_HOTBACKUP */
1500 
1501 /*************************************************************/
1503 static
1504 ibool
1505 btr_page_reorganize_low(
1506 /*====================*/
1507  ibool recovery,
1512  buf_block_t* block,
1513  dict_index_t* index,
1514  mtr_t* mtr)
1515 {
1516  buf_pool_t* buf_pool = buf_pool_from_bpage(&block->page);
1517  page_t* page = buf_block_get_frame(block);
1518  page_zip_des_t* page_zip = buf_block_get_page_zip(block);
1519  buf_block_t* temp_block;
1520  page_t* temp_page;
1521  ulint log_mode;
1522  ulint data_size1;
1523  ulint data_size2;
1524  ulint max_ins_size1;
1525  ulint max_ins_size2;
1526  ibool success = FALSE;
1527 
1528  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
1529  ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
1530 #ifdef UNIV_ZIP_DEBUG
1531  ut_a(!page_zip || page_zip_validate(page_zip, page));
1532 #endif /* UNIV_ZIP_DEBUG */
1533  data_size1 = page_get_data_size(page);
1534  max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1);
1535 
1536 #ifndef UNIV_HOTBACKUP
1537  /* Write the log record */
1538  mlog_open_and_write_index(mtr, page, index, page_is_comp(page)
1540  : MLOG_PAGE_REORGANIZE, 0);
1541 #endif /* !UNIV_HOTBACKUP */
1542 
1543  /* Turn logging off */
1544  log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
1545 
1546 #ifndef UNIV_HOTBACKUP
1547  temp_block = buf_block_alloc(buf_pool);
1548 #else /* !UNIV_HOTBACKUP */
1549  ut_ad(block == back_block1);
1550  temp_block = back_block2;
1551 #endif /* !UNIV_HOTBACKUP */
1552  temp_page = temp_block->frame;
1553 
1554  /* Copy the old page to temporary space */
1555  buf_frame_copy(temp_page, page);
1556 
1557 #ifndef UNIV_HOTBACKUP
1558  if (UNIV_LIKELY(!recovery)) {
1559  btr_search_drop_page_hash_index(block);
1560  }
1561 
1562  block->check_index_page_at_flush = TRUE;
1563 #endif /* !UNIV_HOTBACKUP */
1564  btr_blob_dbg_remove(page, index, "btr_page_reorganize");
1565 
1566  /* Recreate the page: note that global data on page (possible
1567  segment headers, next page-field, etc.) is preserved intact */
1568 
1569  page_create(block, mtr, dict_table_is_comp(index->table));
1570 
1571  /* Copy the records from the temporary space to the recreated page;
1572  do not copy the lock bits yet */
1573 
1574  page_copy_rec_list_end_no_locks(block, temp_block,
1575  page_get_infimum_rec(temp_page),
1576  index, mtr);
1577 
1578  if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) {
1579  /* Copy max trx id to recreated page */
1580  trx_id_t max_trx_id = page_get_max_trx_id(temp_page);
1581  page_set_max_trx_id(block, NULL, max_trx_id, mtr);
1582  /* In crash recovery, dict_index_is_sec_or_ibuf() always
1583  returns TRUE, even for clustered indexes. max_trx_id is
1584  unused in clustered index pages. */
1585  ut_ad(max_trx_id != 0 || recovery);
1586  }
1587 
1588  if (UNIV_LIKELY_NULL(page_zip)
1589  && UNIV_UNLIKELY
1590  (!page_zip_compress(page_zip, page, index, NULL))) {
1591 
1592  /* Restore the old page and exit. */
1593  btr_blob_dbg_restore(page, temp_page, index,
1594  "btr_page_reorganize_compress_fail");
1595 
1596 #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
1597  /* Check that the bytes that we skip are identical. */
1598  ut_a(!memcmp(page, temp_page, PAGE_HEADER));
1599  ut_a(!memcmp(PAGE_HEADER + PAGE_N_RECS + page,
1600  PAGE_HEADER + PAGE_N_RECS + temp_page,
1601  PAGE_DATA - (PAGE_HEADER + PAGE_N_RECS)));
1602  ut_a(!memcmp(UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + page,
1603  UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + temp_page,
1605 #endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
1606 
1607  memcpy(PAGE_HEADER + page, PAGE_HEADER + temp_page,
1608  PAGE_N_RECS - PAGE_N_DIR_SLOTS);
1609  memcpy(PAGE_DATA + page, PAGE_DATA + temp_page,
1610  UNIV_PAGE_SIZE - PAGE_DATA - FIL_PAGE_DATA_END);
1611 
1612 #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
1613  ut_a(!memcmp(page, temp_page, UNIV_PAGE_SIZE));
1614 #endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
1615 
1616  goto func_exit;
1617  }
1618 
1619 #ifndef UNIV_HOTBACKUP
1620  if (UNIV_LIKELY(!recovery)) {
1621  /* Update the record lock bitmaps */
1622  lock_move_reorganize_page(block, temp_block);
1623  }
1624 #endif /* !UNIV_HOTBACKUP */
1625 
1626  data_size2 = page_get_data_size(page);
1627  max_ins_size2 = page_get_max_insert_size_after_reorganize(page, 1);
1628 
1629  if (UNIV_UNLIKELY(data_size1 != data_size2)
1630  || UNIV_UNLIKELY(max_ins_size1 != max_ins_size2)) {
1631  buf_page_print(page, 0);
1632  buf_page_print(temp_page, 0);
1633  fprintf(stderr,
1634  "InnoDB: Error: page old data size %lu"
1635  " new data size %lu\n"
1636  "InnoDB: Error: page old max ins size %lu"
1637  " new max ins size %lu\n"
1638  "InnoDB: Submit a detailed bug report"
1639  " to http://bugs.mysql.com\n",
1640  (unsigned long) data_size1, (unsigned long) data_size2,
1641  (unsigned long) max_ins_size1,
1642  (unsigned long) max_ins_size2);
1643  } else {
1644  success = TRUE;
1645  }
1646 
1647 func_exit:
1648 #ifdef UNIV_ZIP_DEBUG
1649  ut_a(!page_zip || page_zip_validate(page_zip, page));
1650 #endif /* UNIV_ZIP_DEBUG */
1651 #ifndef UNIV_HOTBACKUP
1652  buf_block_free(temp_block);
1653 #endif /* !UNIV_HOTBACKUP */
1654 
1655  /* Restore logging mode */
1656  mtr_set_log_mode(mtr, log_mode);
1657 
1658  return(success);
1659 }
1660 
1661 #ifndef UNIV_HOTBACKUP
1662 /*************************************************************/
1669 UNIV_INTERN
1670 ibool
1671 btr_page_reorganize(
1672 /*================*/
1673  buf_block_t* block,
1674  dict_index_t* index,
1675  mtr_t* mtr)
1676 {
1677  return(btr_page_reorganize_low(FALSE, block, index, mtr));
1678 }
1679 #endif /* !UNIV_HOTBACKUP */
1680 
1681 /***********************************************************/
1684 UNIV_INTERN
1685 byte*
1686 btr_parse_page_reorganize(
1687 /*======================*/
1688  byte* ptr,
1689  byte* /*end_ptr __attribute__((unused))*/,
1691  dict_index_t* index,
1692  buf_block_t* block,
1693  mtr_t* mtr)
1694 {
1695  ut_ad(ptr && end_ptr);
1696 
1697  /* The record is empty, except for the record initial part */
1698 
1699  if (UNIV_LIKELY(block != NULL)) {
1700  btr_page_reorganize_low(TRUE, block, index, mtr);
1701  }
1702 
1703  return(ptr);
1704 }
1705 
1706 #ifndef UNIV_HOTBACKUP
1707 /*************************************************************/
1709 static
1710 void
1711 btr_page_empty(
1712 /*===========*/
1713  buf_block_t* block,
1714  page_zip_des_t* page_zip,
1715  dict_index_t* index,
1716  ulint level,
1717  mtr_t* mtr)
1718 {
1719  page_t* page = buf_block_get_frame(block);
1720 
1721  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
1722  ut_ad(page_zip == buf_block_get_page_zip(block));
1723 #ifdef UNIV_ZIP_DEBUG
1724  ut_a(!page_zip || page_zip_validate(page_zip, page));
1725 #endif /* UNIV_ZIP_DEBUG */
1726 
1727  btr_search_drop_page_hash_index(block);
1728  btr_blob_dbg_remove(page, index, "btr_page_empty");
1729 
1730  /* Recreate the page: note that global data on page (possible
1731  segment headers, next page-field, etc.) is preserved intact */
1732 
1733  if (UNIV_LIKELY_NULL(page_zip)) {
1734  page_create_zip(block, index, level, mtr);
1735  } else {
1736  page_create(block, mtr, dict_table_is_comp(index->table));
1737  btr_page_set_level(page, NULL, level, mtr);
1738  }
1739 
1740  block->check_index_page_at_flush = TRUE;
1741 }
1742 
1743 /*************************************************************/
1750 UNIV_INTERN
1751 rec_t*
1752 btr_root_raise_and_insert(
1753 /*======================*/
1754  btr_cur_t* cursor,
1758  const dtuple_t* tuple,
1759  ulint n_ext,
1760  mtr_t* mtr)
1761 {
1762  dict_index_t* index;
1763  page_t* root;
1764  page_t* new_page;
1765  ulint new_page_no;
1766  rec_t* rec;
1767  mem_heap_t* heap;
1768  dtuple_t* node_ptr;
1769  ulint level;
1770  rec_t* node_ptr_rec;
1771  page_cur_t* page_cursor;
1772  page_zip_des_t* root_page_zip;
1773  page_zip_des_t* new_page_zip;
1774  buf_block_t* root_block;
1775  buf_block_t* new_block;
1776 
1777  root = btr_cur_get_page(cursor);
1778  root_block = btr_cur_get_block(cursor);
1779  root_page_zip = buf_block_get_page_zip(root_block);
1780 #ifdef UNIV_ZIP_DEBUG
1781  ut_a(!root_page_zip || page_zip_validate(root_page_zip, root));
1782 #endif /* UNIV_ZIP_DEBUG */
1783  index = btr_cur_get_index(cursor);
1784 #ifdef UNIV_BTR_DEBUG
1785  if (!dict_index_is_ibuf(index)) {
1786  ulint space = dict_index_get_space(index);
1787 
1788  ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
1789  + root, space));
1790  ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
1791  + root, space));
1792  }
1793 
1794  ut_a(dict_index_get_page(index) == page_get_page_no(root));
1795 #endif /* UNIV_BTR_DEBUG */
1796  ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
1797  MTR_MEMO_X_LOCK));
1798  ut_ad(mtr_memo_contains(mtr, root_block, MTR_MEMO_PAGE_X_FIX));
1799 
1800  /* Allocate a new page to the tree. Root splitting is done by first
1801  moving the root records to the new page, emptying the root, putting
1802  a node pointer to the new page, and then splitting the new page. */
1803 
1804  level = btr_page_get_level(root, mtr);
1805 
1806  new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, mtr);
1807  new_page = buf_block_get_frame(new_block);
1808  new_page_zip = buf_block_get_page_zip(new_block);
1809  ut_a(!new_page_zip == !root_page_zip);
1810  ut_a(!new_page_zip
1811  || page_zip_get_size(new_page_zip)
1812  == page_zip_get_size(root_page_zip));
1813 
1814  btr_page_create(new_block, new_page_zip, index, level, mtr);
1815 
1816  /* Set the next node and previous node fields of new page */
1817  btr_page_set_next(new_page, new_page_zip, FIL_NULL, mtr);
1818  btr_page_set_prev(new_page, new_page_zip, FIL_NULL, mtr);
1819 
1820  /* Copy the records from root to the new page one by one. */
1821 
1822  if (0
1823 #ifdef UNIV_ZIP_COPY
1824  || new_page_zip
1825 #endif /* UNIV_ZIP_COPY */
1826  || UNIV_UNLIKELY
1827  (!page_copy_rec_list_end(new_block, root_block,
1828  page_get_infimum_rec(root),
1829  index, mtr))) {
1830  ut_a(new_page_zip);
1831 
1832  /* Copy the page byte for byte. */
1833  page_zip_copy_recs(new_page_zip, new_page,
1834  root_page_zip, root, index, mtr);
1835 
1836  /* Update the lock table and possible hash index. */
1837 
1838  lock_move_rec_list_end(new_block, root_block,
1839  page_get_infimum_rec(root));
1840 
1841  btr_search_move_or_delete_hash_entries(new_block, root_block,
1842  index);
1843  }
1844 
1845  /* If this is a pessimistic insert which is actually done to
1846  perform a pessimistic update then we have stored the lock
1847  information of the record to be inserted on the infimum of the
1848  root page: we cannot discard the lock structs on the root page */
1849 
1850  lock_update_root_raise(new_block, root_block);
1851 
1852  /* Create a memory heap where the node pointer is stored */
1853  heap = mem_heap_create(100);
1854 
1855  rec = page_rec_get_next(page_get_infimum_rec(new_page));
1856  new_page_no = buf_block_get_page_no(new_block);
1857 
1858  /* Build the node pointer (= node key and page address) for the
1859  child */
1860 
1861  node_ptr = dict_index_build_node_ptr(index, rec, new_page_no, heap,
1862  level);
1863  /* The node pointer must be marked as the predefined minimum record,
1864  as there is no lower alphabetical limit to records in the leftmost
1865  node of a level: */
1866  dtuple_set_info_bits(node_ptr,
1867  dtuple_get_info_bits(node_ptr)
1868  | REC_INFO_MIN_REC_FLAG);
1869 
1870  /* Rebuild the root page to get free space */
1871  btr_page_empty(root_block, root_page_zip, index, level + 1, mtr);
1872 
1873  /* Set the next node and previous node fields, although
1874  they should already have been set. The previous node field
1875  must be FIL_NULL if root_page_zip != NULL, because the
1876  REC_INFO_MIN_REC_FLAG (of the first user record) will be
1877  set if and only if btr_page_get_prev() == FIL_NULL. */
1878  btr_page_set_next(root, root_page_zip, FIL_NULL, mtr);
1879  btr_page_set_prev(root, root_page_zip, FIL_NULL, mtr);
1880 
1881  page_cursor = btr_cur_get_page_cur(cursor);
1882 
1883  /* Insert node pointer to the root */
1884 
1885  page_cur_set_before_first(root_block, page_cursor);
1886 
1887  node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr,
1888  index, 0, mtr);
1889 
1890  /* The root page should only contain the node pointer
1891  to new_page at this point. Thus, the data should fit. */
1892  ut_a(node_ptr_rec);
1893 
1894  /* Free the memory heap */
1895  mem_heap_free(heap);
1896 
1897  /* We play safe and reset the free bits for the new page */
1898 
1899 #if 0
1900  fprintf(stderr, "Root raise new page no %lu\n", new_page_no);
1901 #endif
1902 
1903  if (!dict_index_is_clust(index)) {
1904  ibuf_reset_free_bits(new_block);
1905  }
1906 
1907  /* Reposition the cursor to the child node */
1908  page_cur_search(new_block, index, tuple,
1909  PAGE_CUR_LE, page_cursor);
1910 
1911  /* Split the child and insert tuple */
1912  return(btr_page_split_and_insert(cursor, tuple, n_ext, mtr));
1913 }
1914 
1915 /*************************************************************/
1919 UNIV_INTERN
1920 ibool
1921 btr_page_get_split_rec_to_left(
1922 /*===========================*/
1923  btr_cur_t* cursor,
1924  rec_t** split_rec)
1928 {
1929  page_t* page;
1930  rec_t* insert_point;
1931  rec_t* infimum;
1932 
1933  page = btr_cur_get_page(cursor);
1934  insert_point = btr_cur_get_rec(cursor);
1935 
1936  if (page_header_get_ptr(page, PAGE_LAST_INSERT)
1937  == page_rec_get_next(insert_point)) {
1938 
1939  infimum = page_get_infimum_rec(page);
1940 
1941  /* If the convergence is in the middle of a page, include also
1942  the record immediately before the new insert to the upper
1943  page. Otherwise, we could repeatedly move from page to page
1944  lots of records smaller than the convergence point. */
1945 
1946  if (infimum != insert_point
1947  && page_rec_get_next(infimum) != insert_point) {
1948 
1949  *split_rec = insert_point;
1950  } else {
1951  *split_rec = page_rec_get_next(insert_point);
1952  }
1953 
1954  return(TRUE);
1955  }
1956 
1957  return(FALSE);
1958 }
1959 
1960 /*************************************************************/
1964 UNIV_INTERN
1965 ibool
1966 btr_page_get_split_rec_to_right(
1967 /*============================*/
1968  btr_cur_t* cursor,
1969  rec_t** split_rec)
1973 {
1974  page_t* page;
1975  rec_t* insert_point;
1976 
1977  page = btr_cur_get_page(cursor);
1978  insert_point = btr_cur_get_rec(cursor);
1979 
1980  /* We use eager heuristics: if the new insert would be right after
1981  the previous insert on the same page, we assume that there is a
1982  pattern of sequential inserts here. */
1983 
1984  if (UNIV_LIKELY(page_header_get_ptr(page, PAGE_LAST_INSERT)
1985  == insert_point)) {
1986 
1987  rec_t* next_rec;
1988 
1989  next_rec = page_rec_get_next(insert_point);
1990 
1991  if (page_rec_is_supremum(next_rec)) {
1992 split_at_new:
1993  /* Split at the new record to insert */
1994  *split_rec = NULL;
1995  } else {
1996  rec_t* next_next_rec = page_rec_get_next(next_rec);
1997  if (page_rec_is_supremum(next_next_rec)) {
1998 
1999  goto split_at_new;
2000  }
2001 
2002  /* If there are >= 2 user records up from the insert
2003  point, split all but 1 off. We want to keep one because
2004  then sequential inserts can use the adaptive hash
2005  index, as they can do the necessary checks of the right
2006  search position just by looking at the records on this
2007  page. */
2008 
2009  *split_rec = next_next_rec;
2010  }
2011 
2012  return(TRUE);
2013  }
2014 
2015  return(FALSE);
2016 }
2017 
2018 /*************************************************************/
2024 static
2025 rec_t*
2026 btr_page_get_split_rec(
2027 /*===================*/
2028  btr_cur_t* cursor,
2029  const dtuple_t* tuple,
2030  ulint n_ext)
2031 {
2032  page_t* page;
2033  page_zip_des_t* page_zip;
2034  ulint insert_size;
2035  ulint free_space;
2036  ulint total_data;
2037  ulint total_n_recs;
2038  ulint total_space;
2039  ulint incl_data;
2040  rec_t* ins_rec;
2041  rec_t* rec;
2042  rec_t* next_rec;
2043  ulint n;
2044  mem_heap_t* heap;
2045  ulint* offsets;
2046 
2047  page = btr_cur_get_page(cursor);
2048 
2049  insert_size = rec_get_converted_size(cursor->index, tuple, n_ext);
2050  free_space = page_get_free_space_of_empty(page_is_comp(page));
2051 
2052  page_zip = btr_cur_get_page_zip(cursor);
2053  if (UNIV_LIKELY_NULL(page_zip)) {
2054  /* Estimate the free space of an empty compressed page. */
2055  ulint free_space_zip = page_zip_empty_size(
2056  cursor->index->n_fields,
2057  page_zip_get_size(page_zip));
2058 
2059  if (UNIV_LIKELY(free_space > (ulint) free_space_zip)) {
2060  free_space = (ulint) free_space_zip;
2061  }
2062  }
2063 
2064  /* free_space is now the free space of a created new page */
2065 
2066  total_data = page_get_data_size(page) + insert_size;
2067  total_n_recs = page_get_n_recs(page) + 1;
2068  ut_ad(total_n_recs >= 2);
2069  total_space = total_data + page_dir_calc_reserved_space(total_n_recs);
2070 
2071  n = 0;
2072  incl_data = 0;
2073  ins_rec = btr_cur_get_rec(cursor);
2074  rec = page_get_infimum_rec(page);
2075 
2076  heap = NULL;
2077  offsets = NULL;
2078 
2079  /* We start to include records to the left half, and when the
2080  space reserved by them exceeds half of total_space, then if
2081  the included records fit on the left page, they will be put there
2082  if something was left over also for the right page,
2083  otherwise the last included record will be the first on the right
2084  half page */
2085 
2086  do {
2087  /* Decide the next record to include */
2088  if (rec == ins_rec) {
2089  rec = NULL; /* NULL denotes that tuple is
2090  now included */
2091  } else if (rec == NULL) {
2092  rec = page_rec_get_next(ins_rec);
2093  } else {
2094  rec = page_rec_get_next(rec);
2095  }
2096 
2097  if (rec == NULL) {
2098  /* Include tuple */
2099  incl_data += insert_size;
2100  } else {
2101  offsets = rec_get_offsets(rec, cursor->index,
2102  offsets, ULINT_UNDEFINED,
2103  &heap);
2104  incl_data += rec_offs_size(offsets);
2105  }
2106 
2107  n++;
2108  } while (incl_data + page_dir_calc_reserved_space(n)
2109  < total_space / 2);
2110 
2111  if (incl_data + page_dir_calc_reserved_space(n) <= free_space) {
2112  /* The next record will be the first on
2113  the right half page if it is not the
2114  supremum record of page */
2115 
2116  if (rec == ins_rec) {
2117  rec = NULL;
2118 
2119  goto func_exit;
2120  } else if (rec == NULL) {
2121  next_rec = page_rec_get_next(ins_rec);
2122  } else {
2123  next_rec = page_rec_get_next(rec);
2124  }
2125  ut_ad(next_rec);
2126  if (!page_rec_is_supremum(next_rec)) {
2127  rec = next_rec;
2128  }
2129  }
2130 
2131 func_exit:
2132  if (UNIV_LIKELY_NULL(heap)) {
2133  mem_heap_free(heap);
2134  }
2135  return(rec);
2136 }
2137 
2138 /*************************************************************/
2142 static
2143 ibool
2144 btr_page_insert_fits(
2145 /*=================*/
2146  btr_cur_t* cursor,
2148  const rec_t* split_rec,
2151  const ulint* offsets,
2153  const dtuple_t* tuple,
2154  ulint n_ext,
2155  mem_heap_t* heap)
2156 {
2157  page_t* page;
2158  ulint insert_size;
2159  ulint free_space;
2160  ulint total_data;
2161  ulint total_n_recs;
2162  const rec_t* rec;
2163  const rec_t* end_rec;
2164  ulint* offs;
2165 
2166  page = btr_cur_get_page(cursor);
2167 
2168  ut_ad(!split_rec == !offsets);
2169  ut_ad(!offsets
2170  || !page_is_comp(page) == !rec_offs_comp(offsets));
2171  ut_ad(!offsets
2172  || rec_offs_validate(split_rec, cursor->index, offsets));
2173 
2174  insert_size = rec_get_converted_size(cursor->index, tuple, n_ext);
2175  free_space = page_get_free_space_of_empty(page_is_comp(page));
2176 
2177  /* free_space is now the free space of a created new page */
2178 
2179  total_data = page_get_data_size(page) + insert_size;
2180  total_n_recs = page_get_n_recs(page) + 1;
2181 
2182  /* We determine which records (from rec to end_rec, not including
2183  end_rec) will end up on the other half page from tuple when it is
2184  inserted. */
2185 
2186  if (split_rec == NULL) {
2187  rec = page_rec_get_next(page_get_infimum_rec(page));
2188  end_rec = page_rec_get_next(btr_cur_get_rec(cursor));
2189 
2190  } else if (cmp_dtuple_rec(tuple, split_rec, offsets) >= 0) {
2191 
2192  rec = page_rec_get_next(page_get_infimum_rec(page));
2193  end_rec = split_rec;
2194  } else {
2195  rec = split_rec;
2196  end_rec = page_get_supremum_rec(page);
2197  }
2198 
2199  if (total_data + page_dir_calc_reserved_space(total_n_recs)
2200  <= free_space) {
2201 
2202  /* Ok, there will be enough available space on the
2203  half page where the tuple is inserted */
2204 
2205  return(TRUE);
2206  }
2207 
2208  offs = NULL;
2209 
2210  while (rec != end_rec) {
2211  /* In this loop we calculate the amount of reserved
2212  space after rec is removed from page. */
2213 
2214  offs = rec_get_offsets(rec, cursor->index, offs,
2215  ULINT_UNDEFINED, &heap);
2216 
2217  total_data -= rec_offs_size(offs);
2218  total_n_recs--;
2219 
2220  if (total_data + page_dir_calc_reserved_space(total_n_recs)
2221  <= free_space) {
2222 
2223  /* Ok, there will be enough available space on the
2224  half page where the tuple is inserted */
2225 
2226  return(TRUE);
2227  }
2228 
2229  rec = page_rec_get_next_const(rec);
2230  }
2231 
2232  return(FALSE);
2233 }
2234 
2235 /*******************************************************/
2238 UNIV_INTERN
2239 void
2240 btr_insert_on_non_leaf_level_func(
2241 /*==============================*/
2242  dict_index_t* index,
2243  ulint level,
2244  dtuple_t* tuple,
2245  const char* file,
2246  ulint line,
2247  mtr_t* mtr)
2248 {
2249  big_rec_t* dummy_big_rec;
2250  btr_cur_t cursor;
2251  ulint err;
2252  rec_t* rec;
2253 
2254  ut_ad(level > 0);
2255 
2256  btr_cur_search_to_nth_level(index, level, tuple, PAGE_CUR_LE,
2258  &cursor, 0, file, line, mtr);
2259 
2260  err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG
2261  | BTR_KEEP_SYS_FLAG
2262  | BTR_NO_UNDO_LOG_FLAG,
2263  &cursor, tuple, &rec,
2264  &dummy_big_rec, 0, NULL, mtr);
2265  ut_a(err == DB_SUCCESS);
2266 }
2267 
2268 /**************************************************************/
2271 static
2272 void
2273 btr_attach_half_pages(
2274 /*==================*/
2275  dict_index_t* index,
2276  buf_block_t* block,
2277  rec_t* split_rec,
2279  buf_block_t* new_block,
2280  ulint direction,
2281  mtr_t* mtr)
2282 {
2283  ulint space;
2284  ulint zip_size;
2285  ulint prev_page_no;
2286  ulint next_page_no;
2287  ulint level;
2288  page_t* page = buf_block_get_frame(block);
2289  page_t* lower_page;
2290  page_t* upper_page;
2291  ulint lower_page_no;
2292  ulint upper_page_no;
2293  page_zip_des_t* lower_page_zip;
2294  page_zip_des_t* upper_page_zip;
2295  dtuple_t* node_ptr_upper;
2296  mem_heap_t* heap;
2297 
2298  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
2299  ut_ad(mtr_memo_contains(mtr, new_block, MTR_MEMO_PAGE_X_FIX));
2300 
2301  /* Create a memory heap where the data tuple is stored */
2302  heap = mem_heap_create(1024);
2303 
2304  /* Based on split direction, decide upper and lower pages */
2305  if (direction == FSP_DOWN) {
2306 
2307  btr_cur_t cursor;
2308  ulint* offsets;
2309 
2310  lower_page = buf_block_get_frame(new_block);
2311  lower_page_no = buf_block_get_page_no(new_block);
2312  lower_page_zip = buf_block_get_page_zip(new_block);
2313  upper_page = buf_block_get_frame(block);
2314  upper_page_no = buf_block_get_page_no(block);
2315  upper_page_zip = buf_block_get_page_zip(block);
2316 
2317  /* Look up the index for the node pointer to page */
2318  offsets = btr_page_get_father_block(NULL, heap, index,
2319  block, mtr, &cursor);
2320 
2321  /* Replace the address of the old child node (= page) with the
2322  address of the new lower half */
2323 
2324  btr_node_ptr_set_child_page_no(
2325  btr_cur_get_rec(&cursor),
2326  btr_cur_get_page_zip(&cursor),
2327  offsets, lower_page_no, mtr);
2328  mem_heap_empty(heap);
2329  } else {
2330  lower_page = buf_block_get_frame(block);
2331  lower_page_no = buf_block_get_page_no(block);
2332  lower_page_zip = buf_block_get_page_zip(block);
2333  upper_page = buf_block_get_frame(new_block);
2334  upper_page_no = buf_block_get_page_no(new_block);
2335  upper_page_zip = buf_block_get_page_zip(new_block);
2336  }
2337 
2338  /* Get the level of the split pages */
2339  level = btr_page_get_level(buf_block_get_frame(block), mtr);
2340  ut_ad(level
2341  == btr_page_get_level(buf_block_get_frame(new_block), mtr));
2342 
2343  /* Build the node pointer (= node key and page address) for the upper
2344  half */
2345 
2346  node_ptr_upper = dict_index_build_node_ptr(index, split_rec,
2347  upper_page_no, heap, level);
2348 
2349  /* Insert it next to the pointer to the lower half. Note that this
2350  may generate recursion leading to a split on the higher level. */
2351 
2352  btr_insert_on_non_leaf_level(index, level + 1, node_ptr_upper, mtr);
2353 
2354  /* Free the memory heap */
2355  mem_heap_free(heap);
2356 
2357  /* Get the previous and next pages of page */
2358 
2359  prev_page_no = btr_page_get_prev(page, mtr);
2360  next_page_no = btr_page_get_next(page, mtr);
2361  space = buf_block_get_space(block);
2362  zip_size = buf_block_get_zip_size(block);
2363 
2364  /* Update page links of the level */
2365 
2366  if (prev_page_no != FIL_NULL) {
2367  buf_block_t* prev_block = btr_block_get(space, zip_size,
2368  prev_page_no,
2369  RW_X_LATCH, mtr);
2370 #ifdef UNIV_BTR_DEBUG
2371  ut_a(page_is_comp(prev_block->frame) == page_is_comp(page));
2372  ut_a(btr_page_get_next(prev_block->frame, mtr)
2373  == buf_block_get_page_no(block));
2374 #endif /* UNIV_BTR_DEBUG */
2375 
2376  btr_page_set_next(buf_block_get_frame(prev_block),
2377  buf_block_get_page_zip(prev_block),
2378  lower_page_no, mtr);
2379  }
2380 
2381  if (next_page_no != FIL_NULL) {
2382  buf_block_t* next_block = btr_block_get(space, zip_size,
2383  next_page_no,
2384  RW_X_LATCH, mtr);
2385 #ifdef UNIV_BTR_DEBUG
2386  ut_a(page_is_comp(next_block->frame) == page_is_comp(page));
2387  ut_a(btr_page_get_prev(next_block->frame, mtr)
2388  == page_get_page_no(page));
2389 #endif /* UNIV_BTR_DEBUG */
2390 
2391  btr_page_set_prev(buf_block_get_frame(next_block),
2392  buf_block_get_page_zip(next_block),
2393  upper_page_no, mtr);
2394  }
2395 
2396  btr_page_set_prev(lower_page, lower_page_zip, prev_page_no, mtr);
2397  btr_page_set_next(lower_page, lower_page_zip, upper_page_no, mtr);
2398 
2399  btr_page_set_prev(upper_page, upper_page_zip, lower_page_no, mtr);
2400  btr_page_set_next(upper_page, upper_page_zip, next_page_no, mtr);
2401 }
2402 
2403 /*************************************************************/
2406 static
2407 ibool
2408 btr_page_tuple_smaller(
2409 /*===================*/
2410  btr_cur_t* cursor,
2411  const dtuple_t* tuple,
2412  ulint* offsets,
2413  ulint n_uniq,
2415  mem_heap_t** heap)
2416 {
2417  buf_block_t* block;
2418  const rec_t* first_rec;
2419  page_cur_t pcur;
2420 
2421  /* Read the first user record in the page. */
2422  block = btr_cur_get_block(cursor);
2423  page_cur_set_before_first(block, &pcur);
2424  page_cur_move_to_next(&pcur);
2425  first_rec = page_cur_get_rec(&pcur);
2426 
2427  offsets = rec_get_offsets(
2428  first_rec, cursor->index, offsets,
2429  n_uniq, heap);
2430 
2431  return(cmp_dtuple_rec(tuple, first_rec, offsets) < 0);
2432 }
2433 
2434 /*************************************************************/
2443 UNIV_INTERN
2444 rec_t*
2445 btr_page_split_and_insert(
2446 /*======================*/
2447  btr_cur_t* cursor,
2450  const dtuple_t* tuple,
2451  ulint n_ext,
2452  mtr_t* mtr)
2453 {
2454  buf_block_t* block;
2455  page_t* page;
2456  page_zip_des_t* page_zip;
2457  ulint page_no;
2458  byte direction;
2459  ulint hint_page_no;
2460  buf_block_t* new_block;
2461  page_t* new_page;
2462  page_zip_des_t* new_page_zip;
2463  rec_t* split_rec;
2464  buf_block_t* left_block;
2465  buf_block_t* right_block;
2466  buf_block_t* insert_block;
2467  page_cur_t* page_cursor;
2468  rec_t* first_rec;
2469  byte* buf = 0; /* remove warning */
2470  rec_t* move_limit;
2471  ibool insert_will_fit;
2472  ibool insert_left;
2473  ulint n_iterations = 0;
2474  rec_t* rec;
2475  mem_heap_t* heap;
2476  ulint n_uniq;
2477  ulint* offsets;
2478 
2479  heap = mem_heap_create(1024);
2480  n_uniq = dict_index_get_n_unique_in_tree(cursor->index);
2481 func_start:
2482  mem_heap_empty(heap);
2483  offsets = NULL;
2484 
2485  ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index),
2486  MTR_MEMO_X_LOCK));
2487 #ifdef UNIV_SYNC_DEBUG
2488  ut_ad(rw_lock_own(dict_index_get_lock(cursor->index), RW_LOCK_EX));
2489 #endif /* UNIV_SYNC_DEBUG */
2490 
2491  block = btr_cur_get_block(cursor);
2492  page = buf_block_get_frame(block);
2493  page_zip = buf_block_get_page_zip(block);
2494 
2495  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
2496  ut_ad(page_get_n_recs(page) >= 1);
2497 
2498  page_no = buf_block_get_page_no(block);
2499 
2500  /* 1. Decide the split record; split_rec == NULL means that the
2501  tuple to be inserted should be the first record on the upper
2502  half-page */
2503  insert_left = FALSE;
2504 
2505  if (n_iterations > 0) {
2506  direction = FSP_UP;
2507  hint_page_no = page_no + 1;
2508  split_rec = btr_page_get_split_rec(cursor, tuple, n_ext);
2509 
2510  if (UNIV_UNLIKELY(split_rec == NULL)) {
2511  insert_left = btr_page_tuple_smaller(
2512  cursor, tuple, offsets, n_uniq, &heap);
2513  }
2514  } else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) {
2515  direction = FSP_UP;
2516  hint_page_no = page_no + 1;
2517  } else if (btr_page_get_split_rec_to_left(cursor, &split_rec)) {
2518  direction = FSP_DOWN;
2519  hint_page_no = page_no - 1;
2520  ut_ad(split_rec);
2521  } else {
2522  direction = FSP_UP;
2523  hint_page_no = page_no + 1;
2524  /* If there is only one record in the index page, we
2525  can't split the node in the middle by default. We need
2526  to determine whether the new record will be inserted
2527  to the left or right. */
2528 
2529  if (page_get_n_recs(page) > 1) {
2530  split_rec = page_get_middle_rec(page);
2531  } else if (btr_page_tuple_smaller(cursor, tuple,
2532  offsets, n_uniq, &heap)) {
2533  split_rec = page_rec_get_next(
2534  page_get_infimum_rec(page));
2535  } else {
2536  split_rec = NULL;
2537  }
2538  }
2539 
2540  /* 2. Allocate a new page to the index */
2541  new_block = btr_page_alloc(cursor->index, hint_page_no, direction,
2542  btr_page_get_level(page, mtr), mtr);
2543  new_page = buf_block_get_frame(new_block);
2544  new_page_zip = buf_block_get_page_zip(new_block);
2545  btr_page_create(new_block, new_page_zip, cursor->index,
2546  btr_page_get_level(page, mtr), mtr);
2547 
2548  /* 3. Calculate the first record on the upper half-page, and the
2549  first record (move_limit) on original page which ends up on the
2550  upper half */
2551 
2552  if (split_rec) {
2553  first_rec = move_limit = split_rec;
2554 
2555  offsets = rec_get_offsets(split_rec, cursor->index, offsets,
2556  n_uniq, &heap);
2557 
2558  insert_left = cmp_dtuple_rec(tuple, split_rec, offsets) < 0;
2559 
2560  if (UNIV_UNLIKELY(!insert_left && new_page_zip
2561  && n_iterations > 0)) {
2562  /* If a compressed page has already been split,
2563  avoid further splits by inserting the record
2564  to an empty page. */
2565  split_rec = NULL;
2566  goto insert_empty;
2567  }
2568  } else if (UNIV_UNLIKELY(insert_left)) {
2569  ut_a(n_iterations > 0);
2570  first_rec = page_rec_get_next(page_get_infimum_rec(page));
2571  move_limit = page_rec_get_next(btr_cur_get_rec(cursor));
2572  } else {
2573 insert_empty:
2574  ut_ad(!split_rec);
2575  ut_ad(!insert_left);
2576  buf = (unsigned char *)mem_alloc(rec_get_converted_size(cursor->index,
2577  tuple, n_ext));
2578 
2579  first_rec = rec_convert_dtuple_to_rec(buf, cursor->index,
2580  tuple, n_ext);
2581  move_limit = page_rec_get_next(btr_cur_get_rec(cursor));
2582  }
2583 
2584  /* 4. Do first the modifications in the tree structure */
2585 
2586  btr_attach_half_pages(cursor->index, block,
2587  first_rec, new_block, direction, mtr);
2588 
2589  /* If the split is made on the leaf level and the insert will fit
2590  on the appropriate half-page, we may release the tree x-latch.
2591  We can then move the records after releasing the tree latch,
2592  thus reducing the tree latch contention. */
2593 
2594  if (split_rec) {
2595  insert_will_fit = !new_page_zip
2596  && btr_page_insert_fits(cursor, split_rec,
2597  offsets, tuple, n_ext, heap);
2598  } else {
2599  if (!insert_left) {
2600  mem_free(buf);
2601  buf = NULL;
2602  }
2603 
2604  insert_will_fit = !new_page_zip
2605  && btr_page_insert_fits(cursor, NULL,
2606  NULL, tuple, n_ext, heap);
2607  }
2608 
2609  if (insert_will_fit && page_is_leaf(page)) {
2610 
2612  MTR_MEMO_X_LOCK);
2613  }
2614 
2615  /* 5. Move then the records to the new page */
2616  if (direction == FSP_DOWN) {
2617  /* fputs("Split left\n", stderr); */
2618 
2619  if (0
2620 #ifdef UNIV_ZIP_COPY
2621  || page_zip
2622 #endif /* UNIV_ZIP_COPY */
2623  || UNIV_UNLIKELY
2624  (!page_move_rec_list_start(new_block, block, move_limit,
2625  cursor->index, mtr))) {
2626  /* For some reason, compressing new_page failed,
2627  even though it should contain fewer records than
2628  the original page. Copy the page byte for byte
2629  and then delete the records from both pages
2630  as appropriate. Deleting will always succeed. */
2631  ut_a(new_page_zip);
2632 
2633  page_zip_copy_recs(new_page_zip, new_page,
2634  page_zip, page, cursor->index, mtr);
2635  page_delete_rec_list_end(move_limit - page + new_page,
2636  new_block, cursor->index,
2637  ULINT_UNDEFINED,
2638  ULINT_UNDEFINED, mtr);
2639 
2640  /* Update the lock table and possible hash index. */
2641 
2643  new_block, block, move_limit,
2644  new_page + PAGE_NEW_INFIMUM);
2645 
2646  btr_search_move_or_delete_hash_entries(
2647  new_block, block, cursor->index);
2648 
2649  /* Delete the records from the source page. */
2650 
2651  page_delete_rec_list_start(move_limit, block,
2652  cursor->index, mtr);
2653  }
2654 
2655  left_block = new_block;
2656  right_block = block;
2657 
2658  lock_update_split_left(right_block, left_block);
2659  } else {
2660  /* fputs("Split right\n", stderr); */
2661 
2662  if (0
2663 #ifdef UNIV_ZIP_COPY
2664  || page_zip
2665 #endif /* UNIV_ZIP_COPY */
2666  || UNIV_UNLIKELY
2667  (!page_move_rec_list_end(new_block, block, move_limit,
2668  cursor->index, mtr))) {
2669  /* For some reason, compressing new_page failed,
2670  even though it should contain fewer records than
2671  the original page. Copy the page byte for byte
2672  and then delete the records from both pages
2673  as appropriate. Deleting will always succeed. */
2674  ut_a(new_page_zip);
2675 
2676  page_zip_copy_recs(new_page_zip, new_page,
2677  page_zip, page, cursor->index, mtr);
2678  page_delete_rec_list_start(move_limit - page
2679  + new_page, new_block,
2680  cursor->index, mtr);
2681 
2682  /* Update the lock table and possible hash index. */
2683 
2684  lock_move_rec_list_end(new_block, block, move_limit);
2685 
2686  btr_search_move_or_delete_hash_entries(
2687  new_block, block, cursor->index);
2688 
2689  /* Delete the records from the source page. */
2690 
2691  page_delete_rec_list_end(move_limit, block,
2692  cursor->index,
2693  ULINT_UNDEFINED,
2694  ULINT_UNDEFINED, mtr);
2695  }
2696 
2697  left_block = block;
2698  right_block = new_block;
2699 
2700  lock_update_split_right(right_block, left_block);
2701  }
2702 
2703 #ifdef UNIV_ZIP_DEBUG
2704  if (UNIV_LIKELY_NULL(page_zip)) {
2705  ut_a(page_zip_validate(page_zip, page));
2706  ut_a(page_zip_validate(new_page_zip, new_page));
2707  }
2708 #endif /* UNIV_ZIP_DEBUG */
2709 
2710  /* At this point, split_rec, move_limit and first_rec may point
2711  to garbage on the old page. */
2712 
2713  /* 6. The split and the tree modification is now completed. Decide the
2714  page where the tuple should be inserted */
2715 
2716  if (insert_left) {
2717  insert_block = left_block;
2718  } else {
2719  insert_block = right_block;
2720  }
2721 
2722  /* 7. Reposition the cursor for insert and try insertion */
2723  page_cursor = btr_cur_get_page_cur(cursor);
2724 
2725  page_cur_search(insert_block, cursor->index, tuple,
2726  PAGE_CUR_LE, page_cursor);
2727 
2728  rec = page_cur_tuple_insert(page_cursor, tuple,
2729  cursor->index, n_ext, mtr);
2730 
2731 #ifdef UNIV_ZIP_DEBUG
2732  {
2733  page_t* insert_page
2734  = buf_block_get_frame(insert_block);
2735 
2736  page_zip_des_t* insert_page_zip
2737  = buf_block_get_page_zip(insert_block);
2738 
2739  ut_a(!insert_page_zip
2740  || page_zip_validate(insert_page_zip, insert_page));
2741  }
2742 #endif /* UNIV_ZIP_DEBUG */
2743 
2744  if (UNIV_LIKELY(rec != NULL)) {
2745 
2746  goto func_exit;
2747  }
2748 
2749  /* 8. If insert did not fit, try page reorganization */
2750 
2751  if (UNIV_UNLIKELY
2752  (!btr_page_reorganize(insert_block, cursor->index, mtr))) {
2753 
2754  goto insert_failed;
2755  }
2756 
2757  page_cur_search(insert_block, cursor->index, tuple,
2758  PAGE_CUR_LE, page_cursor);
2759  rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index,
2760  n_ext, mtr);
2761 
2762  if (UNIV_UNLIKELY(rec == NULL)) {
2763  /* The insert did not fit on the page: loop back to the
2764  start of the function for a new split */
2765 insert_failed:
2766  /* We play safe and reset the free bits for new_page */
2767  if (!dict_index_is_clust(cursor->index)) {
2768  ibuf_reset_free_bits(new_block);
2769  }
2770 
2771  /* fprintf(stderr, "Split second round %lu\n",
2772  page_get_page_no(page)); */
2773  n_iterations++;
2774  ut_ad(n_iterations < 2
2775  || buf_block_get_page_zip(insert_block));
2776  ut_ad(!insert_will_fit);
2777 
2778  goto func_start;
2779  }
2780 
2781 func_exit:
2782  /* Insert fit on the page: update the free bits for the
2783  left and right pages in the same mtr */
2784 
2785  if (!dict_index_is_clust(cursor->index) && page_is_leaf(page)) {
2786  ibuf_update_free_bits_for_two_pages_low(
2787  buf_block_get_zip_size(left_block),
2788  left_block, right_block, mtr);
2789  }
2790 
2791 #if 0
2792  fprintf(stderr, "Split and insert done %lu %lu\n",
2793  buf_block_get_page_no(left_block),
2794  buf_block_get_page_no(right_block));
2795 #endif
2796 
2797  ut_ad(page_validate(buf_block_get_frame(left_block), cursor->index));
2798  ut_ad(page_validate(buf_block_get_frame(right_block), cursor->index));
2799 
2800  mem_heap_free(heap);
2801  return(rec);
2802 }
2803 
2804 /*************************************************************/
2806 static
2807 void
2808 btr_level_list_remove(
2809 /*==================*/
2810  ulint space,
2811  ulint zip_size,
2813  page_t* page,
2814  mtr_t* mtr)
2815 {
2816  ulint prev_page_no;
2817  ulint next_page_no;
2818 
2819  ut_ad(page && mtr);
2820  ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
2821  ut_ad(space == page_get_space_id(page));
2822  /* Get the previous and next page numbers of page */
2823 
2824  prev_page_no = btr_page_get_prev(page, mtr);
2825  next_page_no = btr_page_get_next(page, mtr);
2826 
2827  /* Update page links of the level */
2828 
2829  if (prev_page_no != FIL_NULL) {
2830  buf_block_t* prev_block
2831  = btr_block_get(space, zip_size, prev_page_no,
2832  RW_X_LATCH, mtr);
2833  page_t* prev_page
2834  = buf_block_get_frame(prev_block);
2835 #ifdef UNIV_BTR_DEBUG
2836  ut_a(page_is_comp(prev_page) == page_is_comp(page));
2837  ut_a(btr_page_get_next(prev_page, mtr)
2838  == page_get_page_no(page));
2839 #endif /* UNIV_BTR_DEBUG */
2840 
2841  btr_page_set_next(prev_page,
2842  buf_block_get_page_zip(prev_block),
2843  next_page_no, mtr);
2844  }
2845 
2846  if (next_page_no != FIL_NULL) {
2847  buf_block_t* next_block
2848  = btr_block_get(space, zip_size, next_page_no,
2849  RW_X_LATCH, mtr);
2850  page_t* next_page
2851  = buf_block_get_frame(next_block);
2852 #ifdef UNIV_BTR_DEBUG
2853  ut_a(page_is_comp(next_page) == page_is_comp(page));
2854  ut_a(btr_page_get_prev(next_page, mtr)
2855  == page_get_page_no(page));
2856 #endif /* UNIV_BTR_DEBUG */
2857 
2858  btr_page_set_prev(next_page,
2859  buf_block_get_page_zip(next_block),
2860  prev_page_no, mtr);
2861  }
2862 }
2863 
2864 /****************************************************************/
2867 UNIV_INLINE
2868 void
2869 btr_set_min_rec_mark_log(
2870 /*=====================*/
2871  rec_t* rec,
2872  byte type,
2873  mtr_t* mtr)
2874 {
2875  mlog_write_initial_log_record(rec, type, mtr);
2876 
2877  /* Write rec offset as a 2-byte ulint */
2879 }
2880 #else /* !UNIV_HOTBACKUP */
2881 # define btr_set_min_rec_mark_log(rec,comp,mtr) ((void) 0)
2882 #endif /* !UNIV_HOTBACKUP */
2883 
2884 /****************************************************************/
2888 UNIV_INTERN
2889 byte*
2890 btr_parse_set_min_rec_mark(
2891 /*=======================*/
2892  byte* ptr,
2893  byte* end_ptr,
2894  ulint comp,
2895  page_t* page,
2896  mtr_t* mtr)
2897 {
2898  rec_t* rec;
2899 
2900  if (end_ptr < ptr + 2) {
2901 
2902  return(NULL);
2903  }
2904 
2905  if (page) {
2906  ut_a(!page_is_comp(page) == !comp);
2907 
2908  rec = page + mach_read_from_2(ptr);
2909 
2910  btr_set_min_rec_mark(rec, mtr);
2911  }
2912 
2913  return(ptr + 2);
2914 }
2915 
2916 /****************************************************************/
2918 UNIV_INTERN
2919 void
2920 btr_set_min_rec_mark(
2921 /*=================*/
2922  rec_t* rec,
2923  mtr_t* mtr)
2924 {
2925  ulint info_bits;
2926 
2927  if (UNIV_LIKELY(page_rec_is_comp(rec))) {
2928  info_bits = rec_get_info_bits(rec, TRUE);
2929 
2930  rec_set_info_bits_new(rec, info_bits | REC_INFO_MIN_REC_FLAG);
2931 
2932  btr_set_min_rec_mark_log(rec, MLOG_COMP_REC_MIN_MARK, mtr);
2933  } else {
2934  info_bits = rec_get_info_bits(rec, FALSE);
2935 
2936  rec_set_info_bits_old(rec, info_bits | REC_INFO_MIN_REC_FLAG);
2937 
2938  btr_set_min_rec_mark_log(rec, MLOG_REC_MIN_MARK, mtr);
2939  }
2940 }
2941 
2942 #ifndef UNIV_HOTBACKUP
2943 /*************************************************************/
2945 UNIV_INTERN
2946 void
2947 btr_node_ptr_delete(
2948 /*================*/
2949  dict_index_t* index,
2950  buf_block_t* block,
2951  mtr_t* mtr)
2952 {
2953  btr_cur_t cursor;
2954  ibool compressed;
2955  ulint err;
2956 
2957  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
2958 
2959  /* Delete node pointer on father page */
2960  btr_page_get_father(index, block, mtr, &cursor);
2961 
2962  compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, RB_NONE,
2963  mtr);
2964  ut_a(err == DB_SUCCESS);
2965 
2966  if (!compressed) {
2967  btr_cur_compress_if_useful(&cursor, mtr);
2968  }
2969 }
2970 
2971 /*************************************************************/
2974 static
2975 void
2976 btr_lift_page_up(
2977 /*=============*/
2978  dict_index_t* index,
2979  buf_block_t* block,
2983  mtr_t* mtr)
2984 {
2985  buf_block_t* father_block;
2986  page_t* father_page;
2987  ulint page_level;
2988  page_zip_des_t* father_page_zip;
2989  page_t* page = buf_block_get_frame(block);
2990  ulint root_page_no;
2991  buf_block_t* blocks[BTR_MAX_LEVELS];
2992  ulint n_blocks;
2993  ulint i;
2994 
2995  ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
2996  ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
2997  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
2998 
2999  page_level = btr_page_get_level(page, mtr);
3000  root_page_no = dict_index_get_page(index);
3001 
3002  {
3003  btr_cur_t cursor;
3004  mem_heap_t* heap = mem_heap_create(100);
3005  ulint* offsets;
3006  buf_block_t* b;
3007 
3008  offsets = btr_page_get_father_block(NULL, heap, index,
3009  block, mtr, &cursor);
3010  father_block = btr_cur_get_block(&cursor);
3011  father_page_zip = buf_block_get_page_zip(father_block);
3012  father_page = buf_block_get_frame(father_block);
3013 
3014  n_blocks = 0;
3015 
3016  /* Store all ancestor pages so we can reset their
3017  levels later on. We have to do all the searches on
3018  the tree now because later on, after we've replaced
3019  the first level, the tree is in an inconsistent state
3020  and can not be searched. */
3021  for (b = father_block;
3022  buf_block_get_page_no(b) != root_page_no; ) {
3023  ut_a(n_blocks < BTR_MAX_LEVELS);
3024 
3025  offsets = btr_page_get_father_block(offsets, heap,
3026  index, b,
3027  mtr, &cursor);
3028 
3029  blocks[n_blocks++] = b = btr_cur_get_block(&cursor);
3030  }
3031 
3032  mem_heap_free(heap);
3033  }
3034 
3035  btr_search_drop_page_hash_index(block);
3036 
3037  /* Make the father empty */
3038  btr_page_empty(father_block, father_page_zip, index, page_level, mtr);
3039 
3040  /* Copy the records to the father page one by one. */
3041  if (0
3042 #ifdef UNIV_ZIP_COPY
3043  || father_page_zip
3044 #endif /* UNIV_ZIP_COPY */
3045  || UNIV_UNLIKELY
3046  (!page_copy_rec_list_end(father_block, block,
3047  page_get_infimum_rec(page),
3048  index, mtr))) {
3049  const page_zip_des_t* page_zip
3050  = buf_block_get_page_zip(block);
3051  ut_a(father_page_zip);
3052  ut_a(page_zip);
3053 
3054  /* Copy the page byte for byte. */
3055  page_zip_copy_recs(father_page_zip, father_page,
3056  page_zip, page, index, mtr);
3057 
3058  /* Update the lock table and possible hash index. */
3059 
3060  lock_move_rec_list_end(father_block, block,
3061  page_get_infimum_rec(page));
3062 
3063  btr_search_move_or_delete_hash_entries(father_block, block,
3064  index);
3065  }
3066 
3067  btr_blob_dbg_remove(page, index, "btr_lift_page_up");
3068  lock_update_copy_and_discard(father_block, block);
3069 
3070  /* Go upward to root page, decrementing levels by one. */
3071  for (i = 0; i < n_blocks; i++, page_level++) {
3072  page_t* inner_page = buf_block_get_frame(blocks[i]);
3073  page_zip_des_t* page_zip= buf_block_get_page_zip(blocks[i]);
3074 
3075  ut_ad(btr_page_get_level(page, mtr) == page_level + 1);
3076 
3077  btr_page_set_level(inner_page, page_zip, page_level, mtr);
3078 #ifdef UNIV_ZIP_DEBUG
3079  ut_a(!page_zip || page_zip_validate(page_zip, inner_page));
3080 #endif /* UNIV_ZIP_DEBUG */
3081  }
3082 
3083  /* Free the file page */
3084  btr_page_free(index, block, mtr);
3085 
3086  /* We play it safe and reset the free bits for the father */
3087  if (!dict_index_is_clust(index)) {
3088  ibuf_reset_free_bits(father_block);
3089  }
3090  ut_ad(page_validate(father_page, index));
3091  ut_ad(btr_check_node_ptr(index, father_block, mtr));
3092 }
3093 
3094 /*************************************************************/
3104 UNIV_INTERN
3105 ibool
3106 btr_compress(
3107 /*=========*/
3108  btr_cur_t* cursor,
3112  mtr_t* mtr)
3113 {
3114  dict_index_t* index;
3115  ulint space;
3116  ulint zip_size;
3117  ulint left_page_no;
3118  ulint right_page_no;
3119  buf_block_t* merge_block;
3120  page_t* merge_page;
3121  page_zip_des_t* merge_page_zip;
3122  ibool is_left;
3123  buf_block_t* block;
3124  page_t* page;
3125  btr_cur_t father_cursor;
3126  mem_heap_t* heap;
3127  ulint* offsets;
3128  ulint data_size;
3129  ulint n_recs;
3130  ulint max_ins_size;
3131  ulint max_ins_size_reorg;
3132 
3133  block = btr_cur_get_block(cursor);
3134  page = btr_cur_get_page(cursor);
3135  index = btr_cur_get_index(cursor);
3136  ut_a((ibool) !!page_is_comp(page) == dict_table_is_comp(index->table));
3137 
3138  ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
3139  MTR_MEMO_X_LOCK));
3140  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
3141  space = dict_index_get_space(index);
3142  zip_size = dict_table_zip_size(index->table);
3143 
3144  left_page_no = btr_page_get_prev(page, mtr);
3145  right_page_no = btr_page_get_next(page, mtr);
3146 
3147 #if 0
3148  fprintf(stderr, "Merge left page %lu right %lu \n",
3149  left_page_no, right_page_no);
3150 #endif
3151 
3152  heap = mem_heap_create(100);
3153  offsets = btr_page_get_father_block(NULL, heap, index, block, mtr,
3154  &father_cursor);
3155 
3156  /* Decide the page to which we try to merge and which will inherit
3157  the locks */
3158 
3159  is_left = left_page_no != FIL_NULL;
3160 
3161  if (is_left) {
3162 
3163  merge_block = btr_block_get(space, zip_size, left_page_no,
3164  RW_X_LATCH, mtr);
3165  merge_page = buf_block_get_frame(merge_block);
3166 #ifdef UNIV_BTR_DEBUG
3167  ut_a(btr_page_get_next(merge_page, mtr)
3168  == buf_block_get_page_no(block));
3169 #endif /* UNIV_BTR_DEBUG */
3170  } else if (right_page_no != FIL_NULL) {
3171 
3172  merge_block = btr_block_get(space, zip_size, right_page_no,
3173  RW_X_LATCH, mtr);
3174  merge_page = buf_block_get_frame(merge_block);
3175 #ifdef UNIV_BTR_DEBUG
3176  ut_a(btr_page_get_prev(merge_page, mtr)
3177  == buf_block_get_page_no(block));
3178 #endif /* UNIV_BTR_DEBUG */
3179  } else {
3180  /* The page is the only one on the level, lift the records
3181  to the father */
3182  btr_lift_page_up(index, block, mtr);
3183  mem_heap_free(heap);
3184  return(TRUE);
3185  }
3186 
3187  n_recs = page_get_n_recs(page);
3188  data_size = page_get_data_size(page);
3189 #ifdef UNIV_BTR_DEBUG
3190  ut_a(page_is_comp(merge_page) == page_is_comp(page));
3191 #endif /* UNIV_BTR_DEBUG */
3192 
3193  max_ins_size_reorg = page_get_max_insert_size_after_reorganize(
3194  merge_page, n_recs);
3195  if (data_size > max_ins_size_reorg) {
3196 
3197  /* No space for merge */
3198 err_exit:
3199  /* We play it safe and reset the free bits. */
3200  if (zip_size
3201  && page_is_leaf(merge_page)
3202  && !dict_index_is_clust(index)) {
3203  ibuf_reset_free_bits(merge_block);
3204  }
3205 
3206  mem_heap_free(heap);
3207  return(FALSE);
3208  }
3209 
3210  ut_ad(page_validate(merge_page, index));
3211 
3212  max_ins_size = page_get_max_insert_size(merge_page, n_recs);
3213 
3214  if (UNIV_UNLIKELY(data_size > max_ins_size)) {
3215 
3216  /* We have to reorganize merge_page */
3217 
3218  if (UNIV_UNLIKELY(!btr_page_reorganize(merge_block,
3219  index, mtr))) {
3220 
3221  goto err_exit;
3222  }
3223 
3224  max_ins_size = page_get_max_insert_size(merge_page, n_recs);
3225 
3226  ut_ad(page_validate(merge_page, index));
3227  ut_ad(max_ins_size == max_ins_size_reorg);
3228 
3229  if (UNIV_UNLIKELY(data_size > max_ins_size)) {
3230 
3231  /* Add fault tolerance, though this should
3232  never happen */
3233 
3234  goto err_exit;
3235  }
3236  }
3237 
3238  merge_page_zip = buf_block_get_page_zip(merge_block);
3239 #ifdef UNIV_ZIP_DEBUG
3240  if (UNIV_LIKELY_NULL(merge_page_zip)) {
3241  const page_zip_des_t* page_zip
3242  = buf_block_get_page_zip(block);
3243  ut_a(page_zip);
3244  ut_a(page_zip_validate(merge_page_zip, merge_page));
3245  ut_a(page_zip_validate(page_zip, page));
3246  }
3247 #endif /* UNIV_ZIP_DEBUG */
3248 
3249  /* Move records to the merge page */
3250  if (is_left) {
3251  rec_t* orig_pred = page_copy_rec_list_start(
3252  merge_block, block, page_get_supremum_rec(page),
3253  index, mtr);
3254 
3255  if (UNIV_UNLIKELY(!orig_pred)) {
3256  goto err_exit;
3257  }
3258 
3259  btr_search_drop_page_hash_index(block);
3260 
3261  /* Remove the page from the level list */
3262  btr_level_list_remove(space, zip_size, page, mtr);
3263 
3264  btr_node_ptr_delete(index, block, mtr);
3265  lock_update_merge_left(merge_block, orig_pred, block);
3266  } else {
3267  rec_t* orig_succ;
3268 #ifdef UNIV_BTR_DEBUG
3269  byte fil_page_prev[4];
3270 #endif /* UNIV_BTR_DEBUG */
3271 
3272  if (UNIV_LIKELY_NULL(merge_page_zip)) {
3273  /* The function page_zip_compress(), which will be
3274  invoked by page_copy_rec_list_end() below,
3275  requires that FIL_PAGE_PREV be FIL_NULL.
3276  Clear the field, but prepare to restore it. */
3277 #ifdef UNIV_BTR_DEBUG
3278  memcpy(fil_page_prev, merge_page + FIL_PAGE_PREV, 4);
3279 #endif /* UNIV_BTR_DEBUG */
3280 #if FIL_NULL != 0xffffffff
3281 # error "FIL_NULL != 0xffffffff"
3282 #endif
3283  memset(merge_page + FIL_PAGE_PREV, 0xff, 4);
3284  }
3285 
3286  orig_succ = page_copy_rec_list_end(merge_block, block,
3287  page_get_infimum_rec(page),
3288  cursor->index, mtr);
3289 
3290  if (UNIV_UNLIKELY(!orig_succ)) {
3291  ut_a(merge_page_zip);
3292 #ifdef UNIV_BTR_DEBUG
3293  /* FIL_PAGE_PREV was restored from merge_page_zip. */
3294  ut_a(!memcmp(fil_page_prev,
3295  merge_page + FIL_PAGE_PREV, 4));
3296 #endif /* UNIV_BTR_DEBUG */
3297  goto err_exit;
3298  }
3299 
3300  btr_search_drop_page_hash_index(block);
3301 
3302 #ifdef UNIV_BTR_DEBUG
3303  if (UNIV_LIKELY_NULL(merge_page_zip)) {
3304  /* Restore FIL_PAGE_PREV in order to avoid an assertion
3305  failure in btr_level_list_remove(), which will set
3306  the field again to FIL_NULL. Even though this makes
3307  merge_page and merge_page_zip inconsistent for a
3308  split second, it is harmless, because the pages
3309  are X-latched. */
3310  memcpy(merge_page + FIL_PAGE_PREV, fil_page_prev, 4);
3311  }
3312 #endif /* UNIV_BTR_DEBUG */
3313 
3314  /* Remove the page from the level list */
3315  btr_level_list_remove(space, zip_size, page, mtr);
3316 
3317  /* Replace the address of the old child node (= page) with the
3318  address of the merge page to the right */
3319 
3320  btr_node_ptr_set_child_page_no(
3321  btr_cur_get_rec(&father_cursor),
3322  btr_cur_get_page_zip(&father_cursor),
3323  offsets, right_page_no, mtr);
3324  btr_node_ptr_delete(index, merge_block, mtr);
3325 
3326  lock_update_merge_right(merge_block, orig_succ, block);
3327  }
3328 
3329  btr_blob_dbg_remove(page, index, "btr_compress");
3330  mem_heap_free(heap);
3331 
3332  if (!dict_index_is_clust(index) && page_is_leaf(merge_page)) {
3333  /* Update the free bits of the B-tree page in the
3334  insert buffer bitmap. This has to be done in a
3335  separate mini-transaction that is committed before the
3336  main mini-transaction. We cannot update the insert
3337  buffer bitmap in this mini-transaction, because
3338  btr_compress() can be invoked recursively without
3339  committing the mini-transaction in between. Since
3340  insert buffer bitmap pages have a lower rank than
3341  B-tree pages, we must not access other pages in the
3342  same mini-transaction after accessing an insert buffer
3343  bitmap page. */
3344 
3345  /* The free bits in the insert buffer bitmap must
3346  never exceed the free space on a page. It is safe to
3347  decrement or reset the bits in the bitmap in a
3348  mini-transaction that is committed before the
3349  mini-transaction that affects the free space. */
3350 
3351  /* It is unsafe to increment the bits in a separately
3352  committed mini-transaction, because in crash recovery,
3353  the free bits could momentarily be set too high. */
3354 
3355  if (zip_size) {
3356  /* Because the free bits may be incremented
3357  and we cannot update the insert buffer bitmap
3358  in the same mini-transaction, the only safe
3359  thing we can do here is the pessimistic
3360  approach: reset the free bits. */
3361  ibuf_reset_free_bits(merge_block);
3362  } else {
3363  /* On uncompressed pages, the free bits will
3364  never increase here. Thus, it is safe to
3365  write the bits accurately in a separate
3366  mini-transaction. */
3367  ibuf_update_free_bits_if_full(merge_block,
3368  UNIV_PAGE_SIZE,
3369  ULINT_UNDEFINED);
3370  }
3371  }
3372 
3373  ut_ad(page_validate(merge_page, index));
3374 #ifdef UNIV_ZIP_DEBUG
3375  ut_a(!merge_page_zip || page_zip_validate(merge_page_zip, merge_page));
3376 #endif /* UNIV_ZIP_DEBUG */
3377 
3378  /* Free the file page */
3379  btr_page_free(index, block, mtr);
3380 
3381  ut_ad(btr_check_node_ptr(index, merge_block, mtr));
3382  return(TRUE);
3383 }
3384 
3385 /*************************************************************/
3390 static
3391 void
3392 btr_discard_only_page_on_level(
3393 /*===========================*/
3394  dict_index_t* index,
3395  buf_block_t* block,
3396  mtr_t* mtr)
3397 {
3398  ulint page_level = 0;
3399  trx_id_t max_trx_id;
3400 
3401  /* Save the PAGE_MAX_TRX_ID from the leaf page. */
3402  max_trx_id = page_get_max_trx_id(buf_block_get_frame(block));
3403 
3404  while (buf_block_get_page_no(block) != dict_index_get_page(index)) {
3405  btr_cur_t cursor;
3406  buf_block_t* father;
3407  const page_t* page = buf_block_get_frame(block);
3408 
3409  ut_a(page_get_n_recs(page) == 1);
3410  ut_a(page_level == btr_page_get_level(page, mtr));
3411  ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
3412  ut_a(btr_page_get_next(page, mtr) == FIL_NULL);
3413 
3414  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
3415  btr_search_drop_page_hash_index(block);
3416 
3417  btr_page_get_father(index, block, mtr, &cursor);
3418  father = btr_cur_get_block(&cursor);
3419 
3420  lock_update_discard(father, PAGE_HEAP_NO_SUPREMUM, block);
3421 
3422  /* Free the file page */
3423  btr_page_free(index, block, mtr);
3424 
3425  block = father;
3426  page_level++;
3427  }
3428 
3429  /* block is the root page, which must be empty, except
3430  for the node pointer to the (now discarded) block(s). */
3431 
3432 #ifdef UNIV_BTR_DEBUG
3433  if (!dict_index_is_ibuf(index)) {
3434  const page_t* root = buf_block_get_frame(block);
3435  const ulint space = dict_index_get_space(index);
3436  ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
3437  + root, space));
3438  ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
3439  + root, space));
3440  }
3441 #endif /* UNIV_BTR_DEBUG */
3442 
3443  btr_page_empty(block, buf_block_get_page_zip(block), index, 0, mtr);
3444 
3445  if (!dict_index_is_clust(index)) {
3446  /* We play it safe and reset the free bits for the root */
3447  ibuf_reset_free_bits(block);
3448 
3449  if (page_is_leaf(buf_block_get_frame(block))) {
3450  ut_a(max_trx_id);
3451  page_set_max_trx_id(block,
3452  buf_block_get_page_zip(block),
3453  max_trx_id, mtr);
3454  }
3455  }
3456 }
3457 
3458 /*************************************************************/
3462 UNIV_INTERN
3463 void
3464 btr_discard_page(
3465 /*=============*/
3466  btr_cur_t* cursor,
3468  mtr_t* mtr)
3469 {
3470  dict_index_t* index;
3471  ulint space;
3472  ulint zip_size;
3473  ulint left_page_no;
3474  ulint right_page_no;
3475  buf_block_t* merge_block;
3476  page_t* merge_page;
3477  buf_block_t* block;
3478  page_t* page;
3479  rec_t* node_ptr;
3480 
3481  block = btr_cur_get_block(cursor);
3482  index = btr_cur_get_index(cursor);
3483 
3485  ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
3486  MTR_MEMO_X_LOCK));
3487  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
3488  space = dict_index_get_space(index);
3489  zip_size = dict_table_zip_size(index->table);
3490 
3491  /* Decide the page which will inherit the locks */
3492 
3493  left_page_no = btr_page_get_prev(buf_block_get_frame(block), mtr);
3494  right_page_no = btr_page_get_next(buf_block_get_frame(block), mtr);
3495 
3496  if (left_page_no != FIL_NULL) {
3497  merge_block = btr_block_get(space, zip_size, left_page_no,
3498  RW_X_LATCH, mtr);
3499  merge_page = buf_block_get_frame(merge_block);
3500 #ifdef UNIV_BTR_DEBUG
3501  ut_a(btr_page_get_next(merge_page, mtr)
3502  == buf_block_get_page_no(block));
3503 #endif /* UNIV_BTR_DEBUG */
3504  } else if (right_page_no != FIL_NULL) {
3505  merge_block = btr_block_get(space, zip_size, right_page_no,
3506  RW_X_LATCH, mtr);
3507  merge_page = buf_block_get_frame(merge_block);
3508 #ifdef UNIV_BTR_DEBUG
3509  ut_a(btr_page_get_prev(merge_page, mtr)
3510  == buf_block_get_page_no(block));
3511 #endif /* UNIV_BTR_DEBUG */
3512  } else {
3513  btr_discard_only_page_on_level(index, block, mtr);
3514 
3515  return;
3516  }
3517 
3518  page = buf_block_get_frame(block);
3519  ut_a(page_is_comp(merge_page) == page_is_comp(page));
3520  btr_search_drop_page_hash_index(block);
3521 
3522  if (left_page_no == FIL_NULL && !page_is_leaf(page)) {
3523 
3524  /* We have to mark the leftmost node pointer on the right
3525  side page as the predefined minimum record */
3526  node_ptr = page_rec_get_next(page_get_infimum_rec(merge_page));
3527 
3528  ut_ad(page_rec_is_user_rec(node_ptr));
3529 
3530  /* This will make page_zip_validate() fail on merge_page
3531  until btr_level_list_remove() completes. This is harmless,
3532  because everything will take place within a single
3533  mini-transaction and because writing to the redo log
3534  is an atomic operation (performed by mtr_commit()). */
3535  btr_set_min_rec_mark(node_ptr, mtr);
3536  }
3537 
3538  btr_node_ptr_delete(index, block, mtr);
3539 
3540  /* Remove the page from the level list */
3541  btr_level_list_remove(space, zip_size, page, mtr);
3542 #ifdef UNIV_ZIP_DEBUG
3543  {
3544  page_zip_des_t* merge_page_zip
3545  = buf_block_get_page_zip(merge_block);
3546  ut_a(!merge_page_zip
3547  || page_zip_validate(merge_page_zip, merge_page));
3548  }
3549 #endif /* UNIV_ZIP_DEBUG */
3550 
3551  if (left_page_no != FIL_NULL) {
3552  lock_update_discard(merge_block, PAGE_HEAP_NO_SUPREMUM,
3553  block);
3554  } else {
3555  lock_update_discard(merge_block,
3556  lock_get_min_heap_no(merge_block),
3557  block);
3558  }
3559 
3560  btr_blob_dbg_remove(page, index, "btr_discard_page");
3561 
3562  /* Free the file page */
3563  btr_page_free(index, block, mtr);
3564 
3565  ut_ad(btr_check_node_ptr(index, merge_block, mtr));
3566 }
3567 
3568 #ifdef UNIV_BTR_PRINT
3569 /*************************************************************/
3571 UNIV_INTERN
3572 void
3573 btr_print_size(
3574 /*===========*/
3575  dict_index_t* index)
3576 {
3577  page_t* root;
3578  fseg_header_t* seg;
3579  mtr_t mtr;
3580 
3581  if (dict_index_is_ibuf(index)) {
3582  fputs("Sorry, cannot print info of an ibuf tree:"
3583  " use ibuf functions\n", stderr);
3584 
3585  return;
3586  }
3587 
3588  mtr_start(&mtr);
3589 
3590  root = btr_root_get(index, &mtr);
3591 
3592  seg = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
3593 
3594  fputs("INFO OF THE NON-LEAF PAGE SEGMENT\n", stderr);
3595  fseg_print(seg, &mtr);
3596 
3597  if (!(index->type & DICT_UNIVERSAL)) {
3598 
3599  seg = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
3600 
3601  fputs("INFO OF THE LEAF PAGE SEGMENT\n", stderr);
3602  fseg_print(seg, &mtr);
3603  }
3604 
3605  mtr_commit(&mtr);
3606 }
3607 
3608 /************************************************************/
3610 static
3611 void
3612 btr_print_recursive(
3613 /*================*/
3614  dict_index_t* index,
3615  buf_block_t* block,
3616  ulint width,
3618  mem_heap_t** heap,
3619  ulint** offsets,
3620  mtr_t* mtr)
3621 {
3622  const page_t* page = buf_block_get_frame(block);
3623  page_cur_t cursor;
3624  ulint n_recs;
3625  ulint i = 0;
3626  mtr_t mtr2;
3627 
3628  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
3629  fprintf(stderr, "NODE ON LEVEL %lu page number %lu\n",
3630  (ulong) btr_page_get_level(page, mtr),
3631  (ulong) buf_block_get_page_no(block));
3632 
3633  page_print(block, index, width, width);
3634 
3635  n_recs = page_get_n_recs(page);
3636 
3637  page_cur_set_before_first(block, &cursor);
3638  page_cur_move_to_next(&cursor);
3639 
3640  while (!page_cur_is_after_last(&cursor)) {
3641 
3642  if (page_is_leaf(page)) {
3643 
3644  /* If this is the leaf level, do nothing */
3645 
3646  } else if ((i <= width) || (i >= n_recs - width)) {
3647 
3648  const rec_t* node_ptr;
3649 
3650  mtr_start(&mtr2);
3651 
3652  node_ptr = page_cur_get_rec(&cursor);
3653 
3654  *offsets = rec_get_offsets(node_ptr, index, *offsets,
3655  ULINT_UNDEFINED, heap);
3656  btr_print_recursive(index,
3657  btr_node_ptr_get_child(node_ptr,
3658  index,
3659  *offsets,
3660  &mtr2),
3661  width, heap, offsets, &mtr2);
3662  mtr_commit(&mtr2);
3663  }
3664 
3665  page_cur_move_to_next(&cursor);
3666  i++;
3667  }
3668 }
3669 
3670 /**************************************************************/
3672 UNIV_INTERN
3673 void
3674 btr_print_index(
3675 /*============*/
3676  dict_index_t* index,
3677  ulint width)
3679 {
3680  mtr_t mtr;
3681  buf_block_t* root;
3682  mem_heap_t* heap = NULL;
3683  ulint offsets_[REC_OFFS_NORMAL_SIZE];
3684  ulint* offsets = offsets_;
3685  rec_offs_init(offsets_);
3686 
3687  fputs("--------------------------\n"
3688  "INDEX TREE PRINT\n", stderr);
3689 
3690  mtr_start(&mtr);
3691 
3692  root = btr_root_block_get(index, &mtr);
3693 
3694  btr_print_recursive(index, root, width, &heap, &offsets, &mtr);
3695  if (UNIV_LIKELY_NULL(heap)) {
3696  mem_heap_free(heap);
3697  }
3698 
3699  mtr_commit(&mtr);
3700 
3701  btr_validate_index(index, NULL);
3702 }
3703 #endif /* UNIV_BTR_PRINT */
3704 
3705 #ifdef UNIV_DEBUG
3706 /************************************************************/
3709 UNIV_INTERN
3710 ibool
3711 btr_check_node_ptr(
3712 /*===============*/
3713  dict_index_t* index,
3714  buf_block_t* block,
3715  mtr_t* mtr)
3716 {
3717  mem_heap_t* heap;
3718  dtuple_t* tuple;
3719  ulint* offsets;
3720  btr_cur_t cursor;
3721  page_t* page = buf_block_get_frame(block);
3722 
3723  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
3724  if (dict_index_get_page(index) == buf_block_get_page_no(block)) {
3725 
3726  return(TRUE);
3727  }
3728 
3729  heap = mem_heap_create(256);
3730  offsets = btr_page_get_father_block(NULL, heap, index, block, mtr,
3731  &cursor);
3732 
3733  if (page_is_leaf(page)) {
3734 
3735  goto func_exit;
3736  }
3737 
3738  tuple = dict_index_build_node_ptr(
3739  index, page_rec_get_next(page_get_infimum_rec(page)), 0, heap,
3740  btr_page_get_level(page, mtr));
3741 
3742  ut_a(!cmp_dtuple_rec(tuple, btr_cur_get_rec(&cursor), offsets));
3743 func_exit:
3744  mem_heap_free(heap);
3745 
3746  return(TRUE);
3747 }
3748 #endif /* UNIV_DEBUG */
3749 
3750 /************************************************************/
3752 static
3753 void
3754 btr_index_rec_validate_report(
3755 /*==========================*/
3756  const page_t* page,
3757  const rec_t* rec,
3758  const dict_index_t* index)
3759 {
3760  fputs("InnoDB: Record in ", stderr);
3761  dict_index_name_print(stderr, NULL, index);
3762  fprintf(stderr, ", page %lu, at offset %lu\n",
3763  page_get_page_no(page), (ulint) page_offset(rec));
3764 }
3765 
3766 /************************************************************/
3770 UNIV_INTERN
3771 ibool
3772 btr_index_rec_validate(
3773 /*===================*/
3774  const rec_t* rec,
3775  const dict_index_t* index,
3776  ibool dump_on_error)
3779 {
3780  ulint len;
3781  ulint n;
3782  ulint i;
3783  const page_t* page;
3784  mem_heap_t* heap = NULL;
3785  ulint offsets_[REC_OFFS_NORMAL_SIZE];
3786  ulint* offsets = offsets_;
3787  rec_offs_init(offsets_);
3788 
3789  page = page_align(rec);
3790 
3791  if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) {
3792  /* The insert buffer index tree can contain records from any
3793  other index: we cannot check the number of fields or
3794  their length */
3795 
3796  return(TRUE);
3797  }
3798 
3799  if (UNIV_UNLIKELY((ibool)!!page_is_comp(page)
3800  != dict_table_is_comp(index->table))) {
3801  btr_index_rec_validate_report(page, rec, index);
3802  fprintf(stderr, "InnoDB: compact flag=%lu, should be %lu\n",
3803  (ulong) !!page_is_comp(page),
3804  (ulong) dict_table_is_comp(index->table));
3805 
3806  return(FALSE);
3807  }
3808 
3809  n = dict_index_get_n_fields(index);
3810 
3811  if (!page_is_comp(page)
3812  && UNIV_UNLIKELY(rec_get_n_fields_old(rec) != n)) {
3813  btr_index_rec_validate_report(page, rec, index);
3814  fprintf(stderr, "InnoDB: has %lu fields, should have %lu\n",
3815  (ulong) rec_get_n_fields_old(rec), (ulong) n);
3816 
3817  if (dump_on_error) {
3818  buf_page_print(page, 0);
3819 
3820  fputs("InnoDB: corrupt record ", stderr);
3821  rec_print_old(stderr, rec);
3822  putc('\n', stderr);
3823  }
3824  return(FALSE);
3825  }
3826 
3827  offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
3828 
3829  for (i = 0; i < n; i++) {
3830  ulint fixed_size = dict_col_get_fixed_size(
3831  dict_index_get_nth_col(index, i), page_is_comp(page));
3832 
3833  rec_get_nth_field_offs(offsets, i, &len);
3834 
3835  /* Note that if fixed_size != 0, it equals the
3836  length of a fixed-size column in the clustered index.
3837  A prefix index of the column is of fixed, but different
3838  length. When fixed_size == 0, prefix_len is the maximum
3839  length of the prefix index column. */
3840 
3841  if ((dict_index_get_nth_field(index, i)->prefix_len == 0
3842  && len != UNIV_SQL_NULL && fixed_size
3843  && len != fixed_size)
3844  || (dict_index_get_nth_field(index, i)->prefix_len > 0
3845  && len != UNIV_SQL_NULL
3846  && len
3847  > dict_index_get_nth_field(index, i)->prefix_len)) {
3848 
3849  btr_index_rec_validate_report(page, rec, index);
3850  fprintf(stderr,
3851  "InnoDB: field %lu len is %lu,"
3852  " should be %lu\n",
3853  (ulong) i, (ulong) len, (ulong) fixed_size);
3854 
3855  if (dump_on_error) {
3856  buf_page_print(page, 0);
3857 
3858  fputs("InnoDB: corrupt record ", stderr);
3859  rec_print_new(stderr, rec, offsets);
3860  putc('\n', stderr);
3861  }
3862  if (UNIV_LIKELY_NULL(heap)) {
3863  mem_heap_free(heap);
3864  }
3865  return(FALSE);
3866  }
3867  }
3868 
3869  if (UNIV_LIKELY_NULL(heap)) {
3870  mem_heap_free(heap);
3871  }
3872  return(TRUE);
3873 }
3874 
3875 /************************************************************/
3879 static
3880 ibool
3881 btr_index_page_validate(
3882 /*====================*/
3883  buf_block_t* block,
3884  dict_index_t* index)
3885 {
3886  page_cur_t cur;
3887  ibool ret = TRUE;
3888 
3889  page_cur_set_before_first(block, &cur);
3890  page_cur_move_to_next(&cur);
3891 
3892  for (;;) {
3893  if (page_cur_is_after_last(&cur)) {
3894 
3895  break;
3896  }
3897 
3898  if (!btr_index_rec_validate(cur.rec, index, TRUE)) {
3899 
3900  return(FALSE);
3901  }
3902 
3903  page_cur_move_to_next(&cur);
3904  }
3905 
3906  return(ret);
3907 }
3908 
3909 /************************************************************/
3911 static
3912 void
3913 btr_validate_report1(
3914 /*=================*/
3915  dict_index_t* index,
3916  ulint level,
3917  const buf_block_t* block)
3918 {
3919  fprintf(stderr, "InnoDB: Error in page %lu of ",
3920  buf_block_get_page_no(block));
3921  dict_index_name_print(stderr, NULL, index);
3922  if (level) {
3923  fprintf(stderr, ", index tree level %lu", level);
3924  }
3925  putc('\n', stderr);
3926 }
3927 
3928 /************************************************************/
3930 static
3931 void
3932 btr_validate_report2(
3933 /*=================*/
3934  const dict_index_t* index,
3935  ulint level,
3936  const buf_block_t* block1,
3937  const buf_block_t* block2)
3938 {
3939  fprintf(stderr, "InnoDB: Error in pages %lu and %lu of ",
3940  buf_block_get_page_no(block1),
3941  buf_block_get_page_no(block2));
3942  dict_index_name_print(stderr, NULL, index);
3943  if (level) {
3944  fprintf(stderr, ", index tree level %lu", level);
3945  }
3946  putc('\n', stderr);
3947 }
3948 
3949 /************************************************************/
3952 static
3953 ibool
3954 btr_validate_level(
3955 /*===============*/
3956  dict_index_t* index,
3957  trx_t* trx,
3958  ulint level)
3959 {
3960  ulint space;
3961  ulint zip_size;
3962  buf_block_t* block;
3963  page_t* page;
3964  buf_block_t* right_block = 0; /* remove warning */
3965  page_t* right_page = 0; /* remove warning */
3966  page_t* father_page;
3967  btr_cur_t node_cur;
3968  btr_cur_t right_node_cur;
3969  rec_t* rec;
3970  ulint right_page_no;
3971  ulint left_page_no;
3972  page_cur_t cursor;
3973  dtuple_t* node_ptr_tuple;
3974  ibool ret = TRUE;
3975  mtr_t mtr;
3976  mem_heap_t* heap = mem_heap_create(256);
3977  ulint* offsets = NULL;
3978  ulint* offsets2= NULL;
3979 #ifdef UNIV_ZIP_DEBUG
3980  page_zip_des_t* page_zip;
3981 #endif /* UNIV_ZIP_DEBUG */
3982 
3983  mtr_start(&mtr);
3984 
3985  mtr_x_lock(dict_index_get_lock(index), &mtr);
3986 
3987  block = btr_root_block_get(index, &mtr);
3988  page = buf_block_get_frame(block);
3989 
3990  space = dict_index_get_space(index);
3991  zip_size = dict_table_zip_size(index->table);
3992 
3993  while (level != btr_page_get_level(page, &mtr)) {
3994  const rec_t* node_ptr;
3995 
3996  ut_a(space == buf_block_get_space(block));
3997  ut_a(space == page_get_space_id(page));
3998 #ifdef UNIV_ZIP_DEBUG
3999  page_zip = buf_block_get_page_zip(block);
4000  ut_a(!page_zip || page_zip_validate(page_zip, page));
4001 #endif /* UNIV_ZIP_DEBUG */
4002  ut_a(!page_is_leaf(page));
4003 
4004  page_cur_set_before_first(block, &cursor);
4005  page_cur_move_to_next(&cursor);
4006 
4007  node_ptr = page_cur_get_rec(&cursor);
4008  offsets = rec_get_offsets(node_ptr, index, offsets,
4009  ULINT_UNDEFINED, &heap);
4010  block = btr_node_ptr_get_child(node_ptr, index, offsets, &mtr);
4011  page = buf_block_get_frame(block);
4012  }
4013 
4014  /* Now we are on the desired level. Loop through the pages on that
4015  level. */
4016 loop:
4017  if (trx_is_interrupted(trx)) {
4018  mtr_commit(&mtr);
4019  mem_heap_free(heap);
4020  return(ret);
4021  }
4022  mem_heap_empty(heap);
4023  offsets = offsets2 = NULL;
4024  mtr_x_lock(dict_index_get_lock(index), &mtr);
4025 
4026 #ifdef UNIV_ZIP_DEBUG
4027  page_zip = buf_block_get_page_zip(block);
4028  ut_a(!page_zip || page_zip_validate(page_zip, page));
4029 #endif /* UNIV_ZIP_DEBUG */
4030 
4031  /* Check ordering etc. of records */
4032 
4033  if (!page_validate(page, index)) {
4034  btr_validate_report1(index, level, block);
4035 
4036  ret = FALSE;
4037  } else if (level == 0) {
4038  /* We are on level 0. Check that the records have the right
4039  number of fields, and field lengths are right. */
4040 
4041  if (!btr_index_page_validate(block, index)) {
4042 
4043  ret = FALSE;
4044  }
4045  }
4046 
4047  ut_a(btr_page_get_level(page, &mtr) == level);
4048 
4049  right_page_no = btr_page_get_next(page, &mtr);
4050  left_page_no = btr_page_get_prev(page, &mtr);
4051 
4052  ut_a(page_get_n_recs(page) > 0 || (level == 0
4053  && page_get_page_no(page)
4054  == dict_index_get_page(index)));
4055 
4056  if (right_page_no != FIL_NULL) {
4057  const rec_t* right_rec;
4058  right_block = btr_block_get(space, zip_size, right_page_no,
4059  RW_X_LATCH, &mtr);
4060  right_page = buf_block_get_frame(right_block);
4061  if (UNIV_UNLIKELY(btr_page_get_prev(right_page, &mtr)
4062  != page_get_page_no(page))) {
4063  btr_validate_report2(index, level, block, right_block);
4064  fputs("InnoDB: broken FIL_PAGE_NEXT"
4065  " or FIL_PAGE_PREV links\n", stderr);
4066  buf_page_print(page, 0);
4067  buf_page_print(right_page, 0);
4068 
4069  ret = FALSE;
4070  }
4071 
4072  if (UNIV_UNLIKELY(page_is_comp(right_page)
4073  != page_is_comp(page))) {
4074  btr_validate_report2(index, level, block, right_block);
4075  fputs("InnoDB: 'compact' flag mismatch\n", stderr);
4076  buf_page_print(page, 0);
4077  buf_page_print(right_page, 0);
4078 
4079  ret = FALSE;
4080 
4081  goto node_ptr_fails;
4082  }
4083 
4084  rec = page_rec_get_prev(page_get_supremum_rec(page));
4085  right_rec = page_rec_get_next(page_get_infimum_rec(
4086  right_page));
4087  offsets = rec_get_offsets(rec, index,
4088  offsets, ULINT_UNDEFINED, &heap);
4089  offsets2 = rec_get_offsets(right_rec, index,
4090  offsets2, ULINT_UNDEFINED, &heap);
4091  if (UNIV_UNLIKELY(cmp_rec_rec(rec, right_rec,
4092  offsets, offsets2,
4093  index) >= 0)) {
4094 
4095  btr_validate_report2(index, level, block, right_block);
4096 
4097  fputs("InnoDB: records in wrong order"
4098  " on adjacent pages\n", stderr);
4099 
4100  buf_page_print(page, 0);
4101  buf_page_print(right_page, 0);
4102 
4103  fputs("InnoDB: record ", stderr);
4104  rec = page_rec_get_prev(page_get_supremum_rec(page));
4105  rec_print(stderr, rec, index);
4106  putc('\n', stderr);
4107  fputs("InnoDB: record ", stderr);
4108  rec = page_rec_get_next(
4109  page_get_infimum_rec(right_page));
4110  rec_print(stderr, rec, index);
4111  putc('\n', stderr);
4112 
4113  ret = FALSE;
4114  }
4115  }
4116 
4117  if (level > 0 && left_page_no == FIL_NULL) {
4118  ut_a(REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
4119  page_rec_get_next(page_get_infimum_rec(page)),
4120  page_is_comp(page)));
4121  }
4122 
4123  if (buf_block_get_page_no(block) != dict_index_get_page(index)) {
4124 
4125  /* Check father node pointers */
4126 
4127  rec_t* node_ptr;
4128 
4129  offsets = btr_page_get_father_block(offsets, heap, index,
4130  block, &mtr, &node_cur);
4131  father_page = btr_cur_get_page(&node_cur);
4132  node_ptr = btr_cur_get_rec(&node_cur);
4133 
4135  index, page_rec_get_prev(page_get_supremum_rec(page)),
4136  block, &node_cur);
4137  offsets = btr_page_get_father_node_ptr(offsets, heap,
4138  &node_cur, &mtr);
4139 
4140  if (UNIV_UNLIKELY(node_ptr != btr_cur_get_rec(&node_cur))
4141  || UNIV_UNLIKELY(btr_node_ptr_get_child_page_no(node_ptr,
4142  offsets)
4143  != buf_block_get_page_no(block))) {
4144 
4145  btr_validate_report1(index, level, block);
4146 
4147  fputs("InnoDB: node pointer to the page is wrong\n",
4148  stderr);
4149 
4150  buf_page_print(father_page, 0);
4151  buf_page_print(page, 0);
4152 
4153  fputs("InnoDB: node ptr ", stderr);
4154  rec_print(stderr, node_ptr, index);
4155 
4156  rec = btr_cur_get_rec(&node_cur);
4157  fprintf(stderr, "\n"
4158  "InnoDB: node ptr child page n:o %lu\n",
4160  rec, offsets));
4161 
4162  fputs("InnoDB: record on page ", stderr);
4163  rec_print_new(stderr, rec, offsets);
4164  putc('\n', stderr);
4165  ret = FALSE;
4166 
4167  goto node_ptr_fails;
4168  }
4169 
4170  if (!page_is_leaf(page)) {
4171  node_ptr_tuple = dict_index_build_node_ptr(
4172  index,
4173  page_rec_get_next(page_get_infimum_rec(page)),
4174  0, heap, btr_page_get_level(page, &mtr));
4175 
4176  if (cmp_dtuple_rec(node_ptr_tuple, node_ptr,
4177  offsets)) {
4178  const rec_t* first_rec = page_rec_get_next(
4179  page_get_infimum_rec(page));
4180 
4181  btr_validate_report1(index, level, block);
4182 
4183  buf_page_print(father_page, 0);
4184  buf_page_print(page, 0);
4185 
4186  fputs("InnoDB: Error: node ptrs differ"
4187  " on levels > 0\n"
4188  "InnoDB: node ptr ", stderr);
4189  rec_print_new(stderr, node_ptr, offsets);
4190  fputs("InnoDB: first rec ", stderr);
4191  rec_print(stderr, first_rec, index);
4192  putc('\n', stderr);
4193  ret = FALSE;
4194 
4195  goto node_ptr_fails;
4196  }
4197  }
4198 
4199  if (left_page_no == FIL_NULL) {
4200  ut_a(node_ptr == page_rec_get_next(
4201  page_get_infimum_rec(father_page)));
4202  ut_a(btr_page_get_prev(father_page, &mtr) == FIL_NULL);
4203  }
4204 
4205  if (right_page_no == FIL_NULL) {
4206  ut_a(node_ptr == page_rec_get_prev(
4207  page_get_supremum_rec(father_page)));
4208  ut_a(btr_page_get_next(father_page, &mtr) == FIL_NULL);
4209  } else {
4210  const rec_t* right_node_ptr
4211  = page_rec_get_next(node_ptr);
4212 
4213  offsets = btr_page_get_father_block(
4214  offsets, heap, index, right_block,
4215  &mtr, &right_node_cur);
4216  if (right_node_ptr
4217  != page_get_supremum_rec(father_page)) {
4218 
4219  if (btr_cur_get_rec(&right_node_cur)
4220  != right_node_ptr) {
4221  ret = FALSE;
4222  fputs("InnoDB: node pointer to"
4223  " the right page is wrong\n",
4224  stderr);
4225 
4226  btr_validate_report1(index, level,
4227  block);
4228 
4229  buf_page_print(father_page, 0);
4230  buf_page_print(page, 0);
4231  buf_page_print(right_page, 0);
4232  }
4233  } else {
4234  page_t* right_father_page
4235  = btr_cur_get_page(&right_node_cur);
4236 
4237  if (btr_cur_get_rec(&right_node_cur)
4238  != page_rec_get_next(
4239  page_get_infimum_rec(
4240  right_father_page))) {
4241  ret = FALSE;
4242  fputs("InnoDB: node pointer 2 to"
4243  " the right page is wrong\n",
4244  stderr);
4245 
4246  btr_validate_report1(index, level,
4247  block);
4248 
4249  buf_page_print(father_page, 0);
4250  buf_page_print(right_father_page, 0);
4251  buf_page_print(page, 0);
4252  buf_page_print(right_page, 0);
4253  }
4254 
4255  if (page_get_page_no(right_father_page)
4256  != btr_page_get_next(father_page, &mtr)) {
4257 
4258  ret = FALSE;
4259  fputs("InnoDB: node pointer 3 to"
4260  " the right page is wrong\n",
4261  stderr);
4262 
4263  btr_validate_report1(index, level,
4264  block);
4265 
4266  buf_page_print(father_page, 0);
4267  buf_page_print(right_father_page, 0);
4268  buf_page_print(page, 0);
4269  buf_page_print(right_page, 0);
4270  }
4271  }
4272  }
4273  }
4274 
4275 node_ptr_fails:
4276  /* Commit the mini-transaction to release the latch on 'page'.
4277  Re-acquire the latch on right_page, which will become 'page'
4278  on the next loop. The page has already been checked. */
4279  mtr_commit(&mtr);
4280 
4281  if (right_page_no != FIL_NULL) {
4282  mtr_start(&mtr);
4283 
4284  block = btr_block_get(space, zip_size, right_page_no,
4285  RW_X_LATCH, &mtr);
4286  page = buf_block_get_frame(block);
4287 
4288  goto loop;
4289  }
4290 
4291  mem_heap_free(heap);
4292  return(ret);
4293 }
4294 
4295 /**************************************************************/
4298 UNIV_INTERN
4299 ibool
4300 btr_validate_index(
4301 /*===============*/
4302  dict_index_t* index,
4303  trx_t* trx)
4304 {
4305  mtr_t mtr;
4306  page_t* root;
4307  ulint i;
4308  ulint n;
4309 
4310  mtr_start(&mtr);
4311  mtr_x_lock(dict_index_get_lock(index), &mtr);
4312 
4313  root = btr_root_get(index, &mtr);
4314  n = btr_page_get_level(root, &mtr);
4315 
4316  for (i = 0; i <= n && !trx_is_interrupted(trx); i++) {
4317  if (!btr_validate_level(index, trx, n - i)) {
4318 
4319  mtr_commit(&mtr);
4320 
4321  return(FALSE);
4322  }
4323  }
4324 
4325  mtr_commit(&mtr);
4326 
4327  return(TRUE);
4328 }
4329 #endif /* !UNIV_HOTBACKUP */