Drizzled Public API Documentation

buf0flu.cc
1 /*****************************************************************************
2 
3 Copyright (C) 1995, 2010, Innobase Oy. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free Software
7 Foundation; version 2 of the License.
8 
9 This program is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12 
13 You should have received a copy of the GNU General Public License along with
14 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
15 St, Fifth Floor, Boston, MA 02110-1301 USA
16 
17 *****************************************************************************/
18 
19 /**************************************************/
26 #include "buf0flu.h"
27 
28 #ifdef UNIV_NONINL
29 #include "buf0flu.ic"
30 #endif
31 
32 #include "buf0buf.h"
33 #include "srv0srv.h"
34 #include "page0zip.h"
35 #ifndef UNIV_HOTBACKUP
36 #include "ut0byte.h"
37 #include "ut0lst.h"
38 #include "page0page.h"
39 #include "fil0fil.h"
40 #include "buf0lru.h"
41 #include "buf0rea.h"
42 #include "ibuf0ibuf.h"
43 #include "log0log.h"
44 #include "os0file.h"
45 #include "trx0sys.h"
46 
47 /**********************************************************************
48 These statistics are generated for heuristics used in estimating the
49 rate at which we should flush the dirty blocks to avoid bursty IO
50 activity. Note that the rate of flushing not only depends on how many
51 dirty pages we have in the buffer pool but it is also a fucntion of
52 how much redo the workload is generating and at what rate. */
53 /* @{ */
54 
58 #define BUF_FLUSH_STAT_N_INTERVAL 20
59 
62 static buf_flush_stat_t buf_flush_stat_arr[BUF_FLUSH_STAT_N_INTERVAL];
63 
65 static ulint buf_flush_stat_arr_ind;
66 
69 static buf_flush_stat_t buf_flush_stat_cur;
70 
73 static buf_flush_stat_t buf_flush_stat_sum;
74 
76 static ulint buf_lru_flush_page_count = 0;
77 
78 /* @} */
79 
80 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
81 /******************************************************************/
84 static
85 ibool
86 buf_flush_validate_low(
87 /*===================*/
88  buf_pool_t* buf_pool);
90 /******************************************************************/
93 static
94 ibool
95 buf_flush_validate_skip(
96 /*====================*/
97  buf_pool_t* buf_pool)
98 {
100 # define BUF_FLUSH_VALIDATE_SKIP 23
101 
104  static int buf_flush_validate_count = BUF_FLUSH_VALIDATE_SKIP;
105 
106  /* There is a race condition below, but it does not matter,
107  because this call is only for heuristic purposes. We want to
108  reduce the call frequency of the costly buf_flush_validate_low()
109  check in debug builds. */
110  if (--buf_flush_validate_count > 0) {
111  return(TRUE);
112  }
113 
114  buf_flush_validate_count = BUF_FLUSH_VALIDATE_SKIP;
115  return(buf_flush_validate_low(buf_pool));
116 }
117 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
118 
119 /******************************************************************/
124 static
125 buf_page_t*
126 buf_flush_insert_in_flush_rbt(
127 /*==========================*/
128  buf_page_t* bpage)
129 {
130  const ib_rbt_node_t* c_node;
131  const ib_rbt_node_t* p_node;
132  buf_page_t* prev = NULL;
133  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
134 
135  ut_ad(buf_flush_list_mutex_own(buf_pool));
136 
137  /* Insert this buffer into the rbt. */
138  c_node = rbt_insert(buf_pool->flush_rbt, &bpage, &bpage);
139  ut_a(c_node != NULL);
140 
141  /* Get the predecessor. */
142  p_node = rbt_prev(buf_pool->flush_rbt, c_node);
143 
144  if (p_node != NULL) {
145  buf_page_t** value;
146  value = rbt_value(buf_page_t*, p_node);
147  prev = *value;
148  ut_a(prev != NULL);
149  }
150 
151  return(prev);
152 }
153 
154 /*********************************************************/
156 static
157 void
158 buf_flush_delete_from_flush_rbt(
159 /*============================*/
160  buf_page_t* bpage)
161 {
162 #ifdef UNIV_DEBUG
163  ibool ret = FALSE;
164 #endif /* UNIV_DEBUG */
165  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
166 
167  ut_ad(buf_flush_list_mutex_own(buf_pool));
168 
169 #ifdef UNIV_DEBUG
170  ret =
171 #endif /* UNIV_DEBUG */
172  rbt_delete(buf_pool->flush_rbt, &bpage);
173  ut_ad(ret);
174 }
175 
176 /*****************************************************************/
186 static
187 int
188 buf_flush_block_cmp(
189 /*================*/
190  const void* p1,
191  const void* p2)
192 {
193  int ret;
194  const buf_page_t* b1 = *(const buf_page_t**) p1;
195  const buf_page_t* b2 = *(const buf_page_t**) p2;
196 #ifdef UNIV_DEBUG
197  buf_pool_t* buf_pool = buf_pool_from_bpage(b1);
198 #endif /* UNIV_DEBUG */
199 
200  ut_ad(b1 != NULL);
201  ut_ad(b2 != NULL);
202 
203  ut_ad(buf_flush_list_mutex_own(buf_pool));
204 
205  ut_ad(b1->in_flush_list);
206  ut_ad(b2->in_flush_list);
207 
209  return(1);
210  } else if (b2->oldest_modification < b1->oldest_modification) {
211  return(-1);
212  }
213 
214  /* If oldest_modification is same then decide on the space. */
215  ret = (int)(b2->space - b1->space);
216 
217  /* Or else decide ordering on the offset field. */
218  return(ret ? ret : (int)(b2->offset - b1->offset));
219 }
220 
221 /********************************************************************/
225 UNIV_INTERN
226 void
228 /*==========================*/
229 {
230  ulint i;
231 
232  for (i = 0; i < srv_buf_pool_instances; i++) {
233  buf_pool_t* buf_pool;
234 
235  buf_pool = buf_pool_from_array(i);
236 
237  buf_flush_list_mutex_enter(buf_pool);
238 
239  /* Create red black tree for speedy insertions in flush list. */
240  buf_pool->flush_rbt = rbt_create(
241  sizeof(buf_page_t*), buf_flush_block_cmp);
242 
243  buf_flush_list_mutex_exit(buf_pool);
244  }
245 }
246 
247 /********************************************************************/
249 UNIV_INTERN
250 void
252 /*==========================*/
253 {
254  ulint i;
255 
256  for (i = 0; i < srv_buf_pool_instances; i++) {
257  buf_pool_t* buf_pool;
258 
259  buf_pool = buf_pool_from_array(i);
260 
261  buf_flush_list_mutex_enter(buf_pool);
262 
263 #ifdef UNIV_DEBUG_VALGRIND
264  {
265  ulint zip_size = buf_block_get_zip_size(block);
266 
267  if (UNIV_UNLIKELY(zip_size)) {
268  UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
269  } else {
270  UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
271  }
272  }
273 #endif /* UNIV_DEBUG_VALGRIND */
274 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
275  ut_a(buf_flush_validate_low(buf_pool));
276 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
277 
278  rbt_free(buf_pool->flush_rbt);
279  buf_pool->flush_rbt = NULL;
280 
281  buf_flush_list_mutex_exit(buf_pool);
282  }
283 }
284 
285 /********************************************************************/
287 UNIV_INTERN
288 void
289 buf_flush_insert_into_flush_list(
290 /*=============================*/
291  buf_pool_t* buf_pool,
292  buf_block_t* block,
293  ib_uint64_t lsn)
294 {
295  ut_ad(!buf_pool_mutex_own(buf_pool));
297  ut_ad(mutex_own(&block->mutex));
298 
299  buf_flush_list_mutex_enter(buf_pool);
300 
301  ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
302  || (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
303  <= lsn));
304 
305  /* If we are in the recovery then we need to update the flush
306  red-black tree as well. */
307  if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
308  buf_flush_list_mutex_exit(buf_pool);
309  buf_flush_insert_sorted_into_flush_list(buf_pool, block, lsn);
310  return;
311  }
312 
314  ut_ad(!block->page.in_flush_list);
315 
316  ut_d(block->page.in_flush_list = TRUE);
317  block->page.oldest_modification = lsn;
318  UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
319 
320 #ifdef UNIV_DEBUG_VALGRIND
321  {
322  ulint zip_size = buf_block_get_zip_size(block);
323 
324  if (UNIV_UNLIKELY(zip_size)) {
325  UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
326  } else {
327  UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
328  }
329  }
330 #endif /* UNIV_DEBUG_VALGRIND */
331 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
332  ut_a(buf_flush_validate_skip(buf_pool));
333 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
334 
335  buf_flush_list_mutex_exit(buf_pool);
336 }
337 
338 /********************************************************************/
342 UNIV_INTERN
343 void
344 buf_flush_insert_sorted_into_flush_list(
345 /*====================================*/
346  buf_pool_t* buf_pool,
347  buf_block_t* block,
348  ib_uint64_t lsn)
349 {
350  buf_page_t* prev_b;
351  buf_page_t* b;
352 
353  ut_ad(!buf_pool_mutex_own(buf_pool));
355  ut_ad(mutex_own(&block->mutex));
357 
358  buf_flush_list_mutex_enter(buf_pool);
359 
360  /* The field in_LRU_list is protected by buf_pool->mutex, which
361  we are not holding. However, while a block is in the flush
362  list, it is dirty and cannot be discarded, not from the
363  page_hash or from the LRU list. At most, the uncompressed
364  page frame of a compressed block may be discarded or created
365  (copying the block->page to or from a buf_page_t that is
366  dynamically allocated from buf_buddy_alloc()). Because those
367  transitions hold block->mutex and the flush list mutex (via
368  buf_flush_relocate_on_flush_list()), there is no possibility
369  of a race condition in the assertions below. */
370  ut_ad(block->page.in_LRU_list);
371  ut_ad(block->page.in_page_hash);
372  /* buf_buddy_block_register() will take a block in the
373  BUF_BLOCK_MEMORY state, not a file page. */
374  ut_ad(!block->page.in_zip_hash);
375 
376  ut_ad(!block->page.in_flush_list);
377  ut_d(block->page.in_flush_list = TRUE);
378  block->page.oldest_modification = lsn;
379 
380 #ifdef UNIV_DEBUG_VALGRIND
381  {
382  ulint zip_size = buf_block_get_zip_size(block);
383 
384  if (UNIV_UNLIKELY(zip_size)) {
385  UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
386  } else {
387  UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
388  }
389  }
390 #endif /* UNIV_DEBUG_VALGRIND */
391 
392 #ifdef UNIV_DEBUG_VALGRIND
393  {
394  ulint zip_size = buf_block_get_zip_size(block);
395 
396  if (UNIV_UNLIKELY(zip_size)) {
397  UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
398  } else {
399  UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
400  }
401  }
402 #endif /* UNIV_DEBUG_VALGRIND */
403 
404  prev_b = NULL;
405 
406  /* For the most part when this function is called the flush_rbt
407  should not be NULL. In a very rare boundary case it is possible
408  that the flush_rbt has already been freed by the recovery thread
409  before the last page was hooked up in the flush_list by the
410  io-handler thread. In that case we'll just do a simple
411  linear search in the else block. */
412  if (buf_pool->flush_rbt) {
413 
414  prev_b = buf_flush_insert_in_flush_rbt(&block->page);
415 
416  } else {
417 
418  b = UT_LIST_GET_FIRST(buf_pool->flush_list);
419 
420  while (b && b->oldest_modification
421  > block->page.oldest_modification) {
422  ut_ad(b->in_flush_list);
423  prev_b = b;
424  b = UT_LIST_GET_NEXT(list, b);
425  }
426  }
427 
428  if (prev_b == NULL) {
429  UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
430  } else {
431  UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
432  prev_b, &block->page);
433  }
434 
435 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
436  ut_a(buf_flush_validate_low(buf_pool));
437 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
438 
439  buf_flush_list_mutex_exit(buf_pool);
440 }
441 
442 /********************************************************************/
446 UNIV_INTERN
447 ibool
449 /*========================*/
450  buf_page_t* bpage)
452 {
453 #ifdef UNIV_DEBUG
454  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
455  ut_ad(buf_pool_mutex_own(buf_pool));
456 #endif
457  ut_ad(mutex_own(buf_page_get_mutex(bpage)));
458  ut_ad(bpage->in_LRU_list);
459 
460  if (UNIV_LIKELY(buf_page_in_file(bpage))) {
461 
462  return(bpage->oldest_modification == 0
463  && buf_page_get_io_fix(bpage) == BUF_IO_NONE
464  && bpage->buf_fix_count == 0);
465  }
466 
467  ut_print_timestamp(stderr);
468  fprintf(stderr,
469  " InnoDB: Error: buffer block state %lu"
470  " in the LRU list!\n",
471  (ulong) buf_page_get_state(bpage));
472  ut_print_buf(stderr, bpage, sizeof(buf_page_t));
473  putc('\n', stderr);
474 
475  return(FALSE);
476 }
477 
478 /********************************************************************/
481 UNIV_INLINE
482 ibool
483 buf_flush_ready_for_flush(
484 /*======================*/
485  buf_page_t* bpage,
487  enum buf_flush flush_type)
488 {
489 #ifdef UNIV_DEBUG
490  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
491  ut_ad(buf_pool_mutex_own(buf_pool));
492 #endif
493  ut_a(buf_page_in_file(bpage));
494  ut_ad(mutex_own(buf_page_get_mutex(bpage)));
495  ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
496 
497  if (bpage->oldest_modification != 0
498  && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
499  ut_ad(bpage->in_flush_list);
500 
501  if (flush_type != BUF_FLUSH_LRU) {
502 
503  return(TRUE);
504 
505  } else if (bpage->buf_fix_count == 0) {
506 
507  /* If we are flushing the LRU list, to avoid deadlocks
508  we require the block not to be bufferfixed, and hence
509  not latched. */
510 
511  return(TRUE);
512  }
513  }
514 
515  return(FALSE);
516 }
517 
518 /********************************************************************/
520 UNIV_INTERN
521 void
523 /*=============*/
524  buf_page_t* bpage)
525 {
526  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
527 
528  ut_ad(buf_pool_mutex_own(buf_pool));
529  ut_ad(mutex_own(buf_page_get_mutex(bpage)));
530  ut_ad(bpage->in_flush_list);
531 
532  buf_flush_list_mutex_enter(buf_pool);
533 
534  switch (buf_page_get_state(bpage)) {
535  case BUF_BLOCK_ZIP_PAGE:
536  /* Clean compressed pages should not be on the flush list */
537  case BUF_BLOCK_ZIP_FREE:
538  case BUF_BLOCK_NOT_USED:
540  case BUF_BLOCK_MEMORY:
542  ut_error;
543  return;
544  case BUF_BLOCK_ZIP_DIRTY:
546  UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
548  break;
549  case BUF_BLOCK_FILE_PAGE:
550  UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
551  break;
552  }
553 
554  /* If the flush_rbt is active then delete from there as well. */
555  if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
556  buf_flush_delete_from_flush_rbt(bpage);
557  }
558 
559  /* Must be done after we have removed it from the flush_rbt
560  because we assert on in_flush_list in comparison function. */
561  ut_d(bpage->in_flush_list = FALSE);
562 
563  bpage->oldest_modification = 0;
564 
565 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
566  ut_a(buf_flush_validate_skip(buf_pool));
567 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
568 
569  buf_flush_list_mutex_exit(buf_pool);
570 }
571 
572 /*******************************************************************/
583 UNIV_INTERN
584 void
586 /*=============================*/
587  buf_page_t* bpage,
588  buf_page_t* dpage)
589 {
590  buf_page_t* prev;
591  buf_page_t* prev_b = NULL;
592  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
593 
594  ut_ad(buf_pool_mutex_own(buf_pool));
595  /* Must reside in the same buffer pool. */
596  ut_ad(buf_pool == buf_pool_from_bpage(dpage));
597 
598  ut_ad(mutex_own(buf_page_get_mutex(bpage)));
599 
600  buf_flush_list_mutex_enter(buf_pool);
601 
602  /* FIXME: At this point we have both buf_pool and flush_list
603  mutexes. Theoretically removal of a block from flush list is
604  only covered by flush_list mutex but currently we do
605  have buf_pool mutex in buf_flush_remove() therefore this block
606  is guaranteed to be in the flush list. We need to check if
607  this will work without the assumption of block removing code
608  having the buf_pool mutex. */
609  ut_ad(bpage->in_flush_list);
610  ut_ad(dpage->in_flush_list);
611 
612  /* If recovery is active we must swap the control blocks in
613  the flush_rbt as well. */
614  if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
615  buf_flush_delete_from_flush_rbt(bpage);
616  prev_b = buf_flush_insert_in_flush_rbt(dpage);
617  }
618 
619  /* Must be done after we have removed it from the flush_rbt
620  because we assert on in_flush_list in comparison function. */
621  ut_d(bpage->in_flush_list = FALSE);
622 
623  prev = UT_LIST_GET_PREV(list, bpage);
624  UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
625 
626  if (prev) {
627  ut_ad(prev->in_flush_list);
629  list,
630  buf_pool->flush_list,
631  prev, dpage);
632  } else {
634  list,
635  buf_pool->flush_list,
636  dpage);
637  }
638 
639  /* Just an extra check. Previous in flush_list
640  should be the same control block as in flush_rbt. */
641  ut_a(!buf_pool->flush_rbt || prev_b == prev);
642 
643 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
644  ut_a(buf_flush_validate_low(buf_pool));
645 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
646 
647  buf_flush_list_mutex_exit(buf_pool);
648 }
649 
650 /********************************************************************/
652 UNIV_INTERN
653 void
655 /*=====================*/
656  buf_page_t* bpage)
657 {
658  enum buf_flush flush_type;
659  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
660 
661  ut_ad(bpage);
662 
663  buf_flush_remove(bpage);
664 
665  flush_type = buf_page_get_flush_type(bpage);
666  buf_pool->n_flush[flush_type]--;
667 
668  if (flush_type == BUF_FLUSH_LRU) {
669  /* Put the block to the end of the LRU list to wait to be
670  moved to the free list */
671 
672  buf_LRU_make_block_old(bpage);
673 
674  buf_pool->LRU_flush_ended++;
675  }
676 
677  /* fprintf(stderr, "n pending flush %lu\n",
678  buf_pool->n_flush[flush_type]); */
679 
680  if (buf_pool->n_flush[flush_type] == 0
681  && buf_pool->init_flush[flush_type] == FALSE) {
682 
683  /* The running flush batch has ended */
684 
685  os_event_set(buf_pool->no_flush[flush_type]);
686  }
687 }
688 
689 /********************************************************************/
692 static
693 void
694 buf_flush_sync_datafiles(void)
695 /*==========================*/
696 {
697  /* Wake possible simulated aio thread to actually post the
698  writes to the operating system */
700 
701  /* Wait that all async writes to tablespaces have been posted to
702  the OS */
704 
705  /* Now we flush the data to disk (for example, with fsync) */
707 
708  return;
709 }
710 
711 /********************************************************************/
717 static
718 void
719 buf_flush_buffered_writes(void)
720 /*===========================*/
721 {
722  byte* write_buf;
723  ulint len;
724  ulint len2;
725  ulint i;
726 
727  if (!srv_use_doublewrite_buf || trx_doublewrite == NULL) {
728  /* Sync the writes to the disk. */
729  buf_flush_sync_datafiles();
730  return;
731  }
732 
733  mutex_enter(&(trx_doublewrite->mutex));
734 
735  /* Write first to doublewrite buffer blocks. We use synchronous
736  aio and thus know that file write has been completed when the
737  control returns. */
738 
739  if (trx_doublewrite->first_free == 0) {
740 
741  mutex_exit(&(trx_doublewrite->mutex));
742 
743  return;
744  }
745 
746  for (i = 0; i < trx_doublewrite->first_free; i++) {
747 
748  const buf_block_t* block;
749 
751 
753  || block->page.zip.data) {
754  /* No simple validate for compressed pages exists. */
755  continue;
756  }
757 
758  if (UNIV_UNLIKELY
759  (memcmp(block->frame + (FIL_PAGE_LSN + 4),
760  block->frame + (UNIV_PAGE_SIZE
762  4))) {
763  ut_print_timestamp(stderr);
764  fprintf(stderr,
765  " InnoDB: ERROR: The page to be written"
766  " seems corrupt!\n"
767  "InnoDB: The lsn fields do not match!"
768  " Noticed in the buffer pool\n"
769  "InnoDB: before posting to the"
770  " doublewrite buffer.\n");
771  }
772 
773  if (!block->check_index_page_at_flush) {
774  } else if (page_is_comp(block->frame)) {
775  if (UNIV_UNLIKELY
776  (!page_simple_validate_new(block->frame))) {
777 corrupted_page:
778  buf_page_print(block->frame, 0);
779 
780  ut_print_timestamp(stderr);
781  fprintf(stderr,
782  " InnoDB: Apparent corruption of an"
783  " index page n:o %lu in space %lu\n"
784  "InnoDB: to be written to data file."
785  " We intentionally crash server\n"
786  "InnoDB: to prevent corrupt data"
787  " from ending up in data\n"
788  "InnoDB: files.\n",
789  (ulong) buf_block_get_page_no(block),
790  (ulong) buf_block_get_space(block));
791 
792  ut_error;
793  }
794  } else if (UNIV_UNLIKELY
795  (!page_simple_validate_old(block->frame))) {
796 
797  goto corrupted_page;
798  }
799  }
800 
801  /* increment the doublewrite flushed pages counter */
802  srv_dblwr_pages_written+= trx_doublewrite->first_free;
803  srv_dblwr_writes++;
804 
806  trx_doublewrite->first_free) * UNIV_PAGE_SIZE;
807 
808  write_buf = trx_doublewrite->write_buf;
809  i = 0;
810 
811  fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
812  trx_doublewrite->block1, 0, len,
813  (void*) write_buf, NULL);
814 
815  for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len;
816  len2 += UNIV_PAGE_SIZE, i++) {
817  const buf_block_t* block = (buf_block_t*)
819 
820  if (UNIV_LIKELY(!block->page.zip.data)
821  && UNIV_LIKELY(buf_block_get_state(block)
823  && UNIV_UNLIKELY
824  (memcmp(write_buf + len2 + (FIL_PAGE_LSN + 4),
825  write_buf + len2
826  + (UNIV_PAGE_SIZE
827  - FIL_PAGE_END_LSN_OLD_CHKSUM + 4), 4))) {
828  ut_print_timestamp(stderr);
829  fprintf(stderr,
830  " InnoDB: ERROR: The page to be written"
831  " seems corrupt!\n"
832  "InnoDB: The lsn fields do not match!"
833  " Noticed in the doublewrite block1.\n");
834  }
835  }
836 
838  goto flush;
839  }
840 
842  * UNIV_PAGE_SIZE;
843 
844  write_buf = trx_doublewrite->write_buf
845  + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
847 
848  fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
849  trx_doublewrite->block2, 0, len,
850  (void*) write_buf, NULL);
851 
852  for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len;
853  len2 += UNIV_PAGE_SIZE, i++) {
854  const buf_block_t* block = (buf_block_t*)
856 
857  if (UNIV_LIKELY(!block->page.zip.data)
858  && UNIV_LIKELY(buf_block_get_state(block)
860  && UNIV_UNLIKELY
861  (memcmp(write_buf + len2 + (FIL_PAGE_LSN + 4),
862  write_buf + len2
863  + (UNIV_PAGE_SIZE
864  - FIL_PAGE_END_LSN_OLD_CHKSUM + 4), 4))) {
865  ut_print_timestamp(stderr);
866  fprintf(stderr,
867  " InnoDB: ERROR: The page to be"
868  " written seems corrupt!\n"
869  "InnoDB: The lsn fields do not match!"
870  " Noticed in"
871  " the doublewrite block2.\n");
872  }
873  }
874 
875 flush:
876  /* Now flush the doublewrite buffer data to disk */
877 
878  fil_flush(TRX_SYS_SPACE);
879 
880  /* We know that the writes have been flushed to disk now
881  and in recovery we will find them in the doublewrite buffer
882  blocks. Next do the writes to the intended positions. */
883 
884  for (i = 0; i < trx_doublewrite->first_free; i++) {
885  const buf_block_t* block = (buf_block_t*)
887 
888  ut_a(buf_page_in_file(&block->page));
889  if (UNIV_LIKELY_NULL(block->page.zip.data)) {
890  fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
891  FALSE, buf_page_get_space(&block->page),
892  buf_page_get_zip_size(&block->page),
893  buf_page_get_page_no(&block->page), 0,
894  buf_page_get_zip_size(&block->page),
895  (void*)block->page.zip.data,
896  (void*)block);
897 
898  /* Increment the counter of I/O operations used
899  for selecting LRU policy. */
901 
902  continue;
903  }
904 
906 
907  if (UNIV_UNLIKELY(memcmp(block->frame + (FIL_PAGE_LSN + 4),
908  block->frame
909  + (UNIV_PAGE_SIZE
911  4))) {
912  ut_print_timestamp(stderr);
913  fprintf(stderr,
914  " InnoDB: ERROR: The page to be written"
915  " seems corrupt!\n"
916  "InnoDB: The lsn fields do not match!"
917  " Noticed in the buffer pool\n"
918  "InnoDB: after posting and flushing"
919  " the doublewrite buffer.\n"
920  "InnoDB: Page buf fix count %lu,"
921  " io fix %lu, state %lu\n",
922  (ulong)block->page.buf_fix_count,
923  (ulong)buf_block_get_io_fix(block),
924  (ulong)buf_block_get_state(block));
925  }
926 
927  fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
928  FALSE, buf_block_get_space(block), 0,
929  buf_block_get_page_no(block), 0, UNIV_PAGE_SIZE,
930  (void*)block->frame, (void*)block);
931 
932  /* Increment the counter of I/O operations used
933  for selecting LRU policy. */
935  }
936 
937  /* Sync the writes to the disk. */
938  buf_flush_sync_datafiles();
939 
940  /* We can now reuse the doublewrite memory buffer: */
942 
943  mutex_exit(&(trx_doublewrite->mutex));
944 }
945 
946 /********************************************************************/
950 static
951 void
952 buf_flush_post_to_doublewrite_buf(
953 /*==============================*/
954  buf_page_t* bpage)
955 {
956  ulint zip_size;
957 try_again:
958  mutex_enter(&(trx_doublewrite->mutex));
959 
960  ut_a(buf_page_in_file(bpage));
961 
964  mutex_exit(&(trx_doublewrite->mutex));
965 
966  buf_flush_buffered_writes();
967 
968  goto try_again;
969  }
970 
971  zip_size = buf_page_get_zip_size(bpage);
972 
973  if (UNIV_UNLIKELY(zip_size)) {
974  UNIV_MEM_ASSERT_RW(bpage->zip.data, zip_size);
975  /* Copy the compressed page and clear the rest. */
976  memcpy(trx_doublewrite->write_buf
977  + UNIV_PAGE_SIZE * trx_doublewrite->first_free,
978  bpage->zip.data, zip_size);
979  memset(trx_doublewrite->write_buf
980  + UNIV_PAGE_SIZE * trx_doublewrite->first_free
981  + zip_size, 0, UNIV_PAGE_SIZE - zip_size);
982  } else {
984  UNIV_MEM_ASSERT_RW(((buf_block_t*) bpage)->frame,
985  UNIV_PAGE_SIZE);
986 
987  memcpy(trx_doublewrite->write_buf
988  + UNIV_PAGE_SIZE * trx_doublewrite->first_free,
989  ((buf_block_t*) bpage)->frame, UNIV_PAGE_SIZE);
990  }
991 
993 
995 
998  mutex_exit(&(trx_doublewrite->mutex));
999 
1000  buf_flush_buffered_writes();
1001 
1002  return;
1003  }
1004 
1005  mutex_exit(&(trx_doublewrite->mutex));
1006 }
1007 #endif /* !UNIV_HOTBACKUP */
1008 
1009 /********************************************************************/
1011 UNIV_INTERN
1012 void
1014 /*=======================*/
1015  byte* page,
1016  void* page_zip_,
1017  ib_uint64_t newest_lsn)
1019 {
1020  ut_ad(page);
1021 
1022  if (page_zip_) {
1023  page_zip_des_t* page_zip = static_cast<page_zip_des_t *>(page_zip_);
1024  ulint zip_size = page_zip_get_size(page_zip);
1025  ut_ad(zip_size);
1026  ut_ad(ut_is_2pow(zip_size));
1027  ut_ad(zip_size <= UNIV_PAGE_SIZE);
1028 
1029  switch (UNIV_EXPECT(fil_page_get_type(page), FIL_PAGE_INDEX)) {
1031  case FIL_PAGE_INODE:
1032  case FIL_PAGE_IBUF_BITMAP:
1033  case FIL_PAGE_TYPE_FSP_HDR:
1034  case FIL_PAGE_TYPE_XDES:
1035  /* These are essentially uncompressed pages. */
1036  memcpy(page_zip->data, page, zip_size);
1037  /* fall through */
1038  case FIL_PAGE_TYPE_ZBLOB:
1039  case FIL_PAGE_TYPE_ZBLOB2:
1040  case FIL_PAGE_INDEX:
1041  mach_write_to_8(page_zip->data
1042  + FIL_PAGE_LSN, newest_lsn);
1043  memset(page_zip->data + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
1044  mach_write_to_4(page_zip->data
1046  srv_use_checksums
1048  page_zip->data, zip_size)
1050  return;
1051  }
1052 
1053  ut_print_timestamp(stderr);
1054  fputs(" InnoDB: ERROR: The compressed page to be written"
1055  " seems corrupt:", stderr);
1056  ut_print_buf(stderr, page, zip_size);
1057  fputs("\nInnoDB: Possibly older version of the page:", stderr);
1058  ut_print_buf(stderr, page_zip->data, zip_size);
1059  putc('\n', stderr);
1060  ut_error;
1061  }
1062 
1063  /* Write the newest modification lsn to the page header and trailer */
1064  mach_write_to_8(page + FIL_PAGE_LSN, newest_lsn);
1065 
1066  mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
1067  newest_lsn);
1068 
1069  /* Store the new formula checksum */
1070 
1072  srv_use_checksums
1075 
1076  /* We overwrite the first 4 bytes of the end lsn field to store
1077  the old formula checksum. Since it depends also on the field
1078  FIL_PAGE_SPACE_OR_CHKSUM, it has to be calculated after storing the
1079  new formula checksum. */
1080 
1081  mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
1082  srv_use_checksums
1084  : BUF_NO_CHECKSUM_MAGIC);
1085 }
1086 
1087 #ifndef UNIV_HOTBACKUP
1088 /********************************************************************/
1092 static
1093 void
1094 buf_flush_write_block_low(
1095 /*======================*/
1096  buf_page_t* bpage)
1097 {
1098  ulint zip_size = buf_page_get_zip_size(bpage);
1099  page_t* frame = NULL;
1100 
1101 #ifdef UNIV_DEBUG
1102  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1103  ut_ad(!buf_pool_mutex_own(buf_pool));
1104 #endif
1105 
1106 #ifdef UNIV_LOG_DEBUG
1107  static ibool univ_log_debug_warned;
1108 #endif /* UNIV_LOG_DEBUG */
1109 
1110  ut_ad(buf_page_in_file(bpage));
1111 
1112  /* We are not holding buf_pool->mutex or block_mutex here.
1113  Nevertheless, it is safe to access bpage, because it is
1114  io_fixed and oldest_modification != 0. Thus, it cannot be
1115  relocated in the buffer pool or removed from flush_list or
1116  LRU_list. */
1117  ut_ad(!buf_pool_mutex_own(buf_pool));
1118  ut_ad(!buf_flush_list_mutex_own(buf_pool));
1119  ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
1121  ut_ad(bpage->oldest_modification != 0);
1122 
1123 #ifdef UNIV_IBUF_COUNT_DEBUG
1124  ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
1125 #endif
1126  ut_ad(bpage->newest_modification != 0);
1127 
1128 #ifdef UNIV_LOG_DEBUG
1129  if (!univ_log_debug_warned) {
1130  univ_log_debug_warned = TRUE;
1131  fputs("Warning: cannot force log to disk if"
1132  " UNIV_LOG_DEBUG is defined!\n"
1133  "Crash recovery will not work!\n",
1134  stderr);
1135  }
1136 #else
1137  /* Force the log to the disk before writing the modified block */
1138  log_write_up_to(bpage->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE);
1139 #endif
1140  switch (buf_page_get_state(bpage)) {
1141  case BUF_BLOCK_ZIP_FREE:
1142  case BUF_BLOCK_ZIP_PAGE: /* The page should be dirty. */
1143  case BUF_BLOCK_NOT_USED:
1145  case BUF_BLOCK_MEMORY:
1146  case BUF_BLOCK_REMOVE_HASH:
1147  ut_error;
1148  break;
1149  case BUF_BLOCK_ZIP_DIRTY:
1150  frame = bpage->zip.data;
1151  if (UNIV_LIKELY(srv_use_checksums)) {
1153  == page_zip_calc_checksum(frame, zip_size));
1154  }
1155  mach_write_to_8(frame + FIL_PAGE_LSN,
1156  bpage->newest_modification);
1157  memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
1158  break;
1159  case BUF_BLOCK_FILE_PAGE:
1160  frame = bpage->zip.data;
1161  if (!frame) {
1162  frame = ((buf_block_t*) bpage)->frame;
1163  }
1164 
1165  buf_flush_init_for_writing(((buf_block_t*) bpage)->frame,
1166  bpage->zip.data
1167  ? &bpage->zip : NULL,
1168  bpage->newest_modification);
1169  break;
1170  }
1171 
1172  if (!srv_use_doublewrite_buf || !trx_doublewrite) {
1173  fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
1174  FALSE, buf_page_get_space(bpage), zip_size,
1175  buf_page_get_page_no(bpage), 0,
1176  zip_size ? zip_size : UNIV_PAGE_SIZE,
1177  frame, bpage);
1178  } else {
1179  buf_flush_post_to_doublewrite_buf(bpage);
1180  }
1181 }
1182 
1183 # if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
1184 /********************************************************************/
1190 UNIV_INTERN
1191 ibool
1192 buf_flush_page_try(
1193 /*===============*/
1194  buf_pool_t* buf_pool,
1195  buf_block_t* block)
1196 {
1197  ut_ad(buf_pool_mutex_own(buf_pool));
1199  ut_ad(mutex_own(&block->mutex));
1200 
1201  if (!buf_flush_ready_for_flush(&block->page, BUF_FLUSH_LRU)) {
1202  return(FALSE);
1203  }
1204 
1205  if (buf_pool->n_flush[BUF_FLUSH_LRU] > 0
1206  || buf_pool->init_flush[BUF_FLUSH_LRU]) {
1207  /* There is already a flush batch of the same type running */
1208  return(FALSE);
1209  }
1210 
1211  buf_pool->init_flush[BUF_FLUSH_LRU] = TRUE;
1212 
1214 
1216 
1217  if (buf_pool->n_flush[BUF_FLUSH_LRU]++ == 0) {
1218 
1220  }
1221 
1222  /* VERY IMPORTANT:
1223  Because any thread may call the LRU flush, even when owning
1224  locks on pages, to avoid deadlocks, we must make sure that the
1225  s-lock is acquired on the page without waiting: this is
1226  accomplished because buf_flush_ready_for_flush() must hold,
1227  and that requires the page not to be bufferfixed. */
1228 
1229  rw_lock_s_lock_gen(&block->lock, BUF_IO_WRITE);
1230 
1231  /* Note that the s-latch is acquired before releasing the
1232  buf_pool mutex: this ensures that the latch is acquired
1233  immediately. */
1234 
1235  mutex_exit(&block->mutex);
1236  buf_pool_mutex_exit(buf_pool);
1237 
1238  /* Even though block is not protected by any mutex at this
1239  point, it is safe to access block, because it is io_fixed and
1240  oldest_modification != 0. Thus, it cannot be relocated in the
1241  buffer pool or removed from flush_list or LRU_list. */
1242 
1243  buf_flush_write_block_low(&block->page);
1244 
1245  buf_pool_mutex_enter(buf_pool);
1246  buf_pool->init_flush[BUF_FLUSH_LRU] = FALSE;
1247 
1248  if (buf_pool->n_flush[BUF_FLUSH_LRU] == 0) {
1249  /* The running flush batch has ended */
1250  os_event_set(buf_pool->no_flush[BUF_FLUSH_LRU]);
1251  }
1252 
1253  buf_pool_mutex_exit(buf_pool);
1254  buf_flush_buffered_writes();
1255 
1256  return(TRUE);
1257 }
1258 # endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
1259 
1260 /********************************************************************/
1267 static
1268 void
1269 buf_flush_page(
1270 /*===========*/
1271  buf_pool_t* buf_pool,
1272  buf_page_t* bpage,
1273  enum buf_flush flush_type)
1275 {
1276  mutex_t* block_mutex;
1277  ibool is_uncompressed;
1278 
1279  ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
1280  ut_ad(buf_pool_mutex_own(buf_pool));
1281  ut_ad(buf_page_in_file(bpage));
1282 
1283  block_mutex = buf_page_get_mutex(bpage);
1284  ut_ad(mutex_own(block_mutex));
1285 
1286  ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
1287 
1289 
1290  buf_page_set_flush_type(bpage, flush_type);
1291 
1292  if (buf_pool->n_flush[flush_type] == 0) {
1293 
1294  os_event_reset(buf_pool->no_flush[flush_type]);
1295  }
1296 
1297  buf_pool->n_flush[flush_type]++;
1298 
1299  is_uncompressed = (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
1300  ut_ad(is_uncompressed == (block_mutex != &buf_pool->zip_mutex));
1301 
1302  switch (flush_type) {
1303  ibool is_s_latched;
1304  case BUF_FLUSH_LIST:
1305  /* If the simulated aio thread is not running, we must
1306  not wait for any latch, as we may end up in a deadlock:
1307  if buf_fix_count == 0, then we know we need not wait */
1308 
1309  is_s_latched = (bpage->buf_fix_count == 0);
1310  if (is_s_latched && is_uncompressed) {
1311  rw_lock_s_lock_gen(&((buf_block_t*) bpage)->lock,
1312  BUF_IO_WRITE);
1313  }
1314 
1315  mutex_exit(block_mutex);
1316  buf_pool_mutex_exit(buf_pool);
1317 
1318  /* Even though bpage is not protected by any mutex at
1319  this point, it is safe to access bpage, because it is
1320  io_fixed and oldest_modification != 0. Thus, it
1321  cannot be relocated in the buffer pool or removed from
1322  flush_list or LRU_list. */
1323 
1324  if (!is_s_latched) {
1325  buf_flush_buffered_writes();
1326 
1327  if (is_uncompressed) {
1328  rw_lock_s_lock_gen(&((buf_block_t*) bpage)
1329  ->lock, BUF_IO_WRITE);
1330  }
1331  }
1332 
1333  break;
1334 
1335  case BUF_FLUSH_LRU:
1336  /* VERY IMPORTANT:
1337  Because any thread may call the LRU flush, even when owning
1338  locks on pages, to avoid deadlocks, we must make sure that the
1339  s-lock is acquired on the page without waiting: this is
1340  accomplished because buf_flush_ready_for_flush() must hold,
1341  and that requires the page not to be bufferfixed. */
1342 
1343  if (is_uncompressed) {
1344  rw_lock_s_lock_gen(&((buf_block_t*) bpage)->lock,
1345  BUF_IO_WRITE);
1346  }
1347 
1348  /* Note that the s-latch is acquired before releasing the
1349  buf_pool mutex: this ensures that the latch is acquired
1350  immediately. */
1351 
1352  mutex_exit(block_mutex);
1353  buf_pool_mutex_exit(buf_pool);
1354  break;
1355 
1356  default:
1357  ut_error;
1358  }
1359 
1360  /* Even though bpage is not protected by any mutex at this
1361  point, it is safe to access bpage, because it is io_fixed and
1362  oldest_modification != 0. Thus, it cannot be relocated in the
1363  buffer pool or removed from flush_list or LRU_list. */
1364 
1365 #ifdef UNIV_DEBUG
1366  if (buf_debug_prints) {
1367  fprintf(stderr,
1368  "Flushing %u space %u page %u\n",
1369  flush_type, bpage->space, bpage->offset);
1370  }
1371 #endif /* UNIV_DEBUG */
1372  buf_flush_write_block_low(bpage);
1373 }
1374 
1375 /***********************************************************/
1378 static
1379 ulint
1380 buf_flush_try_neighbors(
1381 /*====================*/
1382  ulint space,
1383  ulint offset,
1384  enum buf_flush flush_type,
1386  ulint n_flushed,
1388  ulint n_to_flush)
1390 {
1391  ulint i;
1392  ulint low;
1393  ulint high;
1394  ulint count = 0;
1395  buf_pool_t* buf_pool = buf_pool_get(space, offset);
1396 
1397  ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
1398 
1399  if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN
1400  || !srv_flush_neighbor_pages) {
1401  /* If there is little space, it is better not to flush
1402  any block except from the end of the LRU list */
1403 
1404  low = offset;
1405  high = offset + 1;
1406  } else {
1407  /* When flushed, dirty blocks are searched in
1408  neighborhoods of this size, and flushed along with the
1409  original page. */
1410 
1411  ulint buf_flush_area;
1412 
1413  buf_flush_area = ut_min(
1414  BUF_READ_AHEAD_AREA(buf_pool),
1415  buf_pool->curr_size / 16);
1416 
1417  low = (offset / buf_flush_area) * buf_flush_area;
1418  high = (offset / buf_flush_area + 1) * buf_flush_area;
1419  }
1420 
1421  /* fprintf(stderr, "Flush area: low %lu high %lu\n", low, high); */
1422 
1423  if (high > fil_space_get_size(space)) {
1424  high = fil_space_get_size(space);
1425  }
1426 
1427  for (i = low; i < high; i++) {
1428 
1429  buf_page_t* bpage;
1430 
1431  if ((count + n_flushed) >= n_to_flush) {
1432 
1433  /* We have already flushed enough pages and
1434  should call it a day. There is, however, one
1435  exception. If the page whose neighbors we
1436  are flushing has not been flushed yet then
1437  we'll try to flush the victim that we
1438  selected originally. */
1439  if (i <= offset) {
1440  i = offset;
1441  } else {
1442  break;
1443  }
1444  }
1445 
1446  buf_pool = buf_pool_get(space, i);
1447 
1448  buf_pool_mutex_enter(buf_pool);
1449 
1450  /* We only want to flush pages from this buffer pool. */
1451  bpage = buf_page_hash_get(buf_pool, space, i);
1452 
1453  if (!bpage) {
1454 
1455  buf_pool_mutex_exit(buf_pool);
1456  continue;
1457  }
1458 
1459  ut_a(buf_page_in_file(bpage));
1460 
1461  /* We avoid flushing 'non-old' blocks in an LRU flush,
1462  because the flushed blocks are soon freed */
1463 
1464  if (flush_type != BUF_FLUSH_LRU
1465  || i == offset
1466  || buf_page_is_old(bpage)) {
1467  mutex_t* block_mutex = buf_page_get_mutex(bpage);
1468 
1469  mutex_enter(block_mutex);
1470 
1471  if (buf_flush_ready_for_flush(bpage, flush_type)
1472  && (i == offset || !bpage->buf_fix_count)) {
1473  /* We only try to flush those
1474  neighbors != offset where the buf fix
1475  count is zero, as we then know that we
1476  probably can latch the page without a
1477  semaphore wait. Semaphore waits are
1478  expensive because we must flush the
1479  doublewrite buffer before we start
1480  waiting. */
1481 
1482  buf_flush_page(buf_pool, bpage, flush_type);
1483  ut_ad(!mutex_own(block_mutex));
1484  ut_ad(!buf_pool_mutex_own(buf_pool));
1485  count++;
1486  continue;
1487  } else {
1488  mutex_exit(block_mutex);
1489  }
1490  }
1491  buf_pool_mutex_exit(buf_pool);
1492  }
1493 
1494  return(count);
1495 }
1496 
1497 /********************************************************************/
1504 static
1505 ibool
1506 buf_flush_page_and_try_neighbors(
1507 /*=============================*/
1508  buf_page_t* bpage,
1511  enum buf_flush flush_type,
1513  ulint n_to_flush,
1515  ulint* count)
1517 {
1518  mutex_t* block_mutex;
1519  ibool flushed = FALSE;
1520 #ifdef UNIV_DEBUG
1521  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1522 #endif /* UNIV_DEBUG */
1523 
1524  ut_ad(buf_pool_mutex_own(buf_pool));
1525 
1526  block_mutex = buf_page_get_mutex(bpage);
1527  mutex_enter(block_mutex);
1528 
1529  ut_a(buf_page_in_file(bpage));
1530 
1531  if (buf_flush_ready_for_flush(bpage, flush_type)) {
1532  ulint space;
1533  ulint offset;
1534  buf_pool_t* buf_pool;
1535 
1536  buf_pool = buf_pool_from_bpage(bpage);
1537 
1538  buf_pool_mutex_exit(buf_pool);
1539 
1540  /* These fields are protected by both the
1541  buffer pool mutex and block mutex. */
1542  space = buf_page_get_space(bpage);
1543  offset = buf_page_get_page_no(bpage);
1544 
1545  mutex_exit(block_mutex);
1546 
1547  /* Try to flush also all the neighbors */
1548  *count += buf_flush_try_neighbors(space,
1549  offset,
1550  flush_type,
1551  *count,
1552  n_to_flush);
1553 
1554  buf_pool_mutex_enter(buf_pool);
1555  flushed = TRUE;
1556  } else {
1557  mutex_exit(block_mutex);
1558  }
1559 
1560  ut_ad(buf_pool_mutex_own(buf_pool));
1561 
1562  return(flushed);
1563 }
1564 
1565 /*******************************************************************/
1571 static
1572 ulint
1573 buf_flush_LRU_list_batch(
1574 /*=====================*/
1575  buf_pool_t* buf_pool,
1576  ulint max)
1577 {
1578  buf_page_t* bpage;
1579  ulint count = 0;
1580 
1581  ut_ad(buf_pool_mutex_own(buf_pool));
1582 
1583  do {
1584  /* Start from the end of the list looking for a
1585  suitable block to be flushed. */
1586  bpage = UT_LIST_GET_LAST(buf_pool->LRU);
1587 
1588  /* Iterate backwards over the flush list till we find
1589  a page that isn't ready for flushing. */
1590  while (bpage != NULL
1591  && !buf_flush_page_and_try_neighbors(
1592  bpage, BUF_FLUSH_LRU, max, &count)) {
1593 
1594  bpage = UT_LIST_GET_PREV(LRU, bpage);
1595  }
1596  } while (bpage != NULL && count < max);
1597 
1598  /* We keep track of all flushes happening as part of LRU
1599  flush. When estimating the desired rate at which flush_list
1600  should be flushed, we factor in this value. */
1601  buf_lru_flush_page_count += count;
1602 
1603  ut_ad(buf_pool_mutex_own(buf_pool));
1604 
1605  return(count);
1606 }
1607 
1608 /*******************************************************************/
1614 static
1615 ulint
1616 buf_flush_flush_list_batch(
1617 /*=======================*/
1618  buf_pool_t* buf_pool,
1619  ulint min_n,
1623  ib_uint64_t lsn_limit)
1628 {
1629  ulint len;
1630  buf_page_t* bpage;
1631  ulint count = 0;
1632 
1633  ut_ad(buf_pool_mutex_own(buf_pool));
1634 
1635  /* If we have flushed enough, leave the loop */
1636  do {
1637  /* Start from the end of the list looking for a suitable
1638  block to be flushed. */
1639 
1640  buf_flush_list_mutex_enter(buf_pool);
1641 
1642  /* We use len here because theoretically insertions can
1643  happen in the flush_list below while we are traversing
1644  it for a suitable candidate for flushing. We'd like to
1645  set a limit on how farther we are willing to traverse
1646  the list. */
1647  len = UT_LIST_GET_LEN(buf_pool->flush_list);
1648  bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
1649 
1650  if (bpage) {
1651  ut_a(bpage->oldest_modification > 0);
1652  }
1653 
1654  if (!bpage || bpage->oldest_modification >= lsn_limit) {
1655 
1656  /* We have flushed enough */
1657  buf_flush_list_mutex_exit(buf_pool);
1658  break;
1659  }
1660 
1661  ut_a(bpage->oldest_modification > 0);
1662 
1663  ut_ad(bpage->in_flush_list);
1664 
1665  buf_flush_list_mutex_exit(buf_pool);
1666 
1667  /* The list may change during the flushing and we cannot
1668  safely preserve within this function a pointer to a
1669  block in the list! */
1670  while (bpage != NULL
1671  && len > 0
1672  && !buf_flush_page_and_try_neighbors(
1673  bpage, BUF_FLUSH_LIST, min_n, &count)) {
1674 
1675  buf_flush_list_mutex_enter(buf_pool);
1676 
1677  /* If we are here that means that buf_pool->mutex
1678  was not released in buf_flush_page_and_try_neighbors()
1679  above and this guarantees that bpage didn't get
1680  relocated since we released the flush_list
1681  mutex above. There is a chance, however, that
1682  the bpage got removed from flush_list (not
1683  currently possible because flush_list_remove()
1684  also obtains buf_pool mutex but that may change
1685  in future). To avoid this scenario we check
1686  the oldest_modification and if it is zero
1687  we start all over again. */
1688  if (bpage->oldest_modification == 0) {
1689  buf_flush_list_mutex_exit(buf_pool);
1690  break;
1691  }
1692 
1693  bpage = UT_LIST_GET_PREV(list, bpage);
1694 
1695  ut_ad(!bpage || bpage->in_flush_list);
1696 
1697  buf_flush_list_mutex_exit(buf_pool);
1698 
1699  --len;
1700  }
1701 
1702  } while (count < min_n && bpage != NULL && len > 0);
1703 
1704  ut_ad(buf_pool_mutex_own(buf_pool));
1705 
1706  return(count);
1707 }
1708 
1709 /*******************************************************************/
1717 static
1718 ulint
1719 buf_flush_batch(
1720 /*============*/
1721  buf_pool_t* buf_pool,
1722  enum buf_flush flush_type,
1726  ulint min_n,
1729  ib_uint64_t lsn_limit)
1734 {
1735  ulint count = 0;
1736 
1737  ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
1738 #ifdef UNIV_SYNC_DEBUG
1739  ut_ad((flush_type != BUF_FLUSH_LIST)
1740  || sync_thread_levels_empty_gen(TRUE));
1741 #endif /* UNIV_SYNC_DEBUG */
1742 
1743  buf_pool_mutex_enter(buf_pool);
1744 
1745  /* Note: The buffer pool mutex is released and reacquired within
1746  the flush functions. */
1747  switch(flush_type) {
1748  case BUF_FLUSH_LRU:
1749  count = buf_flush_LRU_list_batch(buf_pool, min_n);
1750  break;
1751  case BUF_FLUSH_LIST:
1752  count = buf_flush_flush_list_batch(buf_pool, min_n, lsn_limit);
1753  break;
1754  default:
1755  ut_error;
1756  }
1757 
1758  buf_pool_mutex_exit(buf_pool);
1759 
1760  buf_flush_buffered_writes();
1761 
1762 #ifdef UNIV_DEBUG
1763  if (buf_debug_prints && count > 0) {
1764  fprintf(stderr, flush_type == BUF_FLUSH_LRU
1765  ? "Flushed %lu pages in LRU flush\n"
1766  : "Flushed %lu pages in flush list flush\n",
1767  (ulong) count);
1768  }
1769 #endif /* UNIV_DEBUG */
1770 
1771  srv_buf_pool_flushed += count;
1772 
1773  return(count);
1774 }
1775 
1776 /******************************************************************/
1778 static
1779 void
1780 buf_flush_common(
1781 /*=============*/
1782  enum buf_flush flush_type,
1783  ulint page_count)
1784 {
1785  buf_flush_buffered_writes();
1786 
1787  ut_a(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
1788 
1789 #ifdef UNIV_DEBUG
1790  if (buf_debug_prints && page_count > 0) {
1791  fprintf(stderr, flush_type == BUF_FLUSH_LRU
1792  ? "Flushed %lu pages in LRU flush\n"
1793  : "Flushed %lu pages in flush list flush\n",
1794  (ulong) page_count);
1795  }
1796 #endif /* UNIV_DEBUG */
1797 
1798  srv_buf_pool_flushed += page_count;
1799 
1800  if (flush_type == BUF_FLUSH_LRU) {
1801  /* We keep track of all flushes happening as part of LRU
1802  flush. When estimating the desired rate at which flush_list
1803  should be flushed we factor in this value. */
1804  buf_lru_flush_page_count += page_count;
1805  }
1806 }
1807 
1808 /******************************************************************/
1810 static
1811 ibool
1812 buf_flush_start(
1813 /*============*/
1814  buf_pool_t* buf_pool,
1815  enum buf_flush flush_type)
1817 {
1818  buf_pool_mutex_enter(buf_pool);
1819 
1820  if (buf_pool->n_flush[flush_type] > 0
1821  || buf_pool->init_flush[flush_type] == TRUE) {
1822 
1823  /* There is already a flush batch of the same type running */
1824 
1825  buf_pool_mutex_exit(buf_pool);
1826 
1827  return(FALSE);
1828  }
1829 
1830  buf_pool->init_flush[flush_type] = TRUE;
1831 
1832  buf_pool_mutex_exit(buf_pool);
1833 
1834  return(TRUE);
1835 }
1836 
1837 /******************************************************************/
1839 static
1840 void
1841 buf_flush_end(
1842 /*==========*/
1843  buf_pool_t* buf_pool,
1844  enum buf_flush flush_type)
1846 {
1847  buf_pool_mutex_enter(buf_pool);
1848 
1849  buf_pool->init_flush[flush_type] = FALSE;
1850 
1851  if (buf_pool->n_flush[flush_type] == 0) {
1852 
1853  /* The running flush batch has ended */
1854 
1855  os_event_set(buf_pool->no_flush[flush_type]);
1856  }
1857 
1858  buf_pool_mutex_exit(buf_pool);
1859 }
1860 
1861 /******************************************************************/
1863 UNIV_INTERN
1864 void
1866 /*=====================*/
1867  buf_pool_t* buf_pool,
1868  enum buf_flush type)
1870 {
1871  ut_ad(type == BUF_FLUSH_LRU || type == BUF_FLUSH_LIST);
1872 
1873  if (buf_pool == NULL) {
1874  ulint i;
1875 
1876  for (i = 0; i < srv_buf_pool_instances; ++i) {
1877  buf_pool_t* i_buf_pool = buf_pool_from_array(i);
1878 
1879  os_event_wait(i_buf_pool->no_flush[type]);
1880  }
1881  } else {
1882  os_event_wait(buf_pool->no_flush[type]);
1883  }
1884 }
1885 
1886 /*******************************************************************/
1893 UNIV_INTERN
1894 ulint
1896 /*==========*/
1897  buf_pool_t* buf_pool,
1898  ulint min_n)
1901 {
1902  ulint page_count;
1903 
1904  if (!buf_flush_start(buf_pool, BUF_FLUSH_LRU)) {
1905  return(ULINT_UNDEFINED);
1906  }
1907 
1908  page_count = buf_flush_batch(buf_pool, BUF_FLUSH_LRU, min_n, 0);
1909 
1910  buf_flush_end(buf_pool, BUF_FLUSH_LRU);
1911 
1912  buf_flush_common(BUF_FLUSH_LRU, page_count);
1913 
1914  return(page_count);
1915 }
1916 
1917 /*******************************************************************/
1923 UNIV_INTERN
1924 ulint
1926 /*===========*/
1927  ulint min_n,
1930  ib_uint64_t lsn_limit)
1935 {
1936  ulint i;
1937  ulint total_page_count = 0;
1938  ibool skipped = FALSE;
1939 
1940  if (min_n != ULINT_MAX) {
1941  /* Ensure that flushing is spread evenly amongst the
1942  buffer pool instances. When min_n is ULINT_MAX
1943  we need to flush everything up to the lsn limit
1944  so no limit here. */
1945  min_n = (min_n + srv_buf_pool_instances - 1)
1947  }
1948 
1949  /* Flush to lsn_limit in all buffer pool instances */
1950  for (i = 0; i < srv_buf_pool_instances; i++) {
1951  buf_pool_t* buf_pool;
1952  ulint page_count = 0;
1953 
1954  buf_pool = buf_pool_from_array(i);
1955 
1956  if (!buf_flush_start(buf_pool, BUF_FLUSH_LIST)) {
1957  /* We have two choices here. If lsn_limit was
1958  specified then skipping an instance of buffer
1959  pool means we cannot guarantee that all pages
1960  up to lsn_limit has been flushed. We can
1961  return right now with failure or we can try
1962  to flush remaining buffer pools up to the
1963  lsn_limit. We attempt to flush other buffer
1964  pools based on the assumption that it will
1965  help in the retry which will follow the
1966  failure. */
1967  skipped = TRUE;
1968 
1969  continue;
1970  }
1971 
1972  page_count = buf_flush_batch(
1973  buf_pool, BUF_FLUSH_LIST, min_n, lsn_limit);
1974 
1975  buf_flush_end(buf_pool, BUF_FLUSH_LIST);
1976 
1977  buf_flush_common(BUF_FLUSH_LIST, page_count);
1978 
1979  total_page_count += page_count;
1980  }
1981 
1982  return(lsn_limit != IB_ULONGLONG_MAX && skipped
1983  ? ULINT_UNDEFINED : total_page_count);
1984 }
1985 
1986 /******************************************************************/
1992 static
1993 ulint
1994 buf_flush_LRU_recommendation(
1995 /*=========================*/
1996  buf_pool_t* buf_pool)
1997 {
1998  buf_page_t* bpage;
1999  ulint n_replaceable;
2000  ulint distance = 0;
2001 
2002  buf_pool_mutex_enter(buf_pool);
2003 
2004  n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
2005 
2006  bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2007 
2008  while ((bpage != NULL)
2009  && (n_replaceable < BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)
2010  + BUF_FLUSH_EXTRA_MARGIN(buf_pool))
2011  && (distance < BUF_LRU_FREE_SEARCH_LEN(buf_pool))) {
2012 
2013  mutex_t* block_mutex = buf_page_get_mutex(bpage);
2014 
2015  mutex_enter(block_mutex);
2016 
2017  if (buf_flush_ready_for_replace(bpage)) {
2018  n_replaceable++;
2019  }
2020 
2021  mutex_exit(block_mutex);
2022 
2023  distance++;
2024 
2025  bpage = UT_LIST_GET_PREV(LRU, bpage);
2026  }
2027 
2028  buf_pool_mutex_exit(buf_pool);
2029 
2030  if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)) {
2031 
2032  return(0);
2033  }
2034 
2035  return(BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)
2036  + BUF_FLUSH_EXTRA_MARGIN(buf_pool)
2037  - n_replaceable);
2038 }
2039 
2040 /*********************************************************************/
2046 UNIV_INTERN
2047 void
2049 /*==================*/
2050  buf_pool_t* buf_pool)
2051 {
2052  ulint n_to_flush;
2053 
2054  n_to_flush = buf_flush_LRU_recommendation(buf_pool);
2055 
2056  if (n_to_flush > 0) {
2057  ulint n_flushed;
2058 
2059  n_flushed = buf_flush_LRU(buf_pool, n_to_flush);
2060 
2061  if (n_flushed == ULINT_UNDEFINED) {
2062  /* There was an LRU type flush batch already running;
2063  let us wait for it to end */
2064 
2066  }
2067  }
2068 }
2069 
2070 /*********************************************************************/
2072 UNIV_INTERN
2073 void
2075 /*========================*/
2076 {
2077  ulint i;
2078 
2079  for (i = 0; i < srv_buf_pool_instances; i++) {
2080  buf_pool_t* buf_pool;
2081 
2082  buf_pool = buf_pool_from_array(i);
2083 
2084  buf_flush_free_margin(buf_pool);
2085  }
2086 }
2087 
2088 /*********************************************************************
2089 Update the historical stats that we are collecting for flush rate
2090 heuristics at the end of each interval.
2091 Flush rate heuristic depends on (a) rate of redo log generation and
2092 (b) the rate at which LRU flush is happening. */
2093 UNIV_INTERN
2094 void
2095 buf_flush_stat_update(void)
2096 /*=======================*/
2097 {
2098  buf_flush_stat_t* item;
2099  ib_uint64_t lsn_diff;
2100  ib_uint64_t lsn;
2101  ulint n_flushed;
2102 
2103  lsn = log_get_lsn();
2104  if (buf_flush_stat_cur.redo == 0) {
2105  /* First time around. Just update the current LSN
2106  and return. */
2107  buf_flush_stat_cur.redo = lsn;
2108  return;
2109  }
2110 
2111  item = &buf_flush_stat_arr[buf_flush_stat_arr_ind];
2112 
2113  /* values for this interval */
2114  lsn_diff = lsn - buf_flush_stat_cur.redo;
2115  n_flushed = buf_lru_flush_page_count
2116  - buf_flush_stat_cur.n_flushed;
2117 
2118  /* add the current value and subtract the obsolete entry. */
2119  buf_flush_stat_sum.redo += lsn_diff - item->redo;
2120  buf_flush_stat_sum.n_flushed += n_flushed - item->n_flushed;
2121 
2122  /* put current entry in the array. */
2123  item->redo = lsn_diff;
2124  item->n_flushed = n_flushed;
2125 
2126  /* update the index */
2127  buf_flush_stat_arr_ind++;
2128  buf_flush_stat_arr_ind %= BUF_FLUSH_STAT_N_INTERVAL;
2129 
2130  /* reset the current entry. */
2131  buf_flush_stat_cur.redo = lsn;
2132  buf_flush_stat_cur.n_flushed = buf_lru_flush_page_count;
2133 }
2134 
2135 /*********************************************************************
2136 Determines the fraction of dirty pages that need to be flushed based
2137 on the speed at which we generate redo log. Note that if redo log
2138 is generated at a significant rate without corresponding increase
2139 in the number of dirty pages (for example, an in-memory workload)
2140 it can cause IO bursts of flushing. This function implements heuristics
2141 to avoid this burstiness.
2142 @return number of dirty pages to be flushed / second */
2143 UNIV_INTERN
2144 ulint
2145 buf_flush_get_desired_flush_rate(void)
2146 /*==================================*/
2147 {
2148  ulint i;
2149  lint rate;
2150  ulint redo_avg;
2151  ulint n_dirty = 0;
2152  ulint n_flush_req;
2153  ulint lru_flush_avg;
2154  ib_uint64_t lsn = log_get_lsn();
2155  ulint log_capacity = log_get_capacity();
2156 
2157  /* log_capacity should never be zero after the initialization
2158  of log subsystem. */
2159  ut_ad(log_capacity != 0);
2160 
2161  /* Get total number of dirty pages. It is OK to access
2162  flush_list without holding any mutex as we are using this
2163  only for heuristics. */
2164  for (i = 0; i < srv_buf_pool_instances; i++) {
2165  buf_pool_t* buf_pool;
2166 
2167  buf_pool = buf_pool_from_array(i);
2168  n_dirty += UT_LIST_GET_LEN(buf_pool->flush_list);
2169  }
2170 
2171  /* An overflow can happen if we generate more than 2^32 bytes
2172  of redo in this interval i.e.: 4G of redo in 1 second. We can
2173  safely consider this as infinity because if we ever come close
2174  to 4G we'll start a synchronous flush of dirty pages. */
2175  /* redo_avg below is average at which redo is generated in
2176  past BUF_FLUSH_STAT_N_INTERVAL + redo generated in the current
2177  interval. */
2178  redo_avg = (ulint) (buf_flush_stat_sum.redo
2179  / BUF_FLUSH_STAT_N_INTERVAL
2180  + (lsn - buf_flush_stat_cur.redo));
2181 
2182  /* An overflow can happen possibly if we flush more than 2^32
2183  pages in BUF_FLUSH_STAT_N_INTERVAL. This is a very very
2184  unlikely scenario. Even when this happens it means that our
2185  flush rate will be off the mark. It won't affect correctness
2186  of any subsystem. */
2187  /* lru_flush_avg below is rate at which pages are flushed as
2188  part of LRU flush in past BUF_FLUSH_STAT_N_INTERVAL + the
2189  number of pages flushed in the current interval. */
2190  lru_flush_avg = buf_flush_stat_sum.n_flushed
2191  / BUF_FLUSH_STAT_N_INTERVAL
2192  + (buf_lru_flush_page_count
2193  - buf_flush_stat_cur.n_flushed);
2194 
2195  n_flush_req = (n_dirty * redo_avg) / log_capacity;
2196 
2197  /* The number of pages that we want to flush from the flush
2198  list is the difference between the required rate and the
2199  number of pages that we are historically flushing from the
2200  LRU list */
2201  rate = n_flush_req - lru_flush_avg;
2202  return(rate > 0 ? (ulint) rate : 0);
2203 }
2204 
2205 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2206 /******************************************************************/
2209 static
2210 ibool
2211 buf_flush_validate_low(
2212 /*===================*/
2213  buf_pool_t* buf_pool)
2214 {
2215  buf_page_t* bpage;
2216  const ib_rbt_node_t* rnode = NULL;
2217 
2218  ut_ad(buf_flush_list_mutex_own(buf_pool));
2219 
2220  UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
2221  ut_ad(ut_list_node_313->in_flush_list));
2222 
2223  bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
2224 
2225  /* If we are in recovery mode i.e.: flush_rbt != NULL
2226  then each block in the flush_list must also be present
2227  in the flush_rbt. */
2228  if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
2229  rnode = rbt_first(buf_pool->flush_rbt);
2230  }
2231 
2232  while (bpage != NULL) {
2233  const ib_uint64_t om = bpage->oldest_modification;
2234 
2235  ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
2236 
2237  ut_ad(bpage->in_flush_list);
2238 
2239  /* A page in buf_pool->flush_list can be in
2240  BUF_BLOCK_REMOVE_HASH state. This happens when a page
2241  is in the middle of being relocated. In that case the
2242  original descriptor can have this state and still be
2243  in the flush list waiting to acquire the
2244  buf_pool->flush_list_mutex to complete the relocation. */
2245  ut_a(buf_page_in_file(bpage)
2247  ut_a(om > 0);
2248 
2249  if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
2250  buf_page_t** prpage;
2251 
2252  ut_a(rnode);
2253  prpage = rbt_value(buf_page_t*, rnode);
2254 
2255  ut_a(*prpage);
2256  ut_a(*prpage == bpage);
2257  rnode = rbt_next(buf_pool->flush_rbt, rnode);
2258  }
2259 
2260  bpage = UT_LIST_GET_NEXT(list, bpage);
2261 
2262  ut_a(!bpage || om >= bpage->oldest_modification);
2263  }
2264 
2265  /* By this time we must have exhausted the traversal of
2266  flush_rbt (if active) as well. */
2267  ut_a(rnode == NULL);
2268 
2269  return(TRUE);
2270 }
2271 
2272 /******************************************************************/
2275 UNIV_INTERN
2276 ibool
2277 buf_flush_validate(
2278 /*===============*/
2279  buf_pool_t* buf_pool)
2280 {
2281  ibool ret;
2282 
2283  buf_flush_list_mutex_enter(buf_pool);
2284 
2285  ret = buf_flush_validate_low(buf_pool);
2286 
2287  buf_flush_list_mutex_exit(buf_pool);
2288 
2289  return(ret);
2290 }
2291 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2292 #endif /* !UNIV_HOTBACKUP */
#define UT_LIST_GET_LEN(BASE)
Definition: ut0lst.h:217
trx_doublewrite_t * trx_doublewrite
Definition: trx0sys.cc:63
ib_uint64_t redo
Definition: buf0flu.h:199
rw_lock_t lock
Definition: buf0buf.h:1458
#define buf_pool_mutex_enter(b)
Definition: buf0buf.h:1765
unsigned offset
Definition: buf0buf.h:1281
UNIV_INLINE ulint page_is_comp(const page_t *page)
UNIV_INTERN ibool page_simple_validate_old(const page_t *page)
Definition: page0page.cc:1888
#define UT_LIST_GET_NEXT(NAME, N)
Definition: ut0lst.h:201
#define BUF_LRU_FREE_SEARCH_LEN(b)
Definition: buf0lru.h:78
UNIV_INTERN ulint buf_calc_page_old_checksum(const byte *page)
Definition: buf0buf.cc:485
UNIV_INTERN ulint fil_io(ulint type, ibool sync, ulint space_id, ulint zip_size, ulint block_offset, ulint byte_offset, ulint len, void *buf, void *message)
Definition: fil0fil.cc:4287
UNIV_INTERN void buf_flush_remove(buf_page_t *bpage)
Definition: buf0flu.cc:522
UNIV_INTERN const ib_rbt_node_t * rbt_prev(const ib_rbt_t *tree, const ib_rbt_node_t *current)
Definition: ut0rbt.cc:1128
#define FIL_PAGE_INDEX
Definition: fil0fil.h:173
UNIV_INTERN void buf_flush_init_flush_rbt(void)
Definition: buf0flu.cc:227
ulint curr_size
Definition: buf0buf.h:1632
ib_uint64_t newest_modification
Definition: buf0buf.h:1372
UNIV_INLINE void mach_write_to_4(byte *b, ulint n)
UNIV_INTERN void buf_flush_free_flush_rbt(void)
Definition: buf0flu.cc:251
#define FIL_PAGE_INODE
Definition: fil0fil.h:175
#define UT_LIST_VALIDATE(NAME, TYPE, BASE, ASSERTION)
Definition: ut0lst.h:244
UNIV_INLINE ulint buf_page_get_page_no(const buf_page_t *bpage) __attribute__((pure))
#define FIL_PAGE_TYPE_ALLOCATED
Definition: fil0fil.h:178
UNIV_INLINE ulint buf_block_get_zip_size(const buf_block_t *block) __attribute__((pure))
#define BUF_READ_AHEAD_AREA(b)
Definition: buf0rea.h:160
UNIV_INTERN void buf_page_print(const byte *read_buf, ulint zip_size)
Definition: buf0buf.cc:601
#define FIL_PAGE_SPACE_OR_CHKSUM
Definition: fil0fil.h:75
unsigned space
Definition: buf0buf.h:1279
mutex_t zip_mutex
Definition: buf0buf.h:1613
#define FIL_PAGE_TYPE_ZBLOB2
Definition: fil0fil.h:186
UNIV_INTERN ibool buf_flush_ready_for_replace(buf_page_t *bpage)
Definition: buf0flu.cc:448
UNIV_INLINE enum buf_flush buf_page_get_flush_type(const buf_page_t *bpage) __attribute__((pure))
UNIV_INTERN void ut_print_buf(FILE *file, const void *buf, ulint len)
Definition: ut0ut.cc:444
UNIV_INTERN ibool page_simple_validate_new(const page_t *page)
Definition: page0page.cc:2098
UNIV_INLINE void mach_write_to_8(byte *b, ib_uint64_t n)
UNIV_INLINE ibool buf_page_in_file(const buf_page_t *bpage) __attribute__((pure))
UNIV_INTERN void fil_flush(ulint space_id)
Definition: fil0fil.cc:4634
#define log_flush_order_mutex_own()
Definition: log0log.h:969
UNIV_INTERN void buf_flush_write_complete(buf_page_t *bpage)
Definition: buf0flu.cc:654
#define buf_flush_list_mutex_own(b)
Definition: buf0buf.h:1771
ulint LRU_flush_ended
Definition: buf0buf.h:1703
#define FIL_PAGE_TYPE_XDES
Definition: fil0fil.h:183
#define FIL_PAGE_FILE_FLUSH_LSN
Definition: fil0fil.h:147
#define ut_d(EXPR)
Definition: ut0dbg.h:129
#define buf_LRU_stat_inc_io()
Definition: buf0lru.h:304
UNIV_INTERN void buf_flush_free_margin(buf_pool_t *buf_pool)
Definition: buf0flu.cc:2048
buf_page_t page
Definition: buf0buf.h:1433
UNIV_INTERN void log_write_up_to(ib_uint64_t lsn, ulint wait, ibool flush_to_disk)
Definition: log0log.cc:1378
ibool check_index_page_at_flush
Definition: buf0buf.h:1467
UNIV_INTERN void buf_flush_init_for_writing(byte *page, void *page_zip_, ib_uint64_t newest_lsn)
Definition: buf0flu.cc:1013
#define buf_flush_list_mutex_enter(b)
Definition: buf0buf.h:1774
#define FIL_TABLESPACE
Definition: fil0fil.h:190
UNIV_INLINE ib_uint64_t log_get_lsn(void)
#define BUF_NO_CHECKSUM_MAGIC
Definition: buf0buf.h:101
The buffer pool structure.
Definition: buf0buf.h:1607
UNIV_INLINE ulint ut_min(ulint n1, ulint n2)
ulint n_flush[BUF_FLUSH_N_TYPES]
Definition: buf0buf.h:1673
UNIV_INLINE ulint buf_block_get_page_no(const buf_block_t *block) __attribute__((pure))
#define UT_LIST_REMOVE(NAME, BASE, N)
Definition: ut0lst.h:178
UNIV_INLINE void buf_page_set_state(buf_page_t *bpage, enum buf_page_state state)
UNIV_INLINE ulint buf_page_get_zip_size(const buf_page_t *bpage) __attribute__((pure))
unsigned buf_fix_count
Definition: buf0buf.h:1299
UNIV_INLINE enum buf_page_state buf_block_get_state(const buf_block_t *block) __attribute__((pure))
UNIV_INLINE buf_pool_t * buf_pool_from_array(ulint index)
page_zip_des_t zip
Definition: buf0buf.h:1308
UNIV_INTERN void buf_LRU_make_block_old(buf_page_t *bpage)
Definition: buf0lru.cc:1414
UNIV_INTERN void os_event_set(os_event_t event)
Definition: os0sync.cc:434
#define FIL_PAGE_TYPE_ZBLOB
Definition: fil0fil.h:185
UNIV_INTERN void buf_flush_free_margins(void)
Definition: buf0flu.cc:2074
mutex_t mutex
Definition: buf0buf.h:1452
#define BUF_FLUSH_EXTRA_MARGIN(b)
Definition: buf0flu.h:258
UNIV_INLINE enum buf_page_state buf_page_get_state(const buf_page_t *bpage)
UNIV_INTERN ib_int64_t os_event_reset(os_event_t event)
Definition: os0sync.cc:472
UNIV_INLINE enum buf_io_fix buf_page_get_io_fix(const buf_page_t *bpage) __attribute__((pure))
#define ut_is_2pow(n)
Definition: ut0ut.h:162
#define ut_a(EXPR)
Definition: ut0dbg.h:105
UNIV_INTERN void os_aio_wait_until_no_pending_writes(void)
Definition: os0file.cc:3575
#define UT_LIST_GET_PREV(NAME, N)
Definition: ut0lst.h:209
#define UT_LIST_INSERT_AFTER(NAME, BASE, NODE1, NODE2)
Definition: ut0lst.h:142
#define BUF_FLUSH_FREE_BLOCK_MARGIN(b)
Definition: buf0flu.h:256
UNIV_INTERN ulint fil_space_get_size(ulint id)
Definition: fil0fil.cc:1435
UNIV_INTERN ibool rbt_delete(ib_rbt_t *tree, const void *key)
Definition: ut0rbt.cc:890
buf_flush
Definition: buf0types.h:47
UNIV_INLINE buf_page_t * buf_page_hash_get(buf_pool_t *buf_pool, ulint space, ulint offset)
UNIV_INTERN ulint fil_page_get_type(const byte *page)
Definition: fil0fil.cc:4915
UNIV_INLINE ibool buf_page_is_old(const buf_page_t *bpage) __attribute__((pure))
UNIV_INTERN ulint buf_flush_list(ulint min_n, ib_uint64_t lsn_limit)
Definition: buf0flu.cc:1925
#define UT_LIST_GET_FIRST(BASE)
Definition: ut0lst.h:224
UNIV_INTERN ib_rbt_t * rbt_create(size_t sizeof_value, ib_rbt_compare compare)
Definition: ut0rbt.cc:757
UNIV_INLINE ulint buf_page_get_space(const buf_page_t *bpage) __attribute__((pure))
UNIV_INTERN const ib_rbt_node_t * rbt_insert(ib_rbt_t *tree, const void *key, const void *value)
Definition: ut0rbt.cc:795
#define ut_ad(EXPR)
Definition: ut0dbg.h:127
ibool init_flush[BUF_FLUSH_N_TYPES]
Definition: buf0buf.h:1670
UNIV_INLINE buf_pool_t * buf_pool_from_bpage(const buf_page_t *bpage)
UNIV_INTERN ulint buf_calc_page_new_checksum(const byte *page)
Definition: buf0buf.cc:451
#define ut_error
Definition: ut0dbg.h:115
UNIV_INLINE void buf_page_set_io_fix(buf_page_t *bpage, enum buf_io_fix io_fix)
#define buf_flush_list_mutex_exit(b)
Definition: buf0buf.h:1778
UNIV_INTERN byte UNIV_INTERN ulint page_zip_calc_checksum(const void *data, ulint size) __attribute__((nonnull))
Definition: page0zip.cc:4650
UNIV_INTERN const ib_rbt_node_t * rbt_first(const ib_rbt_t *tree)
Definition: ut0rbt.cc:1074
UNIV_INLINE ulint buf_block_get_space(const buf_block_t *block) __attribute__((pure))
buf_page_t ** buf_block_arr
Definition: trx0sys.h:572
os_event_t no_flush[BUF_FLUSH_N_TYPES]
Definition: buf0buf.h:1676
UNIV_INTERN void buf_flush_wait_batch_end(buf_pool_t *buf_pool, enum buf_flush type)
Definition: buf0flu.cc:1865
#define UT_LIST_ADD_FIRST(NAME, BASE, N)
Definition: ut0lst.h:97
UNIV_INLINE void buf_page_set_flush_type(buf_page_t *bpage, enum buf_flush flush_type)
UNIV_INLINE ulint mach_read_from_4(const byte *b) __attribute__((nonnull
page_zip_t * data
Definition: page0types.h:68
UNIV_INTERN void fil_flush_file_spaces(ulint purpose)
Definition: fil0fil.cc:4749
UNIV_INTERN void ut_print_timestamp(FILE *file)
Definition: ut0ut.cc:247
Statistics for selecting flush rate based on redo log generation speed.
Definition: buf0flu.h:197
UNIV_INTERN void os_aio_simulated_wake_handler_threads(void)
Definition: os0file.cc:3908
ib_uint64_t oldest_modification
Definition: buf0buf.h:1378
byte page_t
Definition: page0types.h:37
UNIV_INTERN const ib_rbt_node_t * rbt_next(const ib_rbt_t *tree, const ib_rbt_node_t *current)
Definition: ut0rbt.cc:1115
UNIV_INLINE mutex_t * buf_page_get_mutex(const buf_page_t *bpage) __attribute__((pure))
#define FIL_PAGE_END_LSN_OLD_CHKSUM
Definition: fil0fil.h:162
UNIV_INLINE buf_pool_t * buf_pool_get(ulint space, ulint offset)
ib_rbt_t * flush_rbt
Definition: buf0buf.h:1680
UNIV_INTERN ulint buf_flush_LRU(buf_pool_t *buf_pool, ulint min_n)
Definition: buf0flu.cc:1895
#define buf_pool_mutex_own(b)
Definition: buf0buf.h:1763
UNIV_INTERN void rbt_free(ib_rbt_t *tree)
Definition: ut0rbt.cc:743
#define OS_AIO_SIMULATED_WAKE_LATER
Definition: os0file.h:180
#define FIL_PAGE_IBUF_BITMAP
Definition: fil0fil.h:179
#define TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
Definition: trx0sys.h:534
#define FIL_PAGE_LSN
Definition: fil0fil.h:115
ulint srv_buf_pool_instances
Definition: srv0srv.cc:255
UNIV_INLINE enum buf_io_fix buf_block_get_io_fix(const buf_block_t *block) __attribute__((pure))
#define FIL_PAGE_TYPE_FSP_HDR
Definition: fil0fil.h:182
#define buf_pool_mutex_exit(b)
Definition: buf0buf.h:1807
#define UT_LIST_GET_LAST(BASE)
Definition: ut0lst.h:235
UNIV_INTERN void buf_LRU_insert_zip_clean(buf_page_t *bpage)
Definition: buf0lru.cc:533
#define BUF_LRU_OLD_MIN_LEN
Definition: buf0lru.h:75
UNIV_INLINE ulint page_zip_get_size(const page_zip_des_t *page_zip) __attribute__((nonnull
UNIV_INTERN void buf_flush_relocate_on_flush_list(buf_page_t *bpage, buf_page_t *dpage)
Definition: buf0flu.cc:585