Drizzled Public API Documentation

buf0buf.cc
1 /*****************************************************************************
2 
3 Copyright (C) 1995, 2010, Innobase Oy. All Rights Reserved.
4 Copyright (C) 2008, Google Inc.
5 
6 Portions of this file contain modifications contributed and copyrighted by
7 Google, Inc. Those modifications are gratefully acknowledged and are described
8 briefly in the InnoDB documentation. The contributions by Google are
9 incorporated with their permission, and subject to the conditions contained in
10 the file COPYING.Google.
11 
12 This program is free software; you can redistribute it and/or modify it under
13 the terms of the GNU General Public License as published by the Free Software
14 Foundation; version 2 of the License.
15 
16 This program is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
19 
20 You should have received a copy of the GNU General Public License along with
21 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
22 St, Fifth Floor, Boston, MA 02110-1301 USA
23 
24 *****************************************************************************/
25 
26 /**************************************************/
33 #include "buf0buf.h"
34 
35 #ifdef UNIV_NONINL
36 #include "buf0buf.ic"
37 #endif
38 
39 #include "mem0mem.h"
40 #include "btr0btr.h"
41 #include "fil0fil.h"
42 #ifndef UNIV_HOTBACKUP
43 #include "buf0buddy.h"
44 #include "lock0lock.h"
45 #include "btr0sea.h"
46 #include "ibuf0ibuf.h"
47 #include "trx0undo.h"
48 #include "log0log.h"
49 #endif /* !UNIV_HOTBACKUP */
50 #include "srv0srv.h"
51 #include "dict0dict.h"
52 #include "log0recv.h"
53 #include "page0zip.h"
54 
55 #include <drizzled/errmsg_print.h>
56 
57 /*
58  IMPLEMENTATION OF THE BUFFER POOL
59  =================================
60 
61 Performance improvement:
62 ------------------------
63 Thread scheduling in NT may be so slow that the OS wait mechanism should
64 not be used even in waiting for disk reads to complete.
65 Rather, we should put waiting query threads to the queue of
66 waiting jobs, and let the OS thread do something useful while the i/o
67 is processed. In this way we could remove most OS thread switches in
68 an i/o-intensive benchmark like TPC-C.
69 
70 A possibility is to put a user space thread library between the database
71 and NT. User space thread libraries might be very fast.
72 
73 SQL Server 7.0 can be configured to use 'fibers' which are lightweight
74 threads in NT. These should be studied.
75 
76  Buffer frames and blocks
77  ------------------------
78 Following the terminology of Gray and Reuter, we call the memory
79 blocks where file pages are loaded buffer frames. For each buffer
80 frame there is a control block, or shortly, a block, in the buffer
81 control array. The control info which does not need to be stored
82 in the file along with the file page, resides in the control block.
83 
84  Buffer pool struct
85  ------------------
86 The buffer buf_pool contains a single mutex which protects all the
87 control data structures of the buf_pool. The content of a buffer frame is
88 protected by a separate read-write lock in its control block, though.
89 These locks can be locked and unlocked without owning the buf_pool->mutex.
90 The OS events in the buf_pool struct can be waited for without owning the
91 buf_pool->mutex.
92 
93 The buf_pool->mutex is a hot-spot in main memory, causing a lot of
94 memory bus traffic on multiprocessor systems when processors
95 alternately access the mutex. On our Pentium, the mutex is accessed
96 maybe every 10 microseconds. We gave up the solution to have mutexes
97 for each control block, for instance, because it seemed to be
98 complicated.
99 
100 A solution to reduce mutex contention of the buf_pool->mutex is to
101 create a separate mutex for the page hash table. On Pentium,
102 accessing the hash table takes 2 microseconds, about half
103 of the total buf_pool->mutex hold time.
104 
105  Control blocks
106  --------------
107 
108 The control block contains, for instance, the bufferfix count
109 which is incremented when a thread wants a file page to be fixed
110 in a buffer frame. The bufferfix operation does not lock the
111 contents of the frame, however. For this purpose, the control
112 block contains a read-write lock.
113 
114 The buffer frames have to be aligned so that the start memory
115 address of a frame is divisible by the universal page size, which
116 is a power of two.
117 
118 We intend to make the buffer buf_pool size on-line reconfigurable,
119 that is, the buf_pool size can be changed without closing the database.
120 Then the database administarator may adjust it to be bigger
121 at night, for example. The control block array must
122 contain enough control blocks for the maximum buffer buf_pool size
123 which is used in the particular database.
124 If the buf_pool size is cut, we exploit the virtual memory mechanism of
125 the OS, and just refrain from using frames at high addresses. Then the OS
126 can swap them to disk.
127 
128 The control blocks containing file pages are put to a hash table
129 according to the file address of the page.
130 We could speed up the access to an individual page by using
131 "pointer swizzling": we could replace the page references on
132 non-leaf index pages by direct pointers to the page, if it exists
133 in the buf_pool. We could make a separate hash table where we could
134 chain all the page references in non-leaf pages residing in the buf_pool,
135 using the page reference as the hash key,
136 and at the time of reading of a page update the pointers accordingly.
137 Drawbacks of this solution are added complexity and,
138 possibly, extra space required on non-leaf pages for memory pointers.
139 A simpler solution is just to speed up the hash table mechanism
140 in the database, using tables whose size is a power of 2.
141 
142  Lists of blocks
143  ---------------
144 
145 There are several lists of control blocks.
146 
147 The free list (buf_pool->free) contains blocks which are currently not
148 used.
149 
150 The common LRU list contains all the blocks holding a file page
151 except those for which the bufferfix count is non-zero.
152 The pages are in the LRU list roughly in the order of the last
153 access to the page, so that the oldest pages are at the end of the
154 list. We also keep a pointer to near the end of the LRU list,
155 which we can use when we want to artificially age a page in the
156 buf_pool. This is used if we know that some page is not needed
157 again for some time: we insert the block right after the pointer,
158 causing it to be replaced sooner than would normally be the case.
159 Currently this aging mechanism is used for read-ahead mechanism
160 of pages, and it can also be used when there is a scan of a full
161 table which cannot fit in the memory. Putting the pages near the
162 end of the LRU list, we make sure that most of the buf_pool stays
163 in the main memory, undisturbed.
164 
165 The unzip_LRU list contains a subset of the common LRU list. The
166 blocks on the unzip_LRU list hold a compressed file page and the
167 corresponding uncompressed page frame. A block is in unzip_LRU if and
168 only if the predicate buf_page_belongs_to_unzip_LRU(&block->page)
169 holds. The blocks in unzip_LRU will be in same order as they are in
170 the common LRU list. That is, each manipulation of the common LRU
171 list will result in the same manipulation of the unzip_LRU list.
172 
173 The chain of modified blocks (buf_pool->flush_list) contains the blocks
174 holding file pages that have been modified in the memory
175 but not written to disk yet. The block with the oldest modification
176 which has not yet been written to disk is at the end of the chain.
177 The access to this list is protected by buf_pool->flush_list_mutex.
178 
179 The chain of unmodified compressed blocks (buf_pool->zip_clean)
180 contains the control blocks (buf_page_t) of those compressed pages
181 that are not in buf_pool->flush_list and for which no uncompressed
182 page has been allocated in the buffer pool. The control blocks for
183 uncompressed pages are accessible via buf_block_t objects that are
184 reachable via buf_pool->chunks[].
185 
186 The chains of free memory blocks (buf_pool->zip_free[]) are used by
187 the buddy allocator (buf0buddy.c) to keep track of currently unused
188 memory blocks of size sizeof(buf_page_t)..UNIV_PAGE_SIZE / 2. These
189 blocks are inside the UNIV_PAGE_SIZE-sized memory blocks of type
190 BUF_BLOCK_MEMORY that the buddy allocator requests from the buffer
191 pool. The buddy allocator is solely used for allocating control
192 blocks for compressed pages (buf_page_t) and compressed page frames.
193 
194  Loading a file page
195  -------------------
196 
197 First, a victim block for replacement has to be found in the
198 buf_pool. It is taken from the free list or searched for from the
199 end of the LRU-list. An exclusive lock is reserved for the frame,
200 the io_fix field is set in the block fixing the block in buf_pool,
201 and the io-operation for loading the page is queued. The io-handler thread
202 releases the X-lock on the frame and resets the io_fix field
203 when the io operation completes.
204 
205 A thread may request the above operation using the function
206 buf_page_get(). It may then continue to request a lock on the frame.
207 The lock is granted when the io-handler releases the x-lock.
208 
209  Read-ahead
210  ----------
211 
212 The read-ahead mechanism is intended to be intelligent and
213 isolated from the semantically higher levels of the database
214 index management. From the higher level we only need the
215 information if a file page has a natural successor or
216 predecessor page. On the leaf level of a B-tree index,
217 these are the next and previous pages in the natural
218 order of the pages.
219 
220 Let us first explain the read-ahead mechanism when the leafs
221 of a B-tree are scanned in an ascending or descending order.
222 When a read page is the first time referenced in the buf_pool,
223 the buffer manager checks if it is at the border of a so-called
224 linear read-ahead area. The tablespace is divided into these
225 areas of size 64 blocks, for example. So if the page is at the
226 border of such an area, the read-ahead mechanism checks if
227 all the other blocks in the area have been accessed in an
228 ascending or descending order. If this is the case, the system
229 looks at the natural successor or predecessor of the page,
230 checks if that is at the border of another area, and in this case
231 issues read-requests for all the pages in that area. Maybe
232 we could relax the condition that all the pages in the area
233 have to be accessed: if data is deleted from a table, there may
234 appear holes of unused pages in the area.
235 
236 A different read-ahead mechanism is used when there appears
237 to be a random access pattern to a file.
238 If a new page is referenced in the buf_pool, and several pages
239 of its random access area (for instance, 32 consecutive pages
240 in a tablespace) have recently been referenced, we may predict
241 that the whole area may be needed in the near future, and issue
242 the read requests for the whole area.
243 */
244 
245 #ifndef UNIV_HOTBACKUP
246 
247 static const int WAIT_FOR_READ = 5000;
249 static const ulint BUF_PAGE_READ_MAX_RETRIES = 100;
250 
252 UNIV_INTERN buf_pool_t* buf_pool_ptr;
253 
254 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
255 static ulint buf_dbg_counter = 0;
258 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
259 #ifdef UNIV_DEBUG
260 
262 UNIV_INTERN ibool buf_debug_prints = FALSE;
263 #endif /* UNIV_DEBUG */
264 
265 #ifdef UNIV_PFS_RWLOCK
266 /* Keys to register buffer block related rwlocks and mutexes with
267 performance schema */
268 UNIV_INTERN mysql_pfs_key_t buf_block_lock_key;
269 # ifdef UNIV_SYNC_DEBUG
270 UNIV_INTERN mysql_pfs_key_t buf_block_debug_latch_key;
271 # endif /* UNIV_SYNC_DEBUG */
272 #endif /* UNIV_PFS_RWLOCK */
273 
274 #ifdef UNIV_PFS_MUTEX
275 UNIV_INTERN mysql_pfs_key_t buffer_block_mutex_key;
276 UNIV_INTERN mysql_pfs_key_t buf_pool_mutex_key;
277 UNIV_INTERN mysql_pfs_key_t buf_pool_zip_mutex_key;
278 UNIV_INTERN mysql_pfs_key_t flush_list_mutex_key;
279 #endif /* UNIV_PFS_MUTEX */
280 
281 #if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK
282 # ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
283 
284 /* Buffer block mutexes and rwlocks can be registered
285 in one group rather than individually. If PFS_GROUP_BUFFER_SYNC
286 is defined, register buffer block mutex and rwlock
287 in one group after their initialization. */
288 # define PFS_GROUP_BUFFER_SYNC
289 
290 /* This define caps the number of mutexes/rwlocks can
291 be registered with performance schema. Developers can
292 modify this define if necessary. Please note, this would
293 be effective only if PFS_GROUP_BUFFER_SYNC is defined. */
294 # define PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER ULINT_MAX
295 
296 # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
297 #endif /* UNIV_PFS_MUTEX || UNIV_PFS_RWLOCK */
298 
301  ulint mem_size;
302  ulint size;
303  void* mem;
306 };
307 #endif /* !UNIV_HOTBACKUP */
308 
309 /********************************************************************/
313 UNIV_INTERN
314 ib_uint64_t
315 buf_pool_get_oldest_modification(void)
316 /*==================================*/
317 {
318  ulint i;
319  buf_page_t* bpage;
320  ib_uint64_t lsn = 0;
321  ib_uint64_t oldest_lsn = 0;
322 
323  /* When we traverse all the flush lists we don't want another
324  thread to add a dirty page to any flush list. */
325  if (srv_buf_pool_instances > 1)
327 
328  for (i = 0; i < srv_buf_pool_instances; i++) {
329  buf_pool_t* buf_pool;
330 
331  buf_pool = buf_pool_from_array(i);
332 
333  buf_flush_list_mutex_enter(buf_pool);
334 
335  bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
336 
337  if (bpage != NULL) {
338  ut_ad(bpage->in_flush_list);
339  lsn = bpage->oldest_modification;
340  }
341 
342  buf_flush_list_mutex_exit(buf_pool);
343 
344  if (!oldest_lsn || oldest_lsn > lsn) {
345  oldest_lsn = lsn;
346  }
347  }
348 
350 
351  /* The returned answer may be out of date: the flush_list can
352  change after the mutex has been released. */
353 
354  return(oldest_lsn);
355 }
356 
357 /********************************************************************/
359 UNIV_INTERN
360 void
361 buf_get_total_list_len(
362 /*===================*/
363  ulint* LRU_len,
364  ulint* free_len,
365  ulint* flush_list_len)
366 {
367  ulint i;
368 
369  *LRU_len = 0;
370  *free_len = 0;
371  *flush_list_len = 0;
372 
373  for (i = 0; i < srv_buf_pool_instances; i++) {
374  buf_pool_t* buf_pool;
375 
376  buf_pool = buf_pool_from_array(i);
377  *LRU_len += UT_LIST_GET_LEN(buf_pool->LRU);
378  *free_len += UT_LIST_GET_LEN(buf_pool->free);
379  *flush_list_len += UT_LIST_GET_LEN(buf_pool->flush_list);
380  }
381 }
382 
383 /********************************************************************/
385 UNIV_INTERN
386 void
387 buf_get_total_stat(
388 /*===============*/
389  buf_pool_stat_t* tot_stat)
390 {
391  ulint i;
392 
393  memset(tot_stat, 0, sizeof(*tot_stat));
394 
395  for (i = 0; i < srv_buf_pool_instances; i++) {
396  buf_pool_stat_t*buf_stat;
397  buf_pool_t* buf_pool;
398 
399  buf_pool = buf_pool_from_array(i);
400 
401  buf_stat = &buf_pool->stat;
402  tot_stat->n_page_gets += buf_stat->n_page_gets;
403  tot_stat->n_pages_read += buf_stat->n_pages_read;
404  tot_stat->n_pages_written += buf_stat->n_pages_written;
405  tot_stat->n_pages_created += buf_stat->n_pages_created;
406  tot_stat->n_ra_pages_read += buf_stat->n_ra_pages_read;
407  tot_stat->n_ra_pages_evicted += buf_stat->n_ra_pages_evicted;
408  tot_stat->n_pages_made_young += buf_stat->n_pages_made_young;
409 
410  tot_stat->n_pages_not_made_young +=
411  buf_stat->n_pages_not_made_young;
412  }
413 }
414 
415 /********************************************************************/
418 UNIV_INTERN
420 buf_block_alloc(
421 /*============*/
422  buf_pool_t* buf_pool)
425 {
426  buf_block_t* block;
427  ulint index;
428  static ulint buf_pool_index;
429 
430  if (buf_pool == NULL) {
431  /* We are allocating memory from any buffer pool, ensure
432  we spread the grace on all buffer pool instances. */
433  index = buf_pool_index++ % srv_buf_pool_instances;
434  buf_pool = buf_pool_from_array(index);
435  }
436 
437  block = buf_LRU_get_free_block(buf_pool);
438 
440 
441  return(block);
442 }
443 
444 /********************************************************************/
449 UNIV_INTERN
450 ulint
451 buf_calc_page_new_checksum(
452 /*=======================*/
453  const byte* page)
454 {
455  ulint checksum;
456 
457  /* Since the field FIL_PAGE_FILE_FLUSH_LSN, and in versions <= 4.1.x
458  ..._ARCH_LOG_NO, are written outside the buffer pool to the first
459  pages of data files, we have to skip them in the page checksum
460  calculation.
461  We must also skip the field FIL_PAGE_SPACE_OR_CHKSUM where the
462  checksum is stored, and also the last 8 bytes of page because
463  there we store the old formula checksum. */
464 
465  checksum = ut_fold_binary(page + FIL_PAGE_OFFSET,
467  + ut_fold_binary(page + FIL_PAGE_DATA,
468  UNIV_PAGE_SIZE - FIL_PAGE_DATA
470  checksum = checksum & 0xFFFFFFFFUL;
471 
472  return(checksum);
473 }
474 
475 /********************************************************************/
483 UNIV_INTERN
484 ulint
485 buf_calc_page_old_checksum(
486 /*=======================*/
487  const byte* page)
488 {
489  ulint checksum;
490 
491  checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN);
492 
493  checksum = checksum & 0xFFFFFFFFUL;
494 
495  return(checksum);
496 }
497 
498 /********************************************************************/
501 UNIV_INTERN
502 ibool
503 buf_page_is_corrupted(
504 /*==================*/
505  const byte* read_buf,
506  ulint zip_size)
508 {
509  ulint checksum_field;
510  ulint old_checksum_field;
511 
512  if (UNIV_LIKELY(!zip_size)
513  && memcmp(read_buf + FIL_PAGE_LSN + 4,
514  read_buf + UNIV_PAGE_SIZE
515  - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
516 
517  /* Stored log sequence numbers at the start and the end
518  of page do not match */
519 
520  return(TRUE);
521  }
522 
523 #ifndef UNIV_HOTBACKUP
524  if (recv_lsn_checks_on) {
525  ib_uint64_t current_lsn;
526 
527  if (log_peek_lsn(&current_lsn)
528  && UNIV_UNLIKELY
529  (current_lsn
530  < mach_read_from_8(read_buf + FIL_PAGE_LSN))) {
531  ut_print_timestamp(stderr);
532 
533  drizzled::errmsg_printf(drizzled::error::INFO,
534  "InnoDB: Error: page %lu log sequence number %"PRIu64". "
535  "InnoDB: is in the future! Current system log sequence number %"PRIu64". "
536  "Your database may be corrupt or you may have copied the InnoDB tablespace but not the InnoDB log files. See "
537  " " REFMAN "forcing-innodb-recovery.html for more information. ",
538  (ulong) mach_read_from_4(read_buf
539  + FIL_PAGE_OFFSET),
540  mach_read_from_8(read_buf + FIL_PAGE_LSN),
541  current_lsn);
542  }
543  }
544 #endif
545 
546  /* If we use checksums validation, make additional check before
547  returning TRUE to ensure that the checksum is not equal to
548  BUF_NO_CHECKSUM_MAGIC which might be stored by InnoDB with checksums
549  disabled. Otherwise, skip checksum calculation and return FALSE */
550 
551  if (UNIV_LIKELY(srv_use_checksums)) {
552  checksum_field = mach_read_from_4(read_buf
554 
555  if (UNIV_UNLIKELY(zip_size)) {
556  return(checksum_field != BUF_NO_CHECKSUM_MAGIC
557  && checksum_field
558  != page_zip_calc_checksum(read_buf, zip_size));
559  }
560 
561  old_checksum_field = mach_read_from_4(
562  read_buf + UNIV_PAGE_SIZE
564 
565  /* There are 2 valid formulas for old_checksum_field:
566 
567  1. Very old versions of InnoDB only stored 8 byte lsn to the
568  start and the end of the page.
569 
570  2. Newer InnoDB versions store the old formula checksum
571  there. */
572 
573  if (old_checksum_field != mach_read_from_4(read_buf
574  + FIL_PAGE_LSN)
575  && old_checksum_field != BUF_NO_CHECKSUM_MAGIC
576  && old_checksum_field
577  != buf_calc_page_old_checksum(read_buf)) {
578 
579  return(TRUE);
580  }
581 
582  /* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
583  (always equal to 0), to FIL_PAGE_SPACE_OR_CHKSUM */
584 
585  if (checksum_field != 0
586  && checksum_field != BUF_NO_CHECKSUM_MAGIC
587  && checksum_field
588  != buf_calc_page_new_checksum(read_buf)) {
589 
590  return(TRUE);
591  }
592  }
593 
594  return(FALSE);
595 }
596 
597 /********************************************************************/
599 UNIV_INTERN
600 void
601 buf_page_print(
602 /*===========*/
603  const byte* read_buf,
604  ulint zip_size)
606 {
607 #ifndef UNIV_HOTBACKUP
608  dict_index_t* index;
609 #endif /* !UNIV_HOTBACKUP */
610  ulint checksum;
611  ulint old_checksum;
612  ulint size = zip_size;
613 
614  if (!size) {
615  size = UNIV_PAGE_SIZE;
616  }
617 
618  ut_print_timestamp(stderr);
619  fprintf(stderr, " InnoDB: Page dump in ascii and hex (%lu bytes):\n",
620  (ulong) size);
621  ut_print_buf(stderr, read_buf, size);
622  fputs("\nInnoDB: End of page dump\n", stderr);
623 
624  if (zip_size) {
625  /* Print compressed page. */
626 
627  switch (fil_page_get_type(read_buf)) {
628  case FIL_PAGE_TYPE_ZBLOB:
630  checksum = srv_use_checksums
631  ? page_zip_calc_checksum(read_buf, zip_size)
633  ut_print_timestamp(stderr);
634  fprintf(stderr,
635  " InnoDB: Compressed BLOB page"
636  " checksum %lu, stored %lu\n"
637  "InnoDB: Page lsn %lu %lu\n"
638  "InnoDB: Page number (if stored"
639  " to page already) %lu,\n"
640  "InnoDB: space id (if stored"
641  " to page already) %lu\n",
642  (ulong) checksum,
643  (ulong) mach_read_from_4(
644  read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
645  (ulong) mach_read_from_4(
646  read_buf + FIL_PAGE_LSN),
647  (ulong) mach_read_from_4(
648  read_buf + (FIL_PAGE_LSN + 4)),
649  (ulong) mach_read_from_4(
650  read_buf + FIL_PAGE_OFFSET),
651  (ulong) mach_read_from_4(
652  read_buf
654  return;
655  default:
656  ut_print_timestamp(stderr);
657  fprintf(stderr,
658  " InnoDB: unknown page type %lu,"
659  " assuming FIL_PAGE_INDEX\n",
660  fil_page_get_type(read_buf));
661  /* fall through */
662  case FIL_PAGE_INDEX:
663  checksum = srv_use_checksums
664  ? page_zip_calc_checksum(read_buf, zip_size)
666 
667  ut_print_timestamp(stderr);
668  fprintf(stderr,
669  " InnoDB: Compressed page checksum %lu,"
670  " stored %lu\n"
671  "InnoDB: Page lsn %lu %lu\n"
672  "InnoDB: Page number (if stored"
673  " to page already) %lu,\n"
674  "InnoDB: space id (if stored"
675  " to page already) %lu\n",
676  (ulong) checksum,
677  (ulong) mach_read_from_4(
678  read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
679  (ulong) mach_read_from_4(
680  read_buf + FIL_PAGE_LSN),
681  (ulong) mach_read_from_4(
682  read_buf + (FIL_PAGE_LSN + 4)),
683  (ulong) mach_read_from_4(
684  read_buf + FIL_PAGE_OFFSET),
685  (ulong) mach_read_from_4(
686  read_buf
688  return;
689  case FIL_PAGE_TYPE_XDES:
690  /* This is an uncompressed page. */
691  break;
692  }
693  }
694 
695  checksum = srv_use_checksums
696  ? buf_calc_page_new_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
697  old_checksum = srv_use_checksums
698  ? buf_calc_page_old_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
699 
700  ut_print_timestamp(stderr);
701  fprintf(stderr,
702  " InnoDB: Page checksum %lu, prior-to-4.0.14-form"
703  " checksum %lu\n"
704  "InnoDB: stored checksum %lu, prior-to-4.0.14-form"
705  " stored checksum %lu\n"
706  "InnoDB: Page lsn %lu %lu, low 4 bytes of lsn"
707  " at page end %lu\n"
708  "InnoDB: Page number (if stored to page already) %lu,\n"
709  "InnoDB: space id (if created with >= MySQL-4.1.1"
710  " and stored already) %lu\n",
711  (ulong) checksum, (ulong) old_checksum,
712  (ulong) mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
713  (ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
715  (ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN),
716  (ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN + 4),
717  (ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
719  (ulong) mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
720  (ulong) mach_read_from_4(read_buf
722 
723 #ifndef UNIV_HOTBACKUP
725  == TRX_UNDO_INSERT) {
726  fprintf(stderr,
727  "InnoDB: Page may be an insert undo log page\n");
728  } else if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR
730  == TRX_UNDO_UPDATE) {
731  fprintf(stderr,
732  "InnoDB: Page may be an update undo log page\n");
733  }
734 #endif /* !UNIV_HOTBACKUP */
735 
736  switch (fil_page_get_type(read_buf)) {
737  index_id_t index_id;
738  case FIL_PAGE_INDEX:
739  index_id = btr_page_get_index_id(read_buf);
740  fprintf(stderr,
741  "InnoDB: Page may be an index page where"
742  " index id is %llu\n",
743  (ullint) index_id);
744 #ifndef UNIV_HOTBACKUP
745  index = dict_index_find_on_id_low(index_id);
746  if (index) {
747  fputs("InnoDB: (", stderr);
748  dict_index_name_print(stderr, NULL, index);
749  fputs(")\n", stderr);
750  }
751 #endif /* !UNIV_HOTBACKUP */
752  break;
753  case FIL_PAGE_INODE:
754  fputs("InnoDB: Page may be an 'inode' page\n", stderr);
755  break;
757  fputs("InnoDB: Page may be an insert buffer free list page\n",
758  stderr);
759  break;
761  fputs("InnoDB: Page may be a freshly allocated page\n",
762  stderr);
763  break;
765  fputs("InnoDB: Page may be an insert buffer bitmap page\n",
766  stderr);
767  break;
768  case FIL_PAGE_TYPE_SYS:
769  fputs("InnoDB: Page may be a system page\n",
770  stderr);
771  break;
773  fputs("InnoDB: Page may be a transaction system page\n",
774  stderr);
775  break;
777  fputs("InnoDB: Page may be a file space header page\n",
778  stderr);
779  break;
780  case FIL_PAGE_TYPE_XDES:
781  fputs("InnoDB: Page may be an extent descriptor page\n",
782  stderr);
783  break;
784  case FIL_PAGE_TYPE_BLOB:
785  fputs("InnoDB: Page may be a BLOB page\n",
786  stderr);
787  break;
788  case FIL_PAGE_TYPE_ZBLOB:
790  fputs("InnoDB: Page may be a compressed BLOB page\n",
791  stderr);
792  break;
793  }
794 }
795 
796 #ifndef UNIV_HOTBACKUP
797 
798 # ifdef PFS_GROUP_BUFFER_SYNC
799 /********************************************************************/
805 static
806 void
807 pfs_register_buffer_block(
808 /*======================*/
809  buf_chunk_t* chunk)
810 {
811  ulint i;
812  ulint num_to_register;
813  buf_block_t* block;
814 
815  block = chunk->blocks;
816 
817  num_to_register = ut_min(chunk->size,
818  PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER);
819 
820  for (i = 0; i < num_to_register; i++) {
821  mutex_t* mutex;
822  rw_lock_t* rwlock;
823 
824 # ifdef UNIV_PFS_MUTEX
825  mutex = &block->mutex;
826  ut_a(!mutex->pfs_psi);
827  mutex->pfs_psi = (PSI_server)
828  ? PSI_server->init_mutex(buffer_block_mutex_key, mutex)
829  : NULL;
830 # endif /* UNIV_PFS_MUTEX */
831 
832 # ifdef UNIV_PFS_RWLOCK
833  rwlock = &block->lock;
834  ut_a(!rwlock->pfs_psi);
835  rwlock->pfs_psi = (PSI_server)
836  ? PSI_server->init_rwlock(buf_block_lock_key, rwlock)
837  : NULL;
838 # endif /* UNIV_PFS_RWLOCK */
839  block++;
840  }
841 }
842 # endif /* PFS_GROUP_BUFFER_SYNC */
843 
844 /********************************************************************/
846 static
847 void
848 buf_block_init(
849 /*===========*/
850  buf_pool_t* buf_pool,
851  buf_block_t* block,
852  byte* frame)
853 {
854  UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE, block);
855 
856  block->frame = frame;
857 
858  block->page.buf_pool_index = buf_pool_index(buf_pool);
859  block->page.state = BUF_BLOCK_NOT_USED;
860  block->page.buf_fix_count = 0;
861  block->page.io_fix = BUF_IO_NONE;
862 
863  block->modify_clock = 0;
864 
865 #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
866  block->page.file_page_was_freed = FALSE;
867 #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
868 
869  block->check_index_page_at_flush = FALSE;
870  block->index = NULL;
871 
872  block->is_hashed = FALSE;
873 
874 #ifdef UNIV_DEBUG
875  block->page.in_page_hash = FALSE;
876  block->page.in_zip_hash = FALSE;
877  block->page.in_flush_list = FALSE;
878  block->page.in_free_list = FALSE;
879  block->page.in_LRU_list = FALSE;
880  block->in_unzip_LRU_list = FALSE;
881 #endif /* UNIV_DEBUG */
882 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
883  block->n_pointers = 0;
884 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
885  page_zip_des_init(&block->page.zip);
886 
887 #if defined PFS_SKIP_BUFFER_MUTEX_RWLOCK || defined PFS_GROUP_BUFFER_SYNC
888  /* If PFS_SKIP_BUFFER_MUTEX_RWLOCK is defined, skip registration
889  of buffer block mutex/rwlock with performance schema. If
890  PFS_GROUP_BUFFER_SYNC is defined, skip the registration
891  since buffer block mutex/rwlock will be registered later in
892  pfs_register_buffer_block() */
893 
894  mutex_create(PFS_NOT_INSTRUMENTED, &block->mutex, SYNC_BUF_BLOCK);
895  rw_lock_create(PFS_NOT_INSTRUMENTED, &block->lock, SYNC_LEVEL_VARYING);
896 #else /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
897  mutex_create(buffer_block_mutex_key, &block->mutex, SYNC_BUF_BLOCK);
898  rw_lock_create(buf_block_lock_key, &block->lock, SYNC_LEVEL_VARYING);
899 #endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
900 
901  ut_ad(rw_lock_validate(&(block->lock)));
902 
903 #ifdef UNIV_SYNC_DEBUG
904  rw_lock_create(buf_block_debug_latch_key,
905  &block->debug_latch, SYNC_NO_ORDER_CHECK);
906 #endif /* UNIV_SYNC_DEBUG */
907 }
908 
909 /********************************************************************/
912 static
914 buf_chunk_init(
915 /*===========*/
916  buf_pool_t* buf_pool,
917  buf_chunk_t* chunk,
918  ulint mem_size)
919 {
920  buf_block_t* block;
921  byte* frame;
922  ulint i;
923 
924  /* Round down to a multiple of page size,
925  although it already should be. */
926  mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
927  /* Reserve space for the block descriptors. */
928  mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
929  + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
930 
931  chunk->mem_size = mem_size;
932  chunk->mem = os_mem_alloc_large(&chunk->mem_size);
933 
934  if (UNIV_UNLIKELY(chunk->mem == NULL)) {
935 
936  return(NULL);
937  }
938 
939  /* Allocate the block descriptors from
940  the start of the memory block. */
941  chunk->blocks = static_cast<buf_block_struct *>(chunk->mem);
942 
943  /* Align a pointer to the first frame. Note that when
944  os_large_page_size is smaller than UNIV_PAGE_SIZE,
945  we may allocate one fewer block than requested. When
946  it is bigger, we may allocate more blocks than requested. */
947 
948  frame = static_cast<unsigned char *>(ut_align(chunk->mem, UNIV_PAGE_SIZE));
949  chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
950  - (frame != chunk->mem);
951 
952  /* Subtract the space needed for block descriptors. */
953  {
954  ulint size = chunk->size;
955 
956  while (frame < (byte*) (chunk->blocks + size)) {
957  frame += UNIV_PAGE_SIZE;
958  size--;
959  }
960 
961  chunk->size = size;
962  }
963 
964  /* Init block structs and assign frames for them. Then we
965  assign the frames to the first blocks (we already mapped the
966  memory above). */
967 
968  block = chunk->blocks;
969 
970  for (i = chunk->size; i--; ) {
971 
972  buf_block_init(buf_pool, block, frame);
973 
974 #ifdef HAVE_VALGRIND
975  /* Wipe contents of frame to eliminate a Purify warning */
976  memset(block->frame, '\0', UNIV_PAGE_SIZE);
977 #endif
978  /* Add the block to the free list */
979  UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
980 
981  ut_d(block->page.in_free_list = TRUE);
982  ut_ad(buf_pool_from_block(block) == buf_pool);
983 
984  block++;
985  frame += UNIV_PAGE_SIZE;
986  }
987 
988 #ifdef PFS_GROUP_BUFFER_SYNC
989  pfs_register_buffer_block(chunk);
990 #endif
991  return(chunk);
992 }
993 
994 #ifdef UNIV_DEBUG
995 /*********************************************************************/
999 static
1000 buf_block_t*
1001 buf_chunk_contains_zip(
1002 /*===================*/
1003  buf_chunk_t* chunk,
1004  const void* data)
1005 {
1006  buf_block_t* block;
1007  ulint i;
1008 
1009  block = chunk->blocks;
1010 
1011  for (i = chunk->size; i--; block++) {
1012  if (block->page.zip.data == data) {
1013 
1014  return(block);
1015  }
1016  }
1017 
1018  return(NULL);
1019 }
1020 
1021 /*********************************************************************/
1025 UNIV_INTERN
1026 buf_block_t*
1027 buf_pool_contains_zip(
1028 /*==================*/
1029  buf_pool_t* buf_pool,
1030  const void* data)
1031 {
1032  ulint n;
1033  buf_chunk_t* chunk = buf_pool->chunks;
1034 
1035  ut_ad(buf_pool);
1036  ut_ad(buf_pool_mutex_own(buf_pool));
1037  for (n = buf_pool->n_chunks; n--; chunk++) {
1038 
1039  buf_block_t* block = buf_chunk_contains_zip(chunk, data);
1040 
1041  if (block) {
1042  return(block);
1043  }
1044  }
1045 
1046  return(NULL);
1047 }
1048 #endif /* UNIV_DEBUG */
1049 
1050 /*********************************************************************/
1053 static
1054 const buf_block_t*
1055 buf_chunk_not_freed(
1056 /*================*/
1057  buf_chunk_t* chunk)
1058 {
1059  buf_block_t* block;
1060  ulint i;
1061 
1062  block = chunk->blocks;
1063 
1064  for (i = chunk->size; i--; block++) {
1065  ibool ready;
1066 
1067  switch (buf_block_get_state(block)) {
1068  case BUF_BLOCK_ZIP_FREE:
1069  case BUF_BLOCK_ZIP_PAGE:
1070  case BUF_BLOCK_ZIP_DIRTY:
1071  /* The uncompressed buffer pool should never
1072  contain compressed block descriptors. */
1073  ut_error;
1074  break;
1075  case BUF_BLOCK_NOT_USED:
1077  case BUF_BLOCK_MEMORY:
1078  case BUF_BLOCK_REMOVE_HASH:
1079  /* Skip blocks that are not being used for
1080  file pages. */
1081  break;
1082  case BUF_BLOCK_FILE_PAGE:
1083  mutex_enter(&block->mutex);
1084  ready = buf_flush_ready_for_replace(&block->page);
1085  mutex_exit(&block->mutex);
1086 
1087  if (!ready) {
1088 
1089  return(block);
1090  }
1091 
1092  break;
1093  }
1094  }
1095 
1096  return(NULL);
1097 }
1098 
1099 /*********************************************************************/
1102 static
1103 ibool
1104 buf_chunk_all_free(
1105 /*===============*/
1106  const buf_chunk_t* chunk)
1107 {
1108  const buf_block_t* block;
1109  ulint i;
1110 
1111  block = chunk->blocks;
1112 
1113  for (i = chunk->size; i--; block++) {
1114 
1115  if (buf_block_get_state(block) != BUF_BLOCK_NOT_USED) {
1116 
1117  return(FALSE);
1118  }
1119  }
1120 
1121  return(TRUE);
1122 }
1123 
1124 /********************************************************************/
1126 static
1127 void
1128 buf_chunk_free(
1129 /*===========*/
1130  buf_pool_t* buf_pool,
1131  buf_chunk_t* chunk)
1132 {
1133  buf_block_t* block;
1134  const buf_block_t* block_end;
1135 
1136  ut_ad(buf_pool_mutex_own(buf_pool));
1137 
1138  block_end = chunk->blocks + chunk->size;
1139 
1140  for (block = chunk->blocks; block < block_end; block++) {
1142  ut_a(!block->page.zip.data);
1143 
1144  ut_ad(!block->page.in_LRU_list);
1145  ut_ad(!block->in_unzip_LRU_list);
1146  ut_ad(!block->page.in_flush_list);
1147  /* Remove the block from the free list. */
1148  ut_ad(block->page.in_free_list);
1149  UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
1150 
1151  /* Free the latches. */
1152  mutex_free(&block->mutex);
1153  rw_lock_free(&block->lock);
1154 #ifdef UNIV_SYNC_DEBUG
1155  rw_lock_free(&block->debug_latch);
1156 #endif /* UNIV_SYNC_DEBUG */
1157  UNIV_MEM_UNDESC(block);
1158  }
1159 
1160  os_mem_free_large(chunk->mem, chunk->mem_size);
1161 }
1162 
1163 /********************************************************************/
1165 static
1166 void
1167 buf_pool_set_sizes(void)
1168 /*====================*/
1169 {
1170  ulint i;
1171  ulint curr_size = 0;
1172 
1174 
1175  for (i = 0; i < srv_buf_pool_instances; i++) {
1176  buf_pool_t* buf_pool;
1177 
1178  buf_pool = buf_pool_from_array(i);
1179  curr_size += buf_pool->curr_pool_size;
1180  }
1181 
1182  srv_buf_pool_curr_size = curr_size;
1184 
1186 }
1187 
1188 /********************************************************************/
1191 static
1192 ulint
1193 buf_pool_init_instance(
1194 /*===================*/
1195  buf_pool_t* buf_pool,
1196  ulint buf_pool_size,
1197  ulint instance_no)
1198 {
1199  ulint i;
1200  buf_chunk_t* chunk;
1201 
1202  /* 1. Initialize general fields
1203  ------------------------------- */
1204  mutex_create(buf_pool_mutex_key,
1205  &buf_pool->mutex, SYNC_BUF_POOL);
1206  mutex_create(buf_pool_zip_mutex_key,
1207  &buf_pool->zip_mutex, SYNC_BUF_BLOCK);
1208 
1209  buf_pool_mutex_enter(buf_pool);
1210 
1211  if (buf_pool_size > 0) {
1212  buf_pool->n_chunks = 1;
1213  void *chunk_ptr= mem_zalloc((sizeof *chunk));
1214  buf_pool->chunks = chunk = static_cast<buf_chunk_t *>(chunk_ptr);
1215 
1216  UT_LIST_INIT(buf_pool->free);
1217 
1218  if (!buf_chunk_init(buf_pool, chunk, buf_pool_size)) {
1219  mem_free(chunk);
1220  mem_free(buf_pool);
1221 
1222  buf_pool_mutex_exit(buf_pool);
1223 
1224  return(DB_ERROR);
1225  }
1226 
1227  buf_pool->instance_no = instance_no;
1228  buf_pool->old_pool_size = buf_pool_size;
1229  buf_pool->curr_size = chunk->size;
1230  buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
1231 
1232  buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
1233  buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
1234 
1235  buf_pool->last_printout_time = ut_time();
1236  }
1237  /* 2. Initialize flushing fields
1238  -------------------------------- */
1239 
1240  mutex_create(flush_list_mutex_key, &buf_pool->flush_list_mutex,
1241  SYNC_BUF_FLUSH_LIST);
1242 
1243  for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
1244  buf_pool->no_flush[i] = os_event_create(NULL);
1245  }
1246 
1247  /* 3. Initialize LRU fields
1248  --------------------------- */
1249 
1250  /* All fields are initialized by mem_zalloc(). */
1251 
1252  buf_pool_mutex_exit(buf_pool);
1253 
1254  return(DB_SUCCESS);
1255 }
1256 
1257 /********************************************************************/
1259 static
1260 void
1261 buf_pool_free_instance(
1262 /*===================*/
1263  buf_pool_t* buf_pool) /* in,own: buffer pool instance
1264  to free */
1265 {
1266  buf_chunk_t* chunk;
1267  buf_chunk_t* chunks;
1268 
1269  chunks = buf_pool->chunks;
1270  chunk = chunks + buf_pool->n_chunks;
1271 
1272  while (--chunk >= chunks) {
1273  /* Bypass the checks of buf_chunk_free(), since they
1274  would fail at shutdown. */
1275  os_mem_free_large(chunk->mem, chunk->mem_size);
1276  }
1277 
1278  mem_free(buf_pool->chunks);
1279  hash_table_free(buf_pool->page_hash);
1280  hash_table_free(buf_pool->zip_hash);
1281 }
1282 
1283 /********************************************************************/
1286 UNIV_INTERN
1287 ulint
1288 buf_pool_init(
1289 /*==========*/
1290  ulint total_size,
1291  ulint n_instances)
1292 {
1293  ulint i;
1294  const ulint size = total_size / n_instances;
1295 
1296  ut_ad(n_instances > 0);
1297  ut_ad(n_instances <= MAX_BUFFER_POOLS);
1298  ut_ad(n_instances == srv_buf_pool_instances);
1299 
1300  /* We create an extra buffer pool instance, this instance is used
1301  for flushing the flush lists, to keep track of n_flush for all
1302  the buffer pools and also used as a waiting object during flushing. */
1303  void *buf_pool_void_ptr= mem_zalloc(n_instances * sizeof *buf_pool_ptr);
1304  buf_pool_ptr = static_cast<buf_pool_struct *>(buf_pool_void_ptr);
1305 
1306  for (i = 0; i < n_instances; i++) {
1307  buf_pool_t* ptr = &buf_pool_ptr[i];
1308 
1309  if (buf_pool_init_instance(ptr, size, i) != DB_SUCCESS) {
1310 
1311  /* Free all the instances created so far. */
1312  buf_pool_free(i);
1313 
1314  return(DB_ERROR);
1315  }
1316  }
1317 
1318  buf_pool_set_sizes();
1319  buf_LRU_old_ratio_update(100 * 3/ 8, FALSE);
1320 
1321  btr_search_sys_create(buf_pool_get_curr_size() / sizeof(void*) / 64);
1322 
1323  return(DB_SUCCESS);
1324 }
1325 
1326 /********************************************************************/
1329 UNIV_INTERN
1330 void
1331 buf_pool_free(
1332 /*==========*/
1333  ulint n_instances)
1334 {
1335  ulint i;
1336 
1337  for (i = 0; i < n_instances; i++) {
1338  buf_pool_free_instance(buf_pool_from_array(i));
1339  }
1340 
1341  mem_free(buf_pool_ptr);
1342  buf_pool_ptr = NULL;
1343 }
1344 
1345 /********************************************************************/
1347 static
1348 void
1349 buf_pool_drop_hash_index_instance(
1350 /*==============================*/
1351  buf_pool_t* buf_pool,
1352  ibool* released_search_latch)
1355 {
1356  buf_chunk_t* chunks = buf_pool->chunks;
1357  buf_chunk_t* chunk = chunks + buf_pool->n_chunks;
1358 
1359  while (--chunk >= chunks) {
1360  ulint i;
1361  buf_block_t* block = chunk->blocks;
1362 
1363  for (i = chunk->size; i--; block++) {
1364  /* block->is_hashed cannot be modified
1365  when we have an x-latch on btr_search_latch;
1366  see the comment in buf0buf.h */
1367 
1368  if (!block->is_hashed) {
1369  continue;
1370  }
1371 
1372  /* To follow the latching order, we
1373  have to release btr_search_latch
1374  before acquiring block->latch. */
1375  rw_lock_x_unlock(&btr_search_latch);
1376  /* When we release the search latch,
1377  we must rescan all blocks, because
1378  some may become hashed again. */
1379  *released_search_latch = TRUE;
1380 
1381  rw_lock_x_lock(&block->lock);
1382 
1383  /* This should be guaranteed by the
1384  callers, which will be holding
1385  btr_search_enabled_mutex. */
1386  ut_ad(!btr_search_enabled);
1387 
1388  /* Because we did not buffer-fix the
1389  block by calling buf_block_get_gen(),
1390  it is possible that the block has been
1391  allocated for some other use after
1392  btr_search_latch was released above.
1393  We do not care which file page the
1394  block is mapped to. All we want to do
1395  is to drop any hash entries referring
1396  to the page. */
1397 
1398  /* It is possible that
1399  block->page.state != BUF_FILE_PAGE.
1400  Even that does not matter, because
1401  btr_search_drop_page_hash_index() will
1402  check block->is_hashed before doing
1403  anything. block->is_hashed can only
1404  be set on uncompressed file pages. */
1405 
1406  btr_search_drop_page_hash_index(block);
1407 
1408  rw_lock_x_unlock(&block->lock);
1409 
1410  rw_lock_x_lock(&btr_search_latch);
1411 
1412  ut_ad(!btr_search_enabled);
1413  }
1414  }
1415 }
1416 
1417 /********************************************************************/
1421 UNIV_INTERN
1422 void
1423 buf_pool_drop_hash_index(void)
1424 /*==========================*/
1425 {
1426  ibool released_search_latch;
1427 
1428 #ifdef UNIV_SYNC_DEBUG
1429  ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
1430 #endif /* UNIV_SYNC_DEBUG */
1431  ut_ad(!btr_search_enabled);
1432 
1433  do {
1434  ulint i;
1435 
1436  released_search_latch = FALSE;
1437 
1438  for (i = 0; i < srv_buf_pool_instances; i++) {
1439  buf_pool_t* buf_pool;
1440 
1441  buf_pool = buf_pool_from_array(i);
1442 
1443  buf_pool_drop_hash_index_instance(
1444  buf_pool, &released_search_latch);
1445  }
1446 
1447  } while (released_search_latch);
1448 }
1449 
1450 /********************************************************************/
1454 UNIV_INTERN
1455 void
1456 buf_relocate(
1457 /*=========*/
1458  buf_page_t* bpage,
1461  buf_page_t* dpage)
1462 {
1463  buf_page_t* b;
1464  ulint fold;
1465  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1466 
1467  ut_ad(buf_pool_mutex_own(buf_pool));
1468  ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1470  ut_a(bpage->buf_fix_count == 0);
1471  ut_ad(bpage->in_LRU_list);
1472  ut_ad(!bpage->in_zip_hash);
1473  ut_ad(bpage->in_page_hash);
1474  ut_ad(bpage == buf_page_hash_get(buf_pool,
1475  bpage->space, bpage->offset));
1476  ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
1477 #ifdef UNIV_DEBUG
1478  switch (buf_page_get_state(bpage)) {
1479  case BUF_BLOCK_ZIP_FREE:
1480  case BUF_BLOCK_NOT_USED:
1482  case BUF_BLOCK_FILE_PAGE:
1483  case BUF_BLOCK_MEMORY:
1484  case BUF_BLOCK_REMOVE_HASH:
1485  ut_error;
1486  case BUF_BLOCK_ZIP_DIRTY:
1487  case BUF_BLOCK_ZIP_PAGE:
1488  break;
1489  }
1490 #endif /* UNIV_DEBUG */
1491 
1492  memcpy(dpage, bpage, sizeof *dpage);
1493 
1494  ut_d(bpage->in_LRU_list = FALSE);
1495  ut_d(bpage->in_page_hash = FALSE);
1496 
1497  /* relocate buf_pool->LRU */
1498  b = UT_LIST_GET_PREV(LRU, bpage);
1499  UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
1500 
1501  if (b) {
1502  UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, b, dpage);
1503  } else {
1504  UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, dpage);
1505  }
1506 
1507  if (UNIV_UNLIKELY(buf_pool->LRU_old == bpage)) {
1508  buf_pool->LRU_old = dpage;
1509 #ifdef UNIV_LRU_DEBUG
1510  /* buf_pool->LRU_old must be the first item in the LRU list
1511  whose "old" flag is set. */
1512  ut_a(buf_pool->LRU_old->old);
1513  ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
1514  || !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
1515  ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
1516  || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
1517  } else {
1518  /* Check that the "old" flag is consistent in
1519  the block and its neighbours. */
1520  buf_page_set_old(dpage, buf_page_is_old(dpage));
1521 #endif /* UNIV_LRU_DEBUG */
1522  }
1523 
1524  ut_d(UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU,
1525  ut_ad(ut_list_node_313->in_LRU_list)));
1526 
1527  /* relocate buf_pool->page_hash */
1528  fold = buf_page_address_fold(bpage->space, bpage->offset);
1529 
1530  HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
1531  HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
1532 }
1533 
1534 /********************************************************************/
1536 static
1537 void
1538 buf_pool_shrink_instance(
1539 /*=====================*/
1540  buf_pool_t* buf_pool,
1541  ulint chunk_size)
1542 {
1543  buf_chunk_t* chunks;
1544  buf_chunk_t* chunk;
1545  ulint max_size;
1546  ulint max_free_size;
1547  buf_chunk_t* max_chunk;
1548  buf_chunk_t* max_free_chunk;
1549 
1550  ut_ad(!buf_pool_mutex_own(buf_pool));
1551 
1552 try_again:
1553  btr_search_disable(); /* Empty the adaptive hash index again */
1554  buf_pool_mutex_enter(buf_pool);
1555 
1556 shrink_again:
1557  if (buf_pool->n_chunks <= 1) {
1558 
1559  /* Cannot shrink if there is only one chunk */
1560  goto func_done;
1561  }
1562 
1563  /* Search for the largest free chunk
1564  not larger than the size difference */
1565  chunks = buf_pool->chunks;
1566  chunk = chunks + buf_pool->n_chunks;
1567  max_size = max_free_size = 0;
1568  max_chunk = max_free_chunk = NULL;
1569 
1570  while (--chunk >= chunks) {
1571  if (chunk->size <= chunk_size
1572  && chunk->size > max_free_size) {
1573  if (chunk->size > max_size) {
1574  max_size = chunk->size;
1575  max_chunk = chunk;
1576  }
1577 
1578  if (buf_chunk_all_free(chunk)) {
1579  max_free_size = chunk->size;
1580  max_free_chunk = chunk;
1581  }
1582  }
1583  }
1584 
1585  if (!max_free_size) {
1586 
1587  ulint dirty = 0;
1588  ulint nonfree = 0;
1589  buf_block_t* block;
1590  buf_block_t* bend;
1591 
1592  /* Cannot shrink: try again later
1593  (do not assign srv_buf_pool_old_size) */
1594  if (!max_chunk) {
1595 
1596  goto func_exit;
1597  }
1598 
1599  block = max_chunk->blocks;
1600  bend = block + max_chunk->size;
1601 
1602  /* Move the blocks of chunk to the end of the
1603  LRU list and try to flush them. */
1604  for (; block < bend; block++) {
1605  switch (buf_block_get_state(block)) {
1606  case BUF_BLOCK_NOT_USED:
1607  continue;
1608  case BUF_BLOCK_FILE_PAGE:
1609  break;
1610  default:
1611  nonfree++;
1612  continue;
1613  }
1614 
1615  mutex_enter(&block->mutex);
1616  /* The following calls will temporarily
1617  release block->mutex and buf_pool->mutex.
1618  Therefore, we have to always retry,
1619  even if !dirty && !nonfree. */
1620 
1621  if (!buf_flush_ready_for_replace(&block->page)) {
1622 
1623  buf_LRU_make_block_old(&block->page);
1624  dirty++;
1625  } else if (buf_LRU_free_block(&block->page, TRUE)
1626  != BUF_LRU_FREED) {
1627  nonfree++;
1628  }
1629 
1630  mutex_exit(&block->mutex);
1631  }
1632 
1633  buf_pool_mutex_exit(buf_pool);
1634 
1635  /* Request for a flush of the chunk if it helps.
1636  Do not flush if there are non-free blocks, since
1637  flushing will not make the chunk freeable. */
1638  if (nonfree) {
1639  /* Avoid busy-waiting. */
1640  os_thread_sleep(100000);
1641  } else if (dirty
1642  && buf_flush_LRU(buf_pool, dirty)
1643  == ULINT_UNDEFINED) {
1644 
1645  buf_flush_wait_batch_end(buf_pool, BUF_FLUSH_LRU);
1646  }
1647 
1648  goto try_again;
1649  }
1650 
1651  max_size = max_free_size;
1652  max_chunk = max_free_chunk;
1653 
1654  buf_pool->old_pool_size = buf_pool->curr_pool_size;
1655 
1656  /* Rewrite buf_pool->chunks. Copy everything but max_chunk. */
1657  chunks = static_cast<buf_chunk_t *>(mem_alloc((buf_pool->n_chunks - 1) * sizeof *chunks));
1658  memcpy(chunks, buf_pool->chunks,
1659  (max_chunk - buf_pool->chunks) * sizeof *chunks);
1660  memcpy(chunks + (max_chunk - buf_pool->chunks),
1661  max_chunk + 1,
1662  buf_pool->chunks + buf_pool->n_chunks
1663  - (max_chunk + 1));
1664  ut_a(buf_pool->curr_size > max_chunk->size);
1665  buf_pool->curr_size -= max_chunk->size;
1666  buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
1667  chunk_size -= max_chunk->size;
1668  buf_chunk_free(buf_pool, max_chunk);
1669  mem_free(buf_pool->chunks);
1670  buf_pool->chunks = chunks;
1671  buf_pool->n_chunks--;
1672 
1673  /* Allow a slack of one megabyte. */
1674  if (chunk_size > 1048576 / UNIV_PAGE_SIZE) {
1675 
1676  goto shrink_again;
1677  }
1678  goto func_exit;
1679 
1680 func_done:
1681  buf_pool->old_pool_size = buf_pool->curr_pool_size;
1682 func_exit:
1683  buf_pool_mutex_exit(buf_pool);
1684  btr_search_enable();
1685 }
1686 
1687 /********************************************************************/
1689 static
1690 void
1691 buf_pool_shrink(
1692 /*============*/
1693  ulint chunk_size)
1694 {
1695  ulint i;
1696 
1697  for (i = 0; i < srv_buf_pool_instances; i++) {
1698  buf_pool_t* buf_pool;
1699  ulint instance_chunk_size;
1700 
1701  instance_chunk_size = chunk_size / srv_buf_pool_instances;
1702  buf_pool = buf_pool_from_array(i);
1703  buf_pool_shrink_instance(buf_pool, instance_chunk_size);
1704  }
1705 
1706  buf_pool_set_sizes();
1707 }
1708 
1709 /********************************************************************/
1711 static
1712 void
1713 buf_pool_page_hash_rebuild_instance(
1714 /*================================*/
1715  buf_pool_t* buf_pool)
1716 {
1717  ulint i;
1718  buf_page_t* b;
1719  buf_chunk_t* chunk;
1720  ulint n_chunks;
1721  hash_table_t* zip_hash;
1722  hash_table_t* page_hash;
1723 
1724  buf_pool_mutex_enter(buf_pool);
1725 
1726  /* Free, create, and populate the hash table. */
1727  hash_table_free(buf_pool->page_hash);
1728  buf_pool->page_hash = page_hash = hash_create(2 * buf_pool->curr_size);
1729  zip_hash = hash_create(2 * buf_pool->curr_size);
1730 
1731  HASH_MIGRATE(buf_pool->zip_hash, zip_hash, buf_page_t, hash,
1732  BUF_POOL_ZIP_FOLD_BPAGE);
1733 
1734  hash_table_free(buf_pool->zip_hash);
1735  buf_pool->zip_hash = zip_hash;
1736 
1737  /* Insert the uncompressed file pages to buf_pool->page_hash. */
1738 
1739  chunk = buf_pool->chunks;
1740  n_chunks = buf_pool->n_chunks;
1741 
1742  for (i = 0; i < n_chunks; i++, chunk++) {
1743  ulint j;
1744  buf_block_t* block = chunk->blocks;
1745 
1746  for (j = 0; j < chunk->size; j++, block++) {
1747  if (buf_block_get_state(block)
1748  == BUF_BLOCK_FILE_PAGE) {
1749  ut_ad(!block->page.in_zip_hash);
1750  ut_ad(block->page.in_page_hash);
1751 
1752  HASH_INSERT(buf_page_t, hash, page_hash,
1754  block->page.space,
1755  block->page.offset),
1756  &block->page);
1757  }
1758  }
1759  }
1760 
1761  /* Insert the compressed-only pages to buf_pool->page_hash.
1762  All such blocks are either in buf_pool->zip_clean or
1763  in buf_pool->flush_list. */
1764 
1765  for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1766  b = UT_LIST_GET_NEXT(list, b)) {
1768  ut_ad(!b->in_flush_list);
1769  ut_ad(b->in_LRU_list);
1770  ut_ad(b->in_page_hash);
1771  ut_ad(!b->in_zip_hash);
1772 
1773  HASH_INSERT(buf_page_t, hash, page_hash,
1774  buf_page_address_fold(b->space, b->offset), b);
1775  }
1776 
1777  buf_flush_list_mutex_enter(buf_pool);
1778  for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1779  b = UT_LIST_GET_NEXT(list, b)) {
1780  ut_ad(b->in_flush_list);
1781  ut_ad(b->in_LRU_list);
1782  ut_ad(b->in_page_hash);
1783  ut_ad(!b->in_zip_hash);
1784 
1785  switch (buf_page_get_state(b)) {
1786  case BUF_BLOCK_ZIP_DIRTY:
1787  HASH_INSERT(buf_page_t, hash, page_hash,
1789  b->offset), b);
1790  break;
1791  case BUF_BLOCK_FILE_PAGE:
1792  /* uncompressed page */
1793  break;
1794  case BUF_BLOCK_ZIP_FREE:
1795  case BUF_BLOCK_ZIP_PAGE:
1796  case BUF_BLOCK_NOT_USED:
1798  case BUF_BLOCK_MEMORY:
1799  case BUF_BLOCK_REMOVE_HASH:
1800  ut_error;
1801  break;
1802  }
1803  }
1804 
1805  buf_flush_list_mutex_exit(buf_pool);
1806  buf_pool_mutex_exit(buf_pool);
1807 }
1808 
1809 /********************************************************************
1810 Determine if a block is a sentinel for a buffer pool watch.
1811 @return TRUE if a sentinel for a buffer pool watch, FALSE if not */
1812 UNIV_INTERN
1813 ibool
1814 buf_pool_watch_is_sentinel(
1815 /*=======================*/
1816  buf_pool_t* buf_pool,
1817  const buf_page_t* bpage)
1818 {
1819  ut_ad(buf_page_in_file(bpage));
1820 
1821  if (bpage < &buf_pool->watch[0]
1822  || bpage >= &buf_pool->watch[BUF_POOL_WATCH_SIZE]) {
1823 
1825  || bpage->zip.data != NULL);
1826 
1827  return(FALSE);
1828  }
1829 
1831  ut_ad(!bpage->in_zip_hash);
1832  ut_ad(bpage->in_page_hash);
1833  ut_ad(bpage->zip.data == NULL);
1834  ut_ad(bpage->buf_fix_count > 0);
1835  return(TRUE);
1836 }
1837 
1838 /****************************************************************/
1842 UNIV_INTERN
1843 buf_page_t*
1844 buf_pool_watch_set(
1845 /*===============*/
1846  ulint space,
1847  ulint offset,
1848  ulint fold)
1849 {
1850  buf_page_t* bpage;
1851  ulint i;
1852  buf_pool_t* buf_pool = buf_pool_get(space, offset);
1853 
1854  ut_ad(buf_pool_mutex_own(buf_pool));
1855 
1856  bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
1857 
1858  if (UNIV_LIKELY_NULL(bpage)) {
1859  if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
1860  /* The page was loaded meanwhile. */
1861  return(bpage);
1862  }
1863  /* Add to an existing watch. */
1864  bpage->buf_fix_count++;
1865  return(NULL);
1866  }
1867 
1868  for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
1869  bpage = &buf_pool->watch[i];
1870 
1871  ut_ad(bpage->access_time == 0);
1872  ut_ad(bpage->newest_modification == 0);
1873  ut_ad(bpage->oldest_modification == 0);
1874  ut_ad(bpage->zip.data == NULL);
1875  ut_ad(!bpage->in_zip_hash);
1876 
1877  switch (bpage->state) {
1878  case BUF_BLOCK_POOL_WATCH:
1879  ut_ad(!bpage->in_page_hash);
1880  ut_ad(bpage->buf_fix_count == 0);
1881 
1882  /* bpage is pointing to buf_pool->watch[],
1883  which is protected by buf_pool->mutex.
1884  Normally, buf_page_t objects are protected by
1885  buf_block_t::mutex or buf_pool->zip_mutex or both. */
1886 
1887  bpage->state = BUF_BLOCK_ZIP_PAGE;
1888  bpage->space = space;
1889  bpage->offset = offset;
1890  bpage->buf_fix_count = 1;
1891 
1892  ut_d(bpage->in_page_hash = TRUE);
1893  HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
1894  fold, bpage);
1895  return(NULL);
1896  case BUF_BLOCK_ZIP_PAGE:
1897  ut_ad(bpage->in_page_hash);
1898  ut_ad(bpage->buf_fix_count > 0);
1899  break;
1900  default:
1901  ut_error;
1902  }
1903  }
1904 
1905  /* Allocation failed. Either the maximum number of purge
1906  threads should never exceed BUF_POOL_WATCH_SIZE, or this code
1907  should be modified to return a special non-NULL value and the
1908  caller should purge the record directly. */
1909  ut_error;
1910 
1911  /* Fix compiler warning */
1912  return(NULL);
1913 }
1914 
1915 /********************************************************************/
1917 static
1918 void
1919 buf_pool_page_hash_rebuild(void)
1920 /*============================*/
1921 {
1922  ulint i;
1923 
1924  for (i = 0; i < srv_buf_pool_instances; i++) {
1925  buf_pool_page_hash_rebuild_instance(buf_pool_from_array(i));
1926  }
1927 }
1928 
1929 /********************************************************************/
1931 static
1932 void
1933 buf_pool_increase_instance(
1934 /*=======================*/
1935  buf_pool_t* buf_pool,
1936  ulint change_size)
1937 {
1938  buf_chunk_t* chunks;
1939  buf_chunk_t* chunk;
1940 
1941  buf_pool_mutex_enter(buf_pool);
1942  chunks = static_cast<buf_chunk_t *>(mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks));
1943 
1944  memcpy(chunks, buf_pool->chunks, buf_pool->n_chunks * sizeof *chunks);
1945 
1946  chunk = &chunks[buf_pool->n_chunks];
1947 
1948  if (!buf_chunk_init(buf_pool, chunk, change_size)) {
1949  mem_free(chunks);
1950  } else {
1951  buf_pool->old_pool_size = buf_pool->curr_pool_size;
1952  buf_pool->curr_size += chunk->size;
1953  buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
1954  mem_free(buf_pool->chunks);
1955  buf_pool->chunks = chunks;
1956  buf_pool->n_chunks++;
1957  }
1958 
1959  buf_pool_mutex_exit(buf_pool);
1960 }
1961 
1962 /********************************************************************/
1964 static
1965 void
1966 buf_pool_increase(
1967 /*==============*/
1968  ulint change_size)
1969 {
1970  ulint i;
1971 
1972  for (i = 0; i < srv_buf_pool_instances; i++) {
1973  buf_pool_increase_instance(
1975  change_size / srv_buf_pool_instances);
1976  }
1977 
1978  buf_pool_set_sizes();
1979 }
1980 
1981 /********************************************************************/
1983 UNIV_INTERN
1984 void
1985 buf_pool_resize(void)
1986 /*=================*/
1987 {
1988  ulint change_size;
1989  ulint min_change_size = 1048576 * srv_buf_pool_instances;
1990 
1992 
1994 
1996 
1997  return;
1998 
1999  } else if (srv_buf_pool_curr_size + min_change_size
2000  > srv_buf_pool_size) {
2001 
2002  change_size = (srv_buf_pool_curr_size - srv_buf_pool_size)
2003  / UNIV_PAGE_SIZE;
2004 
2006 
2007  /* Disable adaptive hash indexes and empty the index
2008  in order to free up memory in the buffer pool chunks. */
2009  buf_pool_shrink(change_size);
2010 
2011  } else if (srv_buf_pool_curr_size + min_change_size
2012  < srv_buf_pool_size) {
2013 
2014  /* Enlarge the buffer pool by at least one megabyte */
2015 
2016  change_size = srv_buf_pool_size - srv_buf_pool_curr_size;
2017 
2019 
2020  buf_pool_increase(change_size);
2021  } else {
2023 
2025 
2026  return;
2027  }
2028 
2029  buf_pool_page_hash_rebuild();
2030 }
2031 
2032 /****************************************************************/
2037 static
2038 void
2039 buf_pool_watch_remove(
2040 /*==================*/
2041  buf_pool_t* buf_pool,
2042  ulint fold,
2044  buf_page_t* watch)
2045 {
2046  ut_ad(buf_pool_mutex_own(buf_pool));
2047 
2048  HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
2049  ut_d(watch->in_page_hash = FALSE);
2050  watch->buf_fix_count = 0;
2051  watch->state = BUF_BLOCK_POOL_WATCH;
2052 }
2053 
2054 /****************************************************************/
2057 UNIV_INTERN
2058 void
2059 buf_pool_watch_unset(
2060 /*=================*/
2061  ulint space,
2062  ulint offset)
2063 {
2064  buf_page_t* bpage;
2065  buf_pool_t* buf_pool = buf_pool_get(space, offset);
2066  ulint fold = buf_page_address_fold(space, offset);
2067 
2068  buf_pool_mutex_enter(buf_pool);
2069  bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
2070  /* The page must exist because buf_pool_watch_set()
2071  increments buf_fix_count. */
2072  ut_a(bpage);
2073 
2074  if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) {
2075  mutex_t* mutex = buf_page_get_mutex(bpage);
2076 
2077  mutex_enter(mutex);
2078  ut_a(bpage->buf_fix_count > 0);
2079  bpage->buf_fix_count--;
2080  mutex_exit(mutex);
2081  } else {
2082  ut_a(bpage->buf_fix_count > 0);
2083 
2084  if (UNIV_LIKELY(!--bpage->buf_fix_count)) {
2085  buf_pool_watch_remove(buf_pool, fold, bpage);
2086  }
2087  }
2088 
2089  buf_pool_mutex_exit(buf_pool);
2090 }
2091 
2092 /****************************************************************/
2097 UNIV_INTERN
2098 ibool
2099 buf_pool_watch_occurred(
2100 /*====================*/
2101  ulint space,
2102  ulint offset)
2103 {
2104  ibool ret;
2105  buf_page_t* bpage;
2106  buf_pool_t* buf_pool = buf_pool_get(space, offset);
2107  ulint fold = buf_page_address_fold(space, offset);
2108 
2109  buf_pool_mutex_enter(buf_pool);
2110 
2111  bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
2112  /* The page must exist because buf_pool_watch_set()
2113  increments buf_fix_count. */
2114  ut_a(bpage);
2115  ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
2116  buf_pool_mutex_exit(buf_pool);
2117 
2118  return(ret);
2119 }
2120 
2121 /********************************************************************/
2125 UNIV_INTERN
2126 void
2127 buf_page_make_young(
2128 /*================*/
2129  buf_page_t* bpage)
2130 {
2131  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2132 
2133  buf_pool_mutex_enter(buf_pool);
2134 
2135  ut_a(buf_page_in_file(bpage));
2136 
2137  buf_LRU_make_block_young(bpage);
2138 
2139  buf_pool_mutex_exit(buf_pool);
2140 }
2141 
2142 /********************************************************************/
2147 static
2148 void
2149 buf_page_set_accessed_make_young(
2150 /*=============================*/
2151  buf_page_t* bpage,
2153  unsigned access_time)
2156 {
2157  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2158 
2159  ut_ad(!buf_pool_mutex_own(buf_pool));
2160  ut_a(buf_page_in_file(bpage));
2161 
2162  if (buf_page_peek_if_too_old(bpage)) {
2163  buf_pool_mutex_enter(buf_pool);
2164  buf_LRU_make_block_young(bpage);
2165  buf_pool_mutex_exit(buf_pool);
2166  } else if (!access_time) {
2167  ulint time_ms = ut_time_ms();
2168  buf_pool_mutex_enter(buf_pool);
2169  buf_page_set_accessed(bpage, time_ms);
2170  buf_pool_mutex_exit(buf_pool);
2171  }
2172 }
2173 
2174 /********************************************************************/
2177 UNIV_INTERN
2178 void
2179 buf_reset_check_index_page_at_flush(
2180 /*================================*/
2181  ulint space,
2182  ulint offset)
2183 {
2184  buf_block_t* block;
2185  buf_pool_t* buf_pool = buf_pool_get(space, offset);
2186 
2187  buf_pool_mutex_enter(buf_pool);
2188 
2189  block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
2190 
2191  if (block && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) {
2192  ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
2193  block->check_index_page_at_flush = FALSE;
2194  }
2195 
2196  buf_pool_mutex_exit(buf_pool);
2197 }
2198 
2199 /********************************************************************/
2204 UNIV_INTERN
2205 ibool
2206 buf_page_peek_if_search_hashed(
2207 /*===========================*/
2208  ulint space,
2209  ulint offset)
2210 {
2211  buf_block_t* block;
2212  ibool is_hashed;
2213  buf_pool_t* buf_pool = buf_pool_get(space, offset);
2214 
2215  buf_pool_mutex_enter(buf_pool);
2216 
2217  block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
2218 
2219  if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
2220  is_hashed = FALSE;
2221  } else {
2222  ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
2223  is_hashed = block->is_hashed;
2224  }
2225 
2226  buf_pool_mutex_exit(buf_pool);
2227 
2228  return(is_hashed);
2229 }
2230 
2231 #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
2232 /********************************************************************/
2238 UNIV_INTERN
2239 buf_page_t*
2240 buf_page_set_file_page_was_freed(
2241 /*=============================*/
2242  ulint space,
2243  ulint offset)
2244 {
2245  buf_page_t* bpage;
2246  buf_pool_t* buf_pool = buf_pool_get(space, offset);
2247 
2248  buf_pool_mutex_enter(buf_pool);
2249 
2250  bpage = buf_page_hash_get(buf_pool, space, offset);
2251 
2252  if (bpage) {
2253  ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2254  /* bpage->file_page_was_freed can already hold
2255  when this code is invoked from dict_drop_index_tree() */
2256  bpage->file_page_was_freed = TRUE;
2257  }
2258 
2259  buf_pool_mutex_exit(buf_pool);
2260 
2261  return(bpage);
2262 }
2263 
2264 /********************************************************************/
2270 UNIV_INTERN
2271 buf_page_t*
2272 buf_page_reset_file_page_was_freed(
2273 /*===============================*/
2274  ulint space,
2275  ulint offset)
2276 {
2277  buf_page_t* bpage;
2278  buf_pool_t* buf_pool = buf_pool_get(space, offset);
2279 
2280  buf_pool_mutex_enter(buf_pool);
2281 
2282  bpage = buf_page_hash_get(buf_pool, space, offset);
2283 
2284  if (bpage) {
2285  ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2286  bpage->file_page_was_freed = FALSE;
2287  }
2288 
2289  buf_pool_mutex_exit(buf_pool);
2290 
2291  return(bpage);
2292 }
2293 #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
2294 
2295 /********************************************************************/
2304 UNIV_INTERN
2305 buf_page_t*
2306 buf_page_get_zip(
2307 /*=============*/
2308  ulint space,
2309  ulint zip_size,
2310  ulint offset)
2311 {
2312  buf_page_t* bpage;
2313  mutex_t* block_mutex;
2314  ibool must_read;
2315  unsigned access_time;
2316  buf_pool_t* buf_pool = buf_pool_get(space, offset);
2317 
2318  buf_pool->stat.n_page_gets++;
2319 
2320  for (;;) {
2321  buf_pool_mutex_enter(buf_pool);
2322 lookup:
2323  bpage = buf_page_hash_get(buf_pool, space, offset);
2324  if (bpage) {
2325  ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2326  break;
2327  }
2328 
2329  /* Page not in buf_pool: needs to be read from file */
2330 
2331  buf_pool_mutex_exit(buf_pool);
2332 
2333  buf_read_page(space, zip_size, offset);
2334 
2335 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2336  ut_a(++buf_dbg_counter % 37 || buf_validate());
2337 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2338  }
2339 
2340  if (UNIV_UNLIKELY(!bpage->zip.data)) {
2341  /* There is no compressed page. */
2342 err_exit:
2343  buf_pool_mutex_exit(buf_pool);
2344  return(NULL);
2345  }
2346 
2347  ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2348 
2349  switch (buf_page_get_state(bpage)) {
2350  case BUF_BLOCK_NOT_USED:
2352  case BUF_BLOCK_MEMORY:
2353  case BUF_BLOCK_REMOVE_HASH:
2354  case BUF_BLOCK_ZIP_FREE:
2355  break;
2356  case BUF_BLOCK_ZIP_PAGE:
2357  case BUF_BLOCK_ZIP_DIRTY:
2358  block_mutex = &buf_pool->zip_mutex;
2359  mutex_enter(block_mutex);
2360  bpage->buf_fix_count++;
2361  goto got_block;
2362  case BUF_BLOCK_FILE_PAGE:
2363  block_mutex = &((buf_block_t*) bpage)->mutex;
2364  mutex_enter(block_mutex);
2365 
2366  /* Discard the uncompressed page frame if possible. */
2367  if (buf_LRU_free_block(bpage, FALSE) == BUF_LRU_FREED) {
2368 
2369  mutex_exit(block_mutex);
2370  goto lookup;
2371  }
2372 
2373  buf_block_buf_fix_inc((buf_block_t*) bpage,
2374  __FILE__, __LINE__);
2375  goto got_block;
2376  }
2377 
2378  ut_error;
2379  goto err_exit;
2380 
2381 got_block:
2382  must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
2383  access_time = buf_page_is_accessed(bpage);
2384 
2385  buf_pool_mutex_exit(buf_pool);
2386 
2387  mutex_exit(block_mutex);
2388 
2389  buf_page_set_accessed_make_young(bpage, access_time);
2390 
2391 #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
2392  ut_a(!bpage->file_page_was_freed);
2393 #endif
2394 
2395 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2396  ut_a(++buf_dbg_counter % 5771 || buf_validate());
2397  ut_a(bpage->buf_fix_count > 0);
2398  ut_a(buf_page_in_file(bpage));
2399 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2400 
2401  if (must_read) {
2402  /* Let us wait until the read operation
2403  completes */
2404 
2405  for (;;) {
2406  enum buf_io_fix io_fix;
2407 
2408  mutex_enter(block_mutex);
2409  io_fix = buf_page_get_io_fix(bpage);
2410  mutex_exit(block_mutex);
2411 
2412  if (io_fix == BUF_IO_READ) {
2413 
2414  os_thread_sleep(WAIT_FOR_READ);
2415  } else {
2416  break;
2417  }
2418  }
2419  }
2420 
2421 #ifdef UNIV_IBUF_COUNT_DEBUG
2422  ut_a(ibuf_count_get(buf_page_get_space(bpage),
2423  buf_page_get_page_no(bpage)) == 0);
2424 #endif
2425  return(bpage);
2426 }
2427 
2428 /********************************************************************/
2430 UNIV_INLINE
2431 void
2432 buf_block_init_low(
2433 /*===============*/
2434  buf_block_t* block)
2435 {
2436  block->check_index_page_at_flush = FALSE;
2437  block->index = NULL;
2438 
2439  block->n_hash_helps = 0;
2440  block->is_hashed = FALSE;
2441  block->n_fields = 1;
2442  block->n_bytes = 0;
2443  block->left_side = TRUE;
2444 }
2445 #endif /* !UNIV_HOTBACKUP */
2446 
2447 /********************************************************************/
2450 UNIV_INTERN
2451 ibool
2452 buf_zip_decompress(
2453 /*===============*/
2454  buf_block_t* block,
2455  ibool check)
2456 {
2457  const byte* frame = block->page.zip.data;
2458  ulint stamp_checksum = mach_read_from_4(
2459  frame + FIL_PAGE_SPACE_OR_CHKSUM);
2460 
2461  ut_ad(buf_block_get_zip_size(block));
2462  ut_a(buf_block_get_space(block) != 0);
2463 
2464  if (UNIV_LIKELY(check && stamp_checksum != BUF_NO_CHECKSUM_MAGIC)) {
2465  ulint calc_checksum = page_zip_calc_checksum(
2466  frame, page_zip_get_size(&block->page.zip));
2467 
2468  if (UNIV_UNLIKELY(stamp_checksum != calc_checksum)) {
2469  ut_print_timestamp(stderr);
2470  fprintf(stderr,
2471  " InnoDB: compressed page checksum mismatch"
2472  " (space %u page %u): %lu != %lu\n",
2473  block->page.space, block->page.offset,
2474  stamp_checksum, calc_checksum);
2475  return(FALSE);
2476  }
2477  }
2478 
2479  switch (fil_page_get_type(frame)) {
2480  case FIL_PAGE_INDEX:
2481  if (page_zip_decompress(&block->page.zip,
2482  block->frame, TRUE)) {
2483  return(TRUE);
2484  }
2485 
2486  fprintf(stderr,
2487  "InnoDB: unable to decompress space %lu page %lu\n",
2488  (ulong) block->page.space,
2489  (ulong) block->page.offset);
2490  return(FALSE);
2491 
2493  case FIL_PAGE_INODE:
2494  case FIL_PAGE_IBUF_BITMAP:
2495  case FIL_PAGE_TYPE_FSP_HDR:
2496  case FIL_PAGE_TYPE_XDES:
2497  case FIL_PAGE_TYPE_ZBLOB:
2498  case FIL_PAGE_TYPE_ZBLOB2:
2499  /* Copy to uncompressed storage. */
2500  memcpy(block->frame, frame,
2501  buf_block_get_zip_size(block));
2502  return(TRUE);
2503  }
2504 
2505  ut_print_timestamp(stderr);
2506  fprintf(stderr,
2507  " InnoDB: unknown compressed page"
2508  " type %lu\n",
2509  fil_page_get_type(frame));
2510  return(FALSE);
2511 }
2512 
2513 #ifndef UNIV_HOTBACKUP
2514 /*******************************************************************/
2518 static
2519 buf_block_t*
2520 buf_block_align_instance(
2521 /*=====================*/
2522  buf_pool_t* buf_pool,
2524  const byte* ptr)
2525 {
2526  buf_chunk_t* chunk;
2527  ulint i;
2528 
2529  /* TODO: protect buf_pool->chunks with a mutex (it will
2530  currently remain constant after buf_pool_init()) */
2531  for (chunk = buf_pool->chunks, i = buf_pool->n_chunks; i--; chunk++) {
2532  ulint offs;
2533 
2534  if (UNIV_UNLIKELY(ptr < chunk->blocks->frame)) {
2535 
2536  continue;
2537  }
2538  /* else */
2539 
2540  offs = ptr - chunk->blocks->frame;
2541 
2542  offs >>= UNIV_PAGE_SIZE_SHIFT;
2543 
2544  if (UNIV_LIKELY(offs < chunk->size)) {
2545  buf_block_t* block = &chunk->blocks[offs];
2546 
2547  /* The function buf_chunk_init() invokes
2548  buf_block_init() so that block[n].frame ==
2549  block->frame + n * UNIV_PAGE_SIZE. Check it. */
2550  ut_ad(block->frame == page_align(ptr));
2551 #ifdef UNIV_DEBUG
2552  /* A thread that updates these fields must
2553  hold buf_pool->mutex and block->mutex. Acquire
2554  only the latter. */
2555  mutex_enter(&block->mutex);
2556 
2557  switch (buf_block_get_state(block)) {
2558  case BUF_BLOCK_ZIP_FREE:
2559  case BUF_BLOCK_ZIP_PAGE:
2560  case BUF_BLOCK_ZIP_DIRTY:
2561  /* These types should only be used in
2562  the compressed buffer pool, whose
2563  memory is allocated from
2564  buf_pool->chunks, in UNIV_PAGE_SIZE
2565  blocks flagged as BUF_BLOCK_MEMORY. */
2566  ut_error;
2567  break;
2568  case BUF_BLOCK_NOT_USED:
2570  case BUF_BLOCK_MEMORY:
2571  /* Some data structures contain
2572  "guess" pointers to file pages. The
2573  file pages may have been freed and
2574  reused. Do not complain. */
2575  break;
2576  case BUF_BLOCK_REMOVE_HASH:
2577  /* buf_LRU_block_remove_hashed_page()
2578  will overwrite the FIL_PAGE_OFFSET and
2579  FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID with
2580  0xff and set the state to
2581  BUF_BLOCK_REMOVE_HASH. */
2583  == 0xffffffff);
2585  == 0xffffffff);
2586  break;
2587  case BUF_BLOCK_FILE_PAGE:
2588  ut_ad(block->page.space
2589  == page_get_space_id(page_align(ptr)));
2590  ut_ad(block->page.offset
2591  == page_get_page_no(page_align(ptr)));
2592  break;
2593  }
2594 
2595  mutex_exit(&block->mutex);
2596 #endif /* UNIV_DEBUG */
2597 
2598  return(block);
2599  }
2600  }
2601 
2602  return(NULL);
2603 }
2604 
2605 /*******************************************************************/
2608 UNIV_INTERN
2609 buf_block_t*
2610 buf_block_align(
2611 /*============*/
2612  const byte* ptr)
2613 {
2614  ulint i;
2615 
2616  for (i = 0; i < srv_buf_pool_instances; i++) {
2617  buf_block_t* block;
2618 
2619  block = buf_block_align_instance(
2620  buf_pool_from_array(i), ptr);
2621  if (block) {
2622  return(block);
2623  }
2624  }
2625 
2626  /* The block should always be found. */
2627  ut_error;
2628  return(NULL);
2629 }
2630 
2631 /********************************************************************/
2636 static
2637 ibool
2638 buf_pointer_is_block_field_instance(
2639 /*================================*/
2640  buf_pool_t* buf_pool,
2641  const void* ptr)
2642 {
2643  const buf_chunk_t* chunk = buf_pool->chunks;
2644  const buf_chunk_t* const echunk = chunk + buf_pool->n_chunks;
2645 
2646  /* TODO: protect buf_pool->chunks with a mutex (it will
2647  currently remain constant after buf_pool_init()) */
2648  while (chunk < echunk) {
2649  if (ptr >= (void *)chunk->blocks
2650  && ptr < (void *)(chunk->blocks + chunk->size)) {
2651 
2652  return(TRUE);
2653  }
2654 
2655  chunk++;
2656  }
2657 
2658  return(FALSE);
2659 }
2660 
2661 /********************************************************************/
2665 UNIV_INTERN
2666 ibool
2667 buf_pointer_is_block_field(
2668 /*=======================*/
2669  const void* ptr)
2670 {
2671  ulint i;
2672 
2673  for (i = 0; i < srv_buf_pool_instances; i++) {
2674  ibool found;
2675 
2676  found = buf_pointer_is_block_field_instance(
2677  buf_pool_from_array(i), ptr);
2678  if (found) {
2679  return(TRUE);
2680  }
2681  }
2682 
2683  return(FALSE);
2684 }
2685 
2686 /********************************************************************/
2689 static
2690 ibool
2691 buf_block_is_uncompressed(
2692 /*======================*/
2693  buf_pool_t* buf_pool,
2694  const buf_block_t* block)
2696 {
2697  ut_ad(buf_pool_mutex_own(buf_pool));
2698 
2699  if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
2700  /* The pointer should be aligned. */
2701  return(FALSE);
2702  }
2703 
2704  return(buf_pointer_is_block_field_instance(buf_pool, (void *)block));
2705 }
2706 
2707 /********************************************************************/
2710 UNIV_INTERN
2711 buf_block_t*
2712 buf_page_get_gen(
2713 /*=============*/
2714  ulint space,
2715  ulint zip_size,
2717  ulint offset,
2718  ulint rw_latch,
2719  buf_block_t* guess,
2720  ulint mode,
2723  const char* file,
2724  ulint line,
2725  mtr_t* mtr)
2726 {
2727  buf_block_t* block;
2728  ulint fold;
2729  unsigned access_time;
2730  ulint fix_type;
2731  ibool must_read;
2732  ulint retries = 0;
2733  buf_pool_t* buf_pool = buf_pool_get(space, offset);
2734 
2735  ut_ad(mtr);
2736  ut_ad(mtr->state == MTR_ACTIVE);
2737  ut_ad((rw_latch == RW_S_LATCH)
2738  || (rw_latch == RW_X_LATCH)
2739  || (rw_latch == RW_NO_LATCH));
2740 #ifdef UNIV_DEBUG
2741  switch (mode) {
2742  case BUF_GET_NO_LATCH:
2743  ut_ad(rw_latch == RW_NO_LATCH);
2744  break;
2745  case BUF_GET:
2746  case BUF_GET_IF_IN_POOL:
2747  case BUF_PEEK_IF_IN_POOL:
2749  break;
2750  default:
2751  ut_error;
2752  }
2753 #endif /* UNIV_DEBUG */
2754  ut_ad(zip_size == fil_space_get_zip_size(space));
2755  ut_ad(ut_is_2pow(zip_size));
2756 #ifndef UNIV_LOG_DEBUG
2757  ut_ad(!ibuf_inside(mtr)
2758  || ibuf_page_low(space, zip_size, offset,
2759  FALSE, file, line, NULL));
2760 #endif
2761  buf_pool->stat.n_page_gets++;
2762  fold = buf_page_address_fold(space, offset);
2763 loop:
2764  block = guess;
2765  buf_pool_mutex_enter(buf_pool);
2766 
2767  if (block) {
2768  /* If the guess is a compressed page descriptor that
2769  has been allocated by buf_buddy_alloc(), it may have
2770  been invalidated by buf_buddy_relocate(). In that
2771  case, block could point to something that happens to
2772  contain the expected bits in block->page. Similarly,
2773  the guess may be pointing to a buffer pool chunk that
2774  has been released when resizing the buffer pool. */
2775 
2776  if (!buf_block_is_uncompressed(buf_pool, block)
2777  || offset != block->page.offset
2778  || space != block->page.space
2780 
2781  block = guess = NULL;
2782  } else {
2783  ut_ad(!block->page.in_zip_hash);
2784  ut_ad(block->page.in_page_hash);
2785  }
2786  }
2787 
2788  if (block == NULL) {
2790  buf_pool, space, offset, fold);
2791  }
2792 
2793 loop2:
2794  if (block && buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
2795  block = NULL;
2796  }
2797 
2798  if (block == NULL) {
2799  /* Page not in buf_pool: needs to be read from file */
2800 
2801  if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
2802  block = (buf_block_t*) buf_pool_watch_set(
2803  space, offset, fold);
2804 
2805  if (UNIV_LIKELY_NULL(block)) {
2806 
2807  goto got_block;
2808  }
2809  }
2810 
2811  buf_pool_mutex_exit(buf_pool);
2812 
2813  if (mode == BUF_GET_IF_IN_POOL
2814  || mode == BUF_PEEK_IF_IN_POOL
2815  || mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
2816 
2817  return(NULL);
2818  }
2819 
2820  if (buf_read_page(space, zip_size, offset)) {
2821  retries = 0;
2822  } else if (retries < BUF_PAGE_READ_MAX_RETRIES) {
2823  ++retries;
2824  } else {
2825  fprintf(stderr, "InnoDB: Error: Unable"
2826  " to read tablespace %lu page no"
2827  " %lu into the buffer pool after"
2828  " %lu attempts\n"
2829  "InnoDB: The most probable cause"
2830  " of this error may be that the"
2831  " table has been corrupted.\n"
2832  "InnoDB: You can try to fix this"
2833  " problem by using"
2834  " innodb_force_recovery.\n"
2835  "InnoDB: Please see reference manual"
2836  " for more details.\n"
2837  "InnoDB: Aborting...\n",
2838  space, offset,
2839  BUF_PAGE_READ_MAX_RETRIES);
2840 
2841  ut_error;
2842  }
2843 
2844 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2845  ut_a(++buf_dbg_counter % 37 || buf_validate());
2846 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2847  goto loop;
2848  }
2849 
2850 got_block:
2851  ut_ad(page_zip_get_size(&block->page.zip) == zip_size);
2852 
2853  must_read = buf_block_get_io_fix(block) == BUF_IO_READ;
2854 
2855  if (must_read && (mode == BUF_GET_IF_IN_POOL
2856  || mode == BUF_PEEK_IF_IN_POOL)) {
2857 
2858  /* The page is being read to buffer pool,
2859  but we cannot wait around for the read to
2860  complete. */
2861  buf_pool_mutex_exit(buf_pool);
2862 
2863  return(NULL);
2864  }
2865 
2866  switch (buf_block_get_state(block)) {
2867  buf_page_t* bpage;
2868  ibool success;
2869 
2870  case BUF_BLOCK_FILE_PAGE:
2871  break;
2872 
2873  case BUF_BLOCK_ZIP_PAGE:
2874  case BUF_BLOCK_ZIP_DIRTY:
2875  bpage = &block->page;
2876  /* Protect bpage->buf_fix_count. */
2877  mutex_enter(&buf_pool->zip_mutex);
2878 
2879  if (bpage->buf_fix_count
2880  || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
2881  /* This condition often occurs when the buffer
2882  is not buffer-fixed, but I/O-fixed by
2883  buf_page_init_for_read(). */
2884  mutex_exit(&buf_pool->zip_mutex);
2885 wait_until_unfixed:
2886  /* The block is buffer-fixed or I/O-fixed.
2887  Try again later. */
2888  buf_pool_mutex_exit(buf_pool);
2889  os_thread_sleep(WAIT_FOR_READ);
2890 
2891  goto loop;
2892  }
2893 
2894  /* Allocate an uncompressed page. */
2895  buf_pool_mutex_exit(buf_pool);
2896  mutex_exit(&buf_pool->zip_mutex);
2897 
2898  block = buf_LRU_get_free_block(buf_pool);
2899  ut_a(block);
2900 
2901  buf_pool_mutex_enter(buf_pool);
2902  mutex_enter(&block->mutex);
2903 
2904  {
2905  buf_page_t* hash_bpage;
2906 
2907  hash_bpage = buf_page_hash_get_low(
2908  buf_pool, space, offset, fold);
2909 
2910  if (UNIV_UNLIKELY(bpage != hash_bpage)) {
2911  /* The buf_pool->page_hash was modified
2912  while buf_pool->mutex was released.
2913  Free the block that was allocated. */
2914 
2915  buf_LRU_block_free_non_file_page(block);
2916  mutex_exit(&block->mutex);
2917 
2918  block = (buf_block_t*) hash_bpage;
2919  goto loop2;
2920  }
2921  }
2922 
2923  if (UNIV_UNLIKELY
2924  (bpage->buf_fix_count
2925  || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
2926 
2927  /* The block was buffer-fixed or I/O-fixed
2928  while buf_pool->mutex was not held by this thread.
2929  Free the block that was allocated and try again.
2930  This should be extremely unlikely. */
2931 
2932  buf_LRU_block_free_non_file_page(block);
2933  mutex_exit(&block->mutex);
2934 
2935  goto wait_until_unfixed;
2936  }
2937 
2938  /* Move the compressed page from bpage to block,
2939  and uncompress it. */
2940 
2941  mutex_enter(&buf_pool->zip_mutex);
2942 
2943  buf_relocate(bpage, &block->page);
2944  buf_block_init_low(block);
2945  block->lock_hash_val = lock_rec_hash(space, offset);
2946 
2947  UNIV_MEM_DESC(&block->page.zip.data,
2948  page_zip_get_size(&block->page.zip), block);
2949 
2950  if (buf_page_get_state(&block->page)
2951  == BUF_BLOCK_ZIP_PAGE) {
2952  UT_LIST_REMOVE(list, buf_pool->zip_clean,
2953  &block->page);
2954  ut_ad(!block->page.in_flush_list);
2955  } else {
2956  /* Relocate buf_pool->flush_list. */
2957  buf_flush_relocate_on_flush_list(bpage,
2958  &block->page);
2959  }
2960 
2961  /* Buffer-fix, I/O-fix, and X-latch the block
2962  for the duration of the decompression.
2963  Also add the block to the unzip_LRU list. */
2964  block->page.state = BUF_BLOCK_FILE_PAGE;
2965 
2966  /* Insert at the front of unzip_LRU list */
2967  buf_unzip_LRU_add_block(block, FALSE);
2968 
2969  block->page.buf_fix_count = 1;
2971  rw_lock_x_lock_func(&block->lock, 0, file, line);
2972 
2973  UNIV_MEM_INVALID(bpage, sizeof *bpage);
2974 
2975  mutex_exit(&block->mutex);
2976  mutex_exit(&buf_pool->zip_mutex);
2977  buf_pool->n_pend_unzip++;
2978 
2979  bpage->state = BUF_BLOCK_ZIP_FREE;
2980  buf_buddy_free(buf_pool, bpage, sizeof *bpage);
2981 
2982  buf_pool_mutex_exit(buf_pool);
2983 
2984  /* Decompress the page and apply buffered operations
2985  while not holding buf_pool->mutex or block->mutex. */
2986  success = buf_zip_decompress(block, srv_use_checksums);
2987  ut_a(success);
2988 
2989  if (UNIV_LIKELY(!recv_no_ibuf_operations)) {
2990  ibuf_merge_or_delete_for_page(block, space, offset,
2991  zip_size, TRUE);
2992  }
2993 
2994  /* Unfix and unlatch the block. */
2995  buf_pool_mutex_enter(buf_pool);
2996  mutex_enter(&block->mutex);
2997  block->page.buf_fix_count--;
2999  mutex_exit(&block->mutex);
3000  buf_pool->n_pend_unzip--;
3001  rw_lock_x_unlock(&block->lock);
3002 
3003  break;
3004 
3005  case BUF_BLOCK_ZIP_FREE:
3006  case BUF_BLOCK_NOT_USED:
3008  case BUF_BLOCK_MEMORY:
3009  case BUF_BLOCK_REMOVE_HASH:
3010  ut_error;
3011  break;
3012  }
3013 
3015 
3016  mutex_enter(&block->mutex);
3017 #if UNIV_WORD_SIZE == 4
3018  /* On 32-bit systems, there is no padding in buf_page_t. On
3019  other systems, Valgrind could complain about uninitialized pad
3020  bytes. */
3021  UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);
3022 #endif
3023 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
3024  if ((mode == BUF_GET_IF_IN_POOL || mode == BUF_GET_IF_IN_POOL_OR_WATCH)
3025  && ibuf_debug) {
3026  /* Try to evict the block from the buffer pool, to use the
3027  insert buffer (change buffer) as much as possible. */
3028 
3029  if (buf_LRU_free_block(&block->page, TRUE) == BUF_LRU_FREED) {
3030  mutex_exit(&block->mutex);
3031  if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
3032  /* Set the watch, as it would have
3033  been set if the page were not in the
3034  buffer pool in the first place. */
3035  block = (buf_block_t*) buf_pool_watch_set(
3036  space, offset, fold);
3037 
3038  if (UNIV_LIKELY_NULL(block)) {
3039 
3040  /* The page entered the buffer
3041  pool for some reason. Try to
3042  evict it again. */
3043  goto got_block;
3044  }
3045  }
3046  buf_pool_mutex_exit(buf_pool);
3047  fprintf(stderr,
3048  "innodb_change_buffering_debug evict %u %u\n",
3049  (unsigned) space, (unsigned) offset);
3050  return(NULL);
3051  } else if (buf_flush_page_try(buf_pool, block)) {
3052  fprintf(stderr,
3053  "innodb_change_buffering_debug flush %u %u\n",
3054  (unsigned) space, (unsigned) offset);
3055  guess = block;
3056  goto loop;
3057  }
3058 
3059  /* Failed to evict the page; change it directly */
3060  }
3061 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
3062 
3063  buf_block_buf_fix_inc(block, file, line);
3064 
3065  mutex_exit(&block->mutex);
3066 
3067  /* Check if this is the first access to the page */
3068 
3069  access_time = buf_page_is_accessed(&block->page);
3070 
3071  buf_pool_mutex_exit(buf_pool);
3072 
3073  if (UNIV_LIKELY(mode != BUF_PEEK_IF_IN_POOL)) {
3074  buf_page_set_accessed_make_young(&block->page, access_time);
3075  }
3076 
3077 #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
3078  ut_a(!block->page.file_page_was_freed);
3079 #endif
3080 
3081 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3082  ut_a(++buf_dbg_counter % 5771 || buf_validate());
3083  ut_a(block->page.buf_fix_count > 0);
3085 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3086 
3087  switch (rw_latch) {
3088  case RW_NO_LATCH:
3089  if (must_read) {
3090  /* Let us wait until the read operation
3091  completes */
3092 
3093  for (;;) {
3094  enum buf_io_fix io_fix;
3095 
3096  mutex_enter(&block->mutex);
3097  io_fix = buf_block_get_io_fix(block);
3098  mutex_exit(&block->mutex);
3099 
3100  if (io_fix == BUF_IO_READ) {
3101 
3102  os_thread_sleep(WAIT_FOR_READ);
3103  } else {
3104  break;
3105  }
3106  }
3107  }
3108 
3109  fix_type = MTR_MEMO_BUF_FIX;
3110  break;
3111 
3112  case RW_S_LATCH:
3113  rw_lock_s_lock_func(&(block->lock), 0, file, line);
3114 
3115  fix_type = MTR_MEMO_PAGE_S_FIX;
3116  break;
3117 
3118  default:
3119  ut_ad(rw_latch == RW_X_LATCH);
3120  rw_lock_x_lock_func(&(block->lock), 0, file, line);
3121 
3122  fix_type = MTR_MEMO_PAGE_X_FIX;
3123  break;
3124  }
3125 
3126  mtr_memo_push(mtr, block, fix_type);
3127 
3128  if (UNIV_LIKELY(mode != BUF_PEEK_IF_IN_POOL) && !access_time) {
3129  /* In the case of a first access, try to apply linear
3130  read-ahead */
3131 
3132  buf_read_ahead_linear(space, zip_size, offset,
3133  ibuf_inside(mtr));
3134  }
3135 
3136 #ifdef UNIV_IBUF_COUNT_DEBUG
3137  ut_a(ibuf_count_get(buf_block_get_space(block),
3138  buf_block_get_page_no(block)) == 0);
3139 #endif
3140  return(block);
3141 }
3142 
3143 /********************************************************************/
3147 UNIV_INTERN
3148 ibool
3149 buf_page_optimistic_get(
3150 /*====================*/
3151  ulint rw_latch,
3152  buf_block_t* block,
3153  ib_uint64_t modify_clock,
3155  const char* file,
3156  ulint line,
3157  mtr_t* mtr)
3158 {
3159  buf_pool_t* buf_pool;
3160  unsigned access_time;
3161  ibool success;
3162  ulint fix_type;
3163 
3164  ut_ad(block);
3165  ut_ad(mtr);
3166  ut_ad(mtr->state == MTR_ACTIVE);
3167  ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
3168 
3169  mutex_enter(&block->mutex);
3170 
3171  if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
3172 
3173  mutex_exit(&block->mutex);
3174 
3175  return(FALSE);
3176  }
3177 
3178  buf_block_buf_fix_inc(block, file, line);
3179 
3180  mutex_exit(&block->mutex);
3181 
3182  /* Check if this is the first access to the page.
3183  We do a dirty read on purpose, to avoid mutex contention.
3184  This field is only used for heuristic purposes; it does not
3185  affect correctness. */
3186 
3187  access_time = buf_page_is_accessed(&block->page);
3188  buf_page_set_accessed_make_young(&block->page, access_time);
3189 
3190  ut_ad(!ibuf_inside(mtr)
3191  || ibuf_page(buf_block_get_space(block),
3192  buf_block_get_zip_size(block),
3193  buf_block_get_page_no(block), NULL));
3194 
3195  if (rw_latch == RW_S_LATCH) {
3196  success = rw_lock_s_lock_nowait(&(block->lock),
3197  file, line);
3198  fix_type = MTR_MEMO_PAGE_S_FIX;
3199  } else {
3200  success = rw_lock_x_lock_func_nowait(&(block->lock),
3201  file, line);
3202  fix_type = MTR_MEMO_PAGE_X_FIX;
3203  }
3204 
3205  if (UNIV_UNLIKELY(!success)) {
3206  mutex_enter(&block->mutex);
3207  buf_block_buf_fix_dec(block);
3208  mutex_exit(&block->mutex);
3209 
3210  return(FALSE);
3211  }
3212 
3213  if (UNIV_UNLIKELY(modify_clock != block->modify_clock)) {
3214  buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
3215 
3216  if (rw_latch == RW_S_LATCH) {
3217  rw_lock_s_unlock(&(block->lock));
3218  } else {
3219  rw_lock_x_unlock(&(block->lock));
3220  }
3221 
3222  mutex_enter(&block->mutex);
3223  buf_block_buf_fix_dec(block);
3224  mutex_exit(&block->mutex);
3225 
3226  return(FALSE);
3227  }
3228 
3229  mtr_memo_push(mtr, block, fix_type);
3230 
3231 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3232  ut_a(++buf_dbg_counter % 5771 || buf_validate());
3233  ut_a(block->page.buf_fix_count > 0);
3235 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3236 
3237 #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
3238  ut_a(block->page.file_page_was_freed == FALSE);
3239 #endif
3240  if (UNIV_UNLIKELY(!access_time)) {
3241  /* In the case of a first access, try to apply linear
3242  read-ahead */
3243 
3244  buf_read_ahead_linear(buf_block_get_space(block),
3245  buf_block_get_zip_size(block),
3246  buf_block_get_page_no(block),
3247  ibuf_inside(mtr));
3248  }
3249 
3250 #ifdef UNIV_IBUF_COUNT_DEBUG
3251  ut_a(ibuf_count_get(buf_block_get_space(block),
3252  buf_block_get_page_no(block)) == 0);
3253 #endif
3254  buf_pool = buf_pool_from_block(block);
3255  buf_pool->stat.n_page_gets++;
3256 
3257  return(TRUE);
3258 }
3259 
3260 /********************************************************************/
3265 UNIV_INTERN
3266 ibool
3267 buf_page_get_known_nowait(
3268 /*======================*/
3269  ulint rw_latch,
3270  buf_block_t* block,
3271  ulint mode,
3272  const char* file,
3273  ulint line,
3274  mtr_t* mtr)
3275 {
3276  buf_pool_t* buf_pool;
3277  ibool success;
3278  ulint fix_type;
3279 
3280  ut_ad(mtr);
3281  ut_ad(mtr->state == MTR_ACTIVE);
3282  ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
3283 
3284  mutex_enter(&block->mutex);
3285 
3287  /* Another thread is just freeing the block from the LRU list
3288  of the buffer pool: do not try to access this page; this
3289  attempt to access the page can only come through the hash
3290  index because when the buffer block state is ..._REMOVE_HASH,
3291  we have already removed it from the page address hash table
3292  of the buffer pool. */
3293 
3294  mutex_exit(&block->mutex);
3295 
3296  return(FALSE);
3297  }
3298 
3300 
3301  buf_block_buf_fix_inc(block, file, line);
3302 
3303  mutex_exit(&block->mutex);
3304 
3305  buf_pool = buf_pool_from_block(block);
3306 
3307  if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
3308  buf_pool_mutex_enter(buf_pool);
3309  buf_LRU_make_block_young(&block->page);
3310  buf_pool_mutex_exit(buf_pool);
3311  } else if (!buf_page_is_accessed(&block->page)) {
3312  /* Above, we do a dirty read on purpose, to avoid
3313  mutex contention. The field buf_page_t::access_time
3314  is only used for heuristic purposes. Writes to the
3315  field must be protected by mutex, however. */
3316  ulint time_ms = ut_time_ms();
3317 
3318  buf_pool_mutex_enter(buf_pool);
3319  buf_page_set_accessed(&block->page, time_ms);
3320  buf_pool_mutex_exit(buf_pool);
3321  }
3322 
3323  ut_ad(!ibuf_inside(mtr) || mode == BUF_KEEP_OLD);
3324 
3325  if (rw_latch == RW_S_LATCH) {
3326  success = rw_lock_s_lock_nowait(&(block->lock),
3327  file, line);
3328  fix_type = MTR_MEMO_PAGE_S_FIX;
3329  } else {
3330  success = rw_lock_x_lock_func_nowait(&(block->lock),
3331  file, line);
3332  fix_type = MTR_MEMO_PAGE_X_FIX;
3333  }
3334 
3335  if (!success) {
3336  mutex_enter(&block->mutex);
3337  buf_block_buf_fix_dec(block);
3338  mutex_exit(&block->mutex);
3339 
3340  return(FALSE);
3341  }
3342 
3343  mtr_memo_push(mtr, block, fix_type);
3344 
3345 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3346  ut_a(++buf_dbg_counter % 5771 || buf_validate());
3347  ut_a(block->page.buf_fix_count > 0);
3349 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3350 #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
3351  ut_a(block->page.file_page_was_freed == FALSE);
3352 #endif
3353 
3354 #ifdef UNIV_IBUF_COUNT_DEBUG
3355  ut_a((mode == BUF_KEEP_OLD)
3356  || (ibuf_count_get(buf_block_get_space(block),
3357  buf_block_get_page_no(block)) == 0));
3358 #endif
3359  buf_pool->stat.n_page_gets++;
3360 
3361  return(TRUE);
3362 }
3363 
3364 /*******************************************************************/
3369 UNIV_INTERN
3370 const buf_block_t*
3371 buf_page_try_get_func(
3372 /*==================*/
3373  ulint space_id,
3374  ulint page_no,
3375  const char* file,
3376  ulint line,
3377  mtr_t* mtr)
3378 {
3379  buf_block_t* block;
3380  ibool success;
3381  ulint fix_type;
3382  buf_pool_t* buf_pool = buf_pool_get(space_id, page_no);
3383 
3384  ut_ad(mtr);
3385  ut_ad(mtr->state == MTR_ACTIVE);
3386 
3387  buf_pool_mutex_enter(buf_pool);
3388  block = buf_block_hash_get(buf_pool, space_id, page_no);
3389 
3390  if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
3391  buf_pool_mutex_exit(buf_pool);
3392  return(NULL);
3393  }
3394 
3395  ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
3396 
3397  mutex_enter(&block->mutex);
3398  buf_pool_mutex_exit(buf_pool);
3399 
3400 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3402  ut_a(buf_block_get_space(block) == space_id);
3403  ut_a(buf_block_get_page_no(block) == page_no);
3404 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3405 
3406  buf_block_buf_fix_inc(block, file, line);
3407  mutex_exit(&block->mutex);
3408 
3409  fix_type = MTR_MEMO_PAGE_S_FIX;
3410  success = rw_lock_s_lock_nowait(&block->lock, file, line);
3411 
3412  if (!success) {
3413  /* Let us try to get an X-latch. If the current thread
3414  is holding an X-latch on the page, we cannot get an
3415  S-latch. */
3416 
3417  fix_type = MTR_MEMO_PAGE_X_FIX;
3418  success = rw_lock_x_lock_func_nowait(&block->lock,
3419  file, line);
3420  }
3421 
3422  if (!success) {
3423  mutex_enter(&block->mutex);
3424  buf_block_buf_fix_dec(block);
3425  mutex_exit(&block->mutex);
3426 
3427  return(NULL);
3428  }
3429 
3430  mtr_memo_push(mtr, block, fix_type);
3431 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3432  ut_a(++buf_dbg_counter % 5771 || buf_validate());
3433  ut_a(block->page.buf_fix_count > 0);
3435 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3436 #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
3437  ut_a(block->page.file_page_was_freed == FALSE);
3438 #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
3439  buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
3440 
3441  buf_pool->stat.n_page_gets++;
3442 
3443 #ifdef UNIV_IBUF_COUNT_DEBUG
3444  ut_a(ibuf_count_get(buf_block_get_space(block),
3445  buf_block_get_page_no(block)) == 0);
3446 #endif
3447 
3448  return(block);
3449 }
3450 
3451 /********************************************************************/
3453 UNIV_INLINE
3454 void
3455 buf_page_init_low(
3456 /*==============*/
3457  buf_page_t* bpage)
3458 {
3459  bpage->flush_type = BUF_FLUSH_LRU;
3460  bpage->io_fix = BUF_IO_NONE;
3461  bpage->buf_fix_count = 0;
3462  bpage->freed_page_clock = 0;
3463  bpage->access_time = 0;
3464  bpage->newest_modification = 0;
3465  bpage->oldest_modification = 0;
3466  HASH_INVALIDATE(bpage, hash);
3467 #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
3468  bpage->file_page_was_freed = FALSE;
3469 #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
3470 }
3471 
3472 /********************************************************************/
3474 static
3475 void
3476 buf_page_init(
3477 /*==========*/
3478  ulint space,
3479  ulint offset,
3481  ulint fold,
3482  buf_block_t* block)
3483 {
3484  buf_page_t* hash_page;
3485  buf_pool_t* buf_pool = buf_pool_get(space, offset);
3486 
3487  ut_ad(buf_pool_mutex_own(buf_pool));
3488  ut_ad(mutex_own(&(block->mutex)));
3490 
3491  /* Set the state of the block */
3492  buf_block_set_file_page(block, space, offset);
3493 
3494 #ifdef UNIV_DEBUG_VALGRIND
3495  if (!space) {
3496  /* Silence valid Valgrind warnings about uninitialized
3497  data being written to data files. There are some unused
3498  bytes on some pages that InnoDB does not initialize. */
3499  UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE);
3500  }
3501 #endif /* UNIV_DEBUG_VALGRIND */
3502 
3503  buf_block_init_low(block);
3504 
3505  block->lock_hash_val = lock_rec_hash(space, offset);
3506 
3507  buf_page_init_low(&block->page);
3508 
3509  /* Insert into the hash table of file pages */
3510 
3511  hash_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
3512 
3513  if (UNIV_LIKELY(!hash_page)) {
3514  } else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
3515  /* Preserve the reference count. */
3516  ulint buf_fix_count = hash_page->buf_fix_count;
3517 
3518  ut_a(buf_fix_count > 0);
3519  block->page.buf_fix_count += buf_fix_count;
3520  buf_pool_watch_remove(buf_pool, fold, hash_page);
3521  } else {
3522  fprintf(stderr,
3523  "InnoDB: Error: page %lu %lu already found"
3524  " in the hash table: %p, %p\n",
3525  (ulong) space,
3526  (ulong) offset,
3527  (const void*) hash_page, (const void*) block);
3528 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3529  mutex_exit(&block->mutex);
3530  buf_pool_mutex_exit(buf_pool);
3531  buf_print();
3532  buf_LRU_print();
3533  buf_validate();
3534  buf_LRU_validate();
3535 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3536  ut_error;
3537  }
3538 
3539  ut_ad(!block->page.in_zip_hash);
3540  ut_ad(!block->page.in_page_hash);
3541  ut_d(block->page.in_page_hash = TRUE);
3542  HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
3543  fold, &block->page);
3544 }
3545 
3546 /********************************************************************/
3556 UNIV_INTERN
3557 buf_page_t*
3558 buf_page_init_for_read(
3559 /*===================*/
3560  ulint* err,
3561  ulint mode,
3562  ulint space,
3563  ulint zip_size,
3564  ibool unzip,
3565  ib_int64_t tablespace_version,
3569  ulint offset)
3570 {
3571  buf_block_t* block;
3572  buf_page_t* bpage = NULL;
3573  buf_page_t* watch_page;
3574  mtr_t mtr;
3575  ulint fold;
3576  ibool lru = FALSE;
3577  void* data;
3578  buf_pool_t* buf_pool = buf_pool_get(space, offset);
3579 
3580  ut_ad(buf_pool);
3581 
3582  *err = DB_SUCCESS;
3583 
3584  if (mode == BUF_READ_IBUF_PAGES_ONLY) {
3585  /* It is a read-ahead within an ibuf routine */
3586 
3587  ut_ad(!ibuf_bitmap_page(zip_size, offset));
3588 
3589  ibuf_mtr_start(&mtr);
3590 
3592  && !ibuf_page(space, zip_size, offset, &mtr)) {
3593 
3594  ibuf_mtr_commit(&mtr);
3595 
3596  return(NULL);
3597  }
3598  } else {
3599  ut_ad(mode == BUF_READ_ANY_PAGE);
3600  }
3601 
3602  if (zip_size && UNIV_LIKELY(!unzip)
3603  && UNIV_LIKELY(!recv_recovery_is_on())) {
3604  block = NULL;
3605  } else {
3606  block = buf_LRU_get_free_block(buf_pool);
3607  ut_ad(block);
3608  ut_ad(buf_pool_from_block(block) == buf_pool);
3609  }
3610 
3611  fold = buf_page_address_fold(space, offset);
3612 
3613  buf_pool_mutex_enter(buf_pool);
3614 
3615  watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
3616  if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
3617  /* The page is already in the buffer pool. */
3618  watch_page = NULL;
3619 err_exit:
3620  if (block) {
3621  mutex_enter(&block->mutex);
3622  buf_LRU_block_free_non_file_page(block);
3623  mutex_exit(&block->mutex);
3624  }
3625 
3626  bpage = NULL;
3627  goto func_exit;
3628  }
3629 
3630  if (fil_tablespace_deleted_or_being_deleted_in_mem(
3631  space, tablespace_version)) {
3632  /* The page belongs to a space which has been
3633  deleted or is being deleted. */
3634  *err = DB_TABLESPACE_DELETED;
3635 
3636  goto err_exit;
3637  }
3638 
3639  if (block) {
3640  bpage = &block->page;
3641  mutex_enter(&block->mutex);
3642 
3643  ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
3644 
3645  buf_page_init(space, offset, fold, block);
3646 
3647  /* The block must be put to the LRU list, to the old blocks */
3648  buf_LRU_add_block(bpage, TRUE/* to old blocks */);
3649 
3650  /* We set a pass-type x-lock on the frame because then
3651  the same thread which called for the read operation
3652  (and is running now at this point of code) can wait
3653  for the read to complete by waiting for the x-lock on
3654  the frame; if the x-lock were recursive, the same
3655  thread would illegally get the x-lock before the page
3656  read is completed. The x-lock is cleared by the
3657  io-handler thread. */
3658 
3659  rw_lock_x_lock_gen(&block->lock, BUF_IO_READ);
3661 
3662  if (UNIV_UNLIKELY(zip_size)) {
3663  page_zip_set_size(&block->page.zip, zip_size);
3664 
3665  /* buf_pool->mutex may be released and
3666  reacquired by buf_buddy_alloc(). Thus, we
3667  must release block->mutex in order not to
3668  break the latching order in the reacquisition
3669  of buf_pool->mutex. We also must defer this
3670  operation until after the block descriptor has
3671  been added to buf_pool->LRU and
3672  buf_pool->page_hash. */
3673  mutex_exit(&block->mutex);
3674  data = buf_buddy_alloc(buf_pool, zip_size, &lru);
3675  mutex_enter(&block->mutex);
3676  block->page.zip.data = static_cast<unsigned char *>(data);
3677 
3678  /* To maintain the invariant
3679  block->in_unzip_LRU_list
3680  == buf_page_belongs_to_unzip_LRU(&block->page)
3681  we have to add this block to unzip_LRU
3682  after block->page.zip.data is set. */
3684  buf_unzip_LRU_add_block(block, TRUE);
3685  }
3686 
3687  mutex_exit(&block->mutex);
3688  } else {
3689  /* Defer buf_buddy_alloc() until after the block has
3690  been found not to exist. The buf_buddy_alloc() and
3691  buf_buddy_free() calls may be expensive because of
3692  buf_buddy_relocate(). */
3693 
3694  /* The compressed page must be allocated before the
3695  control block (bpage), in order to avoid the
3696  invocation of buf_buddy_relocate_block() on
3697  uninitialized data. */
3698  data = buf_buddy_alloc(buf_pool, zip_size, &lru);
3699  bpage = static_cast<buf_page_struct *>(buf_buddy_alloc(buf_pool, sizeof *bpage, &lru));
3700 
3701  /* Initialize the buf_pool pointer. */
3702  bpage->buf_pool_index = buf_pool_index(buf_pool);
3703 
3704  /* If buf_buddy_alloc() allocated storage from the LRU list,
3705  it released and reacquired buf_pool->mutex. Thus, we must
3706  check the page_hash again, as it may have been modified. */
3707  if (UNIV_UNLIKELY(lru)) {
3708 
3709  watch_page = buf_page_hash_get_low(
3710  buf_pool, space, offset, fold);
3711 
3712  if (watch_page
3713  && !buf_pool_watch_is_sentinel(buf_pool,
3714  watch_page)) {
3715 
3716  /* The block was added by some other thread. */
3717  watch_page = NULL;
3718  bpage->state = BUF_BLOCK_ZIP_FREE;
3719  buf_buddy_free(buf_pool, bpage, sizeof *bpage);
3720  buf_buddy_free(buf_pool, data, zip_size);
3721 
3722  bpage = NULL;
3723  goto func_exit;
3724  }
3725  }
3726 
3727  page_zip_des_init(&bpage->zip);
3728  page_zip_set_size(&bpage->zip, zip_size);
3729  bpage->zip.data = static_cast<unsigned char *>(data);
3730 
3731  mutex_enter(&buf_pool->zip_mutex);
3732  UNIV_MEM_DESC(bpage->zip.data,
3733  page_zip_get_size(&bpage->zip), bpage);
3734 
3735  buf_page_init_low(bpage);
3736 
3737  bpage->state = BUF_BLOCK_ZIP_PAGE;
3738  bpage->space = space;
3739  bpage->offset = offset;
3740 
3741 
3742 #ifdef UNIV_DEBUG
3743  bpage->in_page_hash = FALSE;
3744  bpage->in_zip_hash = FALSE;
3745  bpage->in_flush_list = FALSE;
3746  bpage->in_free_list = FALSE;
3747  bpage->in_LRU_list = FALSE;
3748 #endif /* UNIV_DEBUG */
3749 
3750  ut_d(bpage->in_page_hash = TRUE);
3751 
3752  if (UNIV_LIKELY_NULL(watch_page)) {
3753  /* Preserve the reference count. */
3754  ulint buf_fix_count = watch_page->buf_fix_count;
3755  ut_a(buf_fix_count > 0);
3756  bpage->buf_fix_count += buf_fix_count;
3757  ut_ad(buf_pool_watch_is_sentinel(buf_pool, watch_page));
3758  buf_pool_watch_remove(buf_pool, fold, watch_page);
3759  }
3760 
3761  HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
3762  bpage);
3763 
3764  /* The block must be put to the LRU list, to the old blocks */
3765  buf_LRU_add_block(bpage, TRUE/* to old blocks */);
3766  buf_LRU_insert_zip_clean(bpage);
3767 
3769 
3770  mutex_exit(&buf_pool->zip_mutex);
3771  }
3772 
3773  buf_pool->n_pend_reads++;
3774 func_exit:
3775  buf_pool_mutex_exit(buf_pool);
3776 
3777  if (mode == BUF_READ_IBUF_PAGES_ONLY) {
3778 
3779  ibuf_mtr_commit(&mtr);
3780  }
3781 
3782  ut_ad(!bpage || buf_page_in_file(bpage));
3783  return(bpage);
3784 }
3785 
3786 /********************************************************************/
3792 UNIV_INTERN
3793 buf_block_t*
3794 buf_page_create(
3795 /*============*/
3796  ulint space,
3797  ulint offset,
3799  ulint zip_size,
3800  mtr_t* mtr)
3801 {
3802  buf_frame_t* frame;
3803  buf_block_t* block;
3804  ulint fold;
3805  buf_block_t* free_block = NULL;
3806  ulint time_ms = ut_time_ms();
3807  buf_pool_t* buf_pool = buf_pool_get(space, offset);
3808 
3809  ut_ad(mtr);
3810  ut_ad(mtr->state == MTR_ACTIVE);
3811  ut_ad(space || !zip_size);
3812 
3813  free_block = buf_LRU_get_free_block(buf_pool);
3814 
3815  fold = buf_page_address_fold(space, offset);
3816 
3817  buf_pool_mutex_enter(buf_pool);
3818 
3820  buf_pool, space, offset, fold);
3821 
3822  if (block
3823  && buf_page_in_file(&block->page)
3824  && !buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
3825 #ifdef UNIV_IBUF_COUNT_DEBUG
3826  ut_a(ibuf_count_get(space, offset) == 0);
3827 #endif
3828 #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
3829  block->page.file_page_was_freed = FALSE;
3830 #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
3831 
3832  /* Page can be found in buf_pool */
3833  buf_pool_mutex_exit(buf_pool);
3834 
3835  buf_block_free(free_block);
3836 
3837  return(buf_page_get_with_no_latch(space, zip_size,
3838  offset, mtr));
3839  }
3840 
3841  /* If we get here, the page was not in buf_pool: init it there */
3842 
3843 #ifdef UNIV_DEBUG
3844  if (buf_debug_prints) {
3845  fprintf(stderr, "Creating space %lu page %lu to buffer\n",
3846  (ulong) space, (ulong) offset);
3847  }
3848 #endif /* UNIV_DEBUG */
3849 
3850  block = free_block;
3851 
3852  mutex_enter(&block->mutex);
3853 
3854  buf_page_init(space, offset, fold, block);
3855 
3856  /* The block must be put to the LRU list */
3857  buf_LRU_add_block(&block->page, FALSE);
3858 
3859  buf_block_buf_fix_inc(block, __FILE__, __LINE__);
3860  buf_pool->stat.n_pages_created++;
3861 
3862  if (zip_size) {
3863  void* data;
3864  ibool lru;
3865 
3866  /* Prevent race conditions during buf_buddy_alloc(),
3867  which may release and reacquire buf_pool->mutex,
3868  by IO-fixing and X-latching the block. */
3869 
3871  rw_lock_x_lock(&block->lock);
3872 
3873  page_zip_set_size(&block->page.zip, zip_size);
3874  mutex_exit(&block->mutex);
3875  /* buf_pool->mutex may be released and reacquired by
3876  buf_buddy_alloc(). Thus, we must release block->mutex
3877  in order not to break the latching order in
3878  the reacquisition of buf_pool->mutex. We also must
3879  defer this operation until after the block descriptor
3880  has been added to buf_pool->LRU and buf_pool->page_hash. */
3881  data = buf_buddy_alloc(buf_pool, zip_size, &lru);
3882  mutex_enter(&block->mutex);
3883  block->page.zip.data = static_cast<unsigned char *>(data);
3884 
3885  /* To maintain the invariant
3886  block->in_unzip_LRU_list
3887  == buf_page_belongs_to_unzip_LRU(&block->page)
3888  we have to add this block to unzip_LRU after
3889  block->page.zip.data is set. */
3891  buf_unzip_LRU_add_block(block, FALSE);
3892 
3894  rw_lock_x_unlock(&block->lock);
3895  }
3896 
3897  buf_page_set_accessed(&block->page, time_ms);
3898 
3899  buf_pool_mutex_exit(buf_pool);
3900 
3901  mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
3902 
3903  mutex_exit(&block->mutex);
3904 
3905  /* Delete possible entries for the page from the insert buffer:
3906  such can exist if the page belonged to an index which was dropped */
3907 
3908  ibuf_merge_or_delete_for_page(NULL, space, offset, zip_size, TRUE);
3909 
3910  /* Flush pages from the end of the LRU list if necessary */
3911  buf_flush_free_margin(buf_pool);
3912 
3913  frame = block->frame;
3914 
3915  memset(frame + FIL_PAGE_PREV, 0xff, 4);
3916  memset(frame + FIL_PAGE_NEXT, 0xff, 4);
3918 
3919  /* Reset to zero the file flush lsn field in the page; if the first
3920  page of an ibdata file is 'created' in this function into the buffer
3921  pool then we lose the original contents of the file flush lsn stamp.
3922  Then InnoDB could in a crash recovery print a big, false, corruption
3923  warning if the stamp contains an lsn bigger than the ib_logfile lsn. */
3924 
3925  memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
3926 
3927 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3928  ut_a(++buf_dbg_counter % 357 || buf_validate());
3929 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3930 #ifdef UNIV_IBUF_COUNT_DEBUG
3931  ut_a(ibuf_count_get(buf_block_get_space(block),
3932  buf_block_get_page_no(block)) == 0);
3933 #endif
3934  return(block);
3935 }
3936 
3937 /********************************************************************/
3940 UNIV_INTERN
3941 void
3942 buf_page_io_complete(
3943 /*=================*/
3944  buf_page_t* bpage)
3945 {
3946  enum buf_io_fix io_type;
3947  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3948  const ibool uncompressed = (buf_page_get_state(bpage)
3949  == BUF_BLOCK_FILE_PAGE);
3950 
3951  ut_a(buf_page_in_file(bpage));
3952 
3953  /* We do not need protect io_fix here by mutex to read
3954  it because this is the only function where we can change the value
3955  from BUF_IO_READ or BUF_IO_WRITE to some other value, and our code
3956  ensures that this is the only thread that handles the i/o for this
3957  block. */
3958 
3959  io_type = buf_page_get_io_fix(bpage);
3960  ut_ad(io_type == BUF_IO_READ || io_type == BUF_IO_WRITE);
3961 
3962  if (io_type == BUF_IO_READ) {
3963  ulint read_page_no;
3964  ulint read_space_id;
3965  byte* frame;
3966 
3967  if (buf_page_get_zip_size(bpage)) {
3968  frame = bpage->zip.data;
3969  buf_pool->n_pend_unzip++;
3970  if (uncompressed
3971  && !buf_zip_decompress((buf_block_t*) bpage,
3972  FALSE)) {
3973 
3974  buf_pool->n_pend_unzip--;
3975  goto corrupt;
3976  }
3977  buf_pool->n_pend_unzip--;
3978  } else {
3979  ut_a(uncompressed);
3980  frame = ((buf_block_t*) bpage)->frame;
3981  }
3982 
3983  /* If this page is not uninitialized and not in the
3984  doublewrite buffer, then the page number and space id
3985  should be the same as in block. */
3986  read_page_no = mach_read_from_4(frame + FIL_PAGE_OFFSET);
3987  read_space_id = mach_read_from_4(
3989 
3990  if (bpage->space == TRX_SYS_SPACE
3991  && trx_doublewrite_page_inside(bpage->offset)) {
3992 
3993  ut_print_timestamp(stderr);
3994  fprintf(stderr,
3995  " InnoDB: Error: reading page %lu\n"
3996  "InnoDB: which is in the"
3997  " doublewrite buffer!\n",
3998  (ulong) bpage->offset);
3999  } else if (!read_space_id && !read_page_no) {
4000  /* This is likely an uninitialized page. */
4001  } else if ((bpage->space
4002  && bpage->space != read_space_id)
4003  || bpage->offset != read_page_no) {
4004  /* We did not compare space_id to read_space_id
4005  if bpage->space == 0, because the field on the
4006  page may contain garbage in MySQL < 4.1.1,
4007  which only supported bpage->space == 0. */
4008 
4009  ut_print_timestamp(stderr);
4010  fprintf(stderr,
4011  " InnoDB: Error: space id and page n:o"
4012  " stored in the page\n"
4013  "InnoDB: read in are %lu:%lu,"
4014  " should be %lu:%lu!\n",
4015  (ulong) read_space_id, (ulong) read_page_no,
4016  (ulong) bpage->space,
4017  (ulong) bpage->offset);
4018  }
4019 
4020  /* From version 3.23.38 up we store the page checksum
4021  to the 4 first bytes of the page end lsn field */
4022 
4023  if (buf_page_is_corrupted(frame,
4024  buf_page_get_zip_size(bpage))) {
4025 corrupt:
4026  fprintf(stderr,
4027  "InnoDB: Database page corruption on disk"
4028  " or a failed\n"
4029  "InnoDB: file read of page %lu.\n"
4030  "InnoDB: You may have to recover"
4031  " from a backup.\n",
4032  (ulong) bpage->offset);
4033  buf_page_print(frame, buf_page_get_zip_size(bpage));
4034  fprintf(stderr,
4035  "InnoDB: Database page corruption on disk"
4036  " or a failed\n"
4037  "InnoDB: file read of page %lu.\n"
4038  "InnoDB: You may have to recover"
4039  " from a backup.\n",
4040  (ulong) bpage->offset);
4041  fputs("InnoDB: It is also possible that"
4042  " your operating\n"
4043  "InnoDB: system has corrupted its"
4044  " own file cache\n"
4045  "InnoDB: and rebooting your computer"
4046  " removes the\n"
4047  "InnoDB: error.\n"
4048  "InnoDB: If the corrupt page is an index page\n"
4049  "InnoDB: you can also try to"
4050  " fix the corruption\n"
4051  "InnoDB: by dumping, dropping,"
4052  " and reimporting\n"
4053  "InnoDB: the corrupt table."
4054  " You can use CHECK\n"
4055  "InnoDB: TABLE to scan your"
4056  " table for corruption.\n"
4057  "InnoDB: See also "
4058  REFMAN "forcing-innodb-recovery.html\n"
4059  "InnoDB: about forcing recovery.\n", stderr);
4060 
4061  if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
4062  fputs("InnoDB: Ending processing because of"
4063  " a corrupt database page.\n",
4064  stderr);
4065  exit(1);
4066  }
4067  }
4068 
4069  if (recv_recovery_is_on()) {
4070  /* Pages must be uncompressed for crash recovery. */
4071  ut_a(uncompressed);
4072  recv_recover_page(TRUE, (buf_block_t*) bpage);
4073  }
4074 
4075  if (uncompressed && !recv_no_ibuf_operations && !srv_fake_write) {
4076  ibuf_merge_or_delete_for_page(
4077  (buf_block_t*) bpage, bpage->space,
4078  bpage->offset, buf_page_get_zip_size(bpage),
4079  TRUE);
4080  }
4081  }
4082 
4083  buf_pool_mutex_enter(buf_pool);
4084  mutex_enter(buf_page_get_mutex(bpage));
4085 
4086 #ifdef UNIV_IBUF_COUNT_DEBUG
4087  if (io_type == BUF_IO_WRITE || uncompressed) {
4088  /* For BUF_IO_READ of compressed-only blocks, the
4089  buffered operations will be merged by buf_page_get_gen()
4090  after the block has been uncompressed. */
4091  ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
4092  }
4093 #endif
4094  /* Because this thread which does the unlocking is not the same that
4095  did the locking, we use a pass value != 0 in unlock, which simply
4096  removes the newest lock debug record, without checking the thread
4097  id. */
4098 
4100 
4101  switch (io_type) {
4102  case BUF_IO_READ:
4103  /* NOTE that the call to ibuf may have moved the ownership of
4104  the x-latch to this OS thread: do not let this confuse you in
4105  debugging! */
4106 
4107  ut_ad(buf_pool->n_pend_reads > 0);
4108  buf_pool->n_pend_reads--;
4109  buf_pool->stat.n_pages_read++;
4110 
4111  if (uncompressed) {
4112  rw_lock_x_unlock_gen(&((buf_block_t*) bpage)->lock,
4113  BUF_IO_READ);
4114  }
4115 
4116  break;
4117 
4118  case BUF_IO_WRITE:
4119  /* Write means a flush operation: call the completion
4120  routine in the flush system */
4121 
4122  buf_flush_write_complete(bpage);
4123 
4124  if (uncompressed) {
4125  rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
4126  BUF_IO_WRITE);
4127  }
4128 
4129  buf_pool->stat.n_pages_written++;
4130 
4131  break;
4132 
4133  default:
4134  ut_error;
4135  }
4136 
4137 #ifdef UNIV_DEBUG
4138  if (buf_debug_prints) {
4139  fprintf(stderr, "Has %s page space %lu page no %lu\n",
4140  io_type == BUF_IO_READ ? "read" : "written",
4141  (ulong) buf_page_get_space(bpage),
4142  (ulong) buf_page_get_page_no(bpage));
4143  }
4144 #endif /* UNIV_DEBUG */
4145 
4146  mutex_exit(buf_page_get_mutex(bpage));
4147  buf_pool_mutex_exit(buf_pool);
4148 }
4149 
4150 /*********************************************************************/
4153 static
4154 ibool
4155 buf_all_freed_instance(
4156 /*===================*/
4157  buf_pool_t* buf_pool)
4158 {
4159  ulint i;
4160  buf_chunk_t* chunk;
4161 
4162  ut_ad(buf_pool);
4163 
4164  buf_pool_mutex_enter(buf_pool);
4165 
4166  chunk = buf_pool->chunks;
4167 
4168  for (i = buf_pool->n_chunks; i--; chunk++) {
4169 
4170  const buf_block_t* block = buf_chunk_not_freed(chunk);
4171 
4172  if (UNIV_LIKELY_NULL(block)) {
4173  fprintf(stderr,
4174  "Page %lu %lu still fixed or dirty\n",
4175  (ulong) block->page.space,
4176  (ulong) block->page.offset);
4177  ut_error;
4178  }
4179  }
4180 
4181  buf_pool_mutex_exit(buf_pool);
4182 
4183  return(TRUE);
4184 }
4185 
4186 /*********************************************************************/
4188 static
4189 void
4190 buf_pool_invalidate_instance(
4191 /*=========================*/
4192  buf_pool_t* buf_pool)
4193 {
4194  ibool freed;
4195  int i;
4196 
4197  buf_pool_mutex_enter(buf_pool);
4198 
4199  for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
4200 
4201  /* As this function is called during startup and
4202  during redo application phase during recovery, InnoDB
4203  is single threaded (apart from IO helper threads) at
4204  this stage. No new write batch can be in intialization
4205  stage at this point. */
4206  ut_ad(buf_pool->init_flush[i] == FALSE);
4207 
4208  /* However, it is possible that a write batch that has
4209  been posted earlier is still not complete. For buffer
4210  pool invalidation to proceed we must ensure there is NO
4211  write activity happening. */
4212  if (buf_pool->n_flush[i] > 0) {
4213  buf_pool_mutex_exit(buf_pool);
4214  buf_flush_wait_batch_end(buf_pool, static_cast<buf_flush>(i));
4215  buf_pool_mutex_enter(buf_pool);
4216  }
4217  }
4218 
4219  buf_pool_mutex_exit(buf_pool);
4220 
4221  ut_ad(buf_all_freed_instance(buf_pool));
4222 
4223  freed = TRUE;
4224 
4225  while (freed) {
4226  freed = buf_LRU_search_and_free_block(buf_pool, 100);
4227  }
4228 
4229  buf_pool_mutex_enter(buf_pool);
4230 
4231  ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
4232  ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
4233 
4234  buf_pool->freed_page_clock = 0;
4235  buf_pool->LRU_old = NULL;
4236  buf_pool->LRU_old_len = 0;
4237  buf_pool->LRU_flush_ended = 0;
4238 
4239  memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
4240  buf_refresh_io_stats(buf_pool);
4241 
4242  buf_pool_mutex_exit(buf_pool);
4243 }
4244 
4245 /*********************************************************************/
4249 UNIV_INTERN
4250 void
4251 buf_pool_invalidate(void)
4252 /*=====================*/
4253 {
4254  ulint i;
4255 
4256  for (i = 0; i < srv_buf_pool_instances; i++) {
4257  buf_pool_invalidate_instance(buf_pool_from_array(i));
4258  }
4259 }
4260 
4261 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
4262 /*********************************************************************/
4265 static
4266 ibool
4267 buf_pool_validate_instance(
4268 /*=======================*/
4269  buf_pool_t* buf_pool)
4270 {
4271  buf_page_t* b;
4272  buf_chunk_t* chunk;
4273  ulint i;
4274  ulint n_single_flush = 0;
4275  ulint n_lru_flush = 0;
4276  ulint n_list_flush = 0;
4277  ulint n_lru = 0;
4278  ulint n_flush = 0;
4279  ulint n_free = 0;
4280  ulint n_zip = 0;
4281 
4282  ut_ad(buf_pool);
4283 
4284  buf_pool_mutex_enter(buf_pool);
4285 
4286  chunk = buf_pool->chunks;
4287 
4288  /* Check the uncompressed blocks. */
4289 
4290  for (i = buf_pool->n_chunks; i--; chunk++) {
4291 
4292  ulint j;
4293  buf_block_t* block = chunk->blocks;
4294 
4295  for (j = chunk->size; j--; block++) {
4296 
4297  mutex_enter(&block->mutex);
4298 
4299  switch (buf_block_get_state(block)) {
4300  case BUF_BLOCK_ZIP_FREE:
4301  case BUF_BLOCK_ZIP_PAGE:
4302  case BUF_BLOCK_ZIP_DIRTY:
4303  /* These should only occur on
4304  zip_clean, zip_free[], or flush_list. */
4305  ut_error;
4306  break;
4307 
4308  case BUF_BLOCK_FILE_PAGE:
4309  ut_a(buf_page_hash_get(buf_pool,
4311  block),
4313  block))
4314  == &block->page);
4315 
4316 #ifdef UNIV_IBUF_COUNT_DEBUG
4317  ut_a(buf_page_get_io_fix(&block->page)
4318  == BUF_IO_READ
4319  || !ibuf_count_get(buf_block_get_space(
4320  block),
4322  block)));
4323 #endif
4324  switch (buf_page_get_io_fix(&block->page)) {
4325  case BUF_IO_NONE:
4326  break;
4327 
4328  case BUF_IO_WRITE:
4329  switch (buf_page_get_flush_type(
4330  &block->page)) {
4331  case BUF_FLUSH_LRU:
4332  n_lru_flush++;
4334  &block->lock,
4335  RW_LOCK_SHARED));
4336  break;
4337  case BUF_FLUSH_LIST:
4338  n_list_flush++;
4339  break;
4340  case BUF_FLUSH_SINGLE_PAGE:
4341  n_single_flush++;
4342  break;
4343  default:
4344  ut_error;
4345  }
4346 
4347  break;
4348 
4349  case BUF_IO_READ:
4350 
4351  ut_a(rw_lock_is_locked(&block->lock,
4352  RW_LOCK_EX));
4353  break;
4354  }
4355 
4356  n_lru++;
4357  break;
4358 
4359  case BUF_BLOCK_NOT_USED:
4360  n_free++;
4361  break;
4362 
4364  case BUF_BLOCK_MEMORY:
4365  case BUF_BLOCK_REMOVE_HASH:
4366  /* do nothing */
4367  break;
4368  }
4369 
4370  mutex_exit(&block->mutex);
4371  }
4372  }
4373 
4374  mutex_enter(&buf_pool->zip_mutex);
4375 
4376  /* Check clean compressed-only blocks. */
4377 
4378  for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
4379  b = UT_LIST_GET_NEXT(list, b)) {
4381  switch (buf_page_get_io_fix(b)) {
4382  case BUF_IO_NONE:
4383  /* All clean blocks should be I/O-unfixed. */
4384  break;
4385  case BUF_IO_READ:
4386  /* In buf_LRU_free_block(), we temporarily set
4387  b->io_fix = BUF_IO_READ for a newly allocated
4388  control block in order to prevent
4389  buf_page_get_gen() from decompressing the block. */
4390  break;
4391  default:
4392  ut_error;
4393  break;
4394  }
4395 
4396  /* It is OK to read oldest_modification here because
4397  we have acquired buf_pool->zip_mutex above which acts
4398  as the 'block->mutex' for these bpages. */
4400  ut_a(buf_page_hash_get(buf_pool, b->space, b->offset) == b);
4401 
4402  n_lru++;
4403  n_zip++;
4404  }
4405 
4406  /* Check dirty blocks. */
4407 
4408  buf_flush_list_mutex_enter(buf_pool);
4409  for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
4410  b = UT_LIST_GET_NEXT(list, b)) {
4411  ut_ad(b->in_flush_list);
4413  n_flush++;
4414 
4415  switch (buf_page_get_state(b)) {
4416  case BUF_BLOCK_ZIP_DIRTY:
4417  n_lru++;
4418  n_zip++;
4419  switch (buf_page_get_io_fix(b)) {
4420  case BUF_IO_NONE:
4421  case BUF_IO_READ:
4422  break;
4423  case BUF_IO_WRITE:
4424  switch (buf_page_get_flush_type(b)) {
4425  case BUF_FLUSH_LRU:
4426  n_lru_flush++;
4427  break;
4428  case BUF_FLUSH_LIST:
4429  n_list_flush++;
4430  break;
4431  case BUF_FLUSH_SINGLE_PAGE:
4432  n_single_flush++;
4433  break;
4434  default:
4435  ut_error;
4436  }
4437  break;
4438  }
4439  break;
4440  case BUF_BLOCK_FILE_PAGE:
4441  /* uncompressed page */
4442  break;
4443  case BUF_BLOCK_ZIP_FREE:
4444  case BUF_BLOCK_ZIP_PAGE:
4445  case BUF_BLOCK_NOT_USED:
4447  case BUF_BLOCK_MEMORY:
4448  case BUF_BLOCK_REMOVE_HASH:
4449  ut_error;
4450  break;
4451  }
4452  ut_a(buf_page_hash_get(buf_pool, b->space, b->offset) == b);
4453  }
4454 
4455  ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
4456 
4457  buf_flush_list_mutex_exit(buf_pool);
4458 
4459  mutex_exit(&buf_pool->zip_mutex);
4460 
4461  if (n_lru + n_free > buf_pool->curr_size + n_zip) {
4462  fprintf(stderr, "n LRU %lu, n free %lu, pool %lu zip %lu\n",
4463  (ulong) n_lru, (ulong) n_free,
4464  (ulong) buf_pool->curr_size, (ulong) n_zip);
4465  ut_error;
4466  }
4467 
4468  ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
4469  if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
4470  fprintf(stderr, "Free list len %lu, free blocks %lu\n",
4471  (ulong) UT_LIST_GET_LEN(buf_pool->free),
4472  (ulong) n_free);
4473  ut_error;
4474  }
4475 
4476  ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
4477  ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
4478  ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
4479 
4480  buf_pool_mutex_exit(buf_pool);
4481 
4482  ut_a(buf_LRU_validate());
4483  ut_a(buf_flush_validate(buf_pool));
4484 
4485  return(TRUE);
4486 }
4487 
4488 /*********************************************************************/
4491 UNIV_INTERN
4492 ibool
4493 buf_validate(void)
4494 /*==============*/
4495 {
4496  ulint i;
4497 
4498  for (i = 0; i < srv_buf_pool_instances; i++) {
4499  buf_pool_t* buf_pool;
4500 
4501  buf_pool = buf_pool_from_array(i);
4502 
4503  buf_pool_validate_instance(buf_pool);
4504  }
4505  return(TRUE);
4506 }
4507 
4508 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
4509 
4510 #if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
4511 /*********************************************************************/
4513 static
4514 void
4515 buf_print_instance(
4516 /*===============*/
4517  buf_pool_t* buf_pool)
4518 {
4519  index_id_t* index_ids;
4520  ulint* counts;
4521  ulint size;
4522  ulint i;
4523  ulint j;
4524  index_id_t id;
4525  ulint n_found;
4526  buf_chunk_t* chunk;
4527  dict_index_t* index;
4528 
4529  ut_ad(buf_pool);
4530 
4531  size = buf_pool->curr_size;
4532 
4533  index_ids = mem_alloc(size * sizeof *index_ids);
4534  counts = mem_alloc(sizeof(ulint) * size);
4535 
4536  buf_pool_mutex_enter(buf_pool);
4537  buf_flush_list_mutex_enter(buf_pool);
4538 
4539  fprintf(stderr,
4540  "buf_pool size %lu\n"
4541  "database pages %lu\n"
4542  "free pages %lu\n"
4543  "modified database pages %lu\n"
4544  "n pending decompressions %lu\n"
4545  "n pending reads %lu\n"
4546  "n pending flush LRU %lu list %lu single page %lu\n"
4547  "pages made young %lu, not young %lu\n"
4548  "pages read %lu, created %lu, written %lu\n",
4549  (ulong) size,
4550  (ulong) UT_LIST_GET_LEN(buf_pool->LRU),
4551  (ulong) UT_LIST_GET_LEN(buf_pool->free),
4552  (ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
4553  (ulong) buf_pool->n_pend_unzip,
4554  (ulong) buf_pool->n_pend_reads,
4555  (ulong) buf_pool->n_flush[BUF_FLUSH_LRU],
4556  (ulong) buf_pool->n_flush[BUF_FLUSH_LIST],
4557  (ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE],
4558  (ulong) buf_pool->stat.n_pages_made_young,
4559  (ulong) buf_pool->stat.n_pages_not_made_young,
4560  (ulong) buf_pool->stat.n_pages_read,
4561  (ulong) buf_pool->stat.n_pages_created,
4562  (ulong) buf_pool->stat.n_pages_written);
4563 
4564  buf_flush_list_mutex_exit(buf_pool);
4565 
4566  /* Count the number of blocks belonging to each index in the buffer */
4567 
4568  n_found = 0;
4569 
4570  chunk = buf_pool->chunks;
4571 
4572  for (i = buf_pool->n_chunks; i--; chunk++) {
4573  buf_block_t* block = chunk->blocks;
4574  ulint n_blocks = chunk->size;
4575 
4576  for (; n_blocks--; block++) {
4577  const buf_frame_t* frame = block->frame;
4578 
4579  if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
4580 
4581  id = btr_page_get_index_id(frame);
4582 
4583  /* Look for the id in the index_ids array */
4584  j = 0;
4585 
4586  while (j < n_found) {
4587 
4588  if (index_ids[j] == id) {
4589  counts[j]++;
4590 
4591  break;
4592  }
4593  j++;
4594  }
4595 
4596  if (j == n_found) {
4597  n_found++;
4598  index_ids[j] = id;
4599  counts[j] = 1;
4600  }
4601  }
4602  }
4603  }
4604 
4605  buf_pool_mutex_exit(buf_pool);
4606 
4607  for (i = 0; i < n_found; i++) {
4608  index = dict_index_get_if_in_cache(index_ids[i]);
4609 
4610  fprintf(stderr,
4611  "Block count for index %llu in buffer is about %lu",
4612  (ullint) index_ids[i],
4613  (ulong) counts[i]);
4614 
4615  if (index) {
4616  putc(' ', stderr);
4617  dict_index_name_print(stderr, NULL, index);
4618  }
4619 
4620  putc('\n', stderr);
4621  }
4622 
4623  mem_free(index_ids);
4624  mem_free(counts);
4625 
4626  ut_a(buf_pool_validate_instance(buf_pool));
4627 }
4628 
4629 /*********************************************************************/
4631 UNIV_INTERN
4632 void
4633 buf_print(void)
4634 /*===========*/
4635 {
4636  ulint i;
4637 
4638  for (i = 0; i < srv_buf_pool_instances; i++) {
4639  buf_pool_t* buf_pool;
4640 
4641  buf_pool = buf_pool_from_array(i);
4642  buf_print_instance(buf_pool);
4643  }
4644 }
4645 #endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
4646 
4647 #ifdef UNIV_DEBUG
4648 /*********************************************************************/
4651 UNIV_INTERN
4652 ulint
4653 buf_get_latched_pages_number_instance(
4654 /*==================================*/
4655  buf_pool_t* buf_pool)
4656 {
4657  buf_page_t* b;
4658  ulint i;
4659  buf_chunk_t* chunk;
4660  ulint fixed_pages_number = 0;
4661 
4662  buf_pool_mutex_enter(buf_pool);
4663 
4664  chunk = buf_pool->chunks;
4665 
4666  for (i = buf_pool->n_chunks; i--; chunk++) {
4667  buf_block_t* block;
4668  ulint j;
4669 
4670  block = chunk->blocks;
4671 
4672  for (j = chunk->size; j--; block++) {
4673  if (buf_block_get_state(block)
4674  != BUF_BLOCK_FILE_PAGE) {
4675 
4676  continue;
4677  }
4678 
4679  mutex_enter(&block->mutex);
4680 
4681  if (block->page.buf_fix_count != 0
4682  || buf_page_get_io_fix(&block->page)
4683  != BUF_IO_NONE) {
4684  fixed_pages_number++;
4685  }
4686 
4687  mutex_exit(&block->mutex);
4688  }
4689  }
4690 
4691  mutex_enter(&buf_pool->zip_mutex);
4692 
4693  /* Traverse the lists of clean and dirty compressed-only blocks. */
4694 
4695  for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
4696  b = UT_LIST_GET_NEXT(list, b)) {
4699 
4700  if (b->buf_fix_count != 0
4701  || buf_page_get_io_fix(b) != BUF_IO_NONE) {
4702  fixed_pages_number++;
4703  }
4704  }
4705 
4706  buf_flush_list_mutex_enter(buf_pool);
4707  for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
4708  b = UT_LIST_GET_NEXT(list, b)) {
4709  ut_ad(b->in_flush_list);
4710 
4711  switch (buf_page_get_state(b)) {
4712  case BUF_BLOCK_ZIP_DIRTY:
4713  if (b->buf_fix_count != 0
4714  || buf_page_get_io_fix(b) != BUF_IO_NONE) {
4715  fixed_pages_number++;
4716  }
4717  break;
4718  case BUF_BLOCK_FILE_PAGE:
4719  /* uncompressed page */
4720  break;
4721  case BUF_BLOCK_ZIP_FREE:
4722  case BUF_BLOCK_ZIP_PAGE:
4723  case BUF_BLOCK_NOT_USED:
4725  case BUF_BLOCK_MEMORY:
4726  case BUF_BLOCK_REMOVE_HASH:
4727  ut_error;
4728  break;
4729  }
4730  }
4731 
4732  buf_flush_list_mutex_exit(buf_pool);
4733  mutex_exit(&buf_pool->zip_mutex);
4734  buf_pool_mutex_exit(buf_pool);
4735 
4736  return(fixed_pages_number);
4737 }
4738 
4739 /*********************************************************************/
4742 UNIV_INTERN
4743 ulint
4744 buf_get_latched_pages_number(void)
4745 /*==============================*/
4746 {
4747  ulint i;
4748  ulint total_latched_pages = 0;
4749 
4750  for (i = 0; i < srv_buf_pool_instances; i++) {
4751  buf_pool_t* buf_pool;
4752 
4753  buf_pool = buf_pool_from_array(i);
4754 
4755  total_latched_pages += buf_get_latched_pages_number_instance(
4756  buf_pool);
4757  }
4758 
4759  return(total_latched_pages);
4760 }
4761 
4762 #endif /* UNIV_DEBUG */
4763 
4764 /*********************************************************************/
4767 UNIV_INTERN
4768 ulint
4769 buf_get_n_pending_ios(void)
4770 /*=======================*/
4771 {
4772  ulint i;
4773  ulint pend_ios = 0;
4774 
4775  for (i = 0; i < srv_buf_pool_instances; i++) {
4776  buf_pool_t* buf_pool;
4777 
4778  buf_pool = buf_pool_from_array(i);
4779 
4780  pend_ios +=
4781  buf_pool->n_pend_reads
4782  + buf_pool->n_flush[BUF_FLUSH_LRU]
4783  + buf_pool->n_flush[BUF_FLUSH_LIST]
4784  + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE];
4785  }
4786 
4787  return(pend_ios);
4788 }
4789 
4790 /*********************************************************************/
4794 UNIV_INTERN
4795 ulint
4796 buf_get_modified_ratio_pct(void)
4797 /*============================*/
4798 {
4799  ulint ratio;
4800  ulint lru_len = 0;
4801  ulint free_len = 0;
4802  ulint flush_list_len = 0;
4803 
4804  buf_get_total_list_len(&lru_len, &free_len, &flush_list_len);
4805 
4806  ratio = (100 * flush_list_len) / (1 + lru_len + free_len);
4807 
4808  /* 1 + is there to avoid division by zero */
4809 
4810  return(ratio);
4811 }
4812 
4813 /*******************************************************************/
4815 static
4816 void
4817 buf_stats_aggregate_pool_info(
4818 /*==========================*/
4819  buf_pool_info_t* total_info,
4822  const buf_pool_info_t* pool_info)
4824 {
4825  ut_a(total_info && pool_info);
4826 
4827  /* Nothing to copy if total_info is the same as pool_info */
4828  if (total_info == pool_info) {
4829  return;
4830  }
4831 
4832  total_info->pool_size += pool_info->pool_size;
4833  total_info->lru_len += pool_info->lru_len;
4834  total_info->old_lru_len += pool_info->old_lru_len;
4835  total_info->free_list_len += pool_info->free_list_len;
4836  total_info->flush_list_len += pool_info->flush_list_len;
4837  total_info->n_pend_unzip += pool_info->n_pend_unzip;
4838  total_info->n_pend_reads += pool_info->n_pend_reads;
4839  total_info->n_pending_flush_lru += pool_info->n_pending_flush_lru;
4840  total_info->n_pending_flush_list += pool_info->n_pending_flush_list;
4841  total_info->n_pending_flush_single_page +=
4842  pool_info->n_pending_flush_single_page;
4843  total_info->n_pages_made_young += pool_info->n_pages_made_young;
4844  total_info->n_pages_not_made_young += pool_info->n_pages_not_made_young;
4845  total_info->n_pages_read += pool_info->n_pages_read;
4846  total_info->n_pages_created += pool_info->n_pages_created;
4847  total_info->n_pages_written += pool_info->n_pages_written;
4848  total_info->n_page_gets += pool_info->n_page_gets;
4849  total_info->n_ra_pages_read += pool_info->n_ra_pages_read;
4850  total_info->n_ra_pages_evicted += pool_info->n_ra_pages_evicted;
4851  total_info->page_made_young_rate += pool_info->page_made_young_rate;
4852  total_info->page_not_made_young_rate +=
4853  pool_info->page_not_made_young_rate;
4854  total_info->pages_read_rate += pool_info->pages_read_rate;
4855  total_info->pages_created_rate += pool_info->pages_created_rate;
4856  total_info->pages_written_rate += pool_info->pages_written_rate;
4857  total_info->n_page_get_delta += pool_info->n_page_get_delta;
4858  total_info->page_read_delta += pool_info->page_read_delta;
4859  total_info->young_making_delta += pool_info->young_making_delta;
4860  total_info->not_young_making_delta += pool_info->not_young_making_delta;
4861  total_info->pages_readahead_rate += pool_info->pages_readahead_rate;
4862  total_info->pages_evicted_rate += pool_info->pages_evicted_rate;
4863  total_info->unzip_lru_len += pool_info->unzip_lru_len;
4864  total_info->io_sum += pool_info->io_sum;
4865  total_info->io_cur += pool_info->io_cur;
4866  total_info->unzip_sum += pool_info->unzip_sum;
4867  total_info->unzip_cur += pool_info->unzip_cur;
4868 }
4869 /*******************************************************************/
4873 static
4874 void
4875 buf_stats_get_pool_info(
4876 /*====================*/
4877  buf_pool_t* buf_pool,
4878  ulint pool_id,
4879  buf_pool_info_t* all_pool_info)
4881 {
4882  buf_pool_info_t* pool_info;
4883  time_t current_time;
4884  double time_elapsed;
4885 
4886  /* Find appropriate pool_info to store stats for this buffer pool */
4887  pool_info = &all_pool_info[pool_id];
4888 
4889  buf_pool_mutex_enter(buf_pool);
4890  buf_flush_list_mutex_enter(buf_pool);
4891 
4892  pool_info->pool_unique_id = pool_id;
4893 
4894  pool_info->pool_size = buf_pool->curr_size;
4895 
4896  pool_info->lru_len = UT_LIST_GET_LEN(buf_pool->LRU);
4897 
4898  pool_info->old_lru_len = buf_pool->LRU_old_len;
4899 
4900  pool_info->free_list_len = UT_LIST_GET_LEN(buf_pool->free);
4901 
4902  pool_info->flush_list_len = UT_LIST_GET_LEN(buf_pool->flush_list);
4903 
4904  pool_info->n_pend_unzip = UT_LIST_GET_LEN(buf_pool->unzip_LRU);
4905 
4906  pool_info->n_pend_reads = buf_pool->n_pend_reads;
4907 
4908  pool_info->n_pending_flush_lru =
4909  (buf_pool->n_flush[BUF_FLUSH_LRU]
4910  + buf_pool->init_flush[BUF_FLUSH_LRU]);
4911 
4912  pool_info->n_pending_flush_list =
4913  (buf_pool->n_flush[BUF_FLUSH_LIST]
4914  + buf_pool->init_flush[BUF_FLUSH_LIST]);
4915 
4916  pool_info->n_pending_flush_single_page =
4917  buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE];
4918 
4919  buf_flush_list_mutex_exit(buf_pool);
4920 
4921  current_time = time(NULL);
4922  time_elapsed = 0.001 + difftime(current_time,
4923  buf_pool->last_printout_time);
4924 
4925  pool_info->n_pages_made_young = buf_pool->stat.n_pages_made_young;
4926 
4927  pool_info->n_pages_not_made_young =
4928  buf_pool->stat.n_pages_not_made_young;
4929 
4930  pool_info->n_pages_read = buf_pool->stat.n_pages_read;
4931 
4932  pool_info->n_pages_created = buf_pool->stat.n_pages_created;
4933 
4934  pool_info->n_pages_written = buf_pool->stat.n_pages_written;
4935 
4936  pool_info->n_page_gets = buf_pool->stat.n_page_gets;
4937 
4938  pool_info->n_ra_pages_read = buf_pool->stat.n_ra_pages_read;
4939 
4940  pool_info->n_ra_pages_evicted = buf_pool->stat.n_ra_pages_evicted;
4941 
4942  pool_info->page_made_young_rate =
4943  (buf_pool->stat.n_pages_made_young
4944  - buf_pool->old_stat.n_pages_made_young) / time_elapsed;
4945 
4946  pool_info->page_not_made_young_rate =
4947  (buf_pool->stat.n_pages_not_made_young
4948  - buf_pool->old_stat.n_pages_not_made_young) / time_elapsed;
4949 
4950  pool_info->pages_read_rate =
4951  (buf_pool->stat.n_pages_read
4952  - buf_pool->old_stat.n_pages_read) / time_elapsed;
4953 
4954  pool_info->pages_created_rate =
4955  (buf_pool->stat.n_pages_created
4956  - buf_pool->old_stat.n_pages_created) / time_elapsed;
4957 
4958  pool_info->pages_written_rate =
4959  (buf_pool->stat.n_pages_written
4960  - buf_pool->old_stat.n_pages_written) / time_elapsed;
4961 
4962  pool_info->n_page_get_delta = buf_pool->stat.n_page_gets
4963  - buf_pool->old_stat.n_page_gets;
4964 
4965  if (pool_info->n_page_get_delta) {
4966  pool_info->page_read_delta = buf_pool->stat.n_pages_read
4967  - buf_pool->old_stat.n_pages_read;
4968 
4969  pool_info->young_making_delta =
4970  buf_pool->stat.n_pages_made_young
4971  - buf_pool->old_stat.n_pages_made_young;
4972 
4973  pool_info->not_young_making_delta =
4974  buf_pool->stat.n_pages_not_made_young
4975  - buf_pool->old_stat.n_pages_not_made_young;
4976  }
4977 
4978  pool_info->pages_readahead_rate =
4979  (buf_pool->stat.n_ra_pages_read
4980  - buf_pool->old_stat.n_ra_pages_read) / time_elapsed;
4981 
4982  pool_info->pages_evicted_rate =
4983  (buf_pool->stat.n_ra_pages_evicted
4984  - buf_pool->old_stat.n_ra_pages_evicted) / time_elapsed;
4985 
4986  pool_info->unzip_lru_len = UT_LIST_GET_LEN(buf_pool->unzip_LRU);
4987 
4988  pool_info->io_sum = buf_LRU_stat_sum.io;
4989 
4990  pool_info->io_cur = buf_LRU_stat_cur.io;
4991 
4992  pool_info->unzip_sum = buf_LRU_stat_sum.unzip;
4993 
4994  pool_info->unzip_cur = buf_LRU_stat_cur.unzip;
4995 
4996  buf_refresh_io_stats(buf_pool);
4997  buf_pool_mutex_exit(buf_pool);
4998 }
4999 
5000 /*********************************************************************/
5002 static
5003 void
5004 buf_print_io_instance(
5005 /*==================*/
5006  buf_pool_info_t*pool_info,
5007  FILE* file)
5008 {
5009  ut_ad(pool_info);
5010 
5011  fprintf(file,
5012  "Buffer pool size %lu\n"
5013  "Free buffers %lu\n"
5014  "Database pages %lu\n"
5015  "Old database pages %lu\n"
5016  "Modified db pages %lu\n"
5017  "Pending reads %lu\n"
5018  "Pending writes: LRU %lu, flush list %lu, single page %lu\n",
5019  pool_info->pool_size,
5020  pool_info->free_list_len,
5021  pool_info->lru_len,
5022  pool_info->old_lru_len,
5023  pool_info->flush_list_len,
5024  pool_info->n_pend_reads,
5025  pool_info->n_pending_flush_lru,
5026  pool_info->n_pending_flush_list,
5027  pool_info->n_pending_flush_single_page);
5028 
5029  fprintf(file,
5030  "Pages made young %lu, not young %lu\n"
5031  "%.2f youngs/s, %.2f non-youngs/s\n"
5032  "Pages read %lu, created %lu, written %lu\n"
5033  "%.2f reads/s, %.2f creates/s, %.2f writes/s\n",
5034  pool_info->n_pages_made_young,
5035  pool_info->n_pages_not_made_young,
5036  pool_info->page_made_young_rate,
5037  pool_info->page_not_made_young_rate,
5038  pool_info->n_pages_read,
5039  pool_info->n_pages_created,
5040  pool_info->n_pages_written,
5041  pool_info->pages_read_rate,
5042  pool_info->pages_created_rate,
5043  pool_info->pages_written_rate);
5044 
5045  if (pool_info->n_page_get_delta) {
5046  fprintf(file,
5047  "Buffer pool hit rate %lu / 1000,"
5048  " young-making rate %lu / 1000 not %lu / 1000\n",
5049  (ulong) (1000 - (1000 * pool_info->page_read_delta
5050  / pool_info->n_page_get_delta)),
5051  (ulong) (1000 * pool_info->young_making_delta
5052  / pool_info->n_page_get_delta),
5053  (ulong) (1000 * pool_info->not_young_making_delta
5054  / pool_info->n_page_get_delta));
5055  } else {
5056  fputs("No buffer pool page gets since the last printout\n",
5057  file);
5058  }
5059 
5060  /* Statistics about read ahead algorithm */
5061  fprintf(file, "Pages read ahead %.2f/s,"
5062  " evicted without access %.2f/s\n",
5063  pool_info->pages_readahead_rate,
5064  pool_info->pages_evicted_rate);
5065 
5066  /* Print some values to help us with visualizing what is
5067  happening with LRU eviction. */
5068  fprintf(file,
5069  "LRU len: %lu, unzip_LRU len: %lu\n"
5070  "I/O sum[%lu]:cur[%lu], unzip sum[%lu]:cur[%lu]\n",
5071  pool_info->lru_len, pool_info->unzip_lru_len,
5072  pool_info->io_sum, pool_info->io_cur,
5073  pool_info->unzip_sum, pool_info->unzip_cur);
5074 }
5075 
5076 /*********************************************************************/
5078 UNIV_INTERN
5079 void
5080 buf_print_io(
5081 /*=========*/
5082  FILE* file)
5083 {
5084  ulint i;
5085  buf_pool_info_t* pool_info;
5086  buf_pool_info_t* pool_info_total;
5087 
5088  /* If srv_buf_pool_instances is greater than 1, allocate
5089  one extra buf_pool_info_t, the last one stores
5090  aggregated/total values from all pools */
5091  if (srv_buf_pool_instances > 1) {
5092  pool_info = (buf_pool_info_t*) mem_zalloc((
5093  srv_buf_pool_instances + 1) * sizeof *pool_info);
5094 
5095  pool_info_total = &pool_info[srv_buf_pool_instances];
5096  } else {
5097  ut_a(srv_buf_pool_instances == 1);
5098  pool_info_total = pool_info = (buf_pool_info_t*) mem_zalloc(
5099  sizeof *pool_info);
5100  }
5101 
5102  for (i = 0; i < srv_buf_pool_instances; i++) {
5103  buf_pool_t* buf_pool;
5104 
5105  buf_pool = buf_pool_from_array(i);
5106 
5107  /* Fetch individual buffer pool info and calculate
5108  aggregated stats along the way */
5109  buf_stats_get_pool_info(buf_pool, i, pool_info);
5110 
5111  /* If we have more than one buffer pool, store
5112  the aggregated stats */
5113  if (srv_buf_pool_instances > 1) {
5114  buf_stats_aggregate_pool_info(pool_info_total,
5115  &pool_info[i]);
5116  }
5117  }
5118 
5119  /* Print the aggreate buffer pool info */
5120  buf_print_io_instance(pool_info_total, file);
5121 
5122  /* If there are more than one buffer pool, print each individual pool
5123  info */
5124  if (srv_buf_pool_instances > 1) {
5125  fputs("----------------------\n"
5126  "INDIVIDUAL BUFFER POOL INFO\n"
5127  "----------------------\n", file);
5128 
5129  for (i = 0; i < srv_buf_pool_instances; i++) {
5130  fprintf(file, "---BUFFER POOL %lu\n", i);
5131  buf_print_io_instance(&pool_info[i], file);
5132  }
5133  }
5134 
5135  mem_free(pool_info);
5136 }
5137 
5138 /**********************************************************************/
5140 UNIV_INTERN
5141 void
5142 buf_refresh_io_stats(
5143 /*=================*/
5144  buf_pool_t* buf_pool)
5145 {
5146  buf_pool->last_printout_time = ut_time();
5147  buf_pool->old_stat = buf_pool->stat;
5148 }
5149 
5150 /**********************************************************************/
5152 UNIV_INTERN
5153 void
5154 buf_refresh_io_stats_all(void)
5155 /*==========================*/
5156 {
5157  ulint i;
5158 
5159  for (i = 0; i < srv_buf_pool_instances; i++) {
5160  buf_pool_t* buf_pool;
5161 
5162  buf_pool = buf_pool_from_array(i);
5163 
5164  buf_refresh_io_stats(buf_pool);
5165  }
5166 }
5167 
5168 /**********************************************************************/
5171 UNIV_INTERN
5172 ibool
5173 buf_all_freed(void)
5174 /*===============*/
5175 {
5176  ulint i;
5177 
5178  for (i = 0; i < srv_buf_pool_instances; i++) {
5179  buf_pool_t* buf_pool;
5180 
5181  buf_pool = buf_pool_from_array(i);
5182 
5183  if (!buf_all_freed_instance(buf_pool)) {
5184  return(FALSE);
5185  }
5186  }
5187 
5188  return(TRUE);
5189 }
5190 
5191 /*********************************************************************/
5195 UNIV_INTERN
5196 ibool
5197 buf_pool_check_no_pending_io(void)
5198 /*==============================*/
5199 {
5200  ulint i;
5201  ibool ret = TRUE;
5202 
5204 
5205  for (i = 0; i < srv_buf_pool_instances && ret; i++) {
5206  const buf_pool_t* buf_pool;
5207 
5208  buf_pool = buf_pool_from_array(i);
5209 
5210  if (buf_pool->n_pend_reads
5211  + buf_pool->n_flush[BUF_FLUSH_LRU]
5212  + buf_pool->n_flush[BUF_FLUSH_LIST]
5213  + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]) {
5214 
5215  ret = FALSE;
5216  }
5217  }
5218 
5220 
5221  return(ret);
5222 }
5223 
5224 #if 0
5225 Code currently not used
5226 /*********************************************************************/
5229 UNIV_INTERN
5230 ulint
5232 /*=======================*/
5233 {
5234  ulint len;
5235 
5236  buf_pool_mutex_enter(buf_pool);
5237 
5238  len = UT_LIST_GET_LEN(buf_pool->free);
5239 
5240  buf_pool_mutex_exit(buf_pool);
5241 
5242  return(len);
5243 }
5244 #endif
5245 
5246 #else /* !UNIV_HOTBACKUP */
5247 /********************************************************************/
5249 UNIV_INTERN
5250 void
5251 buf_page_init_for_backup_restore(
5252 /*=============================*/
5253  ulint space,
5254  ulint offset,
5256  ulint zip_size,
5258  buf_block_t* block)
5259 {
5260  block->page.state = BUF_BLOCK_FILE_PAGE;
5261  block->page.space = space;
5262  block->page.offset = offset;
5263 
5264  page_zip_des_init(&block->page.zip);
5265 
5266  /* We assume that block->page.data has been allocated
5267  with zip_size == UNIV_PAGE_SIZE. */
5268  ut_ad(zip_size <= UNIV_PAGE_SIZE);
5269  ut_ad(ut_is_2pow(zip_size));
5270  page_zip_set_size(&block->page.zip, zip_size);
5271  if (zip_size) {
5272  block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
5273  }
5274 }
5275 #endif /* !UNIV_HOTBACKUP */