Intel® OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Modules Pages
kmp_alloc.c
1 /*
2  * kmp_alloc.c -- private/shared dyanmic memory allocation and management
3  * $Revision: 43450 $
4  * $Date: 2014-09-09 10:07:22 -0500 (Tue, 09 Sep 2014) $
5  */
6 
7 /* <copyright>
8  Copyright (c) 1997-2014 Intel Corporation. All Rights Reserved.
9 
10  Redistribution and use in source and binary forms, with or without
11  modification, are permitted provided that the following conditions
12  are met:
13 
14  * Redistributions of source code must retain the above copyright
15  notice, this list of conditions and the following disclaimer.
16  * Redistributions in binary form must reproduce the above copyright
17  notice, this list of conditions and the following disclaimer in the
18  documentation and/or other materials provided with the distribution.
19  * Neither the name of Intel Corporation nor the names of its
20  contributors may be used to endorse or promote products derived
21  from this software without specific prior written permission.
22 
23  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 
35 </copyright> */
36 
37 #include "kmp.h"
38 #include "kmp_wrapper_malloc.h"
39 #include "kmp_io.h"
40 
41 // Disable bget when it is not used
42 #if KMP_USE_BGET
43 
44 /* Thread private buffer management code */
45 
46 typedef int (*bget_compact_t)(size_t, int);
47 typedef void *(*bget_acquire_t)(size_t);
48 typedef void (*bget_release_t)(void *);
49 
50 /* NOTE: bufsize must be a signed datatype */
51 
52 #if KMP_OS_WINDOWS
53 # if KMP_ARCH_X86 || KMP_ARCH_ARM
54  typedef kmp_int32 bufsize;
55 # else
56  typedef kmp_int64 bufsize;
57 # endif
58 #else
59  typedef ssize_t bufsize;
60 #endif
61 
62 /* The three modes of operation are, fifo search, lifo search, and best-fit */
63 
64 typedef enum bget_mode {
65  bget_mode_fifo = 0,
66  bget_mode_lifo = 1,
67  bget_mode_best = 2
68 } bget_mode_t;
69 
70 
71 static void bpool( kmp_info_t *th, void *buffer, bufsize len);
72 static void *bget( kmp_info_t *th, bufsize size);
73 static void *bgetz( kmp_info_t *th, bufsize size);
74 static void *bgetr( kmp_info_t *th, void *buffer, bufsize newsize);
75 static void brel( kmp_info_t *th, void *buf);
76 static void bectl( kmp_info_t *th, bget_compact_t compact, bget_acquire_t acquire, bget_release_t release, bufsize pool_incr );
77 
78 #ifdef KMP_DEBUG
79 static void bstats( kmp_info_t *th, bufsize *curalloc, bufsize *totfree, bufsize *maxfree, long *nget, long *nrel);
80 static void bstatse( kmp_info_t *th, bufsize *pool_incr, long *npool, long *npget, long *nprel, long *ndget, long *ndrel);
81 static void bufdump( kmp_info_t *th, void *buf);
82 static void bpoold( kmp_info_t *th, void *pool, int dumpalloc, int dumpfree);
83 static int bpoolv( kmp_info_t *th, void *pool);
84 #endif
85 
86 /* BGET CONFIGURATION */
87  /* Buffer allocation size quantum:
88  all buffers allocated are a
89  multiple of this size. This
90  MUST be a power of two. */
91 
92  /* On IA-32 architecture with Linux* OS,
93  malloc() does not
94  ensure 16 byte alignmnent */
95 
96 #if KMP_ARCH_X86 || !KMP_HAVE_QUAD
97 
98 #define SizeQuant 8
99 #define AlignType double
100 
101 #else
102 
103 #define SizeQuant 16
104 #define AlignType _Quad
105 
106 #endif
107 
108 #define BufStats 1 /* Define this symbol to enable the
109  bstats() function which calculates
110  the total free space in the buffer
111  pool, the largest available
112  buffer, and the total space
113  currently allocated. */
114 
115 #ifdef KMP_DEBUG
116 
117 #define BufDump 1 /* Define this symbol to enable the
118  bpoold() function which dumps the
119  buffers in a buffer pool. */
120 
121 #define BufValid 1 /* Define this symbol to enable the
122  bpoolv() function for validating
123  a buffer pool. */
124 
125 #define DumpData 1 /* Define this symbol to enable the
126  bufdump() function which allows
127  dumping the contents of an allocated
128  or free buffer. */
129 #ifdef NOT_USED_NOW
130 
131 #define FreeWipe 1 /* Wipe free buffers to a guaranteed
132  pattern of garbage to trip up
133  miscreants who attempt to use
134  pointers into released buffers. */
135 
136 #define BestFit 1 /* Use a best fit algorithm when
137  searching for space for an
138  allocation request. This uses
139  memory more efficiently, but
140  allocation will be much slower. */
141 #endif /* NOT_USED_NOW */
142 #endif /* KMP_DEBUG */
143 
144 
145 static bufsize bget_bin_size[ ] = {
146  0,
147 // 1 << 6, /* .5 Cache line */
148  1 << 7, /* 1 Cache line, new */
149  1 << 8, /* 2 Cache lines */
150  1 << 9, /* 4 Cache lines, new */
151  1 << 10, /* 8 Cache lines */
152  1 << 11, /* 16 Cache lines, new */
153  1 << 12,
154  1 << 13, /* new */
155  1 << 14,
156  1 << 15, /* new */
157  1 << 16,
158  1 << 17,
159  1 << 18,
160  1 << 19,
161  1 << 20, /* 1MB */
162  1 << 21, /* 2MB */
163  1 << 22, /* 4MB */
164  1 << 23, /* 8MB */
165  1 << 24, /* 16MB */
166  1 << 25, /* 32MB */
167 };
168 
169 #define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize))
170 
171 struct bfhead;
172 
173 /* Declare the interface, including the requested buffer size type,
174  bufsize. */
175 
176 /* Queue links */
177 
178 typedef struct qlinks {
179  struct bfhead *flink; /* Forward link */
180  struct bfhead *blink; /* Backward link */
181 } qlinks_t;
182 
183 /* Header in allocated and free buffers */
184 
185 typedef struct bhead2 {
186  kmp_info_t *bthr; /* The thread which owns the buffer pool */
187  bufsize prevfree; /* Relative link back to previous
188  free buffer in memory or 0 if
189  previous buffer is allocated. */
190  bufsize bsize; /* Buffer size: positive if free,
191  negative if allocated. */
192 } bhead2_t;
193 
194 /* Make sure the bhead structure is a multiple of SizeQuant in size. */
195 
196 typedef union bhead {
197  KMP_ALIGN( SizeQuant )
198  AlignType b_align;
199  char b_pad[ sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant)) ];
200  bhead2_t bb;
201 } bhead_t;
202 #define BH(p) ((bhead_t *) (p))
203 
204 /* Header in directly allocated buffers (by acqfcn) */
205 
206 typedef struct bdhead
207 {
208  bufsize tsize; /* Total size, including overhead */
209  bhead_t bh; /* Common header */
210 } bdhead_t;
211 #define BDH(p) ((bdhead_t *) (p))
212 
213 /* Header in free buffers */
214 
215 typedef struct bfhead {
216  bhead_t bh; /* Common allocated/free header */
217  qlinks_t ql; /* Links on free list */
218 } bfhead_t;
219 #define BFH(p) ((bfhead_t *) (p))
220 
221 typedef struct thr_data {
222  bfhead_t freelist[ MAX_BGET_BINS ];
223 #if BufStats
224  size_t totalloc; /* Total space currently allocated */
225  long numget, numrel; /* Number of bget() and brel() calls */
226  long numpblk; /* Number of pool blocks */
227  long numpget, numprel; /* Number of block gets and rels */
228  long numdget, numdrel; /* Number of direct gets and rels */
229 #endif /* BufStats */
230 
231  /* Automatic expansion block management functions */
232  bget_compact_t compfcn;
233  bget_acquire_t acqfcn;
234  bget_release_t relfcn;
235 
236  bget_mode_t mode; /* what allocation mode to use? */
237 
238  bufsize exp_incr; /* Expansion block size */
239  bufsize pool_len; /* 0: no bpool calls have been made
240  -1: not all pool blocks are
241  the same size
242  >0: (common) block size for all
243  bpool calls made so far
244  */
245  bfhead_t * last_pool; /* Last pool owned by this thread (delay dealocation) */
246 } thr_data_t;
247 
248 /* Minimum allocation quantum: */
249 
250 #define QLSize (sizeof(qlinks_t))
251 #define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize)
252 #define MaxSize (bufsize)( ~ ( ( (bufsize)( 1 ) << ( sizeof( bufsize ) * CHAR_BIT - 1 ) ) | ( SizeQuant - 1 ) ) )
253  // Maximun for the requested size.
254 
255 /* End sentinel: value placed in bsize field of dummy block delimiting
256  end of pool block. The most negative number which will fit in a
257  bufsize, defined in a way that the compiler will accept. */
258 
259 #define ESent ((bufsize) (-(((((bufsize)1)<<((int)sizeof(bufsize)*8-2))-1)*2)-2))
260 
261 /* ------------------------------------------------------------------------ */
262 
263 /* Thread Data management routines */
264 
265 static int
266 bget_get_bin( bufsize size )
267 {
268  // binary chop bins
269  int lo = 0, hi = MAX_BGET_BINS - 1;
270 
271  KMP_DEBUG_ASSERT( size > 0 );
272 
273  while ( (hi - lo) > 1 ) {
274  int mid = (lo + hi) >> 1;
275  if (size < bget_bin_size[ mid ])
276  hi = mid - 1;
277  else
278  lo = mid;
279  }
280 
281  KMP_DEBUG_ASSERT( (lo >= 0) && (lo < MAX_BGET_BINS) );
282 
283  return lo;
284 }
285 
286 static void
287 set_thr_data( kmp_info_t *th )
288 {
289  int i;
290  thr_data_t *data;
291 
292  data =
293  (thr_data_t *)(
294  ( ! th->th.th_local.bget_data ) ? __kmp_allocate( sizeof( *data ) ) : th->th.th_local.bget_data
295  );
296 
297  memset( data, '\0', sizeof( *data ) );
298 
299  for (i = 0; i < MAX_BGET_BINS; ++i) {
300  data->freelist[ i ].ql.flink = & data->freelist[ i ];
301  data->freelist[ i ].ql.blink = & data->freelist[ i ];
302  }
303 
304  th->th.th_local.bget_data = data;
305  th->th.th_local.bget_list = 0;
306 #if ! USE_CMP_XCHG_FOR_BGET
307 #ifdef USE_QUEUING_LOCK_FOR_BGET
308  __kmp_init_lock( & th->th.th_local.bget_lock );
309 #else
310  __kmp_init_bootstrap_lock( & th->th.th_local.bget_lock );
311 #endif /* USE_LOCK_FOR_BGET */
312 #endif /* ! USE_CMP_XCHG_FOR_BGET */
313 }
314 
315 static thr_data_t *
316 get_thr_data( kmp_info_t *th )
317 {
318  thr_data_t *data;
319 
320  data = (thr_data_t *) th->th.th_local.bget_data;
321 
322  KMP_DEBUG_ASSERT( data != 0 );
323 
324  return data;
325 }
326 
327 
328 #ifdef KMP_DEBUG
329 
330 static void
331 __kmp_bget_validate_queue( kmp_info_t *th )
332 {
333  /* NOTE: assume that the global_lock is held */
334 
335  void *p = (void *) th->th.th_local.bget_list;
336 
337  while (p != 0) {
338  bfhead_t *b = BFH(((char *) p) - sizeof(bhead_t));
339 
340  KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
341  p = (void *) b->ql.flink;
342  }
343 }
344 
345 #endif
346 
347 /* Walk the free list and release the enqueued buffers */
348 
349 static void
350 __kmp_bget_dequeue( kmp_info_t *th )
351 {
352  void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
353 
354  if (p != 0) {
355  #if USE_CMP_XCHG_FOR_BGET
356  {
357  volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
358  while ( ! KMP_COMPARE_AND_STORE_PTR(
359  & th->th.th_local.bget_list, old_value, NULL ) )
360  {
361  KMP_CPU_PAUSE();
362  old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
363  }
364  p = (void *) old_value;
365  }
366  #else /* ! USE_CMP_XCHG_FOR_BGET */
367  #ifdef USE_QUEUING_LOCK_FOR_BGET
368  __kmp_acquire_lock( & th->th.th_local.bget_lock,
369  __kmp_gtid_from_thread(th) );
370  #else
371  __kmp_acquire_bootstrap_lock( & th->th.th_local.bget_lock );
372  #endif /* USE_QUEUING_LOCK_FOR_BGET */
373 
374  p = (void *) th->th.th_local.bget_list;
375  th->th.th_local.bget_list = 0;
376 
377  #ifdef USE_QUEUING_LOCK_FOR_BGET
378  __kmp_release_lock( & th->th.th_local.bget_lock,
379  __kmp_gtid_from_thread(th) );
380  #else
381  __kmp_release_bootstrap_lock( & th->th.th_local.bget_lock );
382  #endif
383  #endif /* USE_CMP_XCHG_FOR_BGET */
384 
385  /* Check again to make sure the list is not empty */
386 
387  while (p != 0) {
388  void *buf = p;
389  bfhead_t *b = BFH(((char *) p) - sizeof(bhead_t));
390 
391  KMP_DEBUG_ASSERT( b->bh.bb.bsize != 0 );
392  KMP_DEBUG_ASSERT( ( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ) ==
393  (kmp_uintptr_t)th ); // clear possible mark
394  KMP_DEBUG_ASSERT( b->ql.blink == 0 );
395 
396  p = (void *) b->ql.flink;
397 
398  brel( th, buf );
399  }
400  }
401 }
402 
403 /* Chain together the free buffers by using the thread owner field */
404 
405 static void
406 __kmp_bget_enqueue( kmp_info_t *th, void *buf
407 #ifdef USE_QUEUING_LOCK_FOR_BGET
408  , kmp_int32 rel_gtid
409 #endif
410  )
411 {
412  bfhead_t *b = BFH(((char *) buf) - sizeof(bhead_t));
413 
414  KMP_DEBUG_ASSERT( b->bh.bb.bsize != 0 );
415  KMP_DEBUG_ASSERT( ( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ) ==
416  (kmp_uintptr_t)th ); // clear possible mark
417 
418  b->ql.blink = 0;
419 
420  KC_TRACE( 10, ( "__kmp_bget_enqueue: moving buffer to T#%d list\n",
421  __kmp_gtid_from_thread( th ) ) );
422 
423 #if USE_CMP_XCHG_FOR_BGET
424  {
425  volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
426  /* the next pointer must be set before setting bget_list to buf to avoid
427  exposing a broken list to other threads, even for an instant. */
428  b->ql.flink = BFH( old_value );
429 
430  while ( ! KMP_COMPARE_AND_STORE_PTR(
431  & th->th.th_local.bget_list, old_value, buf ) )
432  {
433  KMP_CPU_PAUSE();
434  old_value = TCR_PTR(th->th.th_local.bget_list);
435  /* the next pointer must be set before setting bget_list to buf to avoid
436  exposing a broken list to other threads, even for an instant. */
437  b->ql.flink = BFH( old_value );
438  }
439  }
440 #else /* ! USE_CMP_XCHG_FOR_BGET */
441 # ifdef USE_QUEUING_LOCK_FOR_BGET
442  __kmp_acquire_lock( & th->th.th_local.bget_lock, rel_gtid );
443 # else
444  __kmp_acquire_bootstrap_lock( & th->th.th_local.bget_lock );
445  # endif
446 
447  b->ql.flink = BFH( th->th.th_local.bget_list );
448  th->th.th_local.bget_list = (void *) buf;
449 
450 # ifdef USE_QUEUING_LOCK_FOR_BGET
451  __kmp_release_lock( & th->th.th_local.bget_lock, rel_gtid );
452 # else
453  __kmp_release_bootstrap_lock( & th->th.th_local.bget_lock );
454 # endif
455 #endif /* USE_CMP_XCHG_FOR_BGET */
456 }
457 
458 /* insert buffer back onto a new freelist */
459 
460 static void
461 __kmp_bget_insert_into_freelist( thr_data_t *thr, bfhead_t *b )
462 {
463  int bin;
464 
465  KMP_DEBUG_ASSERT( ((size_t)b ) % SizeQuant == 0 );
466  KMP_DEBUG_ASSERT( b->bh.bb.bsize % SizeQuant == 0 );
467 
468  bin = bget_get_bin( b->bh.bb.bsize );
469 
470  KMP_DEBUG_ASSERT(thr->freelist[ bin ].ql.blink->ql.flink == &thr->freelist[ bin ]);
471  KMP_DEBUG_ASSERT(thr->freelist[ bin ].ql.flink->ql.blink == &thr->freelist[ bin ]);
472 
473  b->ql.flink = &thr->freelist[ bin ];
474  b->ql.blink = thr->freelist[ bin ].ql.blink;
475 
476  thr->freelist[ bin ].ql.blink = b;
477  b->ql.blink->ql.flink = b;
478 }
479 
480 /* unlink the buffer from the old freelist */
481 
482 static void
483 __kmp_bget_remove_from_freelist( bfhead_t *b )
484 {
485  KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
486  KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
487 
488  b->ql.blink->ql.flink = b->ql.flink;
489  b->ql.flink->ql.blink = b->ql.blink;
490 }
491 
492 /* ------------------------------------------------------------------------ */
493 
494 /* GET STATS -- check info on free list */
495 
496 static void
497 bcheck( kmp_info_t *th, bufsize *max_free, bufsize *total_free )
498 {
499  thr_data_t *thr = get_thr_data( th );
500  int bin;
501 
502  *total_free = *max_free = 0;
503 
504  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
505  bfhead_t *b, *best;
506 
507  best = &thr->freelist[ bin ];
508  b = best->ql.flink;
509 
510  while (b != &thr->freelist[ bin ]) {
511  *total_free += (b->bh.bb.bsize - sizeof( bhead_t ));
512  if ((best == &thr->freelist[ bin ]) || (b->bh.bb.bsize < best->bh.bb.bsize))
513  best = b;
514 
515  /* Link to next buffer */
516  b = b->ql.flink;
517  }
518 
519  if (*max_free < best->bh.bb.bsize)
520  *max_free = best->bh.bb.bsize;
521  }
522 
523  if (*max_free > (bufsize)sizeof( bhead_t ))
524  *max_free -= sizeof( bhead_t );
525 }
526 
527 /* ------------------------------------------------------------------------ */
528 
529 /* BGET -- Allocate a buffer. */
530 
531 static void *
532 bget( kmp_info_t *th, bufsize requested_size )
533 {
534  thr_data_t *thr = get_thr_data( th );
535  bufsize size = requested_size;
536  bfhead_t *b;
537  void *buf;
538  int compactseq = 0;
539  int use_blink = 0;
540 /* For BestFit */
541  bfhead_t *best;
542 
543  if ( size < 0 || size + sizeof( bhead_t ) > MaxSize ) {
544  return NULL;
545  }; // if
546 
547  __kmp_bget_dequeue( th ); /* Release any queued buffers */
548 
549  if (size < (bufsize)SizeQ) { /* Need at least room for the */
550  size = SizeQ; /* queue links. */
551  }
552  #if defined( SizeQuant ) && ( SizeQuant > 1 )
553  size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
554  #endif
555 
556  size += sizeof(bhead_t); /* Add overhead in allocated buffer
557  to size required. */
558  KMP_DEBUG_ASSERT( size >= 0 );
559  KMP_DEBUG_ASSERT( size % SizeQuant == 0 );
560 
561  use_blink = ( thr->mode == bget_mode_lifo );
562 
563  /* If a compact function was provided in the call to bectl(), wrap
564  a loop around the allocation process to allow compaction to
565  intervene in case we don't find a suitable buffer in the chain. */
566 
567  for (;;) {
568  int bin;
569 
570  for (bin = bget_get_bin( size ); bin < MAX_BGET_BINS; ++bin) {
571  /* Link to next buffer */
572  b = ( use_blink ? thr->freelist[ bin ].ql.blink : thr->freelist[ bin ].ql.flink );
573 
574  if (thr->mode == bget_mode_best) {
575  best = &thr->freelist[ bin ];
576 
577  /* Scan the free list searching for the first buffer big enough
578  to hold the requested size buffer. */
579 
580  while (b != &thr->freelist[ bin ]) {
581  if (b->bh.bb.bsize >= (bufsize) size) {
582  if ((best == &thr->freelist[ bin ]) || (b->bh.bb.bsize < best->bh.bb.bsize)) {
583  best = b;
584  }
585  }
586 
587  /* Link to next buffer */
588  b = ( use_blink ? b->ql.blink : b->ql.flink );
589  }
590  b = best;
591  }
592 
593  while (b != &thr->freelist[ bin ]) {
594  if ((bufsize) b->bh.bb.bsize >= (bufsize) size) {
595 
596  /* Buffer is big enough to satisfy the request. Allocate it
597  to the caller. We must decide whether the buffer is large
598  enough to split into the part given to the caller and a
599  free buffer that remains on the free list, or whether the
600  entire buffer should be removed from the free list and
601  given to the caller in its entirety. We only split the
602  buffer if enough room remains for a header plus the minimum
603  quantum of allocation. */
604 
605  if ((b->bh.bb.bsize - (bufsize) size) > (bufsize)(SizeQ + (sizeof(bhead_t)))) {
606  bhead_t *ba, *bn;
607 
608  ba = BH(((char *) b) + (b->bh.bb.bsize - (bufsize) size));
609  bn = BH(((char *) ba) + size);
610 
611  KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
612 
613  /* Subtract size from length of free block. */
614  b->bh.bb.bsize -= (bufsize) size;
615 
616  /* Link allocated buffer to the previous free buffer. */
617  ba->bb.prevfree = b->bh.bb.bsize;
618 
619  /* Plug negative size into user buffer. */
620  ba->bb.bsize = -size;
621 
622  /* Mark this buffer as owned by this thread. */
623  TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark it)
624  /* Mark buffer after this one not preceded by free block. */
625  bn->bb.prevfree = 0;
626 
627  /* unlink the buffer from the old freelist, and reinsert it into the new freelist */
628  __kmp_bget_remove_from_freelist( b );
629  __kmp_bget_insert_into_freelist( thr, b );
630 #if BufStats
631  thr->totalloc += (size_t) size;
632  thr->numget++; /* Increment number of bget() calls */
633 #endif
634  buf = (void *) ((((char *) ba) + sizeof(bhead_t)));
635  KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
636  return buf;
637  } else {
638  bhead_t *ba;
639 
640  ba = BH(((char *) b) + b->bh.bb.bsize);
641 
642  KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
643 
644  /* The buffer isn't big enough to split. Give the whole
645  shebang to the caller and remove it from the free list. */
646 
647  __kmp_bget_remove_from_freelist( b );
648 #if BufStats
649  thr->totalloc += (size_t) b->bh.bb.bsize;
650  thr->numget++; /* Increment number of bget() calls */
651 #endif
652  /* Negate size to mark buffer allocated. */
653  b->bh.bb.bsize = -(b->bh.bb.bsize);
654 
655  /* Mark this buffer as owned by this thread. */
656  TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark it)
657  /* Zero the back pointer in the next buffer in memory
658  to indicate that this buffer is allocated. */
659  ba->bb.prevfree = 0;
660 
661  /* Give user buffer starting at queue links. */
662  buf = (void *) &(b->ql);
663  KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
664  return buf;
665  }
666  }
667 
668  /* Link to next buffer */
669  b = ( use_blink ? b->ql.blink : b->ql.flink );
670  }
671  }
672 
673  /* We failed to find a buffer. If there's a compact function
674  defined, notify it of the size requested. If it returns
675  TRUE, try the allocation again. */
676 
677  if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
678  break;
679  }
680  }
681 
682  /* No buffer available with requested size free. */
683 
684  /* Don't give up yet -- look in the reserve supply. */
685 
686  if (thr->acqfcn != 0) {
687  if (size > (bufsize) (thr->exp_incr - sizeof(bhead_t))) {
688 
689  /* Request is too large to fit in a single expansion
690  block. Try to satisy it by a direct buffer acquisition. */
691 
692  bdhead_t *bdh;
693 
694  size += sizeof(bdhead_t) - sizeof(bhead_t);
695 
696  KE_TRACE( 10, ("%%%%%% MALLOC( %d )\n", (int) size ) );
697 
698  /* richryan */
699  bdh = BDH((*thr->acqfcn)((bufsize) size));
700  if (bdh != NULL) {
701 
702  /* Mark the buffer special by setting the size field
703  of its header to zero. */
704  bdh->bh.bb.bsize = 0;
705 
706  /* Mark this buffer as owned by this thread. */
707  TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated,
708  // because direct buffer never goes to free list
709  bdh->bh.bb.prevfree = 0;
710  bdh->tsize = size;
711 #if BufStats
712  thr->totalloc += (size_t) size;
713  thr->numget++; /* Increment number of bget() calls */
714  thr->numdget++; /* Direct bget() call count */
715 #endif
716  buf = (void *) (bdh + 1);
717  KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
718  return buf;
719  }
720 
721  } else {
722 
723  /* Try to obtain a new expansion block */
724 
725  void *newpool;
726 
727  KE_TRACE( 10, ("%%%%%% MALLOCB( %d )\n", (int) thr->exp_incr ) );
728 
729  /* richryan */
730  newpool = (*thr->acqfcn)((bufsize) thr->exp_incr);
731  KMP_DEBUG_ASSERT( ((size_t)newpool) % SizeQuant == 0 );
732  if (newpool != NULL) {
733  bpool( th, newpool, thr->exp_incr);
734  buf = bget( th, requested_size); /* This can't, I say, can't get into a loop. */
735  return buf;
736  }
737  }
738  }
739 
740  /* Still no buffer available */
741 
742  return NULL;
743 }
744 
745 /* BGETZ -- Allocate a buffer and clear its contents to zero. We clear
746  the entire contents of the buffer to zero, not just the
747  region requested by the caller. */
748 
749 static void *
750 bgetz( kmp_info_t *th, bufsize size )
751 {
752  char *buf = (char *) bget( th, size);
753 
754  if (buf != NULL) {
755  bhead_t *b;
756  bufsize rsize;
757 
758  b = BH(buf - sizeof(bhead_t));
759  rsize = -(b->bb.bsize);
760  if (rsize == 0) {
761  bdhead_t *bd;
762 
763  bd = BDH(buf - sizeof(bdhead_t));
764  rsize = bd->tsize - (bufsize) sizeof(bdhead_t);
765  } else {
766  rsize -= sizeof(bhead_t);
767  }
768 
769  KMP_DEBUG_ASSERT(rsize >= size);
770 
771  (void) memset(buf, 0, (bufsize) rsize);
772  }
773  return ((void *) buf);
774 }
775 
776 /* BGETR -- Reallocate a buffer. This is a minimal implementation,
777  simply in terms of brel() and bget(). It could be
778  enhanced to allow the buffer to grow into adjacent free
779  blocks and to avoid moving data unnecessarily. */
780 
781 static void *
782 bgetr( kmp_info_t *th, void *buf, bufsize size)
783 {
784  void *nbuf;
785  bufsize osize; /* Old size of buffer */
786  bhead_t *b;
787 
788  nbuf = bget( th, size );
789  if ( nbuf == NULL ) { /* Acquire new buffer */
790  return NULL;
791  }
792  if ( buf == NULL ) {
793  return nbuf;
794  }
795  b = BH(((char *) buf) - sizeof(bhead_t));
796  osize = -b->bb.bsize;
797  if (osize == 0) {
798  /* Buffer acquired directly through acqfcn. */
799  bdhead_t *bd;
800 
801  bd = BDH(((char *) buf) - sizeof(bdhead_t));
802  osize = bd->tsize - (bufsize) sizeof(bdhead_t);
803  } else {
804  osize -= sizeof(bhead_t);
805  };
806 
807  KMP_DEBUG_ASSERT(osize > 0);
808 
809  (void) memcpy((char *) nbuf, (char *) buf, /* Copy the data */
810  (size_t) ((size < osize) ? size : osize));
811  brel( th, buf );
812 
813  return nbuf;
814 }
815 
816 /* BREL -- Release a buffer. */
817 
818 static void
819 brel( kmp_info_t *th, void *buf )
820 {
821  thr_data_t *thr = get_thr_data( th );
822  bfhead_t *b, *bn;
823  kmp_info_t *bth;
824 
825  KMP_DEBUG_ASSERT(buf != NULL);
826  KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
827 
828  b = BFH(((char *) buf) - sizeof(bhead_t));
829 
830  if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */
831  bdhead_t *bdh;
832 
833  bdh = BDH(((char *) buf) - sizeof(bdhead_t));
834  KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
835 #if BufStats
836  thr->totalloc -= (size_t) bdh->tsize;
837  thr->numdrel++; /* Number of direct releases */
838  thr->numrel++; /* Increment number of brel() calls */
839 #endif /* BufStats */
840 #ifdef FreeWipe
841  (void) memset((char *) buf, 0x55,
842  (size_t) (bdh->tsize - sizeof(bdhead_t)));
843 #endif /* FreeWipe */
844 
845  KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) bdh ) );
846 
847  KMP_DEBUG_ASSERT( thr->relfcn != 0 );
848  (*thr->relfcn)((void *) bdh); /* Release it directly. */
849  return;
850  }
851 
852  bth = (kmp_info_t *)( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ); // clear possible mark before comparison
853  if ( bth != th ) {
854  /* Add this buffer to be released by the owning thread later */
855  __kmp_bget_enqueue( bth, buf
856 #ifdef USE_QUEUING_LOCK_FOR_BGET
857  , __kmp_gtid_from_thread( th )
858 #endif
859  );
860  return;
861  }
862 
863  /* Buffer size must be negative, indicating that the buffer is
864  allocated. */
865 
866  if (b->bh.bb.bsize >= 0) {
867  bn = NULL;
868  }
869  KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
870 
871  /* Back pointer in next buffer must be zero, indicating the
872  same thing: */
873 
874  KMP_DEBUG_ASSERT(BH((char *) b - b->bh.bb.bsize)->bb.prevfree == 0);
875 
876 #if BufStats
877  thr->numrel++; /* Increment number of brel() calls */
878  thr->totalloc += (size_t) b->bh.bb.bsize;
879 #endif
880 
881  /* If the back link is nonzero, the previous buffer is free. */
882 
883  if (b->bh.bb.prevfree != 0) {
884  /* The previous buffer is free. Consolidate this buffer with it
885  by adding the length of this buffer to the previous free
886  buffer. Note that we subtract the size in the buffer being
887  released, since it's negative to indicate that the buffer is
888  allocated. */
889 
890  register bufsize size = b->bh.bb.bsize;
891 
892  /* Make the previous buffer the one we're working on. */
893  KMP_DEBUG_ASSERT(BH((char *) b - b->bh.bb.prevfree)->bb.bsize == b->bh.bb.prevfree);
894  b = BFH(((char *) b) - b->bh.bb.prevfree);
895  b->bh.bb.bsize -= size;
896 
897  /* unlink the buffer from the old freelist */
898  __kmp_bget_remove_from_freelist( b );
899  }
900  else {
901  /* The previous buffer isn't allocated. Mark this buffer
902  size as positive (i.e. free) and fall throught to place
903  the buffer on the free list as an isolated free block. */
904 
905  b->bh.bb.bsize = -b->bh.bb.bsize;
906  }
907 
908  /* insert buffer back onto a new freelist */
909  __kmp_bget_insert_into_freelist( thr, b );
910 
911 
912  /* Now we look at the next buffer in memory, located by advancing from
913  the start of this buffer by its size, to see if that buffer is
914  free. If it is, we combine this buffer with the next one in
915  memory, dechaining the second buffer from the free list. */
916 
917  bn = BFH(((char *) b) + b->bh.bb.bsize);
918  if (bn->bh.bb.bsize > 0) {
919 
920  /* The buffer is free. Remove it from the free list and add
921  its size to that of our buffer. */
922 
923  KMP_DEBUG_ASSERT(BH((char *) bn + bn->bh.bb.bsize)->bb.prevfree == bn->bh.bb.bsize);
924 
925  __kmp_bget_remove_from_freelist( bn );
926 
927  b->bh.bb.bsize += bn->bh.bb.bsize;
928 
929  /* unlink the buffer from the old freelist, and reinsert it into the new freelist */
930 
931  __kmp_bget_remove_from_freelist( b );
932  __kmp_bget_insert_into_freelist( thr, b );
933 
934  /* Finally, advance to the buffer that follows the newly
935  consolidated free block. We must set its backpointer to the
936  head of the consolidated free block. We know the next block
937  must be an allocated block because the process of recombination
938  guarantees that two free blocks will never be contiguous in
939  memory. */
940 
941  bn = BFH(((char *) b) + b->bh.bb.bsize);
942  }
943 #ifdef FreeWipe
944  (void) memset(((char *) b) + sizeof(bfhead_t), 0x55,
945  (size_t) (b->bh.bb.bsize - sizeof(bfhead_t)));
946 #endif
947  KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
948 
949  /* The next buffer is allocated. Set the backpointer in it to point
950  to this buffer; the previous free buffer in memory. */
951 
952  bn->bh.bb.prevfree = b->bh.bb.bsize;
953 
954  /* If a block-release function is defined, and this free buffer
955  constitutes the entire block, release it. Note that pool_len
956  is defined in such a way that the test will fail unless all
957  pool blocks are the same size. */
958 
959  if (thr->relfcn != 0 &&
960  b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t)))
961  {
962 #if BufStats
963  if (thr->numpblk != 1) { /* Do not release the last buffer until finalization time */
964 #endif
965 
966  KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
967  KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.bsize == ESent);
968  KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.prevfree == b->bh.bb.bsize);
969 
970  /* Unlink the buffer from the free list */
971  __kmp_bget_remove_from_freelist( b );
972 
973  KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) b ) );
974 
975  (*thr->relfcn)(b);
976 #if BufStats
977  thr->numprel++; /* Nr of expansion block releases */
978  thr->numpblk--; /* Total number of blocks */
979  KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
980 
981  /* avoid leaving stale last_pool pointer around if it is being dealloced */
982  if (thr->last_pool == b) thr->last_pool = 0;
983  }
984  else {
985  thr->last_pool = b;
986  }
987 #endif /* BufStats */
988  }
989 }
990 
991 /* BECTL -- Establish automatic pool expansion control */
992 
993 static void
994 bectl( kmp_info_t *th, bget_compact_t compact, bget_acquire_t acquire, bget_release_t release, bufsize pool_incr)
995 {
996  thr_data_t *thr = get_thr_data( th );
997 
998  thr->compfcn = compact;
999  thr->acqfcn = acquire;
1000  thr->relfcn = release;
1001  thr->exp_incr = pool_incr;
1002 }
1003 
1004 /* BPOOL -- Add a region of memory to the buffer pool. */
1005 
1006 static void
1007 bpool( kmp_info_t *th, void *buf, bufsize len)
1008 {
1009 /* int bin = 0; */
1010  thr_data_t *thr = get_thr_data( th );
1011  bfhead_t *b = BFH(buf);
1012  bhead_t *bn;
1013 
1014  __kmp_bget_dequeue( th ); /* Release any queued buffers */
1015 
1016 #ifdef SizeQuant
1017  len &= ~(SizeQuant - 1);
1018 #endif
1019  if (thr->pool_len == 0) {
1020  thr->pool_len = len;
1021  } else if (len != thr->pool_len) {
1022  thr->pool_len = -1;
1023  }
1024 #if BufStats
1025  thr->numpget++; /* Number of block acquisitions */
1026  thr->numpblk++; /* Number of blocks total */
1027  KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1028 #endif /* BufStats */
1029 
1030  /* Since the block is initially occupied by a single free buffer,
1031  it had better not be (much) larger than the largest buffer
1032  whose size we can store in bhead.bb.bsize. */
1033 
1034  KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize) ESent + 1));
1035 
1036  /* Clear the backpointer at the start of the block to indicate that
1037  there is no free block prior to this one. That blocks
1038  recombination when the first block in memory is released. */
1039 
1040  b->bh.bb.prevfree = 0;
1041 
1042  /* Create a dummy allocated buffer at the end of the pool. This dummy
1043  buffer is seen when a buffer at the end of the pool is released and
1044  blocks recombination of the last buffer with the dummy buffer at
1045  the end. The length in the dummy buffer is set to the largest
1046  negative number to denote the end of the pool for diagnostic
1047  routines (this specific value is not counted on by the actual
1048  allocation and release functions). */
1049 
1050  len -= sizeof(bhead_t);
1051  b->bh.bb.bsize = (bufsize) len;
1052  /* Set the owner of this buffer */
1053  TCW_PTR( b->bh.bb.bthr, (kmp_info_t*)((kmp_uintptr_t)th | 1) ); // mark the buffer as allocated address
1054 
1055  /* Chain the new block to the free list. */
1056  __kmp_bget_insert_into_freelist( thr, b );
1057 
1058 #ifdef FreeWipe
1059  (void) memset(((char *) b) + sizeof(bfhead_t), 0x55,
1060  (size_t) (len - sizeof(bfhead_t)));
1061 #endif
1062  bn = BH(((char *) b) + len);
1063  bn->bb.prevfree = (bufsize) len;
1064  /* Definition of ESent assumes two's complement! */
1065  KMP_DEBUG_ASSERT( (~0) == -1 && (bn != 0) );
1066 
1067  bn->bb.bsize = ESent;
1068 }
1069 
1070 /* ------------------------------------------------------------------------ */
1071 
1072 /* BFREED -- Dump the free lists for this thread. */
1073 
1074 static void
1075 bfreed( kmp_info_t *th )
1076 {
1077  int bin = 0, count = 0;
1078  int gtid = __kmp_gtid_from_thread( th );
1079  thr_data_t *thr = get_thr_data( th );
1080 
1081 #if BufStats
1082  __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC " get=%" KMP_INT64_SPEC " rel=%" \
1083  KMP_INT64_SPEC " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC " prel=%" KMP_INT64_SPEC \
1084  " dget=%" KMP_INT64_SPEC " drel=%" KMP_INT64_SPEC "\n",
1085  gtid, (kmp_uint64) thr->totalloc,
1086  (kmp_int64) thr->numget, (kmp_int64) thr->numrel,
1087  (kmp_int64) thr->numpblk,
1088  (kmp_int64) thr->numpget, (kmp_int64) thr->numprel,
1089  (kmp_int64) thr->numdget, (kmp_int64) thr->numdrel );
1090 #endif
1091 
1092  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
1093  bfhead_t *b;
1094 
1095  for (b = thr->freelist[ bin ].ql.flink; b != &thr->freelist[ bin ]; b = b->ql.flink) {
1096  bufsize bs = b->bh.bb.bsize;
1097 
1098  KMP_DEBUG_ASSERT( b->ql.blink->ql.flink == b );
1099  KMP_DEBUG_ASSERT( b->ql.flink->ql.blink == b );
1100  KMP_DEBUG_ASSERT( bs > 0 );
1101 
1102  count += 1;
1103 
1104  __kmp_printf_no_lock("__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b, (long) bs );
1105 #ifdef FreeWipe
1106  {
1107  char *lerr = ((char *) b) + sizeof(bfhead_t);
1108  if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) || (memcmp(lerr, lerr + 1, (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
1109  __kmp_printf_no_lock( "__kmp_printpool: T#%d (Contents of above free block have been overstored.)\n", gtid );
1110  }
1111  }
1112 #endif
1113  }
1114  }
1115 
1116  if (count == 0)
1117  __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid );
1118 }
1119 
1120 /* ------------------------------------------------------------------------ */
1121 
1122 #ifdef KMP_DEBUG
1123 
1124 #if BufStats
1125 
1126 /* BSTATS -- Return buffer allocation free space statistics. */
1127 
1128 static void
1129 bstats( kmp_info_t *th, bufsize *curalloc, bufsize *totfree, bufsize *maxfree, long *nget, long *nrel)
1130 {
1131  int bin = 0;
1132  thr_data_t *thr = get_thr_data( th );
1133 
1134  *nget = thr->numget;
1135  *nrel = thr->numrel;
1136  *curalloc = (bufsize) thr->totalloc;
1137  *totfree = 0;
1138  *maxfree = -1;
1139 
1140  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
1141  bfhead_t *b = thr->freelist[ bin ].ql.flink;
1142 
1143  while (b != &thr->freelist[ bin ]) {
1144  KMP_DEBUG_ASSERT(b->bh.bb.bsize > 0);
1145  *totfree += b->bh.bb.bsize;
1146  if (b->bh.bb.bsize > *maxfree) {
1147  *maxfree = b->bh.bb.bsize;
1148  }
1149  b = b->ql.flink; /* Link to next buffer */
1150  }
1151  }
1152 }
1153 
1154 /* BSTATSE -- Return extended statistics */
1155 
1156 static void
1157 bstatse( kmp_info_t *th, bufsize *pool_incr, long *npool, long *npget, long *nprel, long *ndget, long *ndrel)
1158 {
1159  thr_data_t *thr = get_thr_data( th );
1160 
1161  *pool_incr = (thr->pool_len < 0) ? -thr->exp_incr : thr->exp_incr;
1162  *npool = thr->numpblk;
1163  *npget = thr->numpget;
1164  *nprel = thr->numprel;
1165  *ndget = thr->numdget;
1166  *ndrel = thr->numdrel;
1167 }
1168 
1169 #endif /* BufStats */
1170 
1171 /* BUFDUMP -- Dump the data in a buffer. This is called with the user
1172  data pointer, and backs up to the buffer header. It will
1173  dump either a free block or an allocated one. */
1174 
1175 static void
1176 bufdump( kmp_info_t *th, void *buf )
1177 {
1178  bfhead_t *b;
1179  unsigned char *bdump;
1180  bufsize bdlen;
1181 
1182  b = BFH(((char *) buf) - sizeof(bhead_t));
1183  KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
1184  if (b->bh.bb.bsize < 0) {
1185  bdump = (unsigned char *) buf;
1186  bdlen = (-b->bh.bb.bsize) - (bufsize) sizeof(bhead_t);
1187  } else {
1188  bdump = (unsigned char *) (((char *) b) + sizeof(bfhead_t));
1189  bdlen = b->bh.bb.bsize - (bufsize) sizeof(bfhead_t);
1190  }
1191 
1192  while (bdlen > 0) {
1193  int i, dupes = 0;
1194  bufsize l = bdlen;
1195  char bhex[50], bascii[20];
1196 
1197  if (l > 16) {
1198  l = 16;
1199  }
1200 
1201  for (i = 0; i < l; i++) {
1202  (void) sprintf(bhex + i * 3, "%02X ", bdump[i]);
1203  if (bdump[i] > 0x20 && bdump[i] < 0x7F)
1204  bascii[ i ] = bdump[ i ];
1205  else
1206  bascii[ i ] = ' ';
1207  }
1208  bascii[i] = 0;
1209  (void) __kmp_printf_no_lock("%-48s %s\n", bhex, bascii);
1210  bdump += l;
1211  bdlen -= l;
1212  while ((bdlen > 16) && (memcmp((char *) (bdump - 16),
1213  (char *) bdump, 16) == 0)) {
1214  dupes++;
1215  bdump += 16;
1216  bdlen -= 16;
1217  }
1218  if (dupes > 1) {
1219  (void) __kmp_printf_no_lock(
1220  " (%d lines [%d bytes] identical to above line skipped)\n",
1221  dupes, dupes * 16);
1222  } else if (dupes == 1) {
1223  bdump -= 16;
1224  bdlen += 16;
1225  }
1226  }
1227 }
1228 
1229 /* BPOOLD -- Dump a buffer pool. The buffer headers are always listed.
1230  If DUMPALLOC is nonzero, the contents of allocated buffers
1231  are dumped. If DUMPFREE is nonzero, free blocks are
1232  dumped as well. If FreeWipe checking is enabled, free
1233  blocks which have been clobbered will always be dumped. */
1234 
1235 static void
1236 bpoold( kmp_info_t *th, void *buf, int dumpalloc, int dumpfree)
1237 {
1238  bfhead_t *b = BFH( (char*)buf - sizeof(bhead_t));
1239 
1240  while (b->bh.bb.bsize != ESent) {
1241  bufsize bs = b->bh.bb.bsize;
1242 
1243  if (bs < 0) {
1244  bs = -bs;
1245  (void) __kmp_printf_no_lock("Allocated buffer: size %6ld bytes.\n", (long) bs);
1246  if (dumpalloc) {
1247  bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1248  }
1249  } else {
1250  const char *lerr = "";
1251 
1252  KMP_DEBUG_ASSERT(bs > 0);
1253  if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
1254  lerr = " (Bad free list links)";
1255  }
1256  (void) __kmp_printf_no_lock("Free block: size %6ld bytes.%s\n",
1257  (long) bs, lerr);
1258 #ifdef FreeWipe
1259  lerr = ((char *) b) + sizeof(bfhead_t);
1260  if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) ||
1261  (memcmp(lerr, lerr + 1,
1262  (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
1263  (void) __kmp_printf_no_lock(
1264  "(Contents of above free block have been overstored.)\n");
1265  bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1266  } else
1267 #endif
1268  if (dumpfree) {
1269  bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1270  }
1271  }
1272  b = BFH(((char *) b) + bs);
1273  }
1274 }
1275 
1276 /* BPOOLV -- Validate a buffer pool. */
1277 
1278 static int
1279 bpoolv( kmp_info_t *th, void *buf )
1280 {
1281  bfhead_t *b = BFH(buf);
1282 
1283  while (b->bh.bb.bsize != ESent) {
1284  bufsize bs = b->bh.bb.bsize;
1285 
1286  if (bs < 0) {
1287  bs = -bs;
1288  } else {
1289 #ifdef FreeWipe
1290  char *lerr = "";
1291 #endif
1292 
1293  KMP_DEBUG_ASSERT(bs > 0);
1294  if (bs <= 0) {
1295  return 0;
1296  }
1297  if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
1298  (void) __kmp_printf_no_lock("Free block: size %6ld bytes. (Bad free list links)\n",
1299  (long) bs);
1300  KMP_DEBUG_ASSERT(0);
1301  return 0;
1302  }
1303 #ifdef FreeWipe
1304  lerr = ((char *) b) + sizeof(bfhead_t);
1305  if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) ||
1306  (memcmp(lerr, lerr + 1,
1307  (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
1308  (void) __kmp_printf_no_lock(
1309  "(Contents of above free block have been overstored.)\n");
1310  bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1311  KMP_DEBUG_ASSERT(0);
1312  return 0;
1313  }
1314 #endif /* FreeWipe */
1315  }
1316  b = BFH(((char *) b) + bs);
1317  }
1318  return 1;
1319 }
1320 
1321 #endif /* KMP_DEBUG */
1322 
1323 /* ------------------------------------------------------------------------ */
1324 
1325 void
1326 __kmp_initialize_bget( kmp_info_t *th )
1327 {
1328  KMP_DEBUG_ASSERT( SizeQuant >= sizeof( void * ) && (th != 0) );
1329 
1330  set_thr_data( th );
1331 
1332  bectl( th, (bget_compact_t) 0, (bget_acquire_t) malloc, (bget_release_t) free,
1333  (bufsize) __kmp_malloc_pool_incr );
1334 }
1335 
1336 void
1337 __kmp_finalize_bget( kmp_info_t *th )
1338 {
1339  thr_data_t *thr;
1340  bfhead_t *b;
1341 
1342  KMP_DEBUG_ASSERT( th != 0 );
1343 
1344 #if BufStats
1345  thr = (thr_data_t *) th->th.th_local.bget_data;
1346  KMP_DEBUG_ASSERT( thr != NULL );
1347  b = thr->last_pool;
1348 
1349  /* If a block-release function is defined, and this free buffer
1350  constitutes the entire block, release it. Note that pool_len
1351  is defined in such a way that the test will fail unless all
1352  pool blocks are the same size. */
1353 
1354  /* Deallocate the last pool if one exists because we no longer do it in brel() */
1355  if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
1356  b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t)))
1357  {
1358  KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
1359  KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.bsize == ESent);
1360  KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.prevfree == b->bh.bb.bsize);
1361 
1362  /* Unlink the buffer from the free list */
1363  __kmp_bget_remove_from_freelist( b );
1364 
1365  KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) b ) );
1366 
1367  (*thr->relfcn)(b);
1368  thr->numprel++; /* Nr of expansion block releases */
1369  thr->numpblk--; /* Total number of blocks */
1370  KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1371  }
1372 #endif /* BufStats */
1373 
1374  /* Deallocate bget_data */
1375  if ( th->th.th_local.bget_data != NULL ) {
1376  __kmp_free( th->th.th_local.bget_data );
1377  th->th.th_local.bget_data = NULL;
1378  }; // if
1379 }
1380 
1381 void
1382 kmpc_set_poolsize( size_t size )
1383 {
1384  bectl( __kmp_get_thread(), (bget_compact_t) 0, (bget_acquire_t) malloc,
1385  (bget_release_t) free, (bufsize) size );
1386 }
1387 
1388 size_t
1389 kmpc_get_poolsize( void )
1390 {
1391  thr_data_t *p;
1392 
1393  p = get_thr_data( __kmp_get_thread() );
1394 
1395  return p->exp_incr;
1396 }
1397 
1398 void
1399 kmpc_set_poolmode( int mode )
1400 {
1401  thr_data_t *p;
1402 
1403  if (mode == bget_mode_fifo || mode == bget_mode_lifo || mode == bget_mode_best) {
1404  p = get_thr_data( __kmp_get_thread() );
1405  p->mode = (bget_mode_t) mode;
1406  }
1407 }
1408 
1409 int
1410 kmpc_get_poolmode( void )
1411 {
1412  thr_data_t *p;
1413 
1414  p = get_thr_data( __kmp_get_thread() );
1415 
1416  return p->mode;
1417 }
1418 
1419 void
1420 kmpc_get_poolstat( size_t *maxmem, size_t *allmem )
1421 {
1422  kmp_info_t *th = __kmp_get_thread();
1423  bufsize a, b;
1424 
1425  __kmp_bget_dequeue( th ); /* Release any queued buffers */
1426 
1427  bcheck( th, &a, &b );
1428 
1429  *maxmem = a;
1430  *allmem = b;
1431 }
1432 
1433 void
1434 kmpc_poolprint( void )
1435 {
1436  kmp_info_t *th = __kmp_get_thread();
1437 
1438  __kmp_bget_dequeue( th ); /* Release any queued buffers */
1439 
1440  bfreed( th );
1441 }
1442 
1443 #endif // #if KMP_USE_BGET
1444 
1445 /* ------------------------------------------------------------------------ */
1446 
1447 void *
1448 kmpc_malloc( size_t size )
1449 {
1450  void * ptr;
1451  ptr = bget( __kmp_entry_thread(), (bufsize) size );
1452 
1453  return ptr;
1454 }
1455 
1456 void *
1457 kmpc_calloc( size_t nelem, size_t elsize )
1458 {
1459  void * ptr;
1460  ptr = bgetz( __kmp_entry_thread(), (bufsize) (nelem * elsize) );
1461 
1462  return ptr;
1463 }
1464 
1465 void *
1466 kmpc_realloc( void * ptr, size_t size )
1467 {
1468  void * result = NULL;
1469 
1470  if ( ptr == NULL ) {
1471  // If pointer is NULL, realloc behaves like malloc.
1472  result = bget( __kmp_entry_thread(), (bufsize) size );
1473  } else if ( size == 0 ) {
1474  // If size is 0, realloc behaves like free.
1475  // The thread must be registered by the call to kmpc_malloc() or kmpc_calloc() before.
1476  // So it should be safe to call __kmp_get_thread(), not __kmp_entry_thread().
1477  brel( __kmp_get_thread(), ptr );
1478  } else {
1479  result = bgetr( __kmp_entry_thread(), ptr, (bufsize) size );
1480  }; // if
1481 
1482  return result;
1483 }
1484 
1485 /* NOTE: the library must have already been initialized by a previous allocate */
1486 
1487 void
1488 kmpc_free( void * ptr )
1489 {
1490  if ( ! __kmp_init_serial ) {
1491  return;
1492  }; // if
1493  if ( ptr != NULL ) {
1494  kmp_info_t *th = __kmp_get_thread();
1495  __kmp_bget_dequeue( th ); /* Release any queued buffers */
1496  brel( th, ptr );
1497  };
1498 }
1499 
1500 
1501 /* ------------------------------------------------------------------------ */
1502 
1503 void *
1504 ___kmp_thread_malloc( kmp_info_t *th, size_t size KMP_SRC_LOC_DECL )
1505 {
1506  void * ptr;
1507  KE_TRACE( 30, (
1508  "-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n",
1509  th,
1510  (int) size
1511  KMP_SRC_LOC_PARM
1512  ) );
1513  ptr = bget( th, (bufsize) size );
1514  KE_TRACE( 30, ( "<- __kmp_thread_malloc() returns %p\n", ptr ) );
1515  return ptr;
1516 }
1517 
1518 void *
1519 ___kmp_thread_calloc( kmp_info_t *th, size_t nelem, size_t elsize KMP_SRC_LOC_DECL )
1520 {
1521  void * ptr;
1522  KE_TRACE( 30, (
1523  "-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n",
1524  th,
1525  (int) nelem,
1526  (int) elsize
1527  KMP_SRC_LOC_PARM
1528  ) );
1529  ptr = bgetz( th, (bufsize) (nelem * elsize) );
1530  KE_TRACE( 30, ( "<- __kmp_thread_calloc() returns %p\n", ptr ) );
1531  return ptr;
1532 }
1533 
1534 void *
1535 ___kmp_thread_realloc( kmp_info_t *th, void *ptr, size_t size KMP_SRC_LOC_DECL )
1536 {
1537  KE_TRACE( 30, (
1538  "-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n",
1539  th,
1540  ptr,
1541  (int) size
1542  KMP_SRC_LOC_PARM
1543  ) );
1544  ptr = bgetr( th, ptr, (bufsize) size );
1545  KE_TRACE( 30, ( "<- __kmp_thread_realloc() returns %p\n", ptr ) );
1546  return ptr;
1547 }
1548 
1549 void
1550 ___kmp_thread_free( kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL )
1551 {
1552  KE_TRACE( 30, (
1553  "-> __kmp_thread_free( %p, %p ) called from %s:%d\n",
1554  th,
1555  ptr
1556  KMP_SRC_LOC_PARM
1557  ) );
1558  if ( ptr != NULL ) {
1559  __kmp_bget_dequeue( th ); /* Release any queued buffers */
1560  brel( th, ptr );
1561  }
1562  KE_TRACE( 30, ( "<- __kmp_thread_free()\n" ) );
1563 }
1564 
1565 /* ------------------------------------------------------------------------ */
1566 /* ------------------------------------------------------------------------ */
1567 /*
1568  If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes memory leaks, but it
1569  may be useful for debugging memory corruptions, used freed pointers, etc.
1570 */
1571 /* #define LEAK_MEMORY */
1572 
1573 struct kmp_mem_descr { // Memory block descriptor.
1574  void * ptr_allocated; // Pointer returned by malloc(), subject for free().
1575  size_t size_allocated; // Size of allocated memory block.
1576  void * ptr_aligned; // Pointer to aligned memory, to be used by client code.
1577  size_t size_aligned; // Size of aligned memory block.
1578 };
1579 typedef struct kmp_mem_descr kmp_mem_descr_t;
1580 
1581 /*
1582  Allocate memory on requested boundary, fill allocated memory with 0x00.
1583  NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
1584  Must use __kmp_free when freeing memory allocated by this routine!
1585  */
1586 static
1587 void *
1588 ___kmp_allocate_align( size_t size, size_t alignment KMP_SRC_LOC_DECL )
1589 {
1590  /*
1591  __kmp_allocate() allocates (by call to malloc()) bigger memory block than requested to
1592  return properly aligned pointer. Original pointer returned by malloc() and size of allocated
1593  block is saved in descriptor just before the aligned pointer. This information used by
1594  __kmp_free() -- it has to pass to free() original pointer, not aligned one.
1595 
1596  +---------+------------+-----------------------------------+---------+
1597  | padding | descriptor | aligned block | padding |
1598  +---------+------------+-----------------------------------+---------+
1599  ^ ^
1600  | |
1601  | +- Aligned pointer returned to caller
1602  +- Pointer returned by malloc()
1603 
1604  Aligned block is filled with zeros, paddings are filled with 0xEF.
1605  */
1606 
1607  kmp_mem_descr_t descr;
1608  kmp_uintptr_t addr_allocated; // Address returned by malloc().
1609  kmp_uintptr_t addr_aligned; // Aligned address to return to caller.
1610  kmp_uintptr_t addr_descr; // Address of memory block descriptor.
1611 
1612  KE_TRACE( 25, (
1613  "-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
1614  (int) size,
1615  (int) alignment
1616  KMP_SRC_LOC_PARM
1617  ) );
1618 
1619  KMP_DEBUG_ASSERT( alignment < 32 * 1024 ); // Alignment should not be too
1620  KMP_DEBUG_ASSERT( sizeof( void * ) <= sizeof( kmp_uintptr_t ) );
1621  // Make sure kmp_uintptr_t is enough to store addresses.
1622 
1623  descr.size_aligned = size;
1624  descr.size_allocated = descr.size_aligned + sizeof( kmp_mem_descr_t ) + alignment;
1625 
1626  #if KMP_DEBUG
1627  descr.ptr_allocated = _malloc_src_loc( descr.size_allocated, _file_, _line_ );
1628  #else
1629  descr.ptr_allocated = malloc_src_loc( descr.size_allocated KMP_SRC_LOC_PARM );
1630  #endif
1631  KE_TRACE( 10, (
1632  " malloc( %d ) returned %p\n",
1633  (int) descr.size_allocated,
1634  descr.ptr_allocated
1635  ) );
1636  if ( descr.ptr_allocated == NULL ) {
1637  KMP_FATAL( OutOfHeapMemory );
1638  };
1639 
1640  addr_allocated = (kmp_uintptr_t) descr.ptr_allocated;
1641  addr_aligned =
1642  ( addr_allocated + sizeof( kmp_mem_descr_t ) + alignment )
1643  & ~ ( alignment - 1 );
1644  addr_descr = addr_aligned - sizeof( kmp_mem_descr_t );
1645 
1646  descr.ptr_aligned = (void *) addr_aligned;
1647 
1648  KE_TRACE( 26, (
1649  " ___kmp_allocate_align: "
1650  "ptr_allocated=%p, size_allocated=%d, "
1651  "ptr_aligned=%p, size_aligned=%d\n",
1652  descr.ptr_allocated,
1653  (int) descr.size_allocated,
1654  descr.ptr_aligned,
1655  (int) descr.size_aligned
1656  ) );
1657 
1658  KMP_DEBUG_ASSERT( addr_allocated <= addr_descr );
1659  KMP_DEBUG_ASSERT( addr_descr + sizeof( kmp_mem_descr_t ) == addr_aligned );
1660  KMP_DEBUG_ASSERT( addr_aligned + descr.size_aligned <= addr_allocated + descr.size_allocated );
1661  KMP_DEBUG_ASSERT( addr_aligned % alignment == 0 );
1662 
1663  #ifdef KMP_DEBUG
1664  memset( descr.ptr_allocated, 0xEF, descr.size_allocated );
1665  // Fill allocated memory block with 0xEF.
1666  #endif
1667  memset( descr.ptr_aligned, 0x00, descr.size_aligned );
1668  // Fill the aligned memory block (which is intended for using by caller) with 0x00. Do not
1669  // put this filling under KMP_DEBUG condition! Many callers expect zeroed memory. (Padding
1670  // bytes remain filled with 0xEF in debugging library.)
1671  * ( (kmp_mem_descr_t *) addr_descr ) = descr;
1672 
1673  KMP_MB();
1674 
1675  KE_TRACE( 25, ( "<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned ) );
1676  return descr.ptr_aligned;
1677 
1678 } // func ___kmp_allocate_align
1679 
1680 
1681 /*
1682  Allocate memory on cache line boundary, fill allocated memory with 0x00.
1683  Do not call this func directly! Use __kmp_allocate macro instead.
1684  NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
1685  Must use __kmp_free when freeing memory allocated by this routine!
1686  */
1687 void *
1688 ___kmp_allocate( size_t size KMP_SRC_LOC_DECL )
1689 {
1690 
1691  void * ptr;
1692  KE_TRACE( 25, ( "-> __kmp_allocate( %d ) called from %s:%d\n", (int) size KMP_SRC_LOC_PARM ) );
1693  ptr = ___kmp_allocate_align( size, __kmp_align_alloc KMP_SRC_LOC_PARM );
1694  KE_TRACE( 25, ( "<- __kmp_allocate() returns %p\n", ptr ) );
1695  return ptr;
1696 
1697 } // func ___kmp_allocate
1698 
1699 #if (BUILD_MEMORY==FIRST_TOUCH)
1700 void *
1701 __kmp_ft_page_allocate(size_t size)
1702 {
1703  void *adr, *aadr;
1704 #if KMP_OS_LINUX
1705  /* TODO: Use this function to get page size everywhere */
1706  int page_size = getpagesize();
1707 #else
1708  /* TODO: Find windows function to get page size and use it everywhere */
1709  int page_size = PAGE_SIZE;
1710 #endif /* KMP_OS_LINUX */
1711 
1712  adr = (void *) __kmp_thread_malloc( __kmp_get_thread(),
1713  size + page_size + KMP_PTR_SKIP);
1714  if ( adr == 0 )
1715  KMP_FATAL( OutOfHeapMemory );
1716 
1717  /* check to see if adr is on a page boundary. */
1718  if ( ( (kmp_uintptr_t) adr & (page_size - 1)) == 0)
1719  /* nothing to do if adr is already on a page boundary. */
1720  aadr = adr;
1721  else
1722  /* else set aadr to the first page boundary in the allocated memory. */
1723  aadr = (void *) ( ( (kmp_uintptr_t) adr + page_size) & ~(page_size - 1) );
1724 
1725  /* the first touch by the owner thread. */
1726  *((void**)aadr) = adr;
1727 
1728  /* skip the memory space used for storing adr above. */
1729  return (void*)((char*)aadr + KMP_PTR_SKIP);
1730 }
1731 #endif
1732 
1733 /*
1734  Allocate memory on page boundary, fill allocated memory with 0x00.
1735  Does not call this func directly! Use __kmp_page_allocate macro instead.
1736  NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
1737  Must use __kmp_free when freeing memory allocated by this routine!
1738  */
1739 void *
1740 ___kmp_page_allocate( size_t size KMP_SRC_LOC_DECL )
1741 {
1742  int page_size = 8 * 1024;
1743  void * ptr;
1744 
1745  KE_TRACE( 25, (
1746  "-> __kmp_page_allocate( %d ) called from %s:%d\n",
1747  (int) size
1748  KMP_SRC_LOC_PARM
1749  ) );
1750  ptr = ___kmp_allocate_align( size, page_size KMP_SRC_LOC_PARM );
1751  KE_TRACE( 25, ( "<- __kmp_page_allocate( %d ) returns %p\n", (int) size, ptr ) );
1752  return ptr;
1753 } // ___kmp_page_allocate
1754 
1755 /*
1756  Free memory allocated by __kmp_allocate() and __kmp_page_allocate().
1757  In debug mode, fill the memory block with 0xEF before call to free().
1758 */
1759 void
1760 ___kmp_free( void * ptr KMP_SRC_LOC_DECL )
1761 {
1762 
1763  kmp_mem_descr_t descr;
1764  kmp_uintptr_t addr_allocated; // Address returned by malloc().
1765  kmp_uintptr_t addr_aligned; // Aligned address passed by caller.
1766 
1767  KE_TRACE( 25, ( "-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM ) );
1768  KMP_ASSERT( ptr != NULL );
1769 
1770  descr = * ( kmp_mem_descr_t *) ( (kmp_uintptr_t) ptr - sizeof( kmp_mem_descr_t ) );
1771 
1772  KE_TRACE( 26, ( " __kmp_free: "
1773  "ptr_allocated=%p, size_allocated=%d, "
1774  "ptr_aligned=%p, size_aligned=%d\n",
1775  descr.ptr_allocated, (int) descr.size_allocated,
1776  descr.ptr_aligned, (int) descr.size_aligned ));
1777 
1778  addr_allocated = (kmp_uintptr_t) descr.ptr_allocated;
1779  addr_aligned = (kmp_uintptr_t) descr.ptr_aligned;
1780 
1781  KMP_DEBUG_ASSERT( addr_aligned % CACHE_LINE == 0 );
1782  KMP_DEBUG_ASSERT( descr.ptr_aligned == ptr );
1783  KMP_DEBUG_ASSERT( addr_allocated + sizeof( kmp_mem_descr_t ) <= addr_aligned );
1784  KMP_DEBUG_ASSERT( descr.size_aligned < descr.size_allocated );
1785  KMP_DEBUG_ASSERT( addr_aligned + descr.size_aligned <= addr_allocated + descr.size_allocated );
1786 
1787  #ifdef KMP_DEBUG
1788  memset( descr.ptr_allocated, 0xEF, descr.size_allocated );
1789  // Fill memory block with 0xEF, it helps catch using freed memory.
1790  #endif
1791 
1792  #ifndef LEAK_MEMORY
1793  KE_TRACE( 10, ( " free( %p )\n", descr.ptr_allocated ) );
1794  # ifdef KMP_DEBUG
1795  _free_src_loc( descr.ptr_allocated, _file_, _line_ );
1796  # else
1797  free_src_loc( descr.ptr_allocated KMP_SRC_LOC_PARM );
1798  # endif
1799  #endif
1800 
1801  KMP_MB();
1802 
1803  KE_TRACE( 25, ( "<- __kmp_free() returns\n" ) );
1804 
1805 } // func ___kmp_free
1806 
1807 /* ------------------------------------------------------------------------ */
1808 /* ------------------------------------------------------------------------ */
1809 
1810 #if USE_FAST_MEMORY == 3
1811 // Allocate fast memory by first scanning the thread's free lists
1812 // If a chunk the right size exists, grab it off the free list.
1813 // Otherwise allocate normally using kmp_thread_malloc.
1814 
1815 // AC: How to choose the limit? Just get 16 for now...
1816 #define KMP_FREE_LIST_LIMIT 16
1817 
1818 // Always use 128 bytes for determining buckets for caching memory blocks
1819 #define DCACHE_LINE 128
1820 
1821 void *
1822 ___kmp_fast_allocate( kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL )
1823 {
1824  void * ptr;
1825  int num_lines;
1826  int idx;
1827  int index;
1828  void * alloc_ptr;
1829  size_t alloc_size;
1830  kmp_mem_descr_t * descr;
1831 
1832  KE_TRACE( 25, ( "-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
1833  __kmp_gtid_from_thread(this_thr), (int) size KMP_SRC_LOC_PARM ) );
1834 
1835  num_lines = ( size + DCACHE_LINE - 1 ) / DCACHE_LINE;
1836  idx = num_lines - 1;
1837  KMP_DEBUG_ASSERT( idx >= 0 );
1838  if ( idx < 2 ) {
1839  index = 0; // idx is [ 0, 1 ], use first free list
1840  num_lines = 2; // 1, 2 cache lines or less than cache line
1841  } else if ( ( idx >>= 2 ) == 0 ) {
1842  index = 1; // idx is [ 2, 3 ], use second free list
1843  num_lines = 4; // 3, 4 cache lines
1844  } else if ( ( idx >>= 2 ) == 0 ) {
1845  index = 2; // idx is [ 4, 15 ], use third free list
1846  num_lines = 16; // 5, 6, ..., 16 cache lines
1847  } else if ( ( idx >>= 2 ) == 0 ) {
1848  index = 3; // idx is [ 16, 63 ], use fourth free list
1849  num_lines = 64; // 17, 18, ..., 64 cache lines
1850  } else {
1851  goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists
1852  }
1853 
1854  ptr = this_thr->th.th_free_lists[index].th_free_list_self;
1855  if ( ptr != NULL ) {
1856  // pop the head of no-sync free list
1857  this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
1858  KMP_DEBUG_ASSERT( this_thr ==
1859  ((kmp_mem_descr_t *)( (kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t) ))->ptr_aligned );
1860  goto end;
1861  };
1862  ptr = TCR_SYNC_PTR( this_thr->th.th_free_lists[index].th_free_list_sync );
1863  if ( ptr != NULL ) {
1864  // no-sync free list is empty, use sync free list (filled in by other threads only)
1865  // pop the head of the sync free list, push NULL instead
1866  while ( ! KMP_COMPARE_AND_STORE_PTR(
1867  &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, NULL ) )
1868  {
1869  KMP_CPU_PAUSE();
1870  ptr = TCR_SYNC_PTR( this_thr->th.th_free_lists[index].th_free_list_sync );
1871  }
1872  // push the rest of chain into no-sync free list (can be NULL if there was the only block)
1873  this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
1874  KMP_DEBUG_ASSERT( this_thr ==
1875  ((kmp_mem_descr_t *)( (kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t) ))->ptr_aligned );
1876  goto end;
1877  }
1878 
1879  alloc_call:
1880  // haven't found block in the free lists, thus allocate it
1881  size = num_lines * DCACHE_LINE;
1882 
1883  alloc_size = size + sizeof( kmp_mem_descr_t ) + DCACHE_LINE;
1884  KE_TRACE( 25, ( "__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with alloc_size %d\n",
1885  __kmp_gtid_from_thread( this_thr ), alloc_size ) );
1886  alloc_ptr = bget( this_thr, (bufsize) alloc_size );
1887 
1888  // align ptr to DCACHE_LINE
1889  ptr = (void *)(( ((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) + DCACHE_LINE ) & ~( DCACHE_LINE - 1 ));
1890  descr = (kmp_mem_descr_t *)( ((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t) );
1891 
1892  descr->ptr_allocated = alloc_ptr; // remember allocated pointer
1893  // we don't need size_allocated
1894  descr->ptr_aligned = (void *)this_thr; // remember allocating thread
1895  // (it is already saved in bget buffer,
1896  // but we may want to use another allocator in future)
1897  descr->size_aligned = size;
1898 
1899  end:
1900  KE_TRACE( 25, ( "<- __kmp_fast_allocate( T#%d ) returns %p\n",
1901  __kmp_gtid_from_thread( this_thr ), ptr ) );
1902  return ptr;
1903 } // func __kmp_fast_allocate
1904 
1905 // Free fast memory and place it on the thread's free list if it is of
1906 // the correct size.
1907 void
1908 ___kmp_fast_free( kmp_info_t *this_thr, void * ptr KMP_SRC_LOC_DECL )
1909 {
1910  kmp_mem_descr_t * descr;
1911  kmp_info_t * alloc_thr;
1912  size_t size;
1913  size_t idx;
1914  int index;
1915 
1916  KE_TRACE( 25, ( "-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
1917  __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM ) );
1918  KMP_ASSERT( ptr != NULL );
1919 
1920  descr = (kmp_mem_descr_t *)( ((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t) );
1921 
1922  KE_TRACE(26, (" __kmp_fast_free: size_aligned=%d\n",
1923  (int) descr->size_aligned ) );
1924 
1925  size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines
1926 
1927  idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block
1928  if ( idx == size ) {
1929  index = 0; // 2 cache lines
1930  } else if ( ( idx <<= 1 ) == size ) {
1931  index = 1; // 4 cache lines
1932  } else if ( ( idx <<= 2 ) == size ) {
1933  index = 2; // 16 cache lines
1934  } else if ( ( idx <<= 2 ) == size ) {
1935  index = 3; // 64 cache lines
1936  } else {
1937  KMP_DEBUG_ASSERT( size > DCACHE_LINE * 64 );
1938  goto free_call; // 65 or more cache lines ( > 8KB )
1939  }
1940 
1941  alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block
1942  if ( alloc_thr == this_thr ) {
1943  // push block to self no-sync free list, linking previous head (LIFO)
1944  *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
1945  this_thr->th.th_free_lists[index].th_free_list_self = ptr;
1946  } else {
1947  void * head = this_thr->th.th_free_lists[index].th_free_list_other;
1948  if ( head == NULL ) {
1949  // Create new free list
1950  this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1951  *((void **)ptr) = NULL; // mark the tail of the list
1952  descr->size_allocated = (size_t)1; // head of the list keeps its length
1953  } else {
1954  // need to check existed "other" list's owner thread and size of queue
1955  kmp_mem_descr_t * dsc = (kmp_mem_descr_t *)( (char*)head - sizeof(kmp_mem_descr_t) );
1956  kmp_info_t * q_th = (kmp_info_t *)(dsc->ptr_aligned); // allocating thread, same for all queue nodes
1957  size_t q_sz = dsc->size_allocated + 1; // new size in case we add current task
1958  if ( q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT ) {
1959  // we can add current task to "other" list, no sync needed
1960  *((void **)ptr) = head;
1961  descr->size_allocated = q_sz;
1962  this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1963  } else {
1964  // either queue blocks owner is changing or size limit exceeded
1965  // return old queue to allocating thread (q_th) synchroneously,
1966  // and start new list for alloc_thr's tasks
1967  void * old_ptr;
1968  void * tail = head;
1969  void * next = *((void **)head);
1970  while ( next != NULL ) {
1971  KMP_DEBUG_ASSERT(
1972  // queue size should decrease by 1 each step through the list
1973  ((kmp_mem_descr_t*)((char*)next - sizeof(kmp_mem_descr_t)))->size_allocated + 1 ==
1974  ((kmp_mem_descr_t*)((char*)tail - sizeof(kmp_mem_descr_t)))->size_allocated );
1975  tail = next; // remember tail node
1976  next = *((void **)next);
1977  }
1978  KMP_DEBUG_ASSERT( q_th != NULL );
1979  // push block to owner's sync free list
1980  old_ptr = TCR_PTR( q_th->th.th_free_lists[index].th_free_list_sync );
1981  /* the next pointer must be set before setting free_list to ptr to avoid
1982  exposing a broken list to other threads, even for an instant. */
1983  *((void **)tail) = old_ptr;
1984 
1985  while ( ! KMP_COMPARE_AND_STORE_PTR(
1986  &q_th->th.th_free_lists[index].th_free_list_sync,
1987  old_ptr,
1988  head ) )
1989  {
1990  KMP_CPU_PAUSE();
1991  old_ptr = TCR_PTR( q_th->th.th_free_lists[index].th_free_list_sync );
1992  *((void **)tail) = old_ptr;
1993  }
1994 
1995  // start new list of not-selt tasks
1996  this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1997  *((void **)ptr) = NULL;
1998  descr->size_allocated = (size_t)1; // head of queue keeps its length
1999  }
2000  }
2001  }
2002  goto end;
2003 
2004  free_call:
2005  KE_TRACE(25, ( "__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
2006  __kmp_gtid_from_thread( this_thr), size ) );
2007  __kmp_bget_dequeue( this_thr ); /* Release any queued buffers */
2008  brel( this_thr, descr->ptr_allocated );
2009 
2010  end:
2011  KE_TRACE( 25, ( "<- __kmp_fast_free() returns\n" ) );
2012 
2013 } // func __kmp_fast_free
2014 
2015 
2016 // Initialize the thread free lists related to fast memory
2017 // Only do this when a thread is initially created.
2018 void
2019 __kmp_initialize_fast_memory( kmp_info_t *this_thr )
2020 {
2021  KE_TRACE(10, ( "__kmp_initialize_fast_memory: Called from th %p\n", this_thr ) );
2022 
2023  memset ( this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof( kmp_free_list_t ) );
2024 }
2025 
2026 // Free the memory in the thread free lists related to fast memory
2027 // Only do this when a thread is being reaped (destroyed).
2028 void
2029 __kmp_free_fast_memory( kmp_info_t *th )
2030 {
2031  // Suppose we use BGET underlying allocator, walk through its structures...
2032  int bin;
2033  thr_data_t * thr = get_thr_data( th );
2034  void ** lst = NULL;
2035 
2036  KE_TRACE(5, ( "__kmp_free_fast_memory: Called T#%d\n",
2037  __kmp_gtid_from_thread( th ) ) );
2038 
2039  __kmp_bget_dequeue( th ); // Release any queued buffers
2040 
2041  // Dig through free lists and extract all allocated blocks
2042  for ( bin = 0; bin < MAX_BGET_BINS; ++bin ) {
2043  bfhead_t * b = thr->freelist[ bin ].ql.flink;
2044  while ( b != &thr->freelist[ bin ] ) {
2045  if ( (kmp_uintptr_t)b->bh.bb.bthr & 1 ) { // if the buffer is an allocated address?
2046  *((void**)b) = lst; // link the list (override bthr, but keep flink yet)
2047  lst = (void**)b; // push b into lst
2048  }
2049  b = b->ql.flink; // get next buffer
2050  }
2051  }
2052  while ( lst != NULL ) {
2053  void * next = *lst;
2054  KE_TRACE(10, ( "__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
2055  lst, next, th, __kmp_gtid_from_thread( th ) ) );
2056  (*thr->relfcn)(lst);
2057  #if BufStats
2058  // count blocks to prevent problems in __kmp_finalize_bget()
2059  thr->numprel++; /* Nr of expansion block releases */
2060  thr->numpblk--; /* Total number of blocks */
2061  #endif
2062  lst = (void**)next;
2063  }
2064 
2065  KE_TRACE(5, ( "__kmp_free_fast_memory: Freed T#%d\n",
2066  __kmp_gtid_from_thread( th ) ) );
2067 }
2068 
2069 #endif // USE_FAST_MEMORY