Intel® OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Modules Pages
kmp_alloc.c
1 /*
2  * kmp_alloc.c -- private/shared dyanmic memory allocation and management
3  */
4 
5 /* <copyright>
6  Copyright (c) 1997-2015 Intel Corporation. All Rights Reserved.
7 
8  Redistribution and use in source and binary forms, with or without
9  modification, are permitted provided that the following conditions
10  are met:
11 
12  * Redistributions of source code must retain the above copyright
13  notice, this list of conditions and the following disclaimer.
14  * Redistributions in binary form must reproduce the above copyright
15  notice, this list of conditions and the following disclaimer in the
16  documentation and/or other materials provided with the distribution.
17  * Neither the name of Intel Corporation nor the names of its
18  contributors may be used to endorse or promote products derived
19  from this software without specific prior written permission.
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 
33 </copyright> */
34 
35 #include "kmp.h"
36 #include "kmp_wrapper_malloc.h"
37 #include "kmp_io.h"
38 
39 // Disable bget when it is not used
40 #if KMP_USE_BGET
41 
42 /* Thread private buffer management code */
43 
44 typedef int (*bget_compact_t)(size_t, int);
45 typedef void *(*bget_acquire_t)(size_t);
46 typedef void (*bget_release_t)(void *);
47 
48 /* NOTE: bufsize must be a signed datatype */
49 
50 #if KMP_OS_WINDOWS
51 # if KMP_ARCH_X86 || KMP_ARCH_ARM
52  typedef kmp_int32 bufsize;
53 # else
54  typedef kmp_int64 bufsize;
55 # endif
56 #else
57  typedef ssize_t bufsize;
58 #endif
59 
60 /* The three modes of operation are, fifo search, lifo search, and best-fit */
61 
62 typedef enum bget_mode {
63  bget_mode_fifo = 0,
64  bget_mode_lifo = 1,
65  bget_mode_best = 2
66 } bget_mode_t;
67 
68 
69 static void bpool( kmp_info_t *th, void *buffer, bufsize len);
70 static void *bget( kmp_info_t *th, bufsize size);
71 static void *bgetz( kmp_info_t *th, bufsize size);
72 static void *bgetr( kmp_info_t *th, void *buffer, bufsize newsize);
73 static void brel( kmp_info_t *th, void *buf);
74 static void bectl( kmp_info_t *th, bget_compact_t compact, bget_acquire_t acquire, bget_release_t release, bufsize pool_incr );
75 
76 #ifdef KMP_DEBUG
77 static void bstats( kmp_info_t *th, bufsize *curalloc, bufsize *totfree, bufsize *maxfree, long *nget, long *nrel);
78 static void bstatse( kmp_info_t *th, bufsize *pool_incr, long *npool, long *npget, long *nprel, long *ndget, long *ndrel);
79 static void bufdump( kmp_info_t *th, void *buf);
80 static void bpoold( kmp_info_t *th, void *pool, int dumpalloc, int dumpfree);
81 static int bpoolv( kmp_info_t *th, void *pool);
82 #endif
83 
84 /* BGET CONFIGURATION */
85  /* Buffer allocation size quantum:
86  all buffers allocated are a
87  multiple of this size. This
88  MUST be a power of two. */
89 
90  /* On IA-32 architecture with Linux* OS,
91  malloc() does not
92  ensure 16 byte alignmnent */
93 
94 #if KMP_ARCH_X86 || !KMP_HAVE_QUAD
95 
96 #define SizeQuant 8
97 #define AlignType double
98 
99 #else
100 
101 #define SizeQuant 16
102 #define AlignType _Quad
103 
104 #endif
105 
106 #define BufStats 1 /* Define this symbol to enable the
107  bstats() function which calculates
108  the total free space in the buffer
109  pool, the largest available
110  buffer, and the total space
111  currently allocated. */
112 
113 #ifdef KMP_DEBUG
114 
115 #define BufDump 1 /* Define this symbol to enable the
116  bpoold() function which dumps the
117  buffers in a buffer pool. */
118 
119 #define BufValid 1 /* Define this symbol to enable the
120  bpoolv() function for validating
121  a buffer pool. */
122 
123 #define DumpData 1 /* Define this symbol to enable the
124  bufdump() function which allows
125  dumping the contents of an allocated
126  or free buffer. */
127 #ifdef NOT_USED_NOW
128 
129 #define FreeWipe 1 /* Wipe free buffers to a guaranteed
130  pattern of garbage to trip up
131  miscreants who attempt to use
132  pointers into released buffers. */
133 
134 #define BestFit 1 /* Use a best fit algorithm when
135  searching for space for an
136  allocation request. This uses
137  memory more efficiently, but
138  allocation will be much slower. */
139 #endif /* NOT_USED_NOW */
140 #endif /* KMP_DEBUG */
141 
142 
143 static bufsize bget_bin_size[ ] = {
144  0,
145 // 1 << 6, /* .5 Cache line */
146  1 << 7, /* 1 Cache line, new */
147  1 << 8, /* 2 Cache lines */
148  1 << 9, /* 4 Cache lines, new */
149  1 << 10, /* 8 Cache lines */
150  1 << 11, /* 16 Cache lines, new */
151  1 << 12,
152  1 << 13, /* new */
153  1 << 14,
154  1 << 15, /* new */
155  1 << 16,
156  1 << 17,
157  1 << 18,
158  1 << 19,
159  1 << 20, /* 1MB */
160  1 << 21, /* 2MB */
161  1 << 22, /* 4MB */
162  1 << 23, /* 8MB */
163  1 << 24, /* 16MB */
164  1 << 25, /* 32MB */
165 };
166 
167 #define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize))
168 
169 struct bfhead;
170 
171 /* Declare the interface, including the requested buffer size type,
172  bufsize. */
173 
174 /* Queue links */
175 
176 typedef struct qlinks {
177  struct bfhead *flink; /* Forward link */
178  struct bfhead *blink; /* Backward link */
179 } qlinks_t;
180 
181 /* Header in allocated and free buffers */
182 
183 typedef struct bhead2 {
184  kmp_info_t *bthr; /* The thread which owns the buffer pool */
185  bufsize prevfree; /* Relative link back to previous
186  free buffer in memory or 0 if
187  previous buffer is allocated. */
188  bufsize bsize; /* Buffer size: positive if free,
189  negative if allocated. */
190 } bhead2_t;
191 
192 /* Make sure the bhead structure is a multiple of SizeQuant in size. */
193 
194 typedef union bhead {
195  KMP_ALIGN( SizeQuant )
196  AlignType b_align;
197  char b_pad[ sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant)) ];
198  bhead2_t bb;
199 } bhead_t;
200 #define BH(p) ((bhead_t *) (p))
201 
202 /* Header in directly allocated buffers (by acqfcn) */
203 
204 typedef struct bdhead
205 {
206  bufsize tsize; /* Total size, including overhead */
207  bhead_t bh; /* Common header */
208 } bdhead_t;
209 #define BDH(p) ((bdhead_t *) (p))
210 
211 /* Header in free buffers */
212 
213 typedef struct bfhead {
214  bhead_t bh; /* Common allocated/free header */
215  qlinks_t ql; /* Links on free list */
216 } bfhead_t;
217 #define BFH(p) ((bfhead_t *) (p))
218 
219 typedef struct thr_data {
220  bfhead_t freelist[ MAX_BGET_BINS ];
221 #if BufStats
222  size_t totalloc; /* Total space currently allocated */
223  long numget, numrel; /* Number of bget() and brel() calls */
224  long numpblk; /* Number of pool blocks */
225  long numpget, numprel; /* Number of block gets and rels */
226  long numdget, numdrel; /* Number of direct gets and rels */
227 #endif /* BufStats */
228 
229  /* Automatic expansion block management functions */
230  bget_compact_t compfcn;
231  bget_acquire_t acqfcn;
232  bget_release_t relfcn;
233 
234  bget_mode_t mode; /* what allocation mode to use? */
235 
236  bufsize exp_incr; /* Expansion block size */
237  bufsize pool_len; /* 0: no bpool calls have been made
238  -1: not all pool blocks are
239  the same size
240  >0: (common) block size for all
241  bpool calls made so far
242  */
243  bfhead_t * last_pool; /* Last pool owned by this thread (delay dealocation) */
244 } thr_data_t;
245 
246 /* Minimum allocation quantum: */
247 
248 #define QLSize (sizeof(qlinks_t))
249 #define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize)
250 #define MaxSize (bufsize)( ~ ( ( (bufsize)( 1 ) << ( sizeof( bufsize ) * CHAR_BIT - 1 ) ) | ( SizeQuant - 1 ) ) )
251  // Maximun for the requested size.
252 
253 /* End sentinel: value placed in bsize field of dummy block delimiting
254  end of pool block. The most negative number which will fit in a
255  bufsize, defined in a way that the compiler will accept. */
256 
257 #define ESent ((bufsize) (-(((((bufsize)1)<<((int)sizeof(bufsize)*8-2))-1)*2)-2))
258 
259 /* ------------------------------------------------------------------------ */
260 
261 /* Thread Data management routines */
262 
263 static int
264 bget_get_bin( bufsize size )
265 {
266  // binary chop bins
267  int lo = 0, hi = MAX_BGET_BINS - 1;
268 
269  KMP_DEBUG_ASSERT( size > 0 );
270 
271  while ( (hi - lo) > 1 ) {
272  int mid = (lo + hi) >> 1;
273  if (size < bget_bin_size[ mid ])
274  hi = mid - 1;
275  else
276  lo = mid;
277  }
278 
279  KMP_DEBUG_ASSERT( (lo >= 0) && (lo < MAX_BGET_BINS) );
280 
281  return lo;
282 }
283 
284 static void
285 set_thr_data( kmp_info_t *th )
286 {
287  int i;
288  thr_data_t *data;
289 
290  data =
291  (thr_data_t *)(
292  ( ! th->th.th_local.bget_data ) ? __kmp_allocate( sizeof( *data ) ) : th->th.th_local.bget_data
293  );
294 
295  memset( data, '\0', sizeof( *data ) );
296 
297  for (i = 0; i < MAX_BGET_BINS; ++i) {
298  data->freelist[ i ].ql.flink = & data->freelist[ i ];
299  data->freelist[ i ].ql.blink = & data->freelist[ i ];
300  }
301 
302  th->th.th_local.bget_data = data;
303  th->th.th_local.bget_list = 0;
304 #if ! USE_CMP_XCHG_FOR_BGET
305 #ifdef USE_QUEUING_LOCK_FOR_BGET
306  __kmp_init_lock( & th->th.th_local.bget_lock );
307 #else
308  __kmp_init_bootstrap_lock( & th->th.th_local.bget_lock );
309 #endif /* USE_LOCK_FOR_BGET */
310 #endif /* ! USE_CMP_XCHG_FOR_BGET */
311 }
312 
313 static thr_data_t *
314 get_thr_data( kmp_info_t *th )
315 {
316  thr_data_t *data;
317 
318  data = (thr_data_t *) th->th.th_local.bget_data;
319 
320  KMP_DEBUG_ASSERT( data != 0 );
321 
322  return data;
323 }
324 
325 
326 #ifdef KMP_DEBUG
327 
328 static void
329 __kmp_bget_validate_queue( kmp_info_t *th )
330 {
331  /* NOTE: assume that the global_lock is held */
332 
333  void *p = (void *) th->th.th_local.bget_list;
334 
335  while (p != 0) {
336  bfhead_t *b = BFH(((char *) p) - sizeof(bhead_t));
337 
338  KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
339  p = (void *) b->ql.flink;
340  }
341 }
342 
343 #endif
344 
345 /* Walk the free list and release the enqueued buffers */
346 
347 static void
348 __kmp_bget_dequeue( kmp_info_t *th )
349 {
350  void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
351 
352  if (p != 0) {
353  #if USE_CMP_XCHG_FOR_BGET
354  {
355  volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
356  while ( ! KMP_COMPARE_AND_STORE_PTR(
357  & th->th.th_local.bget_list, old_value, NULL ) )
358  {
359  KMP_CPU_PAUSE();
360  old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
361  }
362  p = (void *) old_value;
363  }
364  #else /* ! USE_CMP_XCHG_FOR_BGET */
365  #ifdef USE_QUEUING_LOCK_FOR_BGET
366  __kmp_acquire_lock( & th->th.th_local.bget_lock,
367  __kmp_gtid_from_thread(th) );
368  #else
369  __kmp_acquire_bootstrap_lock( & th->th.th_local.bget_lock );
370  #endif /* USE_QUEUING_LOCK_FOR_BGET */
371 
372  p = (void *) th->th.th_local.bget_list;
373  th->th.th_local.bget_list = 0;
374 
375  #ifdef USE_QUEUING_LOCK_FOR_BGET
376  __kmp_release_lock( & th->th.th_local.bget_lock,
377  __kmp_gtid_from_thread(th) );
378  #else
379  __kmp_release_bootstrap_lock( & th->th.th_local.bget_lock );
380  #endif
381  #endif /* USE_CMP_XCHG_FOR_BGET */
382 
383  /* Check again to make sure the list is not empty */
384 
385  while (p != 0) {
386  void *buf = p;
387  bfhead_t *b = BFH(((char *) p) - sizeof(bhead_t));
388 
389  KMP_DEBUG_ASSERT( b->bh.bb.bsize != 0 );
390  KMP_DEBUG_ASSERT( ( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ) ==
391  (kmp_uintptr_t)th ); // clear possible mark
392  KMP_DEBUG_ASSERT( b->ql.blink == 0 );
393 
394  p = (void *) b->ql.flink;
395 
396  brel( th, buf );
397  }
398  }
399 }
400 
401 /* Chain together the free buffers by using the thread owner field */
402 
403 static void
404 __kmp_bget_enqueue( kmp_info_t *th, void *buf
405 #ifdef USE_QUEUING_LOCK_FOR_BGET
406  , kmp_int32 rel_gtid
407 #endif
408  )
409 {
410  bfhead_t *b = BFH(((char *) buf) - sizeof(bhead_t));
411 
412  KMP_DEBUG_ASSERT( b->bh.bb.bsize != 0 );
413  KMP_DEBUG_ASSERT( ( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ) ==
414  (kmp_uintptr_t)th ); // clear possible mark
415 
416  b->ql.blink = 0;
417 
418  KC_TRACE( 10, ( "__kmp_bget_enqueue: moving buffer to T#%d list\n",
419  __kmp_gtid_from_thread( th ) ) );
420 
421 #if USE_CMP_XCHG_FOR_BGET
422  {
423  volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
424  /* the next pointer must be set before setting bget_list to buf to avoid
425  exposing a broken list to other threads, even for an instant. */
426  b->ql.flink = BFH( old_value );
427 
428  while ( ! KMP_COMPARE_AND_STORE_PTR(
429  & th->th.th_local.bget_list, old_value, buf ) )
430  {
431  KMP_CPU_PAUSE();
432  old_value = TCR_PTR(th->th.th_local.bget_list);
433  /* the next pointer must be set before setting bget_list to buf to avoid
434  exposing a broken list to other threads, even for an instant. */
435  b->ql.flink = BFH( old_value );
436  }
437  }
438 #else /* ! USE_CMP_XCHG_FOR_BGET */
439 # ifdef USE_QUEUING_LOCK_FOR_BGET
440  __kmp_acquire_lock( & th->th.th_local.bget_lock, rel_gtid );
441 # else
442  __kmp_acquire_bootstrap_lock( & th->th.th_local.bget_lock );
443  # endif
444 
445  b->ql.flink = BFH( th->th.th_local.bget_list );
446  th->th.th_local.bget_list = (void *) buf;
447 
448 # ifdef USE_QUEUING_LOCK_FOR_BGET
449  __kmp_release_lock( & th->th.th_local.bget_lock, rel_gtid );
450 # else
451  __kmp_release_bootstrap_lock( & th->th.th_local.bget_lock );
452 # endif
453 #endif /* USE_CMP_XCHG_FOR_BGET */
454 }
455 
456 /* insert buffer back onto a new freelist */
457 
458 static void
459 __kmp_bget_insert_into_freelist( thr_data_t *thr, bfhead_t *b )
460 {
461  int bin;
462 
463  KMP_DEBUG_ASSERT( ((size_t)b ) % SizeQuant == 0 );
464  KMP_DEBUG_ASSERT( b->bh.bb.bsize % SizeQuant == 0 );
465 
466  bin = bget_get_bin( b->bh.bb.bsize );
467 
468  KMP_DEBUG_ASSERT(thr->freelist[ bin ].ql.blink->ql.flink == &thr->freelist[ bin ]);
469  KMP_DEBUG_ASSERT(thr->freelist[ bin ].ql.flink->ql.blink == &thr->freelist[ bin ]);
470 
471  b->ql.flink = &thr->freelist[ bin ];
472  b->ql.blink = thr->freelist[ bin ].ql.blink;
473 
474  thr->freelist[ bin ].ql.blink = b;
475  b->ql.blink->ql.flink = b;
476 }
477 
478 /* unlink the buffer from the old freelist */
479 
480 static void
481 __kmp_bget_remove_from_freelist( bfhead_t *b )
482 {
483  KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
484  KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
485 
486  b->ql.blink->ql.flink = b->ql.flink;
487  b->ql.flink->ql.blink = b->ql.blink;
488 }
489 
490 /* ------------------------------------------------------------------------ */
491 
492 /* GET STATS -- check info on free list */
493 
494 static void
495 bcheck( kmp_info_t *th, bufsize *max_free, bufsize *total_free )
496 {
497  thr_data_t *thr = get_thr_data( th );
498  int bin;
499 
500  *total_free = *max_free = 0;
501 
502  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
503  bfhead_t *b, *best;
504 
505  best = &thr->freelist[ bin ];
506  b = best->ql.flink;
507 
508  while (b != &thr->freelist[ bin ]) {
509  *total_free += (b->bh.bb.bsize - sizeof( bhead_t ));
510  if ((best == &thr->freelist[ bin ]) || (b->bh.bb.bsize < best->bh.bb.bsize))
511  best = b;
512 
513  /* Link to next buffer */
514  b = b->ql.flink;
515  }
516 
517  if (*max_free < best->bh.bb.bsize)
518  *max_free = best->bh.bb.bsize;
519  }
520 
521  if (*max_free > (bufsize)sizeof( bhead_t ))
522  *max_free -= sizeof( bhead_t );
523 }
524 
525 /* ------------------------------------------------------------------------ */
526 
527 /* BGET -- Allocate a buffer. */
528 
529 static void *
530 bget( kmp_info_t *th, bufsize requested_size )
531 {
532  thr_data_t *thr = get_thr_data( th );
533  bufsize size = requested_size;
534  bfhead_t *b;
535  void *buf;
536  int compactseq = 0;
537  int use_blink = 0;
538 /* For BestFit */
539  bfhead_t *best;
540 
541  if ( size < 0 || size + sizeof( bhead_t ) > MaxSize ) {
542  return NULL;
543  }; // if
544 
545  __kmp_bget_dequeue( th ); /* Release any queued buffers */
546 
547  if (size < (bufsize)SizeQ) { /* Need at least room for the */
548  size = SizeQ; /* queue links. */
549  }
550  #if defined( SizeQuant ) && ( SizeQuant > 1 )
551  size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
552  #endif
553 
554  size += sizeof(bhead_t); /* Add overhead in allocated buffer
555  to size required. */
556  KMP_DEBUG_ASSERT( size >= 0 );
557  KMP_DEBUG_ASSERT( size % SizeQuant == 0 );
558 
559  use_blink = ( thr->mode == bget_mode_lifo );
560 
561  /* If a compact function was provided in the call to bectl(), wrap
562  a loop around the allocation process to allow compaction to
563  intervene in case we don't find a suitable buffer in the chain. */
564 
565  for (;;) {
566  int bin;
567 
568  for (bin = bget_get_bin( size ); bin < MAX_BGET_BINS; ++bin) {
569  /* Link to next buffer */
570  b = ( use_blink ? thr->freelist[ bin ].ql.blink : thr->freelist[ bin ].ql.flink );
571 
572  if (thr->mode == bget_mode_best) {
573  best = &thr->freelist[ bin ];
574 
575  /* Scan the free list searching for the first buffer big enough
576  to hold the requested size buffer. */
577 
578  while (b != &thr->freelist[ bin ]) {
579  if (b->bh.bb.bsize >= (bufsize) size) {
580  if ((best == &thr->freelist[ bin ]) || (b->bh.bb.bsize < best->bh.bb.bsize)) {
581  best = b;
582  }
583  }
584 
585  /* Link to next buffer */
586  b = ( use_blink ? b->ql.blink : b->ql.flink );
587  }
588  b = best;
589  }
590 
591  while (b != &thr->freelist[ bin ]) {
592  if ((bufsize) b->bh.bb.bsize >= (bufsize) size) {
593 
594  /* Buffer is big enough to satisfy the request. Allocate it
595  to the caller. We must decide whether the buffer is large
596  enough to split into the part given to the caller and a
597  free buffer that remains on the free list, or whether the
598  entire buffer should be removed from the free list and
599  given to the caller in its entirety. We only split the
600  buffer if enough room remains for a header plus the minimum
601  quantum of allocation. */
602 
603  if ((b->bh.bb.bsize - (bufsize) size) > (bufsize)(SizeQ + (sizeof(bhead_t)))) {
604  bhead_t *ba, *bn;
605 
606  ba = BH(((char *) b) + (b->bh.bb.bsize - (bufsize) size));
607  bn = BH(((char *) ba) + size);
608 
609  KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
610 
611  /* Subtract size from length of free block. */
612  b->bh.bb.bsize -= (bufsize) size;
613 
614  /* Link allocated buffer to the previous free buffer. */
615  ba->bb.prevfree = b->bh.bb.bsize;
616 
617  /* Plug negative size into user buffer. */
618  ba->bb.bsize = -size;
619 
620  /* Mark this buffer as owned by this thread. */
621  TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark it)
622  /* Mark buffer after this one not preceded by free block. */
623  bn->bb.prevfree = 0;
624 
625  /* unlink the buffer from the old freelist, and reinsert it into the new freelist */
626  __kmp_bget_remove_from_freelist( b );
627  __kmp_bget_insert_into_freelist( thr, b );
628 #if BufStats
629  thr->totalloc += (size_t) size;
630  thr->numget++; /* Increment number of bget() calls */
631 #endif
632  buf = (void *) ((((char *) ba) + sizeof(bhead_t)));
633  KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
634  return buf;
635  } else {
636  bhead_t *ba;
637 
638  ba = BH(((char *) b) + b->bh.bb.bsize);
639 
640  KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
641 
642  /* The buffer isn't big enough to split. Give the whole
643  shebang to the caller and remove it from the free list. */
644 
645  __kmp_bget_remove_from_freelist( b );
646 #if BufStats
647  thr->totalloc += (size_t) b->bh.bb.bsize;
648  thr->numget++; /* Increment number of bget() calls */
649 #endif
650  /* Negate size to mark buffer allocated. */
651  b->bh.bb.bsize = -(b->bh.bb.bsize);
652 
653  /* Mark this buffer as owned by this thread. */
654  TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark it)
655  /* Zero the back pointer in the next buffer in memory
656  to indicate that this buffer is allocated. */
657  ba->bb.prevfree = 0;
658 
659  /* Give user buffer starting at queue links. */
660  buf = (void *) &(b->ql);
661  KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
662  return buf;
663  }
664  }
665 
666  /* Link to next buffer */
667  b = ( use_blink ? b->ql.blink : b->ql.flink );
668  }
669  }
670 
671  /* We failed to find a buffer. If there's a compact function
672  defined, notify it of the size requested. If it returns
673  TRUE, try the allocation again. */
674 
675  if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
676  break;
677  }
678  }
679 
680  /* No buffer available with requested size free. */
681 
682  /* Don't give up yet -- look in the reserve supply. */
683 
684  if (thr->acqfcn != 0) {
685  if (size > (bufsize) (thr->exp_incr - sizeof(bhead_t))) {
686 
687  /* Request is too large to fit in a single expansion
688  block. Try to satisy it by a direct buffer acquisition. */
689 
690  bdhead_t *bdh;
691 
692  size += sizeof(bdhead_t) - sizeof(bhead_t);
693 
694  KE_TRACE( 10, ("%%%%%% MALLOC( %d )\n", (int) size ) );
695 
696  /* richryan */
697  bdh = BDH((*thr->acqfcn)((bufsize) size));
698  if (bdh != NULL) {
699 
700  /* Mark the buffer special by setting the size field
701  of its header to zero. */
702  bdh->bh.bb.bsize = 0;
703 
704  /* Mark this buffer as owned by this thread. */
705  TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated,
706  // because direct buffer never goes to free list
707  bdh->bh.bb.prevfree = 0;
708  bdh->tsize = size;
709 #if BufStats
710  thr->totalloc += (size_t) size;
711  thr->numget++; /* Increment number of bget() calls */
712  thr->numdget++; /* Direct bget() call count */
713 #endif
714  buf = (void *) (bdh + 1);
715  KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
716  return buf;
717  }
718 
719  } else {
720 
721  /* Try to obtain a new expansion block */
722 
723  void *newpool;
724 
725  KE_TRACE( 10, ("%%%%%% MALLOCB( %d )\n", (int) thr->exp_incr ) );
726 
727  /* richryan */
728  newpool = (*thr->acqfcn)((bufsize) thr->exp_incr);
729  KMP_DEBUG_ASSERT( ((size_t)newpool) % SizeQuant == 0 );
730  if (newpool != NULL) {
731  bpool( th, newpool, thr->exp_incr);
732  buf = bget( th, requested_size); /* This can't, I say, can't get into a loop. */
733  return buf;
734  }
735  }
736  }
737 
738  /* Still no buffer available */
739 
740  return NULL;
741 }
742 
743 /* BGETZ -- Allocate a buffer and clear its contents to zero. We clear
744  the entire contents of the buffer to zero, not just the
745  region requested by the caller. */
746 
747 static void *
748 bgetz( kmp_info_t *th, bufsize size )
749 {
750  char *buf = (char *) bget( th, size);
751 
752  if (buf != NULL) {
753  bhead_t *b;
754  bufsize rsize;
755 
756  b = BH(buf - sizeof(bhead_t));
757  rsize = -(b->bb.bsize);
758  if (rsize == 0) {
759  bdhead_t *bd;
760 
761  bd = BDH(buf - sizeof(bdhead_t));
762  rsize = bd->tsize - (bufsize) sizeof(bdhead_t);
763  } else {
764  rsize -= sizeof(bhead_t);
765  }
766 
767  KMP_DEBUG_ASSERT(rsize >= size);
768 
769  (void) memset(buf, 0, (bufsize) rsize);
770  }
771  return ((void *) buf);
772 }
773 
774 /* BGETR -- Reallocate a buffer. This is a minimal implementation,
775  simply in terms of brel() and bget(). It could be
776  enhanced to allow the buffer to grow into adjacent free
777  blocks and to avoid moving data unnecessarily. */
778 
779 static void *
780 bgetr( kmp_info_t *th, void *buf, bufsize size)
781 {
782  void *nbuf;
783  bufsize osize; /* Old size of buffer */
784  bhead_t *b;
785 
786  nbuf = bget( th, size );
787  if ( nbuf == NULL ) { /* Acquire new buffer */
788  return NULL;
789  }
790  if ( buf == NULL ) {
791  return nbuf;
792  }
793  b = BH(((char *) buf) - sizeof(bhead_t));
794  osize = -b->bb.bsize;
795  if (osize == 0) {
796  /* Buffer acquired directly through acqfcn. */
797  bdhead_t *bd;
798 
799  bd = BDH(((char *) buf) - sizeof(bdhead_t));
800  osize = bd->tsize - (bufsize) sizeof(bdhead_t);
801  } else {
802  osize -= sizeof(bhead_t);
803  };
804 
805  KMP_DEBUG_ASSERT(osize > 0);
806 
807  (void) KMP_MEMCPY((char *) nbuf, (char *) buf, /* Copy the data */
808  (size_t) ((size < osize) ? size : osize));
809  brel( th, buf );
810 
811  return nbuf;
812 }
813 
814 /* BREL -- Release a buffer. */
815 
816 static void
817 brel( kmp_info_t *th, void *buf )
818 {
819  thr_data_t *thr = get_thr_data( th );
820  bfhead_t *b, *bn;
821  kmp_info_t *bth;
822 
823  KMP_DEBUG_ASSERT(buf != NULL);
824  KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
825 
826  b = BFH(((char *) buf) - sizeof(bhead_t));
827 
828  if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */
829  bdhead_t *bdh;
830 
831  bdh = BDH(((char *) buf) - sizeof(bdhead_t));
832  KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
833 #if BufStats
834  thr->totalloc -= (size_t) bdh->tsize;
835  thr->numdrel++; /* Number of direct releases */
836  thr->numrel++; /* Increment number of brel() calls */
837 #endif /* BufStats */
838 #ifdef FreeWipe
839  (void) memset((char *) buf, 0x55,
840  (size_t) (bdh->tsize - sizeof(bdhead_t)));
841 #endif /* FreeWipe */
842 
843  KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) bdh ) );
844 
845  KMP_DEBUG_ASSERT( thr->relfcn != 0 );
846  (*thr->relfcn)((void *) bdh); /* Release it directly. */
847  return;
848  }
849 
850  bth = (kmp_info_t *)( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ); // clear possible mark before comparison
851  if ( bth != th ) {
852  /* Add this buffer to be released by the owning thread later */
853  __kmp_bget_enqueue( bth, buf
854 #ifdef USE_QUEUING_LOCK_FOR_BGET
855  , __kmp_gtid_from_thread( th )
856 #endif
857  );
858  return;
859  }
860 
861  /* Buffer size must be negative, indicating that the buffer is
862  allocated. */
863 
864  if (b->bh.bb.bsize >= 0) {
865  bn = NULL;
866  }
867  KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
868 
869  /* Back pointer in next buffer must be zero, indicating the
870  same thing: */
871 
872  KMP_DEBUG_ASSERT(BH((char *) b - b->bh.bb.bsize)->bb.prevfree == 0);
873 
874 #if BufStats
875  thr->numrel++; /* Increment number of brel() calls */
876  thr->totalloc += (size_t) b->bh.bb.bsize;
877 #endif
878 
879  /* If the back link is nonzero, the previous buffer is free. */
880 
881  if (b->bh.bb.prevfree != 0) {
882  /* The previous buffer is free. Consolidate this buffer with it
883  by adding the length of this buffer to the previous free
884  buffer. Note that we subtract the size in the buffer being
885  released, since it's negative to indicate that the buffer is
886  allocated. */
887 
888  register bufsize size = b->bh.bb.bsize;
889 
890  /* Make the previous buffer the one we're working on. */
891  KMP_DEBUG_ASSERT(BH((char *) b - b->bh.bb.prevfree)->bb.bsize == b->bh.bb.prevfree);
892  b = BFH(((char *) b) - b->bh.bb.prevfree);
893  b->bh.bb.bsize -= size;
894 
895  /* unlink the buffer from the old freelist */
896  __kmp_bget_remove_from_freelist( b );
897  }
898  else {
899  /* The previous buffer isn't allocated. Mark this buffer
900  size as positive (i.e. free) and fall through to place
901  the buffer on the free list as an isolated free block. */
902 
903  b->bh.bb.bsize = -b->bh.bb.bsize;
904  }
905 
906  /* insert buffer back onto a new freelist */
907  __kmp_bget_insert_into_freelist( thr, b );
908 
909 
910  /* Now we look at the next buffer in memory, located by advancing from
911  the start of this buffer by its size, to see if that buffer is
912  free. If it is, we combine this buffer with the next one in
913  memory, dechaining the second buffer from the free list. */
914 
915  bn = BFH(((char *) b) + b->bh.bb.bsize);
916  if (bn->bh.bb.bsize > 0) {
917 
918  /* The buffer is free. Remove it from the free list and add
919  its size to that of our buffer. */
920 
921  KMP_DEBUG_ASSERT(BH((char *) bn + bn->bh.bb.bsize)->bb.prevfree == bn->bh.bb.bsize);
922 
923  __kmp_bget_remove_from_freelist( bn );
924 
925  b->bh.bb.bsize += bn->bh.bb.bsize;
926 
927  /* unlink the buffer from the old freelist, and reinsert it into the new freelist */
928 
929  __kmp_bget_remove_from_freelist( b );
930  __kmp_bget_insert_into_freelist( thr, b );
931 
932  /* Finally, advance to the buffer that follows the newly
933  consolidated free block. We must set its backpointer to the
934  head of the consolidated free block. We know the next block
935  must be an allocated block because the process of recombination
936  guarantees that two free blocks will never be contiguous in
937  memory. */
938 
939  bn = BFH(((char *) b) + b->bh.bb.bsize);
940  }
941 #ifdef FreeWipe
942  (void) memset(((char *) b) + sizeof(bfhead_t), 0x55,
943  (size_t) (b->bh.bb.bsize - sizeof(bfhead_t)));
944 #endif
945  KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
946 
947  /* The next buffer is allocated. Set the backpointer in it to point
948  to this buffer; the previous free buffer in memory. */
949 
950  bn->bh.bb.prevfree = b->bh.bb.bsize;
951 
952  /* If a block-release function is defined, and this free buffer
953  constitutes the entire block, release it. Note that pool_len
954  is defined in such a way that the test will fail unless all
955  pool blocks are the same size. */
956 
957  if (thr->relfcn != 0 &&
958  b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t)))
959  {
960 #if BufStats
961  if (thr->numpblk != 1) { /* Do not release the last buffer until finalization time */
962 #endif
963 
964  KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
965  KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.bsize == ESent);
966  KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.prevfree == b->bh.bb.bsize);
967 
968  /* Unlink the buffer from the free list */
969  __kmp_bget_remove_from_freelist( b );
970 
971  KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) b ) );
972 
973  (*thr->relfcn)(b);
974 #if BufStats
975  thr->numprel++; /* Nr of expansion block releases */
976  thr->numpblk--; /* Total number of blocks */
977  KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
978 
979  /* avoid leaving stale last_pool pointer around if it is being dealloced */
980  if (thr->last_pool == b) thr->last_pool = 0;
981  }
982  else {
983  thr->last_pool = b;
984  }
985 #endif /* BufStats */
986  }
987 }
988 
989 /* BECTL -- Establish automatic pool expansion control */
990 
991 static void
992 bectl( kmp_info_t *th, bget_compact_t compact, bget_acquire_t acquire, bget_release_t release, bufsize pool_incr)
993 {
994  thr_data_t *thr = get_thr_data( th );
995 
996  thr->compfcn = compact;
997  thr->acqfcn = acquire;
998  thr->relfcn = release;
999  thr->exp_incr = pool_incr;
1000 }
1001 
1002 /* BPOOL -- Add a region of memory to the buffer pool. */
1003 
1004 static void
1005 bpool( kmp_info_t *th, void *buf, bufsize len)
1006 {
1007 /* int bin = 0; */
1008  thr_data_t *thr = get_thr_data( th );
1009  bfhead_t *b = BFH(buf);
1010  bhead_t *bn;
1011 
1012  __kmp_bget_dequeue( th ); /* Release any queued buffers */
1013 
1014 #ifdef SizeQuant
1015  len &= ~(SizeQuant - 1);
1016 #endif
1017  if (thr->pool_len == 0) {
1018  thr->pool_len = len;
1019  } else if (len != thr->pool_len) {
1020  thr->pool_len = -1;
1021  }
1022 #if BufStats
1023  thr->numpget++; /* Number of block acquisitions */
1024  thr->numpblk++; /* Number of blocks total */
1025  KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1026 #endif /* BufStats */
1027 
1028  /* Since the block is initially occupied by a single free buffer,
1029  it had better not be (much) larger than the largest buffer
1030  whose size we can store in bhead.bb.bsize. */
1031 
1032  KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize) ESent + 1));
1033 
1034  /* Clear the backpointer at the start of the block to indicate that
1035  there is no free block prior to this one. That blocks
1036  recombination when the first block in memory is released. */
1037 
1038  b->bh.bb.prevfree = 0;
1039 
1040  /* Create a dummy allocated buffer at the end of the pool. This dummy
1041  buffer is seen when a buffer at the end of the pool is released and
1042  blocks recombination of the last buffer with the dummy buffer at
1043  the end. The length in the dummy buffer is set to the largest
1044  negative number to denote the end of the pool for diagnostic
1045  routines (this specific value is not counted on by the actual
1046  allocation and release functions). */
1047 
1048  len -= sizeof(bhead_t);
1049  b->bh.bb.bsize = (bufsize) len;
1050  /* Set the owner of this buffer */
1051  TCW_PTR( b->bh.bb.bthr, (kmp_info_t*)((kmp_uintptr_t)th | 1) ); // mark the buffer as allocated address
1052 
1053  /* Chain the new block to the free list. */
1054  __kmp_bget_insert_into_freelist( thr, b );
1055 
1056 #ifdef FreeWipe
1057  (void) memset(((char *) b) + sizeof(bfhead_t), 0x55,
1058  (size_t) (len - sizeof(bfhead_t)));
1059 #endif
1060  bn = BH(((char *) b) + len);
1061  bn->bb.prevfree = (bufsize) len;
1062  /* Definition of ESent assumes two's complement! */
1063  KMP_DEBUG_ASSERT( (~0) == -1 && (bn != 0) );
1064 
1065  bn->bb.bsize = ESent;
1066 }
1067 
1068 /* ------------------------------------------------------------------------ */
1069 
1070 /* BFREED -- Dump the free lists for this thread. */
1071 
1072 static void
1073 bfreed( kmp_info_t *th )
1074 {
1075  int bin = 0, count = 0;
1076  int gtid = __kmp_gtid_from_thread( th );
1077  thr_data_t *thr = get_thr_data( th );
1078 
1079 #if BufStats
1080  __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC " get=%" KMP_INT64_SPEC " rel=%" \
1081  KMP_INT64_SPEC " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC " prel=%" KMP_INT64_SPEC \
1082  " dget=%" KMP_INT64_SPEC " drel=%" KMP_INT64_SPEC "\n",
1083  gtid, (kmp_uint64) thr->totalloc,
1084  (kmp_int64) thr->numget, (kmp_int64) thr->numrel,
1085  (kmp_int64) thr->numpblk,
1086  (kmp_int64) thr->numpget, (kmp_int64) thr->numprel,
1087  (kmp_int64) thr->numdget, (kmp_int64) thr->numdrel );
1088 #endif
1089 
1090  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
1091  bfhead_t *b;
1092 
1093  for (b = thr->freelist[ bin ].ql.flink; b != &thr->freelist[ bin ]; b = b->ql.flink) {
1094  bufsize bs = b->bh.bb.bsize;
1095 
1096  KMP_DEBUG_ASSERT( b->ql.blink->ql.flink == b );
1097  KMP_DEBUG_ASSERT( b->ql.flink->ql.blink == b );
1098  KMP_DEBUG_ASSERT( bs > 0 );
1099 
1100  count += 1;
1101 
1102  __kmp_printf_no_lock("__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b, (long) bs );
1103 #ifdef FreeWipe
1104  {
1105  char *lerr = ((char *) b) + sizeof(bfhead_t);
1106  if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) || (memcmp(lerr, lerr + 1, (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
1107  __kmp_printf_no_lock( "__kmp_printpool: T#%d (Contents of above free block have been overstored.)\n", gtid );
1108  }
1109  }
1110 #endif
1111  }
1112  }
1113 
1114  if (count == 0)
1115  __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid );
1116 }
1117 
1118 /* ------------------------------------------------------------------------ */
1119 
1120 #ifdef KMP_DEBUG
1121 
1122 #if BufStats
1123 
1124 /* BSTATS -- Return buffer allocation free space statistics. */
1125 
1126 static void
1127 bstats( kmp_info_t *th, bufsize *curalloc, bufsize *totfree, bufsize *maxfree, long *nget, long *nrel)
1128 {
1129  int bin = 0;
1130  thr_data_t *thr = get_thr_data( th );
1131 
1132  *nget = thr->numget;
1133  *nrel = thr->numrel;
1134  *curalloc = (bufsize) thr->totalloc;
1135  *totfree = 0;
1136  *maxfree = -1;
1137 
1138  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
1139  bfhead_t *b = thr->freelist[ bin ].ql.flink;
1140 
1141  while (b != &thr->freelist[ bin ]) {
1142  KMP_DEBUG_ASSERT(b->bh.bb.bsize > 0);
1143  *totfree += b->bh.bb.bsize;
1144  if (b->bh.bb.bsize > *maxfree) {
1145  *maxfree = b->bh.bb.bsize;
1146  }
1147  b = b->ql.flink; /* Link to next buffer */
1148  }
1149  }
1150 }
1151 
1152 /* BSTATSE -- Return extended statistics */
1153 
1154 static void
1155 bstatse( kmp_info_t *th, bufsize *pool_incr, long *npool, long *npget, long *nprel, long *ndget, long *ndrel)
1156 {
1157  thr_data_t *thr = get_thr_data( th );
1158 
1159  *pool_incr = (thr->pool_len < 0) ? -thr->exp_incr : thr->exp_incr;
1160  *npool = thr->numpblk;
1161  *npget = thr->numpget;
1162  *nprel = thr->numprel;
1163  *ndget = thr->numdget;
1164  *ndrel = thr->numdrel;
1165 }
1166 
1167 #endif /* BufStats */
1168 
1169 /* BUFDUMP -- Dump the data in a buffer. This is called with the user
1170  data pointer, and backs up to the buffer header. It will
1171  dump either a free block or an allocated one. */
1172 
1173 static void
1174 bufdump( kmp_info_t *th, void *buf )
1175 {
1176  bfhead_t *b;
1177  unsigned char *bdump;
1178  bufsize bdlen;
1179 
1180  b = BFH(((char *) buf) - sizeof(bhead_t));
1181  KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
1182  if (b->bh.bb.bsize < 0) {
1183  bdump = (unsigned char *) buf;
1184  bdlen = (-b->bh.bb.bsize) - (bufsize) sizeof(bhead_t);
1185  } else {
1186  bdump = (unsigned char *) (((char *) b) + sizeof(bfhead_t));
1187  bdlen = b->bh.bb.bsize - (bufsize) sizeof(bfhead_t);
1188  }
1189 
1190  while (bdlen > 0) {
1191  int i, dupes = 0;
1192  bufsize l = bdlen;
1193  char bhex[50], bascii[20];
1194 
1195  if (l > 16) {
1196  l = 16;
1197  }
1198 
1199  for (i = 0; i < l; i++) {
1200  (void) KMP_SNPRINTF(bhex + i * 3, sizeof(bhex), "%02X ", bdump[i]);
1201  if (bdump[i] > 0x20 && bdump[i] < 0x7F)
1202  bascii[ i ] = bdump[ i ];
1203  else
1204  bascii[ i ] = ' ';
1205  }
1206  bascii[i] = 0;
1207  (void) __kmp_printf_no_lock("%-48s %s\n", bhex, bascii);
1208  bdump += l;
1209  bdlen -= l;
1210  while ((bdlen > 16) && (memcmp((char *) (bdump - 16),
1211  (char *) bdump, 16) == 0)) {
1212  dupes++;
1213  bdump += 16;
1214  bdlen -= 16;
1215  }
1216  if (dupes > 1) {
1217  (void) __kmp_printf_no_lock(
1218  " (%d lines [%d bytes] identical to above line skipped)\n",
1219  dupes, dupes * 16);
1220  } else if (dupes == 1) {
1221  bdump -= 16;
1222  bdlen += 16;
1223  }
1224  }
1225 }
1226 
1227 /* BPOOLD -- Dump a buffer pool. The buffer headers are always listed.
1228  If DUMPALLOC is nonzero, the contents of allocated buffers
1229  are dumped. If DUMPFREE is nonzero, free blocks are
1230  dumped as well. If FreeWipe checking is enabled, free
1231  blocks which have been clobbered will always be dumped. */
1232 
1233 static void
1234 bpoold( kmp_info_t *th, void *buf, int dumpalloc, int dumpfree)
1235 {
1236  bfhead_t *b = BFH( (char*)buf - sizeof(bhead_t));
1237 
1238  while (b->bh.bb.bsize != ESent) {
1239  bufsize bs = b->bh.bb.bsize;
1240 
1241  if (bs < 0) {
1242  bs = -bs;
1243  (void) __kmp_printf_no_lock("Allocated buffer: size %6ld bytes.\n", (long) bs);
1244  if (dumpalloc) {
1245  bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1246  }
1247  } else {
1248  const char *lerr = "";
1249 
1250  KMP_DEBUG_ASSERT(bs > 0);
1251  if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
1252  lerr = " (Bad free list links)";
1253  }
1254  (void) __kmp_printf_no_lock("Free block: size %6ld bytes.%s\n",
1255  (long) bs, lerr);
1256 #ifdef FreeWipe
1257  lerr = ((char *) b) + sizeof(bfhead_t);
1258  if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) ||
1259  (memcmp(lerr, lerr + 1,
1260  (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
1261  (void) __kmp_printf_no_lock(
1262  "(Contents of above free block have been overstored.)\n");
1263  bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1264  } else
1265 #endif
1266  if (dumpfree) {
1267  bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1268  }
1269  }
1270  b = BFH(((char *) b) + bs);
1271  }
1272 }
1273 
1274 /* BPOOLV -- Validate a buffer pool. */
1275 
1276 static int
1277 bpoolv( kmp_info_t *th, void *buf )
1278 {
1279  bfhead_t *b = BFH(buf);
1280 
1281  while (b->bh.bb.bsize != ESent) {
1282  bufsize bs = b->bh.bb.bsize;
1283 
1284  if (bs < 0) {
1285  bs = -bs;
1286  } else {
1287 #ifdef FreeWipe
1288  char *lerr = "";
1289 #endif
1290 
1291  KMP_DEBUG_ASSERT(bs > 0);
1292  if (bs <= 0) {
1293  return 0;
1294  }
1295  if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
1296  (void) __kmp_printf_no_lock("Free block: size %6ld bytes. (Bad free list links)\n",
1297  (long) bs);
1298  KMP_DEBUG_ASSERT(0);
1299  return 0;
1300  }
1301 #ifdef FreeWipe
1302  lerr = ((char *) b) + sizeof(bfhead_t);
1303  if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) ||
1304  (memcmp(lerr, lerr + 1,
1305  (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
1306  (void) __kmp_printf_no_lock(
1307  "(Contents of above free block have been overstored.)\n");
1308  bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1309  KMP_DEBUG_ASSERT(0);
1310  return 0;
1311  }
1312 #endif /* FreeWipe */
1313  }
1314  b = BFH(((char *) b) + bs);
1315  }
1316  return 1;
1317 }
1318 
1319 #endif /* KMP_DEBUG */
1320 
1321 /* ------------------------------------------------------------------------ */
1322 
1323 void
1324 __kmp_initialize_bget( kmp_info_t *th )
1325 {
1326  KMP_DEBUG_ASSERT( SizeQuant >= sizeof( void * ) && (th != 0) );
1327 
1328  set_thr_data( th );
1329 
1330  bectl( th, (bget_compact_t) 0, (bget_acquire_t) malloc, (bget_release_t) free,
1331  (bufsize) __kmp_malloc_pool_incr );
1332 }
1333 
1334 void
1335 __kmp_finalize_bget( kmp_info_t *th )
1336 {
1337  thr_data_t *thr;
1338  bfhead_t *b;
1339 
1340  KMP_DEBUG_ASSERT( th != 0 );
1341 
1342 #if BufStats
1343  thr = (thr_data_t *) th->th.th_local.bget_data;
1344  KMP_DEBUG_ASSERT( thr != NULL );
1345  b = thr->last_pool;
1346 
1347  /* If a block-release function is defined, and this free buffer
1348  constitutes the entire block, release it. Note that pool_len
1349  is defined in such a way that the test will fail unless all
1350  pool blocks are the same size. */
1351 
1352  /* Deallocate the last pool if one exists because we no longer do it in brel() */
1353  if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
1354  b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t)))
1355  {
1356  KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
1357  KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.bsize == ESent);
1358  KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.prevfree == b->bh.bb.bsize);
1359 
1360  /* Unlink the buffer from the free list */
1361  __kmp_bget_remove_from_freelist( b );
1362 
1363  KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) b ) );
1364 
1365  (*thr->relfcn)(b);
1366  thr->numprel++; /* Nr of expansion block releases */
1367  thr->numpblk--; /* Total number of blocks */
1368  KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1369  }
1370 #endif /* BufStats */
1371 
1372  /* Deallocate bget_data */
1373  if ( th->th.th_local.bget_data != NULL ) {
1374  __kmp_free( th->th.th_local.bget_data );
1375  th->th.th_local.bget_data = NULL;
1376  }; // if
1377 }
1378 
1379 void
1380 kmpc_set_poolsize( size_t size )
1381 {
1382  bectl( __kmp_get_thread(), (bget_compact_t) 0, (bget_acquire_t) malloc,
1383  (bget_release_t) free, (bufsize) size );
1384 }
1385 
1386 size_t
1387 kmpc_get_poolsize( void )
1388 {
1389  thr_data_t *p;
1390 
1391  p = get_thr_data( __kmp_get_thread() );
1392 
1393  return p->exp_incr;
1394 }
1395 
1396 void
1397 kmpc_set_poolmode( int mode )
1398 {
1399  thr_data_t *p;
1400 
1401  if (mode == bget_mode_fifo || mode == bget_mode_lifo || mode == bget_mode_best) {
1402  p = get_thr_data( __kmp_get_thread() );
1403  p->mode = (bget_mode_t) mode;
1404  }
1405 }
1406 
1407 int
1408 kmpc_get_poolmode( void )
1409 {
1410  thr_data_t *p;
1411 
1412  p = get_thr_data( __kmp_get_thread() );
1413 
1414  return p->mode;
1415 }
1416 
1417 void
1418 kmpc_get_poolstat( size_t *maxmem, size_t *allmem )
1419 {
1420  kmp_info_t *th = __kmp_get_thread();
1421  bufsize a, b;
1422 
1423  __kmp_bget_dequeue( th ); /* Release any queued buffers */
1424 
1425  bcheck( th, &a, &b );
1426 
1427  *maxmem = a;
1428  *allmem = b;
1429 }
1430 
1431 void
1432 kmpc_poolprint( void )
1433 {
1434  kmp_info_t *th = __kmp_get_thread();
1435 
1436  __kmp_bget_dequeue( th ); /* Release any queued buffers */
1437 
1438  bfreed( th );
1439 }
1440 
1441 #endif // #if KMP_USE_BGET
1442 
1443 /* ------------------------------------------------------------------------ */
1444 
1445 void *
1446 kmpc_malloc( size_t size )
1447 {
1448  void * ptr;
1449  ptr = bget( __kmp_entry_thread(), (bufsize) size );
1450 
1451  return ptr;
1452 }
1453 
1454 void *
1455 kmpc_calloc( size_t nelem, size_t elsize )
1456 {
1457  void * ptr;
1458  ptr = bgetz( __kmp_entry_thread(), (bufsize) (nelem * elsize) );
1459 
1460  return ptr;
1461 }
1462 
1463 void *
1464 kmpc_realloc( void * ptr, size_t size )
1465 {
1466  void * result = NULL;
1467 
1468  if ( ptr == NULL ) {
1469  // If pointer is NULL, realloc behaves like malloc.
1470  result = bget( __kmp_entry_thread(), (bufsize) size );
1471  } else if ( size == 0 ) {
1472  // If size is 0, realloc behaves like free.
1473  // The thread must be registered by the call to kmpc_malloc() or kmpc_calloc() before.
1474  // So it should be safe to call __kmp_get_thread(), not __kmp_entry_thread().
1475  brel( __kmp_get_thread(), ptr );
1476  } else {
1477  result = bgetr( __kmp_entry_thread(), ptr, (bufsize) size );
1478  }; // if
1479 
1480  return result;
1481 }
1482 
1483 /* NOTE: the library must have already been initialized by a previous allocate */
1484 
1485 void
1486 kmpc_free( void * ptr )
1487 {
1488  if ( ! __kmp_init_serial ) {
1489  return;
1490  }; // if
1491  if ( ptr != NULL ) {
1492  kmp_info_t *th = __kmp_get_thread();
1493  __kmp_bget_dequeue( th ); /* Release any queued buffers */
1494  brel( th, ptr );
1495  };
1496 }
1497 
1498 
1499 /* ------------------------------------------------------------------------ */
1500 
1501 void *
1502 ___kmp_thread_malloc( kmp_info_t *th, size_t size KMP_SRC_LOC_DECL )
1503 {
1504  void * ptr;
1505  KE_TRACE( 30, (
1506  "-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n",
1507  th,
1508  (int) size
1509  KMP_SRC_LOC_PARM
1510  ) );
1511  ptr = bget( th, (bufsize) size );
1512  KE_TRACE( 30, ( "<- __kmp_thread_malloc() returns %p\n", ptr ) );
1513  return ptr;
1514 }
1515 
1516 void *
1517 ___kmp_thread_calloc( kmp_info_t *th, size_t nelem, size_t elsize KMP_SRC_LOC_DECL )
1518 {
1519  void * ptr;
1520  KE_TRACE( 30, (
1521  "-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n",
1522  th,
1523  (int) nelem,
1524  (int) elsize
1525  KMP_SRC_LOC_PARM
1526  ) );
1527  ptr = bgetz( th, (bufsize) (nelem * elsize) );
1528  KE_TRACE( 30, ( "<- __kmp_thread_calloc() returns %p\n", ptr ) );
1529  return ptr;
1530 }
1531 
1532 void *
1533 ___kmp_thread_realloc( kmp_info_t *th, void *ptr, size_t size KMP_SRC_LOC_DECL )
1534 {
1535  KE_TRACE( 30, (
1536  "-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n",
1537  th,
1538  ptr,
1539  (int) size
1540  KMP_SRC_LOC_PARM
1541  ) );
1542  ptr = bgetr( th, ptr, (bufsize) size );
1543  KE_TRACE( 30, ( "<- __kmp_thread_realloc() returns %p\n", ptr ) );
1544  return ptr;
1545 }
1546 
1547 void
1548 ___kmp_thread_free( kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL )
1549 {
1550  KE_TRACE( 30, (
1551  "-> __kmp_thread_free( %p, %p ) called from %s:%d\n",
1552  th,
1553  ptr
1554  KMP_SRC_LOC_PARM
1555  ) );
1556  if ( ptr != NULL ) {
1557  __kmp_bget_dequeue( th ); /* Release any queued buffers */
1558  brel( th, ptr );
1559  }
1560  KE_TRACE( 30, ( "<- __kmp_thread_free()\n" ) );
1561 }
1562 
1563 /* ------------------------------------------------------------------------ */
1564 /* ------------------------------------------------------------------------ */
1565 /*
1566  If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes memory leaks, but it
1567  may be useful for debugging memory corruptions, used freed pointers, etc.
1568 */
1569 /* #define LEAK_MEMORY */
1570 
1571 struct kmp_mem_descr { // Memory block descriptor.
1572  void * ptr_allocated; // Pointer returned by malloc(), subject for free().
1573  size_t size_allocated; // Size of allocated memory block.
1574  void * ptr_aligned; // Pointer to aligned memory, to be used by client code.
1575  size_t size_aligned; // Size of aligned memory block.
1576 };
1577 typedef struct kmp_mem_descr kmp_mem_descr_t;
1578 
1579 /*
1580  Allocate memory on requested boundary, fill allocated memory with 0x00.
1581  NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
1582  Must use __kmp_free when freeing memory allocated by this routine!
1583  */
1584 static
1585 void *
1586 ___kmp_allocate_align( size_t size, size_t alignment KMP_SRC_LOC_DECL )
1587 {
1588  /*
1589  __kmp_allocate() allocates (by call to malloc()) bigger memory block than requested to
1590  return properly aligned pointer. Original pointer returned by malloc() and size of allocated
1591  block is saved in descriptor just before the aligned pointer. This information used by
1592  __kmp_free() -- it has to pass to free() original pointer, not aligned one.
1593 
1594  +---------+------------+-----------------------------------+---------+
1595  | padding | descriptor | aligned block | padding |
1596  +---------+------------+-----------------------------------+---------+
1597  ^ ^
1598  | |
1599  | +- Aligned pointer returned to caller
1600  +- Pointer returned by malloc()
1601 
1602  Aligned block is filled with zeros, paddings are filled with 0xEF.
1603  */
1604 
1605  kmp_mem_descr_t descr;
1606  kmp_uintptr_t addr_allocated; // Address returned by malloc().
1607  kmp_uintptr_t addr_aligned; // Aligned address to return to caller.
1608  kmp_uintptr_t addr_descr; // Address of memory block descriptor.
1609 
1610  KE_TRACE( 25, (
1611  "-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
1612  (int) size,
1613  (int) alignment
1614  KMP_SRC_LOC_PARM
1615  ) );
1616 
1617  KMP_DEBUG_ASSERT( alignment < 32 * 1024 ); // Alignment should not be too
1618  KMP_DEBUG_ASSERT( sizeof( void * ) <= sizeof( kmp_uintptr_t ) );
1619  // Make sure kmp_uintptr_t is enough to store addresses.
1620 
1621  descr.size_aligned = size;
1622  descr.size_allocated = descr.size_aligned + sizeof( kmp_mem_descr_t ) + alignment;
1623 
1624  #if KMP_DEBUG
1625  descr.ptr_allocated = _malloc_src_loc( descr.size_allocated, _file_, _line_ );
1626  #else
1627  descr.ptr_allocated = malloc_src_loc( descr.size_allocated KMP_SRC_LOC_PARM );
1628  #endif
1629  KE_TRACE( 10, (
1630  " malloc( %d ) returned %p\n",
1631  (int) descr.size_allocated,
1632  descr.ptr_allocated
1633  ) );
1634  if ( descr.ptr_allocated == NULL ) {
1635  KMP_FATAL( OutOfHeapMemory );
1636  };
1637 
1638  addr_allocated = (kmp_uintptr_t) descr.ptr_allocated;
1639  addr_aligned =
1640  ( addr_allocated + sizeof( kmp_mem_descr_t ) + alignment )
1641  & ~ ( alignment - 1 );
1642  addr_descr = addr_aligned - sizeof( kmp_mem_descr_t );
1643 
1644  descr.ptr_aligned = (void *) addr_aligned;
1645 
1646  KE_TRACE( 26, (
1647  " ___kmp_allocate_align: "
1648  "ptr_allocated=%p, size_allocated=%d, "
1649  "ptr_aligned=%p, size_aligned=%d\n",
1650  descr.ptr_allocated,
1651  (int) descr.size_allocated,
1652  descr.ptr_aligned,
1653  (int) descr.size_aligned
1654  ) );
1655 
1656  KMP_DEBUG_ASSERT( addr_allocated <= addr_descr );
1657  KMP_DEBUG_ASSERT( addr_descr + sizeof( kmp_mem_descr_t ) == addr_aligned );
1658  KMP_DEBUG_ASSERT( addr_aligned + descr.size_aligned <= addr_allocated + descr.size_allocated );
1659  KMP_DEBUG_ASSERT( addr_aligned % alignment == 0 );
1660 
1661  #ifdef KMP_DEBUG
1662  memset( descr.ptr_allocated, 0xEF, descr.size_allocated );
1663  // Fill allocated memory block with 0xEF.
1664  #endif
1665  memset( descr.ptr_aligned, 0x00, descr.size_aligned );
1666  // Fill the aligned memory block (which is intended for using by caller) with 0x00. Do not
1667  // put this filling under KMP_DEBUG condition! Many callers expect zeroed memory. (Padding
1668  // bytes remain filled with 0xEF in debugging library.)
1669  * ( (kmp_mem_descr_t *) addr_descr ) = descr;
1670 
1671  KMP_MB();
1672 
1673  KE_TRACE( 25, ( "<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned ) );
1674  return descr.ptr_aligned;
1675 
1676 } // func ___kmp_allocate_align
1677 
1678 
1679 /*
1680  Allocate memory on cache line boundary, fill allocated memory with 0x00.
1681  Do not call this func directly! Use __kmp_allocate macro instead.
1682  NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
1683  Must use __kmp_free when freeing memory allocated by this routine!
1684  */
1685 void *
1686 ___kmp_allocate( size_t size KMP_SRC_LOC_DECL )
1687 {
1688 
1689  void * ptr;
1690  KE_TRACE( 25, ( "-> __kmp_allocate( %d ) called from %s:%d\n", (int) size KMP_SRC_LOC_PARM ) );
1691  ptr = ___kmp_allocate_align( size, __kmp_align_alloc KMP_SRC_LOC_PARM );
1692  KE_TRACE( 25, ( "<- __kmp_allocate() returns %p\n", ptr ) );
1693  return ptr;
1694 
1695 } // func ___kmp_allocate
1696 
1697 #if (BUILD_MEMORY==FIRST_TOUCH)
1698 void *
1699 __kmp_ft_page_allocate(size_t size)
1700 {
1701  void *adr, *aadr;
1702 #if KMP_OS_LINUX
1703  /* TODO: Use this function to get page size everywhere */
1704  int page_size = getpagesize();
1705 #else
1706  /* TODO: Find windows function to get page size and use it everywhere */
1707  int page_size = PAGE_SIZE;
1708 #endif /* KMP_OS_LINUX */
1709 
1710  adr = (void *) __kmp_thread_malloc( __kmp_get_thread(),
1711  size + page_size + KMP_PTR_SKIP);
1712  if ( adr == 0 )
1713  KMP_FATAL( OutOfHeapMemory );
1714 
1715  /* check to see if adr is on a page boundary. */
1716  if ( ( (kmp_uintptr_t) adr & (page_size - 1)) == 0)
1717  /* nothing to do if adr is already on a page boundary. */
1718  aadr = adr;
1719  else
1720  /* else set aadr to the first page boundary in the allocated memory. */
1721  aadr = (void *) ( ( (kmp_uintptr_t) adr + page_size) & ~(page_size - 1) );
1722 
1723  /* the first touch by the owner thread. */
1724  *((void**)aadr) = adr;
1725 
1726  /* skip the memory space used for storing adr above. */
1727  return (void*)((char*)aadr + KMP_PTR_SKIP);
1728 }
1729 #endif
1730 
1731 /*
1732  Allocate memory on page boundary, fill allocated memory with 0x00.
1733  Does not call this func directly! Use __kmp_page_allocate macro instead.
1734  NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
1735  Must use __kmp_free when freeing memory allocated by this routine!
1736  */
1737 void *
1738 ___kmp_page_allocate( size_t size KMP_SRC_LOC_DECL )
1739 {
1740  int page_size = 8 * 1024;
1741  void * ptr;
1742 
1743  KE_TRACE( 25, (
1744  "-> __kmp_page_allocate( %d ) called from %s:%d\n",
1745  (int) size
1746  KMP_SRC_LOC_PARM
1747  ) );
1748  ptr = ___kmp_allocate_align( size, page_size KMP_SRC_LOC_PARM );
1749  KE_TRACE( 25, ( "<- __kmp_page_allocate( %d ) returns %p\n", (int) size, ptr ) );
1750  return ptr;
1751 } // ___kmp_page_allocate
1752 
1753 /*
1754  Free memory allocated by __kmp_allocate() and __kmp_page_allocate().
1755  In debug mode, fill the memory block with 0xEF before call to free().
1756 */
1757 void
1758 ___kmp_free( void * ptr KMP_SRC_LOC_DECL )
1759 {
1760 
1761  kmp_mem_descr_t descr;
1762  kmp_uintptr_t addr_allocated; // Address returned by malloc().
1763  kmp_uintptr_t addr_aligned; // Aligned address passed by caller.
1764 
1765  KE_TRACE( 25, ( "-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM ) );
1766  KMP_ASSERT( ptr != NULL );
1767 
1768  descr = * ( kmp_mem_descr_t *) ( (kmp_uintptr_t) ptr - sizeof( kmp_mem_descr_t ) );
1769 
1770  KE_TRACE( 26, ( " __kmp_free: "
1771  "ptr_allocated=%p, size_allocated=%d, "
1772  "ptr_aligned=%p, size_aligned=%d\n",
1773  descr.ptr_allocated, (int) descr.size_allocated,
1774  descr.ptr_aligned, (int) descr.size_aligned ));
1775 
1776  addr_allocated = (kmp_uintptr_t) descr.ptr_allocated;
1777  addr_aligned = (kmp_uintptr_t) descr.ptr_aligned;
1778 
1779  KMP_DEBUG_ASSERT( addr_aligned % CACHE_LINE == 0 );
1780  KMP_DEBUG_ASSERT( descr.ptr_aligned == ptr );
1781  KMP_DEBUG_ASSERT( addr_allocated + sizeof( kmp_mem_descr_t ) <= addr_aligned );
1782  KMP_DEBUG_ASSERT( descr.size_aligned < descr.size_allocated );
1783  KMP_DEBUG_ASSERT( addr_aligned + descr.size_aligned <= addr_allocated + descr.size_allocated );
1784 
1785  #ifdef KMP_DEBUG
1786  memset( descr.ptr_allocated, 0xEF, descr.size_allocated );
1787  // Fill memory block with 0xEF, it helps catch using freed memory.
1788  #endif
1789 
1790  #ifndef LEAK_MEMORY
1791  KE_TRACE( 10, ( " free( %p )\n", descr.ptr_allocated ) );
1792  # ifdef KMP_DEBUG
1793  _free_src_loc( descr.ptr_allocated, _file_, _line_ );
1794  # else
1795  free_src_loc( descr.ptr_allocated KMP_SRC_LOC_PARM );
1796  # endif
1797  #endif
1798 
1799  KMP_MB();
1800 
1801  KE_TRACE( 25, ( "<- __kmp_free() returns\n" ) );
1802 
1803 } // func ___kmp_free
1804 
1805 /* ------------------------------------------------------------------------ */
1806 /* ------------------------------------------------------------------------ */
1807 
1808 #if USE_FAST_MEMORY == 3
1809 // Allocate fast memory by first scanning the thread's free lists
1810 // If a chunk the right size exists, grab it off the free list.
1811 // Otherwise allocate normally using kmp_thread_malloc.
1812 
1813 // AC: How to choose the limit? Just get 16 for now...
1814 #define KMP_FREE_LIST_LIMIT 16
1815 
1816 // Always use 128 bytes for determining buckets for caching memory blocks
1817 #define DCACHE_LINE 128
1818 
1819 void *
1820 ___kmp_fast_allocate( kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL )
1821 {
1822  void * ptr;
1823  int num_lines;
1824  int idx;
1825  int index;
1826  void * alloc_ptr;
1827  size_t alloc_size;
1828  kmp_mem_descr_t * descr;
1829 
1830  KE_TRACE( 25, ( "-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
1831  __kmp_gtid_from_thread(this_thr), (int) size KMP_SRC_LOC_PARM ) );
1832 
1833  num_lines = ( size + DCACHE_LINE - 1 ) / DCACHE_LINE;
1834  idx = num_lines - 1;
1835  KMP_DEBUG_ASSERT( idx >= 0 );
1836  if ( idx < 2 ) {
1837  index = 0; // idx is [ 0, 1 ], use first free list
1838  num_lines = 2; // 1, 2 cache lines or less than cache line
1839  } else if ( ( idx >>= 2 ) == 0 ) {
1840  index = 1; // idx is [ 2, 3 ], use second free list
1841  num_lines = 4; // 3, 4 cache lines
1842  } else if ( ( idx >>= 2 ) == 0 ) {
1843  index = 2; // idx is [ 4, 15 ], use third free list
1844  num_lines = 16; // 5, 6, ..., 16 cache lines
1845  } else if ( ( idx >>= 2 ) == 0 ) {
1846  index = 3; // idx is [ 16, 63 ], use fourth free list
1847  num_lines = 64; // 17, 18, ..., 64 cache lines
1848  } else {
1849  goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists
1850  }
1851 
1852  ptr = this_thr->th.th_free_lists[index].th_free_list_self;
1853  if ( ptr != NULL ) {
1854  // pop the head of no-sync free list
1855  this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
1856  KMP_DEBUG_ASSERT( this_thr ==
1857  ((kmp_mem_descr_t *)( (kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t) ))->ptr_aligned );
1858  goto end;
1859  };
1860  ptr = TCR_SYNC_PTR( this_thr->th.th_free_lists[index].th_free_list_sync );
1861  if ( ptr != NULL ) {
1862  // no-sync free list is empty, use sync free list (filled in by other threads only)
1863  // pop the head of the sync free list, push NULL instead
1864  while ( ! KMP_COMPARE_AND_STORE_PTR(
1865  &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, NULL ) )
1866  {
1867  KMP_CPU_PAUSE();
1868  ptr = TCR_SYNC_PTR( this_thr->th.th_free_lists[index].th_free_list_sync );
1869  }
1870  // push the rest of chain into no-sync free list (can be NULL if there was the only block)
1871  this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
1872  KMP_DEBUG_ASSERT( this_thr ==
1873  ((kmp_mem_descr_t *)( (kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t) ))->ptr_aligned );
1874  goto end;
1875  }
1876 
1877  alloc_call:
1878  // haven't found block in the free lists, thus allocate it
1879  size = num_lines * DCACHE_LINE;
1880 
1881  alloc_size = size + sizeof( kmp_mem_descr_t ) + DCACHE_LINE;
1882  KE_TRACE( 25, ( "__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with alloc_size %d\n",
1883  __kmp_gtid_from_thread( this_thr ), alloc_size ) );
1884  alloc_ptr = bget( this_thr, (bufsize) alloc_size );
1885 
1886  // align ptr to DCACHE_LINE
1887  ptr = (void *)(( ((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) + DCACHE_LINE ) & ~( DCACHE_LINE - 1 ));
1888  descr = (kmp_mem_descr_t *)( ((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t) );
1889 
1890  descr->ptr_allocated = alloc_ptr; // remember allocated pointer
1891  // we don't need size_allocated
1892  descr->ptr_aligned = (void *)this_thr; // remember allocating thread
1893  // (it is already saved in bget buffer,
1894  // but we may want to use another allocator in future)
1895  descr->size_aligned = size;
1896 
1897  end:
1898  KE_TRACE( 25, ( "<- __kmp_fast_allocate( T#%d ) returns %p\n",
1899  __kmp_gtid_from_thread( this_thr ), ptr ) );
1900  return ptr;
1901 } // func __kmp_fast_allocate
1902 
1903 // Free fast memory and place it on the thread's free list if it is of
1904 // the correct size.
1905 void
1906 ___kmp_fast_free( kmp_info_t *this_thr, void * ptr KMP_SRC_LOC_DECL )
1907 {
1908  kmp_mem_descr_t * descr;
1909  kmp_info_t * alloc_thr;
1910  size_t size;
1911  size_t idx;
1912  int index;
1913 
1914  KE_TRACE( 25, ( "-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
1915  __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM ) );
1916  KMP_ASSERT( ptr != NULL );
1917 
1918  descr = (kmp_mem_descr_t *)( ((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t) );
1919 
1920  KE_TRACE(26, (" __kmp_fast_free: size_aligned=%d\n",
1921  (int) descr->size_aligned ) );
1922 
1923  size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines
1924 
1925  idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block
1926  if ( idx == size ) {
1927  index = 0; // 2 cache lines
1928  } else if ( ( idx <<= 1 ) == size ) {
1929  index = 1; // 4 cache lines
1930  } else if ( ( idx <<= 2 ) == size ) {
1931  index = 2; // 16 cache lines
1932  } else if ( ( idx <<= 2 ) == size ) {
1933  index = 3; // 64 cache lines
1934  } else {
1935  KMP_DEBUG_ASSERT( size > DCACHE_LINE * 64 );
1936  goto free_call; // 65 or more cache lines ( > 8KB )
1937  }
1938 
1939  alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block
1940  if ( alloc_thr == this_thr ) {
1941  // push block to self no-sync free list, linking previous head (LIFO)
1942  *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
1943  this_thr->th.th_free_lists[index].th_free_list_self = ptr;
1944  } else {
1945  void * head = this_thr->th.th_free_lists[index].th_free_list_other;
1946  if ( head == NULL ) {
1947  // Create new free list
1948  this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1949  *((void **)ptr) = NULL; // mark the tail of the list
1950  descr->size_allocated = (size_t)1; // head of the list keeps its length
1951  } else {
1952  // need to check existed "other" list's owner thread and size of queue
1953  kmp_mem_descr_t * dsc = (kmp_mem_descr_t *)( (char*)head - sizeof(kmp_mem_descr_t) );
1954  kmp_info_t * q_th = (kmp_info_t *)(dsc->ptr_aligned); // allocating thread, same for all queue nodes
1955  size_t q_sz = dsc->size_allocated + 1; // new size in case we add current task
1956  if ( q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT ) {
1957  // we can add current task to "other" list, no sync needed
1958  *((void **)ptr) = head;
1959  descr->size_allocated = q_sz;
1960  this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1961  } else {
1962  // either queue blocks owner is changing or size limit exceeded
1963  // return old queue to allocating thread (q_th) synchroneously,
1964  // and start new list for alloc_thr's tasks
1965  void * old_ptr;
1966  void * tail = head;
1967  void * next = *((void **)head);
1968  while ( next != NULL ) {
1969  KMP_DEBUG_ASSERT(
1970  // queue size should decrease by 1 each step through the list
1971  ((kmp_mem_descr_t*)((char*)next - sizeof(kmp_mem_descr_t)))->size_allocated + 1 ==
1972  ((kmp_mem_descr_t*)((char*)tail - sizeof(kmp_mem_descr_t)))->size_allocated );
1973  tail = next; // remember tail node
1974  next = *((void **)next);
1975  }
1976  KMP_DEBUG_ASSERT( q_th != NULL );
1977  // push block to owner's sync free list
1978  old_ptr = TCR_PTR( q_th->th.th_free_lists[index].th_free_list_sync );
1979  /* the next pointer must be set before setting free_list to ptr to avoid
1980  exposing a broken list to other threads, even for an instant. */
1981  *((void **)tail) = old_ptr;
1982 
1983  while ( ! KMP_COMPARE_AND_STORE_PTR(
1984  &q_th->th.th_free_lists[index].th_free_list_sync,
1985  old_ptr,
1986  head ) )
1987  {
1988  KMP_CPU_PAUSE();
1989  old_ptr = TCR_PTR( q_th->th.th_free_lists[index].th_free_list_sync );
1990  *((void **)tail) = old_ptr;
1991  }
1992 
1993  // start new list of not-selt tasks
1994  this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1995  *((void **)ptr) = NULL;
1996  descr->size_allocated = (size_t)1; // head of queue keeps its length
1997  }
1998  }
1999  }
2000  goto end;
2001 
2002  free_call:
2003  KE_TRACE(25, ( "__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
2004  __kmp_gtid_from_thread( this_thr), size ) );
2005  __kmp_bget_dequeue( this_thr ); /* Release any queued buffers */
2006  brel( this_thr, descr->ptr_allocated );
2007 
2008  end:
2009  KE_TRACE( 25, ( "<- __kmp_fast_free() returns\n" ) );
2010 
2011 } // func __kmp_fast_free
2012 
2013 
2014 // Initialize the thread free lists related to fast memory
2015 // Only do this when a thread is initially created.
2016 void
2017 __kmp_initialize_fast_memory( kmp_info_t *this_thr )
2018 {
2019  KE_TRACE(10, ( "__kmp_initialize_fast_memory: Called from th %p\n", this_thr ) );
2020 
2021  memset ( this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof( kmp_free_list_t ) );
2022 }
2023 
2024 // Free the memory in the thread free lists related to fast memory
2025 // Only do this when a thread is being reaped (destroyed).
2026 void
2027 __kmp_free_fast_memory( kmp_info_t *th )
2028 {
2029  // Suppose we use BGET underlying allocator, walk through its structures...
2030  int bin;
2031  thr_data_t * thr = get_thr_data( th );
2032  void ** lst = NULL;
2033 
2034  KE_TRACE(5, ( "__kmp_free_fast_memory: Called T#%d\n",
2035  __kmp_gtid_from_thread( th ) ) );
2036 
2037  __kmp_bget_dequeue( th ); // Release any queued buffers
2038 
2039  // Dig through free lists and extract all allocated blocks
2040  for ( bin = 0; bin < MAX_BGET_BINS; ++bin ) {
2041  bfhead_t * b = thr->freelist[ bin ].ql.flink;
2042  while ( b != &thr->freelist[ bin ] ) {
2043  if ( (kmp_uintptr_t)b->bh.bb.bthr & 1 ) { // if the buffer is an allocated address?
2044  *((void**)b) = lst; // link the list (override bthr, but keep flink yet)
2045  lst = (void**)b; // push b into lst
2046  }
2047  b = b->ql.flink; // get next buffer
2048  }
2049  }
2050  while ( lst != NULL ) {
2051  void * next = *lst;
2052  KE_TRACE(10, ( "__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
2053  lst, next, th, __kmp_gtid_from_thread( th ) ) );
2054  (*thr->relfcn)(lst);
2055  #if BufStats
2056  // count blocks to prevent problems in __kmp_finalize_bget()
2057  thr->numprel++; /* Nr of expansion block releases */
2058  thr->numpblk--; /* Total number of blocks */
2059  #endif
2060  lst = (void**)next;
2061  }
2062 
2063  KE_TRACE(5, ( "__kmp_free_fast_memory: Freed T#%d\n",
2064  __kmp_gtid_from_thread( th ) ) );
2065 }
2066 
2067 #endif // USE_FAST_MEMORY