Intel® OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Modules Pages
kmp_lock.cpp
1 /*
2  * kmp_lock.cpp -- lock-related functions
3  * $Revision: 43389 $
4  * $Date: 2014-08-11 10:54:01 -0500 (Mon, 11 Aug 2014) $
5  */
6 
7 /* <copyright>
8  Copyright (c) 1997-2014 Intel Corporation. All Rights Reserved.
9 
10  Redistribution and use in source and binary forms, with or without
11  modification, are permitted provided that the following conditions
12  are met:
13 
14  * Redistributions of source code must retain the above copyright
15  notice, this list of conditions and the following disclaimer.
16  * Redistributions in binary form must reproduce the above copyright
17  notice, this list of conditions and the following disclaimer in the
18  documentation and/or other materials provided with the distribution.
19  * Neither the name of Intel Corporation nor the names of its
20  contributors may be used to endorse or promote products derived
21  from this software without specific prior written permission.
22 
23  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 
35 </copyright> */
36 
37 #include <stddef.h>
38 
39 #include "kmp.h"
40 #include "kmp_itt.h"
41 #include "kmp_i18n.h"
42 #include "kmp_lock.h"
43 #include "kmp_io.h"
44 
45 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
46 # include <unistd.h>
47 # include <sys/syscall.h>
48 // We should really include <futex.h>, but that causes compatibility problems on different
49 // Linux* OS distributions that either require that you include (or break when you try to include)
50 // <pci/types.h>.
51 // Since all we need is the two macros below (which are part of the kernel ABI, so can't change)
52 // we just define the constants here and don't include <futex.h>
53 # ifndef FUTEX_WAIT
54 # define FUTEX_WAIT 0
55 # endif
56 # ifndef FUTEX_WAKE
57 # define FUTEX_WAKE 1
58 # endif
59 #endif
60 
61 /* Implement spin locks for internal library use. */
62 /* The algorithm implemented is Lamport's bakery lock [1974]. */
63 
64 void
65 __kmp_validate_locks( void )
66 {
67  int i;
68  kmp_uint32 x, y;
69 
70  /* Check to make sure unsigned arithmetic does wraps properly */
71  x = ~((kmp_uint32) 0) - 2;
72  y = x - 2;
73 
74  for (i = 0; i < 8; ++i, ++x, ++y) {
75  kmp_uint32 z = (x - y);
76  KMP_ASSERT( z == 2 );
77  }
78 
79  KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 );
80 }
81 
82 
83 /* ------------------------------------------------------------------------ */
84 /* test and set locks */
85 
86 //
87 // For the non-nested locks, we can only assume that the first 4 bytes were
88 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
89 // compiler only allocates a 4 byte pointer on IA-32 architecture. On
90 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
91 //
92 // gcc reserves >= 8 bytes for nested locks, so we can assume that the
93 // entire 8 bytes were allocated for nested locks on all 64-bit platforms.
94 //
95 
96 static kmp_int32
97 __kmp_get_tas_lock_owner( kmp_tas_lock_t *lck )
98 {
99  return TCR_4( lck->lk.poll ) - 1;
100 }
101 
102 static inline bool
103 __kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck )
104 {
105  return lck->lk.depth_locked != -1;
106 }
107 
108 __forceinline static void
109 __kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid )
110 {
111  KMP_MB();
112 
113 #ifdef USE_LOCK_PROFILE
114  kmp_uint32 curr = TCR_4( lck->lk.poll );
115  if ( ( curr != 0 ) && ( curr != gtid + 1 ) )
116  __kmp_printf( "LOCK CONTENTION: %p\n", lck );
117  /* else __kmp_printf( "." );*/
118 #endif /* USE_LOCK_PROFILE */
119 
120  if ( ( lck->lk.poll == 0 )
121  && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) {
122  KMP_FSYNC_ACQUIRED(lck);
123  return;
124  }
125 
126  kmp_uint32 spins;
127  KMP_FSYNC_PREPARE( lck );
128  KMP_INIT_YIELD( spins );
129  if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
130  __kmp_xproc ) ) {
131  KMP_YIELD( TRUE );
132  }
133  else {
134  KMP_YIELD_SPIN( spins );
135  }
136 
137  while ( ( lck->lk.poll != 0 ) ||
138  ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) ) {
139  //
140  // FIXME - use exponential backoff here
141  //
142  if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
143  __kmp_xproc ) ) {
144  KMP_YIELD( TRUE );
145  }
146  else {
147  KMP_YIELD_SPIN( spins );
148  }
149  }
150  KMP_FSYNC_ACQUIRED( lck );
151 }
152 
153 void
154 __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
155 {
156  __kmp_acquire_tas_lock_timed_template( lck, gtid );
157 }
158 
159 static void
160 __kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
161 {
162  char const * const func = "omp_set_lock";
163  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
164  && __kmp_is_tas_lock_nestable( lck ) ) {
165  KMP_FATAL( LockNestableUsedAsSimple, func );
166  }
167  if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) {
168  KMP_FATAL( LockIsAlreadyOwned, func );
169  }
170  __kmp_acquire_tas_lock( lck, gtid );
171 }
172 
173 int
174 __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
175 {
176  if ( ( lck->lk.poll == 0 )
177  && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) {
178  KMP_FSYNC_ACQUIRED( lck );
179  return TRUE;
180  }
181  return FALSE;
182 }
183 
184 static int
185 __kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
186 {
187  char const * const func = "omp_test_lock";
188  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
189  && __kmp_is_tas_lock_nestable( lck ) ) {
190  KMP_FATAL( LockNestableUsedAsSimple, func );
191  }
192  return __kmp_test_tas_lock( lck, gtid );
193 }
194 
195 void
196 __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
197 {
198  KMP_MB(); /* Flush all pending memory write invalidates. */
199 
200  KMP_FSYNC_RELEASING(lck);
201  KMP_ST_REL32( &(lck->lk.poll), 0 );
202 
203  KMP_MB(); /* Flush all pending memory write invalidates. */
204 
205  KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
206  __kmp_xproc ) );
207 }
208 
209 static void
210 __kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
211 {
212  char const * const func = "omp_unset_lock";
213  KMP_MB(); /* in case another processor initialized lock */
214  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
215  && __kmp_is_tas_lock_nestable( lck ) ) {
216  KMP_FATAL( LockNestableUsedAsSimple, func );
217  }
218  if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
219  KMP_FATAL( LockUnsettingFree, func );
220  }
221  if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 )
222  && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) {
223  KMP_FATAL( LockUnsettingSetByAnother, func );
224  }
225  __kmp_release_tas_lock( lck, gtid );
226 }
227 
228 void
229 __kmp_init_tas_lock( kmp_tas_lock_t * lck )
230 {
231  TCW_4( lck->lk.poll, 0 );
232 }
233 
234 static void
235 __kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck )
236 {
237  __kmp_init_tas_lock( lck );
238 }
239 
240 void
241 __kmp_destroy_tas_lock( kmp_tas_lock_t *lck )
242 {
243  lck->lk.poll = 0;
244 }
245 
246 static void
247 __kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck )
248 {
249  char const * const func = "omp_destroy_lock";
250  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
251  && __kmp_is_tas_lock_nestable( lck ) ) {
252  KMP_FATAL( LockNestableUsedAsSimple, func );
253  }
254  if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
255  KMP_FATAL( LockStillOwned, func );
256  }
257  __kmp_destroy_tas_lock( lck );
258 }
259 
260 
261 //
262 // nested test and set locks
263 //
264 
265 void
266 __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
267 {
268  KMP_DEBUG_ASSERT( gtid >= 0 );
269 
270  if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
271  lck->lk.depth_locked += 1;
272  }
273  else {
274  __kmp_acquire_tas_lock_timed_template( lck, gtid );
275  lck->lk.depth_locked = 1;
276  }
277 }
278 
279 static void
280 __kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
281 {
282  char const * const func = "omp_set_nest_lock";
283  if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
284  KMP_FATAL( LockSimpleUsedAsNestable, func );
285  }
286  __kmp_acquire_nested_tas_lock( lck, gtid );
287 }
288 
289 int
290 __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
291 {
292  int retval;
293 
294  KMP_DEBUG_ASSERT( gtid >= 0 );
295 
296  if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
297  retval = ++lck->lk.depth_locked;
298  }
299  else if ( !__kmp_test_tas_lock( lck, gtid ) ) {
300  retval = 0;
301  }
302  else {
303  KMP_MB();
304  retval = lck->lk.depth_locked = 1;
305  }
306  return retval;
307 }
308 
309 static int
310 __kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
311 {
312  char const * const func = "omp_test_nest_lock";
313  if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
314  KMP_FATAL( LockSimpleUsedAsNestable, func );
315  }
316  return __kmp_test_nested_tas_lock( lck, gtid );
317 }
318 
319 void
320 __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
321 {
322  KMP_DEBUG_ASSERT( gtid >= 0 );
323 
324  KMP_MB();
325  if ( --(lck->lk.depth_locked) == 0 ) {
326  __kmp_release_tas_lock( lck, gtid );
327  }
328 }
329 
330 static void
331 __kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
332 {
333  char const * const func = "omp_unset_nest_lock";
334  KMP_MB(); /* in case another processor initialized lock */
335  if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
336  KMP_FATAL( LockSimpleUsedAsNestable, func );
337  }
338  if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
339  KMP_FATAL( LockUnsettingFree, func );
340  }
341  if ( __kmp_get_tas_lock_owner( lck ) != gtid ) {
342  KMP_FATAL( LockUnsettingSetByAnother, func );
343  }
344  __kmp_release_nested_tas_lock( lck, gtid );
345 }
346 
347 void
348 __kmp_init_nested_tas_lock( kmp_tas_lock_t * lck )
349 {
350  __kmp_init_tas_lock( lck );
351  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
352 }
353 
354 static void
355 __kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck )
356 {
357  __kmp_init_nested_tas_lock( lck );
358 }
359 
360 void
361 __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck )
362 {
363  __kmp_destroy_tas_lock( lck );
364  lck->lk.depth_locked = 0;
365 }
366 
367 static void
368 __kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck )
369 {
370  char const * const func = "omp_destroy_nest_lock";
371  if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
372  KMP_FATAL( LockSimpleUsedAsNestable, func );
373  }
374  if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
375  KMP_FATAL( LockStillOwned, func );
376  }
377  __kmp_destroy_nested_tas_lock( lck );
378 }
379 
380 
381 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
382 
383 /* ------------------------------------------------------------------------ */
384 /* futex locks */
385 
386 // futex locks are really just test and set locks, with a different method
387 // of handling contention. They take the same amount of space as test and
388 // set locks, and are allocated the same way (i.e. use the area allocated by
389 // the compiler for non-nested locks / allocate nested locks on the heap).
390 
391 static kmp_int32
392 __kmp_get_futex_lock_owner( kmp_futex_lock_t *lck )
393 {
394  return ( TCR_4( lck->lk.poll ) >> 1 ) - 1;
395 }
396 
397 static inline bool
398 __kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck )
399 {
400  return lck->lk.depth_locked != -1;
401 }
402 
403 __forceinline static void
404 __kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid )
405 {
406  kmp_int32 gtid_code = ( gtid + 1 ) << 1;
407 
408  KMP_MB();
409 
410 #ifdef USE_LOCK_PROFILE
411  kmp_uint32 curr = TCR_4( lck->lk.poll );
412  if ( ( curr != 0 ) && ( curr != gtid_code ) )
413  __kmp_printf( "LOCK CONTENTION: %p\n", lck );
414  /* else __kmp_printf( "." );*/
415 #endif /* USE_LOCK_PROFILE */
416 
417  KMP_FSYNC_PREPARE( lck );
418  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
419  lck, lck->lk.poll, gtid ) );
420 
421  kmp_int32 poll_val;
422  while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), 0,
423  gtid_code ) ) != 0 ) {
424  kmp_int32 cond = poll_val & 1;
425  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
426  lck, gtid, poll_val, cond ) );
427 
428  //
429  // NOTE: if you try to use the following condition for this branch
430  //
431  // if ( poll_val & 1 == 0 )
432  //
433  // Then the 12.0 compiler has a bug where the following block will
434  // always be skipped, regardless of the value of the LSB of poll_val.
435  //
436  if ( ! cond ) {
437  //
438  // Try to set the lsb in the poll to indicate to the owner
439  // thread that they need to wake this thread up.
440  //
441  if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ),
442  poll_val, poll_val | 1 ) ) {
443  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
444  lck, lck->lk.poll, gtid ) );
445  continue;
446  }
447  poll_val |= 1;
448 
449  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n",
450  lck, lck->lk.poll, gtid ) );
451  }
452 
453  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
454  lck, gtid, poll_val ) );
455 
456  kmp_int32 rc;
457  if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT,
458  poll_val, NULL, NULL, 0 ) ) != 0 ) {
459  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n",
460  lck, gtid, poll_val, rc, errno ) );
461  continue;
462  }
463 
464  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
465  lck, gtid, poll_val ) );
466  //
467  // This thread has now done a successful futex wait call and was
468  // entered on the OS futex queue. We must now perform a futex
469  // wake call when releasing the lock, as we have no idea how many
470  // other threads are in the queue.
471  //
472  gtid_code |= 1;
473  }
474 
475  KMP_FSYNC_ACQUIRED( lck );
476  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n",
477  lck, lck->lk.poll, gtid ) );
478 }
479 
480 void
481 __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
482 {
483  __kmp_acquire_futex_lock_timed_template( lck, gtid );
484 }
485 
486 static void
487 __kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
488 {
489  char const * const func = "omp_set_lock";
490  if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
491  && __kmp_is_futex_lock_nestable( lck ) ) {
492  KMP_FATAL( LockNestableUsedAsSimple, func );
493  }
494  if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) {
495  KMP_FATAL( LockIsAlreadyOwned, func );
496  }
497  __kmp_acquire_futex_lock( lck, gtid );
498 }
499 
500 int
501 __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
502 {
503  if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, ( gtid + 1 ) << 1 ) ) {
504  KMP_FSYNC_ACQUIRED( lck );
505  return TRUE;
506  }
507  return FALSE;
508 }
509 
510 static int
511 __kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
512 {
513  char const * const func = "omp_test_lock";
514  if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
515  && __kmp_is_futex_lock_nestable( lck ) ) {
516  KMP_FATAL( LockNestableUsedAsSimple, func );
517  }
518  return __kmp_test_futex_lock( lck, gtid );
519 }
520 
521 void
522 __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
523 {
524  KMP_MB(); /* Flush all pending memory write invalidates. */
525 
526  KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
527  lck, lck->lk.poll, gtid ) );
528 
529  KMP_FSYNC_RELEASING(lck);
530 
531  kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), 0 );
532 
533  KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
534  lck, gtid, poll_val ) );
535 
536  if ( poll_val & 1 ) {
537  KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
538  lck, gtid ) );
539  syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, 1, NULL, NULL, 0 );
540  }
541 
542  KMP_MB(); /* Flush all pending memory write invalidates. */
543 
544  KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n",
545  lck, lck->lk.poll, gtid ) );
546 
547  KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
548  __kmp_xproc ) );
549 }
550 
551 static void
552 __kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
553 {
554  char const * const func = "omp_unset_lock";
555  KMP_MB(); /* in case another processor initialized lock */
556  if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
557  && __kmp_is_futex_lock_nestable( lck ) ) {
558  KMP_FATAL( LockNestableUsedAsSimple, func );
559  }
560  if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
561  KMP_FATAL( LockUnsettingFree, func );
562  }
563  if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 )
564  && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) {
565  KMP_FATAL( LockUnsettingSetByAnother, func );
566  }
567  __kmp_release_futex_lock( lck, gtid );
568 }
569 
570 void
571 __kmp_init_futex_lock( kmp_futex_lock_t * lck )
572 {
573  TCW_4( lck->lk.poll, 0 );
574 }
575 
576 static void
577 __kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck )
578 {
579  __kmp_init_futex_lock( lck );
580 }
581 
582 void
583 __kmp_destroy_futex_lock( kmp_futex_lock_t *lck )
584 {
585  lck->lk.poll = 0;
586 }
587 
588 static void
589 __kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck )
590 {
591  char const * const func = "omp_destroy_lock";
592  if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
593  && __kmp_is_futex_lock_nestable( lck ) ) {
594  KMP_FATAL( LockNestableUsedAsSimple, func );
595  }
596  if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
597  KMP_FATAL( LockStillOwned, func );
598  }
599  __kmp_destroy_futex_lock( lck );
600 }
601 
602 
603 //
604 // nested futex locks
605 //
606 
607 void
608 __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
609 {
610  KMP_DEBUG_ASSERT( gtid >= 0 );
611 
612  if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
613  lck->lk.depth_locked += 1;
614  }
615  else {
616  __kmp_acquire_futex_lock_timed_template( lck, gtid );
617  lck->lk.depth_locked = 1;
618  }
619 }
620 
621 static void
622 __kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
623 {
624  char const * const func = "omp_set_nest_lock";
625  if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
626  KMP_FATAL( LockSimpleUsedAsNestable, func );
627  }
628  __kmp_acquire_nested_futex_lock( lck, gtid );
629 }
630 
631 int
632 __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
633 {
634  int retval;
635 
636  KMP_DEBUG_ASSERT( gtid >= 0 );
637 
638  if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
639  retval = ++lck->lk.depth_locked;
640  }
641  else if ( !__kmp_test_futex_lock( lck, gtid ) ) {
642  retval = 0;
643  }
644  else {
645  KMP_MB();
646  retval = lck->lk.depth_locked = 1;
647  }
648  return retval;
649 }
650 
651 static int
652 __kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
653 {
654  char const * const func = "omp_test_nest_lock";
655  if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
656  KMP_FATAL( LockSimpleUsedAsNestable, func );
657  }
658  return __kmp_test_nested_futex_lock( lck, gtid );
659 }
660 
661 void
662 __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
663 {
664  KMP_DEBUG_ASSERT( gtid >= 0 );
665 
666  KMP_MB();
667  if ( --(lck->lk.depth_locked) == 0 ) {
668  __kmp_release_futex_lock( lck, gtid );
669  }
670 }
671 
672 static void
673 __kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
674 {
675  char const * const func = "omp_unset_nest_lock";
676  KMP_MB(); /* in case another processor initialized lock */
677  if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
678  KMP_FATAL( LockSimpleUsedAsNestable, func );
679  }
680  if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
681  KMP_FATAL( LockUnsettingFree, func );
682  }
683  if ( __kmp_get_futex_lock_owner( lck ) != gtid ) {
684  KMP_FATAL( LockUnsettingSetByAnother, func );
685  }
686  __kmp_release_nested_futex_lock( lck, gtid );
687 }
688 
689 void
690 __kmp_init_nested_futex_lock( kmp_futex_lock_t * lck )
691 {
692  __kmp_init_futex_lock( lck );
693  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
694 }
695 
696 static void
697 __kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck )
698 {
699  __kmp_init_nested_futex_lock( lck );
700 }
701 
702 void
703 __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck )
704 {
705  __kmp_destroy_futex_lock( lck );
706  lck->lk.depth_locked = 0;
707 }
708 
709 static void
710 __kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck )
711 {
712  char const * const func = "omp_destroy_nest_lock";
713  if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
714  KMP_FATAL( LockSimpleUsedAsNestable, func );
715  }
716  if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
717  KMP_FATAL( LockStillOwned, func );
718  }
719  __kmp_destroy_nested_futex_lock( lck );
720 }
721 
722 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
723 
724 
725 /* ------------------------------------------------------------------------ */
726 /* ticket (bakery) locks */
727 
728 static kmp_int32
729 __kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck )
730 {
731  return TCR_4( lck->lk.owner_id ) - 1;
732 }
733 
734 static inline bool
735 __kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck )
736 {
737  return lck->lk.depth_locked != -1;
738 }
739 
740 static kmp_uint32
741 __kmp_bakery_check(kmp_uint value, kmp_uint checker)
742 {
743  register kmp_uint32 pause;
744 
745  if (value == checker) {
746  return TRUE;
747  }
748  for (pause = checker - value; pause != 0; --pause);
749  return FALSE;
750 }
751 
752 __forceinline static void
753 __kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid )
754 {
755  kmp_uint32 my_ticket;
756  KMP_MB();
757 
758  my_ticket = KMP_TEST_THEN_INC32( (kmp_int32 *) &lck->lk.next_ticket );
759 
760 #ifdef USE_LOCK_PROFILE
761  if ( TCR_4( lck->lk.now_serving ) != my_ticket )
762  __kmp_printf( "LOCK CONTENTION: %p\n", lck );
763  /* else __kmp_printf( "." );*/
764 #endif /* USE_LOCK_PROFILE */
765 
766  if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
767  KMP_FSYNC_ACQUIRED(lck);
768  return;
769  }
770  KMP_WAIT_YIELD( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck );
771  KMP_FSYNC_ACQUIRED(lck);
772 }
773 
774 void
775 __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
776 {
777  __kmp_acquire_ticket_lock_timed_template( lck, gtid );
778 }
779 
780 static void
781 __kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
782 {
783  char const * const func = "omp_set_lock";
784  if ( lck->lk.initialized != lck ) {
785  KMP_FATAL( LockIsUninitialized, func );
786  }
787  if ( __kmp_is_ticket_lock_nestable( lck ) ) {
788  KMP_FATAL( LockNestableUsedAsSimple, func );
789  }
790  if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) {
791  KMP_FATAL( LockIsAlreadyOwned, func );
792  }
793 
794  __kmp_acquire_ticket_lock( lck, gtid );
795 
796  lck->lk.owner_id = gtid + 1;
797 }
798 
799 int
800 __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
801 {
802  kmp_uint32 my_ticket = TCR_4( lck->lk.next_ticket );
803  if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
804  kmp_uint32 next_ticket = my_ticket + 1;
805  if ( KMP_COMPARE_AND_STORE_ACQ32( (kmp_int32 *) &lck->lk.next_ticket,
806  my_ticket, next_ticket ) ) {
807  KMP_FSYNC_ACQUIRED( lck );
808  return TRUE;
809  }
810  }
811  return FALSE;
812 }
813 
814 static int
815 __kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
816 {
817  char const * const func = "omp_test_lock";
818  if ( lck->lk.initialized != lck ) {
819  KMP_FATAL( LockIsUninitialized, func );
820  }
821  if ( __kmp_is_ticket_lock_nestable( lck ) ) {
822  KMP_FATAL( LockNestableUsedAsSimple, func );
823  }
824 
825  int retval = __kmp_test_ticket_lock( lck, gtid );
826 
827  if ( retval ) {
828  lck->lk.owner_id = gtid + 1;
829  }
830  return retval;
831 }
832 
833 void
834 __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
835 {
836  kmp_uint32 distance;
837 
838  KMP_MB(); /* Flush all pending memory write invalidates. */
839 
840  KMP_FSYNC_RELEASING(lck);
841  distance = ( TCR_4( lck->lk.next_ticket ) - TCR_4( lck->lk.now_serving ) );
842 
843  KMP_ST_REL32( &(lck->lk.now_serving), lck->lk.now_serving + 1 );
844 
845  KMP_MB(); /* Flush all pending memory write invalidates. */
846 
847  KMP_YIELD( distance
848  > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) );
849 }
850 
851 static void
852 __kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
853 {
854  char const * const func = "omp_unset_lock";
855  KMP_MB(); /* in case another processor initialized lock */
856  if ( lck->lk.initialized != lck ) {
857  KMP_FATAL( LockIsUninitialized, func );
858  }
859  if ( __kmp_is_ticket_lock_nestable( lck ) ) {
860  KMP_FATAL( LockNestableUsedAsSimple, func );
861  }
862  if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
863  KMP_FATAL( LockUnsettingFree, func );
864  }
865  if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 )
866  && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) {
867  KMP_FATAL( LockUnsettingSetByAnother, func );
868  }
869  lck->lk.owner_id = 0;
870  __kmp_release_ticket_lock( lck, gtid );
871 }
872 
873 void
874 __kmp_init_ticket_lock( kmp_ticket_lock_t * lck )
875 {
876  lck->lk.location = NULL;
877  TCW_4( lck->lk.next_ticket, 0 );
878  TCW_4( lck->lk.now_serving, 0 );
879  lck->lk.owner_id = 0; // no thread owns the lock.
880  lck->lk.depth_locked = -1; // -1 => not a nested lock.
881  lck->lk.initialized = (kmp_ticket_lock *)lck;
882 }
883 
884 static void
885 __kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
886 {
887  __kmp_init_ticket_lock( lck );
888 }
889 
890 void
891 __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck )
892 {
893  lck->lk.initialized = NULL;
894  lck->lk.location = NULL;
895  lck->lk.next_ticket = 0;
896  lck->lk.now_serving = 0;
897  lck->lk.owner_id = 0;
898  lck->lk.depth_locked = -1;
899 }
900 
901 static void
902 __kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
903 {
904  char const * const func = "omp_destroy_lock";
905  if ( lck->lk.initialized != lck ) {
906  KMP_FATAL( LockIsUninitialized, func );
907  }
908  if ( __kmp_is_ticket_lock_nestable( lck ) ) {
909  KMP_FATAL( LockNestableUsedAsSimple, func );
910  }
911  if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
912  KMP_FATAL( LockStillOwned, func );
913  }
914  __kmp_destroy_ticket_lock( lck );
915 }
916 
917 
918 //
919 // nested ticket locks
920 //
921 
922 void
923 __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
924 {
925  KMP_DEBUG_ASSERT( gtid >= 0 );
926 
927  if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
928  lck->lk.depth_locked += 1;
929  }
930  else {
931  __kmp_acquire_ticket_lock_timed_template( lck, gtid );
932  KMP_MB();
933  lck->lk.depth_locked = 1;
934  KMP_MB();
935  lck->lk.owner_id = gtid + 1;
936  }
937 }
938 
939 static void
940 __kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
941 {
942  char const * const func = "omp_set_nest_lock";
943  if ( lck->lk.initialized != lck ) {
944  KMP_FATAL( LockIsUninitialized, func );
945  }
946  if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
947  KMP_FATAL( LockSimpleUsedAsNestable, func );
948  }
949  __kmp_acquire_nested_ticket_lock( lck, gtid );
950 }
951 
952 int
953 __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
954 {
955  int retval;
956 
957  KMP_DEBUG_ASSERT( gtid >= 0 );
958 
959  if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
960  retval = ++lck->lk.depth_locked;
961  }
962  else if ( !__kmp_test_ticket_lock( lck, gtid ) ) {
963  retval = 0;
964  }
965  else {
966  KMP_MB();
967  retval = lck->lk.depth_locked = 1;
968  KMP_MB();
969  lck->lk.owner_id = gtid + 1;
970  }
971  return retval;
972 }
973 
974 static int
975 __kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck,
976  kmp_int32 gtid )
977 {
978  char const * const func = "omp_test_nest_lock";
979  if ( lck->lk.initialized != lck ) {
980  KMP_FATAL( LockIsUninitialized, func );
981  }
982  if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
983  KMP_FATAL( LockSimpleUsedAsNestable, func );
984  }
985  return __kmp_test_nested_ticket_lock( lck, gtid );
986 }
987 
988 void
989 __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
990 {
991  KMP_DEBUG_ASSERT( gtid >= 0 );
992 
993  KMP_MB();
994  if ( --(lck->lk.depth_locked) == 0 ) {
995  KMP_MB();
996  lck->lk.owner_id = 0;
997  __kmp_release_ticket_lock( lck, gtid );
998  }
999 }
1000 
1001 static void
1002 __kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
1003 {
1004  char const * const func = "omp_unset_nest_lock";
1005  KMP_MB(); /* in case another processor initialized lock */
1006  if ( lck->lk.initialized != lck ) {
1007  KMP_FATAL( LockIsUninitialized, func );
1008  }
1009  if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1010  KMP_FATAL( LockSimpleUsedAsNestable, func );
1011  }
1012  if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
1013  KMP_FATAL( LockUnsettingFree, func );
1014  }
1015  if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) {
1016  KMP_FATAL( LockUnsettingSetByAnother, func );
1017  }
1018  __kmp_release_nested_ticket_lock( lck, gtid );
1019 }
1020 
1021 void
1022 __kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck )
1023 {
1024  __kmp_init_ticket_lock( lck );
1025  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1026 }
1027 
1028 static void
1029 __kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
1030 {
1031  __kmp_init_nested_ticket_lock( lck );
1032 }
1033 
1034 void
1035 __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck )
1036 {
1037  __kmp_destroy_ticket_lock( lck );
1038  lck->lk.depth_locked = 0;
1039 }
1040 
1041 static void
1042 __kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
1043 {
1044  char const * const func = "omp_destroy_nest_lock";
1045  if ( lck->lk.initialized != lck ) {
1046  KMP_FATAL( LockIsUninitialized, func );
1047  }
1048  if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1049  KMP_FATAL( LockSimpleUsedAsNestable, func );
1050  }
1051  if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
1052  KMP_FATAL( LockStillOwned, func );
1053  }
1054  __kmp_destroy_nested_ticket_lock( lck );
1055 }
1056 
1057 
1058 //
1059 // access functions to fields which don't exist for all lock kinds.
1060 //
1061 
1062 static int
1063 __kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck )
1064 {
1065  return lck == lck->lk.initialized;
1066 }
1067 
1068 static const ident_t *
1069 __kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck )
1070 {
1071  return lck->lk.location;
1072 }
1073 
1074 static void
1075 __kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc )
1076 {
1077  lck->lk.location = loc;
1078 }
1079 
1080 static kmp_lock_flags_t
1081 __kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck )
1082 {
1083  return lck->lk.flags;
1084 }
1085 
1086 static void
1087 __kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags )
1088 {
1089  lck->lk.flags = flags;
1090 }
1091 
1092 /* ------------------------------------------------------------------------ */
1093 /* queuing locks */
1094 
1095 /*
1096  * First the states
1097  * (head,tail) = 0, 0 means lock is unheld, nobody on queue
1098  * UINT_MAX or -1, 0 means lock is held, nobody on queue
1099  * h, h means lock is held or about to transition, 1 element on queue
1100  * h, t h <> t, means lock is held or about to transition, >1 elements on queue
1101  *
1102  * Now the transitions
1103  * Acquire(0,0) = -1 ,0
1104  * Release(0,0) = Error
1105  * Acquire(-1,0) = h ,h h > 0
1106  * Release(-1,0) = 0 ,0
1107  * Acquire(h,h) = h ,t h > 0, t > 0, h <> t
1108  * Release(h,h) = -1 ,0 h > 0
1109  * Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
1110  * Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t
1111  *
1112  * And pictorially
1113  *
1114  *
1115  * +-----+
1116  * | 0, 0|------- release -------> Error
1117  * +-----+
1118  * | ^
1119  * acquire| |release
1120  * | |
1121  * | |
1122  * v |
1123  * +-----+
1124  * |-1, 0|
1125  * +-----+
1126  * | ^
1127  * acquire| |release
1128  * | |
1129  * | |
1130  * v |
1131  * +-----+
1132  * | h, h|
1133  * +-----+
1134  * | ^
1135  * acquire| |release
1136  * | |
1137  * | |
1138  * v |
1139  * +-----+
1140  * | h, t|----- acquire, release loopback ---+
1141  * +-----+ |
1142  * ^ |
1143  * | |
1144  * +------------------------------------+
1145  *
1146  */
1147 
1148 #ifdef DEBUG_QUEUING_LOCKS
1149 
1150 /* Stuff for circular trace buffer */
1151 #define TRACE_BUF_ELE 1024
1152 static char traces[TRACE_BUF_ELE][128] = { 0 }
1153 static int tc = 0;
1154 #define TRACE_LOCK(X,Y) sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s\n", X, Y );
1155 #define TRACE_LOCK_T(X,Y,Z) sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s%d\n", X,Y,Z );
1156 #define TRACE_LOCK_HT(X,Y,Z,Q) sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s %d,%d\n", X, Y, Z, Q );
1157 
1158 static void
1159 __kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid,
1160  kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id )
1161 {
1162  kmp_int32 t, i;
1163 
1164  __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" );
1165 
1166  i = tc % TRACE_BUF_ELE;
1167  __kmp_printf_no_lock( "%s\n", traces[i] );
1168  i = (i+1) % TRACE_BUF_ELE;
1169  while ( i != (tc % TRACE_BUF_ELE) ) {
1170  __kmp_printf_no_lock( "%s", traces[i] );
1171  i = (i+1) % TRACE_BUF_ELE;
1172  }
1173  __kmp_printf_no_lock( "\n" );
1174 
1175  __kmp_printf_no_lock(
1176  "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n",
1177  gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting,
1178  head_id, tail_id );
1179 
1180  __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id );
1181 
1182  if ( lck->lk.head_id >= 1 ) {
1183  t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting;
1184  while (t > 0) {
1185  __kmp_printf_no_lock( "-> %d ", t );
1186  t = __kmp_threads[t-1]->th.th_next_waiting;
1187  }
1188  }
1189  __kmp_printf_no_lock( "; tail: %d ", lck->lk.tail_id );
1190  __kmp_printf_no_lock( "\n\n" );
1191 }
1192 
1193 #endif /* DEBUG_QUEUING_LOCKS */
1194 
1195 static kmp_int32
1196 __kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck )
1197 {
1198  return TCR_4( lck->lk.owner_id ) - 1;
1199 }
1200 
1201 static inline bool
1202 __kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck )
1203 {
1204  return lck->lk.depth_locked != -1;
1205 }
1206 
1207 /* Acquire a lock using a the queuing lock implementation */
1208 template <bool takeTime>
1209 /* [TLW] The unused template above is left behind because of what BEB believes is a
1210  potential compiler problem with __forceinline. */
1211 __forceinline static void
1212 __kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck,
1213  kmp_int32 gtid )
1214 {
1215  register kmp_info_t *this_thr = __kmp_thread_from_gtid( gtid );
1216  volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1217  volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1218  volatile kmp_uint32 *spin_here_p;
1219  kmp_int32 need_mf = 1;
1220 
1221  KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1222 
1223  KMP_FSYNC_PREPARE( lck );
1224  KMP_DEBUG_ASSERT( this_thr != NULL );
1225  spin_here_p = & this_thr->th.th_spin_here;
1226 
1227 #ifdef DEBUG_QUEUING_LOCKS
1228  TRACE_LOCK( gtid+1, "acq ent" );
1229  if ( *spin_here_p )
1230  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1231  if ( this_thr->th.th_next_waiting != 0 )
1232  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1233 #endif
1234  KMP_DEBUG_ASSERT( !*spin_here_p );
1235  KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1236 
1237 
1238  /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p
1239  that may follow, not just in execution order, but also in visibility order. This way,
1240  when a releasing thread observes the changes to the queue by this thread, it can
1241  rightly assume that spin_here_p has already been set to TRUE, so that when it sets
1242  spin_here_p to FALSE, it is not premature. If the releasing thread sets spin_here_p
1243  to FALSE before this thread sets it to TRUE, this thread will hang.
1244  */
1245  *spin_here_p = TRUE; /* before enqueuing to prevent race */
1246 
1247  while( 1 ) {
1248  kmp_int32 enqueued;
1249  kmp_int32 head;
1250  kmp_int32 tail;
1251 
1252  head = *head_id_p;
1253 
1254  switch ( head ) {
1255 
1256  case -1:
1257  {
1258 #ifdef DEBUG_QUEUING_LOCKS
1259  tail = *tail_id_p;
1260  TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1261 #endif
1262  tail = 0; /* to make sure next link asynchronously read is not set accidentally;
1263  this assignment prevents us from entering the if ( t > 0 )
1264  condition in the enqueued case below, which is not necessary for
1265  this state transition */
1266 
1267  need_mf = 0;
1268  /* try (-1,0)->(tid,tid) */
1269  enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p,
1270  KMP_PACK_64( -1, 0 ),
1271  KMP_PACK_64( gtid+1, gtid+1 ) );
1272 #ifdef DEBUG_QUEUING_LOCKS
1273  if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" );
1274 #endif
1275  }
1276  break;
1277 
1278  default:
1279  {
1280  tail = *tail_id_p;
1281  KMP_DEBUG_ASSERT( tail != gtid + 1 );
1282 
1283 #ifdef DEBUG_QUEUING_LOCKS
1284  TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1285 #endif
1286 
1287  if ( tail == 0 ) {
1288  enqueued = FALSE;
1289  }
1290  else {
1291  need_mf = 0;
1292  /* try (h,t) or (h,h)->(h,tid) */
1293  enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 );
1294 
1295 #ifdef DEBUG_QUEUING_LOCKS
1296  if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" );
1297 #endif
1298  }
1299  }
1300  break;
1301 
1302  case 0: /* empty queue */
1303  {
1304  kmp_int32 grabbed_lock;
1305 
1306 #ifdef DEBUG_QUEUING_LOCKS
1307  tail = *tail_id_p;
1308  TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1309 #endif
1310  /* try (0,0)->(-1,0) */
1311 
1312  /* only legal transition out of head = 0 is head = -1 with no change to tail */
1313  grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 );
1314 
1315  if ( grabbed_lock ) {
1316 
1317  *spin_here_p = FALSE;
1318 
1319  KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1320  lck, gtid ));
1321 #ifdef DEBUG_QUEUING_LOCKS
1322  TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 );
1323 #endif
1324  KMP_FSYNC_ACQUIRED( lck );
1325  return; /* lock holder cannot be on queue */
1326  }
1327  enqueued = FALSE;
1328  }
1329  break;
1330  }
1331 
1332  if ( enqueued ) {
1333  if ( tail > 0 ) {
1334  kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 );
1335  KMP_ASSERT( tail_thr != NULL );
1336  tail_thr->th.th_next_waiting = gtid+1;
1337  /* corresponding wait for this write in release code */
1338  }
1339  KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid ));
1340 
1341 
1342  /* ToDo: May want to consider using __kmp_wait_sleep or something that sleeps for
1343  * throughput only here.
1344  */
1345  KMP_MB();
1346  KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck);
1347 
1348 #ifdef DEBUG_QUEUING_LOCKS
1349  TRACE_LOCK( gtid+1, "acq spin" );
1350 
1351  if ( this_thr->th.th_next_waiting != 0 )
1352  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1353 #endif
1354  KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1355  KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n",
1356  lck, gtid ));
1357 
1358 #ifdef DEBUG_QUEUING_LOCKS
1359  TRACE_LOCK( gtid+1, "acq exit 2" );
1360 #endif
1361  /* got lock, we were dequeued by the thread that released lock */
1362  return;
1363  }
1364 
1365  /* Yield if number of threads > number of logical processors */
1366  /* ToDo: Not sure why this should only be in oversubscription case,
1367  maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1368  KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc :
1369  __kmp_xproc ) );
1370 #ifdef DEBUG_QUEUING_LOCKS
1371  TRACE_LOCK( gtid+1, "acq retry" );
1372 #endif
1373 
1374  }
1375  KMP_ASSERT2( 0, "should not get here" );
1376 }
1377 
1378 void
1379 __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1380 {
1381  KMP_DEBUG_ASSERT( gtid >= 0 );
1382 
1383  __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1384 }
1385 
1386 static void
1387 __kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1388  kmp_int32 gtid )
1389 {
1390  char const * const func = "omp_set_lock";
1391  if ( lck->lk.initialized != lck ) {
1392  KMP_FATAL( LockIsUninitialized, func );
1393  }
1394  if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1395  KMP_FATAL( LockNestableUsedAsSimple, func );
1396  }
1397  if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1398  KMP_FATAL( LockIsAlreadyOwned, func );
1399  }
1400 
1401  __kmp_acquire_queuing_lock( lck, gtid );
1402 
1403  lck->lk.owner_id = gtid + 1;
1404 }
1405 
1406 int
1407 __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1408 {
1409  volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1410  kmp_int32 head;
1411 #ifdef KMP_DEBUG
1412  kmp_info_t *this_thr;
1413 #endif
1414 
1415  KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid ));
1416  KMP_DEBUG_ASSERT( gtid >= 0 );
1417 #ifdef KMP_DEBUG
1418  this_thr = __kmp_thread_from_gtid( gtid );
1419  KMP_DEBUG_ASSERT( this_thr != NULL );
1420  KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1421 #endif
1422 
1423  head = *head_id_p;
1424 
1425  if ( head == 0 ) { /* nobody on queue, nobody holding */
1426 
1427  /* try (0,0)->(-1,0) */
1428 
1429  if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) {
1430  KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid ));
1431  KMP_FSYNC_ACQUIRED(lck);
1432  return TRUE;
1433  }
1434  }
1435 
1436  KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid ));
1437  return FALSE;
1438 }
1439 
1440 static int
1441 __kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1442 {
1443  char const * const func = "omp_test_lock";
1444  if ( lck->lk.initialized != lck ) {
1445  KMP_FATAL( LockIsUninitialized, func );
1446  }
1447  if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1448  KMP_FATAL( LockNestableUsedAsSimple, func );
1449  }
1450 
1451  int retval = __kmp_test_queuing_lock( lck, gtid );
1452 
1453  if ( retval ) {
1454  lck->lk.owner_id = gtid + 1;
1455  }
1456  return retval;
1457 }
1458 
1459 void
1460 __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1461 {
1462  register kmp_info_t *this_thr;
1463  volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1464  volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1465 
1466  KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1467  KMP_DEBUG_ASSERT( gtid >= 0 );
1468  this_thr = __kmp_thread_from_gtid( gtid );
1469  KMP_DEBUG_ASSERT( this_thr != NULL );
1470 #ifdef DEBUG_QUEUING_LOCKS
1471  TRACE_LOCK( gtid+1, "rel ent" );
1472 
1473  if ( this_thr->th.th_spin_here )
1474  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1475  if ( this_thr->th.th_next_waiting != 0 )
1476  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1477 #endif
1478  KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1479  KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1480 
1481  KMP_FSYNC_RELEASING(lck);
1482 
1483  while( 1 ) {
1484  kmp_int32 dequeued;
1485  kmp_int32 head;
1486  kmp_int32 tail;
1487 
1488  head = *head_id_p;
1489 
1490 #ifdef DEBUG_QUEUING_LOCKS
1491  tail = *tail_id_p;
1492  TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail );
1493  if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1494 #endif
1495  KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */
1496 
1497  if ( head == -1 ) { /* nobody on queue */
1498 
1499  /* try (-1,0)->(0,0) */
1500  if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) {
1501  KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1502  lck, gtid ));
1503 #ifdef DEBUG_QUEUING_LOCKS
1504  TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 );
1505 #endif
1506  return;
1507  }
1508  dequeued = FALSE;
1509 
1510  }
1511  else {
1512 
1513  tail = *tail_id_p;
1514  if ( head == tail ) { /* only one thread on the queue */
1515 
1516 #ifdef DEBUG_QUEUING_LOCKS
1517  if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1518 #endif
1519  KMP_DEBUG_ASSERT( head > 0 );
1520 
1521  /* try (h,h)->(-1,0) */
1522  dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p,
1523  KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) );
1524 #ifdef DEBUG_QUEUING_LOCKS
1525  TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" );
1526 #endif
1527 
1528  }
1529  else {
1530  volatile kmp_int32 *waiting_id_p;
1531  kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1532  KMP_DEBUG_ASSERT( head_thr != NULL );
1533  waiting_id_p = & head_thr->th.th_next_waiting;
1534 
1535  /* Does this require synchronous reads? */
1536 #ifdef DEBUG_QUEUING_LOCKS
1537  if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1538 #endif
1539  KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1540 
1541  /* try (h,t)->(h',t) or (t,t) */
1542 
1543  KMP_MB();
1544  /* make sure enqueuing thread has time to update next waiting thread field */
1545  *head_id_p = (kmp_int32) KMP_WAIT_YIELD((volatile kmp_uint*) waiting_id_p, 0, KMP_NEQ, NULL);
1546 #ifdef DEBUG_QUEUING_LOCKS
1547  TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" );
1548 #endif
1549  dequeued = TRUE;
1550  }
1551  }
1552 
1553  if ( dequeued ) {
1554  kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1555  KMP_DEBUG_ASSERT( head_thr != NULL );
1556 
1557  /* Does this require synchronous reads? */
1558 #ifdef DEBUG_QUEUING_LOCKS
1559  if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1560 #endif
1561  KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1562 
1563  /* For clean code only.
1564  * Thread not released until next statement prevents race with acquire code.
1565  */
1566  head_thr->th.th_next_waiting = 0;
1567 #ifdef DEBUG_QUEUING_LOCKS
1568  TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head );
1569 #endif
1570 
1571  KMP_MB();
1572  /* reset spin value */
1573  head_thr->th.th_spin_here = FALSE;
1574 
1575  KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n",
1576  lck, gtid ));
1577 #ifdef DEBUG_QUEUING_LOCKS
1578  TRACE_LOCK( gtid+1, "rel exit 2" );
1579 #endif
1580  return;
1581  }
1582  /* KMP_CPU_PAUSE( ); don't want to make releasing thread hold up acquiring threads */
1583 
1584 #ifdef DEBUG_QUEUING_LOCKS
1585  TRACE_LOCK( gtid+1, "rel retry" );
1586 #endif
1587 
1588  } /* while */
1589  KMP_ASSERT2( 0, "should not get here" );
1590 }
1591 
1592 static void
1593 __kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1594  kmp_int32 gtid )
1595 {
1596  char const * const func = "omp_unset_lock";
1597  KMP_MB(); /* in case another processor initialized lock */
1598  if ( lck->lk.initialized != lck ) {
1599  KMP_FATAL( LockIsUninitialized, func );
1600  }
1601  if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1602  KMP_FATAL( LockNestableUsedAsSimple, func );
1603  }
1604  if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1605  KMP_FATAL( LockUnsettingFree, func );
1606  }
1607  if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1608  KMP_FATAL( LockUnsettingSetByAnother, func );
1609  }
1610  lck->lk.owner_id = 0;
1611  __kmp_release_queuing_lock( lck, gtid );
1612 }
1613 
1614 void
1615 __kmp_init_queuing_lock( kmp_queuing_lock_t *lck )
1616 {
1617  lck->lk.location = NULL;
1618  lck->lk.head_id = 0;
1619  lck->lk.tail_id = 0;
1620  lck->lk.next_ticket = 0;
1621  lck->lk.now_serving = 0;
1622  lck->lk.owner_id = 0; // no thread owns the lock.
1623  lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1624  lck->lk.initialized = lck;
1625 
1626  KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1627 }
1628 
1629 static void
1630 __kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1631 {
1632  __kmp_init_queuing_lock( lck );
1633 }
1634 
1635 void
1636 __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck )
1637 {
1638  lck->lk.initialized = NULL;
1639  lck->lk.location = NULL;
1640  lck->lk.head_id = 0;
1641  lck->lk.tail_id = 0;
1642  lck->lk.next_ticket = 0;
1643  lck->lk.now_serving = 0;
1644  lck->lk.owner_id = 0;
1645  lck->lk.depth_locked = -1;
1646 }
1647 
1648 static void
1649 __kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1650 {
1651  char const * const func = "omp_destroy_lock";
1652  if ( lck->lk.initialized != lck ) {
1653  KMP_FATAL( LockIsUninitialized, func );
1654  }
1655  if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1656  KMP_FATAL( LockNestableUsedAsSimple, func );
1657  }
1658  if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1659  KMP_FATAL( LockStillOwned, func );
1660  }
1661  __kmp_destroy_queuing_lock( lck );
1662 }
1663 
1664 
1665 //
1666 // nested queuing locks
1667 //
1668 
1669 void
1670 __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1671 {
1672  KMP_DEBUG_ASSERT( gtid >= 0 );
1673 
1674  if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1675  lck->lk.depth_locked += 1;
1676  }
1677  else {
1678  __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1679  KMP_MB();
1680  lck->lk.depth_locked = 1;
1681  KMP_MB();
1682  lck->lk.owner_id = gtid + 1;
1683  }
1684 }
1685 
1686 static void
1687 __kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1688 {
1689  char const * const func = "omp_set_nest_lock";
1690  if ( lck->lk.initialized != lck ) {
1691  KMP_FATAL( LockIsUninitialized, func );
1692  }
1693  if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1694  KMP_FATAL( LockSimpleUsedAsNestable, func );
1695  }
1696  __kmp_acquire_nested_queuing_lock( lck, gtid );
1697 }
1698 
1699 int
1700 __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1701 {
1702  int retval;
1703 
1704  KMP_DEBUG_ASSERT( gtid >= 0 );
1705 
1706  if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1707  retval = ++lck->lk.depth_locked;
1708  }
1709  else if ( !__kmp_test_queuing_lock( lck, gtid ) ) {
1710  retval = 0;
1711  }
1712  else {
1713  KMP_MB();
1714  retval = lck->lk.depth_locked = 1;
1715  KMP_MB();
1716  lck->lk.owner_id = gtid + 1;
1717  }
1718  return retval;
1719 }
1720 
1721 static int
1722 __kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1723  kmp_int32 gtid )
1724 {
1725  char const * const func = "omp_test_nest_lock";
1726  if ( lck->lk.initialized != lck ) {
1727  KMP_FATAL( LockIsUninitialized, func );
1728  }
1729  if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1730  KMP_FATAL( LockSimpleUsedAsNestable, func );
1731  }
1732  return __kmp_test_nested_queuing_lock( lck, gtid );
1733 }
1734 
1735 void
1736 __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1737 {
1738  KMP_DEBUG_ASSERT( gtid >= 0 );
1739 
1740  KMP_MB();
1741  if ( --(lck->lk.depth_locked) == 0 ) {
1742  KMP_MB();
1743  lck->lk.owner_id = 0;
1744  __kmp_release_queuing_lock( lck, gtid );
1745  }
1746 }
1747 
1748 static void
1749 __kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1750 {
1751  char const * const func = "omp_unset_nest_lock";
1752  KMP_MB(); /* in case another processor initialized lock */
1753  if ( lck->lk.initialized != lck ) {
1754  KMP_FATAL( LockIsUninitialized, func );
1755  }
1756  if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1757  KMP_FATAL( LockSimpleUsedAsNestable, func );
1758  }
1759  if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1760  KMP_FATAL( LockUnsettingFree, func );
1761  }
1762  if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1763  KMP_FATAL( LockUnsettingSetByAnother, func );
1764  }
1765  __kmp_release_nested_queuing_lock( lck, gtid );
1766 }
1767 
1768 void
1769 __kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck )
1770 {
1771  __kmp_init_queuing_lock( lck );
1772  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1773 }
1774 
1775 static void
1776 __kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1777 {
1778  __kmp_init_nested_queuing_lock( lck );
1779 }
1780 
1781 void
1782 __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck )
1783 {
1784  __kmp_destroy_queuing_lock( lck );
1785  lck->lk.depth_locked = 0;
1786 }
1787 
1788 static void
1789 __kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1790 {
1791  char const * const func = "omp_destroy_nest_lock";
1792  if ( lck->lk.initialized != lck ) {
1793  KMP_FATAL( LockIsUninitialized, func );
1794  }
1795  if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1796  KMP_FATAL( LockSimpleUsedAsNestable, func );
1797  }
1798  if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1799  KMP_FATAL( LockStillOwned, func );
1800  }
1801  __kmp_destroy_nested_queuing_lock( lck );
1802 }
1803 
1804 
1805 //
1806 // access functions to fields which don't exist for all lock kinds.
1807 //
1808 
1809 static int
1810 __kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck )
1811 {
1812  return lck == lck->lk.initialized;
1813 }
1814 
1815 static const ident_t *
1816 __kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck )
1817 {
1818  return lck->lk.location;
1819 }
1820 
1821 static void
1822 __kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc )
1823 {
1824  lck->lk.location = loc;
1825 }
1826 
1827 static kmp_lock_flags_t
1828 __kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck )
1829 {
1830  return lck->lk.flags;
1831 }
1832 
1833 static void
1834 __kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags )
1835 {
1836  lck->lk.flags = flags;
1837 }
1838 
1839 #if KMP_USE_ADAPTIVE_LOCKS
1840 
1841 /*
1842  RTM Adaptive locks
1843 */
1844 
1845 // TODO: Use the header for intrinsics below with the compiler 13.0
1846 //#include <immintrin.h>
1847 
1848 // Values from the status register after failed speculation.
1849 #define _XBEGIN_STARTED (~0u)
1850 #define _XABORT_EXPLICIT (1 << 0)
1851 #define _XABORT_RETRY (1 << 1)
1852 #define _XABORT_CONFLICT (1 << 2)
1853 #define _XABORT_CAPACITY (1 << 3)
1854 #define _XABORT_DEBUG (1 << 4)
1855 #define _XABORT_NESTED (1 << 5)
1856 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1857 
1858 // Aborts for which it's worth trying again immediately
1859 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1860 
1861 #define STRINGIZE_INTERNAL(arg) #arg
1862 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1863 
1864 // Access to RTM instructions
1865 
1866 /*
1867  A version of XBegin which returns -1 on speculation, and the value of EAX on an abort.
1868  This is the same definition as the compiler intrinsic that will be supported at some point.
1869 */
1870 static __inline int _xbegin()
1871 {
1872  int res = -1;
1873 
1874 #if KMP_OS_WINDOWS
1875 #if KMP_ARCH_X86_64
1876  _asm {
1877  _emit 0xC7
1878  _emit 0xF8
1879  _emit 2
1880  _emit 0
1881  _emit 0
1882  _emit 0
1883  jmp L2
1884  mov res, eax
1885  L2:
1886  }
1887 #else /* IA32 */
1888  _asm {
1889  _emit 0xC7
1890  _emit 0xF8
1891  _emit 2
1892  _emit 0
1893  _emit 0
1894  _emit 0
1895  jmp L2
1896  mov res, eax
1897  L2:
1898  }
1899 #endif // KMP_ARCH_X86_64
1900 #else
1901  /* Note that %eax must be noted as killed (clobbered), because
1902  * the XSR is returned in %eax(%rax) on abort. Other register
1903  * values are restored, so don't need to be killed.
1904  *
1905  * We must also mark 'res' as an input and an output, since otherwise
1906  * 'res=-1' may be dropped as being dead, whereas we do need the
1907  * assignment on the successful (i.e., non-abort) path.
1908  */
1909  __asm__ volatile ("1: .byte 0xC7; .byte 0xF8;\n"
1910  " .long 1f-1b-6\n"
1911  " jmp 2f\n"
1912  "1: movl %%eax,%0\n"
1913  "2:"
1914  :"+r"(res)::"memory","%eax");
1915 #endif // KMP_OS_WINDOWS
1916  return res;
1917 }
1918 
1919 /*
1920  Transaction end
1921 */
1922 static __inline void _xend()
1923 {
1924 #if KMP_OS_WINDOWS
1925  __asm {
1926  _emit 0x0f
1927  _emit 0x01
1928  _emit 0xd5
1929  }
1930 #else
1931  __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory");
1932 #endif
1933 }
1934 
1935 /*
1936  This is a macro, the argument must be a single byte constant which
1937  can be evaluated by the inline assembler, since it is emitted as a
1938  byte into the assembly code.
1939 */
1940 #if KMP_OS_WINDOWS
1941 #define _xabort(ARG) \
1942  _asm _emit 0xc6 \
1943  _asm _emit 0xf8 \
1944  _asm _emit ARG
1945 #else
1946 #define _xabort(ARG) \
1947  __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory");
1948 #endif
1949 
1950 //
1951 // Statistics is collected for testing purpose
1952 //
1953 #if KMP_DEBUG_ADAPTIVE_LOCKS
1954 
1955 // We accumulate speculative lock statistics when the lock is destroyed.
1956 // We keep locks that haven't been destroyed in the liveLocks list
1957 // so that we can grab their statistics too.
1958 static kmp_adaptive_lock_statistics_t destroyedStats;
1959 
1960 // To hold the list of live locks.
1961 static kmp_adaptive_lock_info_t liveLocks;
1962 
1963 // A lock so we can safely update the list of locks.
1964 static kmp_bootstrap_lock_t chain_lock;
1965 
1966 // Initialize the list of stats.
1967 void
1968 __kmp_init_speculative_stats()
1969 {
1970  kmp_adaptive_lock_info_t *lck = &liveLocks;
1971 
1972  memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) );
1973  lck->stats.next = lck;
1974  lck->stats.prev = lck;
1975 
1976  KMP_ASSERT( lck->stats.next->stats.prev == lck );
1977  KMP_ASSERT( lck->stats.prev->stats.next == lck );
1978 
1979  __kmp_init_bootstrap_lock( &chain_lock );
1980 
1981 }
1982 
1983 // Insert the lock into the circular list
1984 static void
1985 __kmp_remember_lock( kmp_adaptive_lock_info_t * lck )
1986 {
1987  __kmp_acquire_bootstrap_lock( &chain_lock );
1988 
1989  lck->stats.next = liveLocks.stats.next;
1990  lck->stats.prev = &liveLocks;
1991 
1992  liveLocks.stats.next = lck;
1993  lck->stats.next->stats.prev = lck;
1994 
1995  KMP_ASSERT( lck->stats.next->stats.prev == lck );
1996  KMP_ASSERT( lck->stats.prev->stats.next == lck );
1997 
1998  __kmp_release_bootstrap_lock( &chain_lock );
1999 }
2000 
2001 static void
2002 __kmp_forget_lock( kmp_adaptive_lock_info_t * lck )
2003 {
2004  KMP_ASSERT( lck->stats.next->stats.prev == lck );
2005  KMP_ASSERT( lck->stats.prev->stats.next == lck );
2006 
2007  kmp_adaptive_lock_info_t * n = lck->stats.next;
2008  kmp_adaptive_lock_info_t * p = lck->stats.prev;
2009 
2010  n->stats.prev = p;
2011  p->stats.next = n;
2012 }
2013 
2014 static void
2015 __kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck )
2016 {
2017  memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) );
2018  __kmp_remember_lock( lck );
2019 }
2020 
2021 static void
2022 __kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck )
2023 {
2024  kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
2025 
2026  t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
2027  t->successfulSpeculations += s->successfulSpeculations;
2028  t->hardFailedSpeculations += s->hardFailedSpeculations;
2029  t->softFailedSpeculations += s->softFailedSpeculations;
2030  t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
2031  t->lemmingYields += s->lemmingYields;
2032 }
2033 
2034 static void
2035 __kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck)
2036 {
2037  kmp_adaptive_lock_statistics_t *t = &destroyedStats;
2038 
2039  __kmp_acquire_bootstrap_lock( &chain_lock );
2040 
2041  __kmp_add_stats( &destroyedStats, lck );
2042  __kmp_forget_lock( lck );
2043 
2044  __kmp_release_bootstrap_lock( &chain_lock );
2045 }
2046 
2047 static float
2048 percent (kmp_uint32 count, kmp_uint32 total)
2049 {
2050  return (total == 0) ? 0.0: (100.0 * count)/total;
2051 }
2052 
2053 static
2054 FILE * __kmp_open_stats_file()
2055 {
2056  if (strcmp (__kmp_speculative_statsfile, "-") == 0)
2057  return stdout;
2058 
2059  size_t buffLen = strlen( __kmp_speculative_statsfile ) + 20;
2060  char buffer[buffLen];
2061  snprintf (&buffer[0], buffLen, __kmp_speculative_statsfile,
2062  (kmp_int32)getpid());
2063  FILE * result = fopen(&buffer[0], "w");
2064 
2065  // Maybe we should issue a warning here...
2066  return result ? result : stdout;
2067 }
2068 
2069 void
2070 __kmp_print_speculative_stats()
2071 {
2072  if (__kmp_user_lock_kind != lk_adaptive)
2073  return;
2074 
2075  FILE * statsFile = __kmp_open_stats_file();
2076 
2077  kmp_adaptive_lock_statistics_t total = destroyedStats;
2078  kmp_adaptive_lock_info_t *lck;
2079 
2080  for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
2081  __kmp_add_stats( &total, lck );
2082  }
2083  kmp_adaptive_lock_statistics_t *t = &total;
2084  kmp_uint32 totalSections = t->nonSpeculativeAcquires + t->successfulSpeculations;
2085  kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations +
2086  t->softFailedSpeculations;
2087 
2088  fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n");
2089  fprintf ( statsFile, " Lock parameters: \n"
2090  " max_soft_retries : %10d\n"
2091  " max_badness : %10d\n",
2092  __kmp_adaptive_backoff_params.max_soft_retries,
2093  __kmp_adaptive_backoff_params.max_badness);
2094  fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts );
2095  fprintf( statsFile, " Total critical sections : %10d\n", totalSections );
2096  fprintf( statsFile, " Successful speculations : %10d (%5.1f%%)\n",
2097  t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) );
2098  fprintf( statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n",
2099  t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) );
2100  fprintf( statsFile, " Lemming yields : %10d\n\n", t->lemmingYields );
2101 
2102  fprintf( statsFile, " Speculative acquire attempts : %10d\n", totalSpeculations );
2103  fprintf( statsFile, " Successes : %10d (%5.1f%%)\n",
2104  t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) );
2105  fprintf( statsFile, " Soft failures : %10d (%5.1f%%)\n",
2106  t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) );
2107  fprintf( statsFile, " Hard failures : %10d (%5.1f%%)\n",
2108  t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) );
2109 
2110  if (statsFile != stdout)
2111  fclose( statsFile );
2112 }
2113 
2114 # define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ )
2115 #else
2116 # define KMP_INC_STAT(lck,stat)
2117 
2118 #endif // KMP_DEBUG_ADAPTIVE_LOCKS
2119 
2120 static inline bool
2121 __kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck )
2122 {
2123  // It is enough to check that the head_id is zero.
2124  // We don't also need to check the tail.
2125  bool res = lck->lk.head_id == 0;
2126 
2127  // We need a fence here, since we must ensure that no memory operations
2128  // from later in this thread float above that read.
2129 #if KMP_COMPILER_ICC
2130  _mm_mfence();
2131 #else
2132  __sync_synchronize();
2133 #endif
2134 
2135  return res;
2136 }
2137 
2138 // Functions for manipulating the badness
2139 static __inline void
2140 __kmp_update_badness_after_success( kmp_adaptive_lock_t *lck )
2141 {
2142  // Reset the badness to zero so we eagerly try to speculate again
2143  lck->lk.adaptive.badness = 0;
2144  KMP_INC_STAT(lck,successfulSpeculations);
2145 }
2146 
2147 // Create a bit mask with one more set bit.
2148 static __inline void
2149 __kmp_step_badness( kmp_adaptive_lock_t *lck )
2150 {
2151  kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1;
2152  if ( newBadness > lck->lk.adaptive.max_badness) {
2153  return;
2154  } else {
2155  lck->lk.adaptive.badness = newBadness;
2156  }
2157 }
2158 
2159 // Check whether speculation should be attempted.
2160 static __inline int
2161 __kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2162 {
2163  kmp_uint32 badness = lck->lk.adaptive.badness;
2164  kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts;
2165  int res = (attempts & badness) == 0;
2166  return res;
2167 }
2168 
2169 // Attempt to acquire only the speculative lock.
2170 // Does not back off to the non-speculative lock.
2171 //
2172 static int
2173 __kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
2174 {
2175  int retries = lck->lk.adaptive.max_soft_retries;
2176 
2177  // We don't explicitly count the start of speculation, rather we record
2178  // the results (success, hard fail, soft fail). The sum of all of those
2179  // is the total number of times we started speculation since all
2180  // speculations must end one of those ways.
2181  do
2182  {
2183  kmp_uint32 status = _xbegin();
2184  // Switch this in to disable actual speculation but exercise
2185  // at least some of the rest of the code. Useful for debugging...
2186  // kmp_uint32 status = _XABORT_NESTED;
2187 
2188  if (status == _XBEGIN_STARTED )
2189  { /* We have successfully started speculation
2190  * Check that no-one acquired the lock for real between when we last looked
2191  * and now. This also gets the lock cache line into our read-set,
2192  * which we need so that we'll abort if anyone later claims it for real.
2193  */
2194  if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2195  {
2196  // Lock is now visibly acquired, so someone beat us to it.
2197  // Abort the transaction so we'll restart from _xbegin with the
2198  // failure status.
2199  _xabort(0x01)
2200  KMP_ASSERT2( 0, "should not get here" );
2201  }
2202  return 1; // Lock has been acquired (speculatively)
2203  } else {
2204  // We have aborted, update the statistics
2205  if ( status & SOFT_ABORT_MASK)
2206  {
2207  KMP_INC_STAT(lck,softFailedSpeculations);
2208  // and loop round to retry.
2209  }
2210  else
2211  {
2212  KMP_INC_STAT(lck,hardFailedSpeculations);
2213  // Give up if we had a hard failure.
2214  break;
2215  }
2216  }
2217  } while( retries-- ); // Loop while we have retries, and didn't fail hard.
2218 
2219  // Either we had a hard failure or we didn't succeed softly after
2220  // the full set of attempts, so back off the badness.
2221  __kmp_step_badness( lck );
2222  return 0;
2223 }
2224 
2225 // Attempt to acquire the speculative lock, or back off to the non-speculative one
2226 // if the speculative lock cannot be acquired.
2227 // We can succeed speculatively, non-speculatively, or fail.
2228 static int
2229 __kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2230 {
2231  // First try to acquire the lock speculatively
2232  if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) )
2233  return 1;
2234 
2235  // Speculative acquisition failed, so try to acquire it non-speculatively.
2236  // Count the non-speculative acquire attempt
2237  lck->lk.adaptive.acquire_attempts++;
2238 
2239  // Use base, non-speculative lock.
2240  if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) )
2241  {
2242  KMP_INC_STAT(lck,nonSpeculativeAcquires);
2243  return 1; // Lock is acquired (non-speculatively)
2244  }
2245  else
2246  {
2247  return 0; // Failed to acquire the lock, it's already visibly locked.
2248  }
2249 }
2250 
2251 static int
2252 __kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2253 {
2254  char const * const func = "omp_test_lock";
2255  if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2256  KMP_FATAL( LockIsUninitialized, func );
2257  }
2258 
2259  int retval = __kmp_test_adaptive_lock( lck, gtid );
2260 
2261  if ( retval ) {
2262  lck->lk.qlk.owner_id = gtid + 1;
2263  }
2264  return retval;
2265 }
2266 
2267 // Block until we can acquire a speculative, adaptive lock.
2268 // We check whether we should be trying to speculate.
2269 // If we should be, we check the real lock to see if it is free,
2270 // and, if not, pause without attempting to acquire it until it is.
2271 // Then we try the speculative acquire.
2272 // This means that although we suffer from lemmings a little (
2273 // because all we can't acquire the lock speculatively until
2274 // the queue of threads waiting has cleared), we don't get into a
2275 // state where we can never acquire the lock speculatively (because we
2276 // force the queue to clear by preventing new arrivals from entering the
2277 // queue).
2278 // This does mean that when we're trying to break lemmings, the lock
2279 // is no longer fair. However OpenMP makes no guarantee that its
2280 // locks are fair, so this isn't a real problem.
2281 static void
2282 __kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
2283 {
2284  if ( __kmp_should_speculate( lck, gtid ) )
2285  {
2286  if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2287  {
2288  if ( __kmp_test_adaptive_lock_only( lck , gtid ) )
2289  return;
2290  // We tried speculation and failed, so give up.
2291  }
2292  else
2293  {
2294  // We can't try speculation until the lock is free, so we
2295  // pause here (without suspending on the queueing lock,
2296  // to allow it to drain, then try again.
2297  // All other threads will also see the same result for
2298  // shouldSpeculate, so will be doing the same if they
2299  // try to claim the lock from now on.
2300  while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2301  {
2302  KMP_INC_STAT(lck,lemmingYields);
2303  __kmp_yield (TRUE);
2304  }
2305 
2306  if ( __kmp_test_adaptive_lock_only( lck, gtid ) )
2307  return;
2308  }
2309  }
2310 
2311  // Speculative acquisition failed, so acquire it non-speculatively.
2312  // Count the non-speculative acquire attempt
2313  lck->lk.adaptive.acquire_attempts++;
2314 
2315  __kmp_acquire_queuing_lock_timed_template<FALSE>( GET_QLK_PTR(lck), gtid );
2316  // We have acquired the base lock, so count that.
2317  KMP_INC_STAT(lck,nonSpeculativeAcquires );
2318 }
2319 
2320 static void
2321 __kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2322 {
2323  char const * const func = "omp_set_lock";
2324  if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2325  KMP_FATAL( LockIsUninitialized, func );
2326  }
2327  if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) {
2328  KMP_FATAL( LockIsAlreadyOwned, func );
2329  }
2330 
2331  __kmp_acquire_adaptive_lock( lck, gtid );
2332 
2333  lck->lk.qlk.owner_id = gtid + 1;
2334 }
2335 
2336 static void
2337 __kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2338 {
2339  if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2340  { // If the lock doesn't look claimed we must be speculating.
2341  // (Or the user's code is buggy and they're releasing without locking;
2342  // if we had XTEST we'd be able to check that case...)
2343  _xend(); // Exit speculation
2344  __kmp_update_badness_after_success( lck );
2345  }
2346  else
2347  { // Since the lock *is* visibly locked we're not speculating,
2348  // so should use the underlying lock's release scheme.
2349  __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid );
2350  }
2351 }
2352 
2353 static void
2354 __kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2355 {
2356  char const * const func = "omp_unset_lock";
2357  KMP_MB(); /* in case another processor initialized lock */
2358  if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2359  KMP_FATAL( LockIsUninitialized, func );
2360  }
2361  if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) {
2362  KMP_FATAL( LockUnsettingFree, func );
2363  }
2364  if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) {
2365  KMP_FATAL( LockUnsettingSetByAnother, func );
2366  }
2367  lck->lk.qlk.owner_id = 0;
2368  __kmp_release_adaptive_lock( lck, gtid );
2369 }
2370 
2371 static void
2372 __kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck )
2373 {
2374  __kmp_init_queuing_lock( GET_QLK_PTR(lck) );
2375  lck->lk.adaptive.badness = 0;
2376  lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0;
2377  lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries;
2378  lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;
2379 #if KMP_DEBUG_ADAPTIVE_LOCKS
2380  __kmp_zero_speculative_stats( &lck->lk.adaptive );
2381 #endif
2382  KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2383 }
2384 
2385 static void
2386 __kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck )
2387 {
2388  __kmp_init_adaptive_lock( lck );
2389 }
2390 
2391 static void
2392 __kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck )
2393 {
2394 #if KMP_DEBUG_ADAPTIVE_LOCKS
2395  __kmp_accumulate_speculative_stats( &lck->lk.adaptive );
2396 #endif
2397  __kmp_destroy_queuing_lock (GET_QLK_PTR(lck));
2398  // Nothing needed for the speculative part.
2399 }
2400 
2401 static void
2402 __kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck )
2403 {
2404  char const * const func = "omp_destroy_lock";
2405  if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2406  KMP_FATAL( LockIsUninitialized, func );
2407  }
2408  if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) {
2409  KMP_FATAL( LockStillOwned, func );
2410  }
2411  __kmp_destroy_adaptive_lock( lck );
2412 }
2413 
2414 
2415 #endif // KMP_USE_ADAPTIVE_LOCKS
2416 
2417 
2418 /* ------------------------------------------------------------------------ */
2419 /* DRDPA ticket locks */
2420 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2421 
2422 static kmp_int32
2423 __kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck )
2424 {
2425  return TCR_4( lck->lk.owner_id ) - 1;
2426 }
2427 
2428 static inline bool
2429 __kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck )
2430 {
2431  return lck->lk.depth_locked != -1;
2432 }
2433 
2434 __forceinline static void
2435 __kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2436 {
2437  kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket);
2438  kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2439  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2440  = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2441  TCR_PTR(lck->lk.polls); // volatile load
2442 
2443 #ifdef USE_LOCK_PROFILE
2444  if (TCR_8(polls[ticket & mask].poll) != ticket)
2445  __kmp_printf("LOCK CONTENTION: %p\n", lck);
2446  /* else __kmp_printf( "." );*/
2447 #endif /* USE_LOCK_PROFILE */
2448 
2449  //
2450  // Now spin-wait, but reload the polls pointer and mask, in case the
2451  // polling area has been reconfigured. Unless it is reconfigured, the
2452  // reloads stay in L1 cache and are cheap.
2453  //
2454  // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!!
2455  //
2456  // The current implementation of KMP_WAIT_YIELD doesn't allow for mask
2457  // and poll to be re-read every spin iteration.
2458  //
2459  kmp_uint32 spins;
2460 
2461  KMP_FSYNC_PREPARE(lck);
2462  KMP_INIT_YIELD(spins);
2463  while (TCR_8(polls[ticket & mask]).poll < ticket) { // volatile load
2464  // If we are oversubscribed,
2465  // or have waited a bit (and KMP_LIBRARY=turnaround), then yield.
2466  // CPU Pause is in the macros for yield.
2467  //
2468  KMP_YIELD(TCR_4(__kmp_nth)
2469  > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
2470  KMP_YIELD_SPIN(spins);
2471 
2472  // Re-read the mask and the poll pointer from the lock structure.
2473  //
2474  // Make certain that "mask" is read before "polls" !!!
2475  //
2476  // If another thread picks reconfigures the polling area and updates
2477  // their values, and we get the new value of mask and the old polls
2478  // pointer, we could access memory beyond the end of the old polling
2479  // area.
2480  //
2481  mask = TCR_8(lck->lk.mask); // volatile load
2482  polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2483  TCR_PTR(lck->lk.polls); // volatile load
2484  }
2485 
2486  //
2487  // Critical section starts here
2488  //
2489  KMP_FSYNC_ACQUIRED(lck);
2490  KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2491  ticket, lck));
2492  lck->lk.now_serving = ticket; // non-volatile store
2493 
2494  //
2495  // Deallocate a garbage polling area if we know that we are the last
2496  // thread that could possibly access it.
2497  //
2498  // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2499  // ticket.
2500  //
2501  if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2502  __kmp_free((void *)lck->lk.old_polls);
2503  lck->lk.old_polls = NULL;
2504  lck->lk.cleanup_ticket = 0;
2505  }
2506 
2507  //
2508  // Check to see if we should reconfigure the polling area.
2509  // If there is still a garbage polling area to be deallocated from a
2510  // previous reconfiguration, let a later thread reconfigure it.
2511  //
2512  if (lck->lk.old_polls == NULL) {
2513  bool reconfigure = false;
2514  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls;
2515  kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2516 
2517  if (TCR_4(__kmp_nth)
2518  > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2519  //
2520  // We are in oversubscription mode. Contract the polling area
2521  // down to a single location, if that hasn't been done already.
2522  //
2523  if (num_polls > 1) {
2524  reconfigure = true;
2525  num_polls = TCR_4(lck->lk.num_polls);
2526  mask = 0;
2527  num_polls = 1;
2528  polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2529  __kmp_allocate(num_polls * sizeof(*polls));
2530  polls[0].poll = ticket;
2531  }
2532  }
2533  else {
2534  //
2535  // We are in under/fully subscribed mode. Check the number of
2536  // threads waiting on the lock. The size of the polling area
2537  // should be at least the number of threads waiting.
2538  //
2539  kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2540  if (num_waiting > num_polls) {
2541  kmp_uint32 old_num_polls = num_polls;
2542  reconfigure = true;
2543  do {
2544  mask = (mask << 1) | 1;
2545  num_polls *= 2;
2546  } while (num_polls <= num_waiting);
2547 
2548  //
2549  // Allocate the new polling area, and copy the relevant portion
2550  // of the old polling area to the new area. __kmp_allocate()
2551  // zeroes the memory it allocates, and most of the old area is
2552  // just zero padding, so we only copy the release counters.
2553  //
2554  polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2555  __kmp_allocate(num_polls * sizeof(*polls));
2556  kmp_uint32 i;
2557  for (i = 0; i < old_num_polls; i++) {
2558  polls[i].poll = old_polls[i].poll;
2559  }
2560  }
2561  }
2562 
2563  if (reconfigure) {
2564  //
2565  // Now write the updated fields back to the lock structure.
2566  //
2567  // Make certain that "polls" is written before "mask" !!!
2568  //
2569  // If another thread picks up the new value of mask and the old
2570  // polls pointer , it could access memory beyond the end of the
2571  // old polling area.
2572  //
2573  // On x86, we need memory fences.
2574  //
2575  KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n",
2576  ticket, lck, num_polls));
2577 
2578  lck->lk.old_polls = old_polls; // non-volatile store
2579  lck->lk.polls = polls; // volatile store
2580 
2581  KMP_MB();
2582 
2583  lck->lk.num_polls = num_polls; // non-volatile store
2584  lck->lk.mask = mask; // volatile store
2585 
2586  KMP_MB();
2587 
2588  //
2589  // Only after the new polling area and mask have been flushed
2590  // to main memory can we update the cleanup ticket field.
2591  //
2592  // volatile load / non-volatile store
2593  //
2594  lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket);
2595  }
2596  }
2597 }
2598 
2599 void
2600 __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2601 {
2602  __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2603 }
2604 
2605 static void
2606 __kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2607 {
2608  char const * const func = "omp_set_lock";
2609  if ( lck->lk.initialized != lck ) {
2610  KMP_FATAL( LockIsUninitialized, func );
2611  }
2612  if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2613  KMP_FATAL( LockNestableUsedAsSimple, func );
2614  }
2615  if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) {
2616  KMP_FATAL( LockIsAlreadyOwned, func );
2617  }
2618 
2619  __kmp_acquire_drdpa_lock( lck, gtid );
2620 
2621  lck->lk.owner_id = gtid + 1;
2622 }
2623 
2624 int
2625 __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2626 {
2627  //
2628  // First get a ticket, then read the polls pointer and the mask.
2629  // The polls pointer must be read before the mask!!! (See above)
2630  //
2631  kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load
2632  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2633  = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2634  TCR_PTR(lck->lk.polls); // volatile load
2635  kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2636  if (TCR_8(polls[ticket & mask].poll) == ticket) {
2637  kmp_uint64 next_ticket = ticket + 1;
2638  if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket,
2639  ticket, next_ticket)) {
2640  KMP_FSYNC_ACQUIRED(lck);
2641  KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2642  ticket, lck));
2643  lck->lk.now_serving = ticket; // non-volatile store
2644 
2645  //
2646  // Since no threads are waiting, there is no possibility that
2647  // we would want to reconfigure the polling area. We might
2648  // have the cleanup ticket value (which says that it is now
2649  // safe to deallocate old_polls), but we'll let a later thread
2650  // which calls __kmp_acquire_lock do that - this routine
2651  // isn't supposed to block, and we would risk blocks if we
2652  // called __kmp_free() to do the deallocation.
2653  //
2654  return TRUE;
2655  }
2656  }
2657  return FALSE;
2658 }
2659 
2660 static int
2661 __kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2662 {
2663  char const * const func = "omp_test_lock";
2664  if ( lck->lk.initialized != lck ) {
2665  KMP_FATAL( LockIsUninitialized, func );
2666  }
2667  if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2668  KMP_FATAL( LockNestableUsedAsSimple, func );
2669  }
2670 
2671  int retval = __kmp_test_drdpa_lock( lck, gtid );
2672 
2673  if ( retval ) {
2674  lck->lk.owner_id = gtid + 1;
2675  }
2676  return retval;
2677 }
2678 
2679 void
2680 __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2681 {
2682  //
2683  // Read the ticket value from the lock data struct, then the polls
2684  // pointer and the mask. The polls pointer must be read before the
2685  // mask!!! (See above)
2686  //
2687  kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load
2688  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2689  = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2690  TCR_PTR(lck->lk.polls); // volatile load
2691  kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2692  KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2693  ticket - 1, lck));
2694  KMP_FSYNC_RELEASING(lck);
2695  KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store
2696 }
2697 
2698 static void
2699 __kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2700 {
2701  char const * const func = "omp_unset_lock";
2702  KMP_MB(); /* in case another processor initialized lock */
2703  if ( lck->lk.initialized != lck ) {
2704  KMP_FATAL( LockIsUninitialized, func );
2705  }
2706  if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2707  KMP_FATAL( LockNestableUsedAsSimple, func );
2708  }
2709  if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2710  KMP_FATAL( LockUnsettingFree, func );
2711  }
2712  if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 )
2713  && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) {
2714  KMP_FATAL( LockUnsettingSetByAnother, func );
2715  }
2716  lck->lk.owner_id = 0;
2717  __kmp_release_drdpa_lock( lck, gtid );
2718 }
2719 
2720 void
2721 __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck )
2722 {
2723  lck->lk.location = NULL;
2724  lck->lk.mask = 0;
2725  lck->lk.num_polls = 1;
2726  lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2727  __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2728  lck->lk.cleanup_ticket = 0;
2729  lck->lk.old_polls = NULL;
2730  lck->lk.next_ticket = 0;
2731  lck->lk.now_serving = 0;
2732  lck->lk.owner_id = 0; // no thread owns the lock.
2733  lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2734  lck->lk.initialized = lck;
2735 
2736  KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2737 }
2738 
2739 static void
2740 __kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2741 {
2742  __kmp_init_drdpa_lock( lck );
2743 }
2744 
2745 void
2746 __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck )
2747 {
2748  lck->lk.initialized = NULL;
2749  lck->lk.location = NULL;
2750  if (lck->lk.polls != NULL) {
2751  __kmp_free((void *)lck->lk.polls);
2752  lck->lk.polls = NULL;
2753  }
2754  if (lck->lk.old_polls != NULL) {
2755  __kmp_free((void *)lck->lk.old_polls);
2756  lck->lk.old_polls = NULL;
2757  }
2758  lck->lk.mask = 0;
2759  lck->lk.num_polls = 0;
2760  lck->lk.cleanup_ticket = 0;
2761  lck->lk.next_ticket = 0;
2762  lck->lk.now_serving = 0;
2763  lck->lk.owner_id = 0;
2764  lck->lk.depth_locked = -1;
2765 }
2766 
2767 static void
2768 __kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2769 {
2770  char const * const func = "omp_destroy_lock";
2771  if ( lck->lk.initialized != lck ) {
2772  KMP_FATAL( LockIsUninitialized, func );
2773  }
2774  if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2775  KMP_FATAL( LockNestableUsedAsSimple, func );
2776  }
2777  if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2778  KMP_FATAL( LockStillOwned, func );
2779  }
2780  __kmp_destroy_drdpa_lock( lck );
2781 }
2782 
2783 
2784 //
2785 // nested drdpa ticket locks
2786 //
2787 
2788 void
2789 __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2790 {
2791  KMP_DEBUG_ASSERT( gtid >= 0 );
2792 
2793  if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2794  lck->lk.depth_locked += 1;
2795  }
2796  else {
2797  __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2798  KMP_MB();
2799  lck->lk.depth_locked = 1;
2800  KMP_MB();
2801  lck->lk.owner_id = gtid + 1;
2802  }
2803 }
2804 
2805 static void
2806 __kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2807 {
2808  char const * const func = "omp_set_nest_lock";
2809  if ( lck->lk.initialized != lck ) {
2810  KMP_FATAL( LockIsUninitialized, func );
2811  }
2812  if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2813  KMP_FATAL( LockSimpleUsedAsNestable, func );
2814  }
2815  __kmp_acquire_nested_drdpa_lock( lck, gtid );
2816 }
2817 
2818 int
2819 __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2820 {
2821  int retval;
2822 
2823  KMP_DEBUG_ASSERT( gtid >= 0 );
2824 
2825  if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2826  retval = ++lck->lk.depth_locked;
2827  }
2828  else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) {
2829  retval = 0;
2830  }
2831  else {
2832  KMP_MB();
2833  retval = lck->lk.depth_locked = 1;
2834  KMP_MB();
2835  lck->lk.owner_id = gtid + 1;
2836  }
2837  return retval;
2838 }
2839 
2840 static int
2841 __kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2842 {
2843  char const * const func = "omp_test_nest_lock";
2844  if ( lck->lk.initialized != lck ) {
2845  KMP_FATAL( LockIsUninitialized, func );
2846  }
2847  if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2848  KMP_FATAL( LockSimpleUsedAsNestable, func );
2849  }
2850  return __kmp_test_nested_drdpa_lock( lck, gtid );
2851 }
2852 
2853 void
2854 __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2855 {
2856  KMP_DEBUG_ASSERT( gtid >= 0 );
2857 
2858  KMP_MB();
2859  if ( --(lck->lk.depth_locked) == 0 ) {
2860  KMP_MB();
2861  lck->lk.owner_id = 0;
2862  __kmp_release_drdpa_lock( lck, gtid );
2863  }
2864 }
2865 
2866 static void
2867 __kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2868 {
2869  char const * const func = "omp_unset_nest_lock";
2870  KMP_MB(); /* in case another processor initialized lock */
2871  if ( lck->lk.initialized != lck ) {
2872  KMP_FATAL( LockIsUninitialized, func );
2873  }
2874  if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2875  KMP_FATAL( LockSimpleUsedAsNestable, func );
2876  }
2877  if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2878  KMP_FATAL( LockUnsettingFree, func );
2879  }
2880  if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) {
2881  KMP_FATAL( LockUnsettingSetByAnother, func );
2882  }
2883  __kmp_release_nested_drdpa_lock( lck, gtid );
2884 }
2885 
2886 void
2887 __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck )
2888 {
2889  __kmp_init_drdpa_lock( lck );
2890  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2891 }
2892 
2893 static void
2894 __kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2895 {
2896  __kmp_init_nested_drdpa_lock( lck );
2897 }
2898 
2899 void
2900 __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck )
2901 {
2902  __kmp_destroy_drdpa_lock( lck );
2903  lck->lk.depth_locked = 0;
2904 }
2905 
2906 static void
2907 __kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2908 {
2909  char const * const func = "omp_destroy_nest_lock";
2910  if ( lck->lk.initialized != lck ) {
2911  KMP_FATAL( LockIsUninitialized, func );
2912  }
2913  if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2914  KMP_FATAL( LockSimpleUsedAsNestable, func );
2915  }
2916  if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2917  KMP_FATAL( LockStillOwned, func );
2918  }
2919  __kmp_destroy_nested_drdpa_lock( lck );
2920 }
2921 
2922 
2923 //
2924 // access functions to fields which don't exist for all lock kinds.
2925 //
2926 
2927 static int
2928 __kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck )
2929 {
2930  return lck == lck->lk.initialized;
2931 }
2932 
2933 static const ident_t *
2934 __kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck )
2935 {
2936  return lck->lk.location;
2937 }
2938 
2939 static void
2940 __kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc )
2941 {
2942  lck->lk.location = loc;
2943 }
2944 
2945 static kmp_lock_flags_t
2946 __kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck )
2947 {
2948  return lck->lk.flags;
2949 }
2950 
2951 static void
2952 __kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags )
2953 {
2954  lck->lk.flags = flags;
2955 }
2956 
2957 /* ------------------------------------------------------------------------ */
2958 /* user locks
2959  *
2960  * They are implemented as a table of function pointers which are set to the
2961  * lock functions of the appropriate kind, once that has been determined.
2962  */
2963 
2964 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
2965 
2966 size_t __kmp_base_user_lock_size = 0;
2967 size_t __kmp_user_lock_size = 0;
2968 
2969 kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL;
2970 void ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
2971 
2972 int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
2973 void ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
2974 void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
2975 void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL;
2976 void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
2977 void ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
2978 
2979 int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
2980 void ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
2981 void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
2982 void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
2983 
2984 int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL;
2985 const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL;
2986 void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL;
2987 kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL;
2988 void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL;
2989 
2990 void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind )
2991 {
2992  switch ( user_lock_kind ) {
2993  case lk_default:
2994  default:
2995  KMP_ASSERT( 0 );
2996 
2997  case lk_tas: {
2998  __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t );
2999  __kmp_user_lock_size = sizeof( kmp_tas_lock_t );
3000 
3001  __kmp_get_user_lock_owner_ =
3002  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3003  ( &__kmp_get_tas_lock_owner );
3004 
3005  if ( __kmp_env_consistency_check ) {
3006  KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
3007  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
3008  }
3009  else {
3010  KMP_BIND_USER_LOCK(tas);
3011  KMP_BIND_NESTED_USER_LOCK(tas);
3012  }
3013 
3014  __kmp_destroy_user_lock_ =
3015  ( void ( * )( kmp_user_lock_p ) )
3016  ( &__kmp_destroy_tas_lock );
3017 
3018  __kmp_is_user_lock_initialized_ =
3019  ( int ( * )( kmp_user_lock_p ) ) NULL;
3020 
3021  __kmp_get_user_lock_location_ =
3022  ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3023 
3024  __kmp_set_user_lock_location_ =
3025  ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3026 
3027  __kmp_get_user_lock_flags_ =
3028  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3029 
3030  __kmp_set_user_lock_flags_ =
3031  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3032  }
3033  break;
3034 
3035 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
3036 
3037  case lk_futex: {
3038  __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t );
3039  __kmp_user_lock_size = sizeof( kmp_futex_lock_t );
3040 
3041  __kmp_get_user_lock_owner_ =
3042  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3043  ( &__kmp_get_futex_lock_owner );
3044 
3045  if ( __kmp_env_consistency_check ) {
3046  KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3047  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3048  }
3049  else {
3050  KMP_BIND_USER_LOCK(futex);
3051  KMP_BIND_NESTED_USER_LOCK(futex);
3052  }
3053 
3054  __kmp_destroy_user_lock_ =
3055  ( void ( * )( kmp_user_lock_p ) )
3056  ( &__kmp_destroy_futex_lock );
3057 
3058  __kmp_is_user_lock_initialized_ =
3059  ( int ( * )( kmp_user_lock_p ) ) NULL;
3060 
3061  __kmp_get_user_lock_location_ =
3062  ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3063 
3064  __kmp_set_user_lock_location_ =
3065  ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3066 
3067  __kmp_get_user_lock_flags_ =
3068  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3069 
3070  __kmp_set_user_lock_flags_ =
3071  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3072  }
3073  break;
3074 
3075 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
3076 
3077  case lk_ticket: {
3078  __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t );
3079  __kmp_user_lock_size = sizeof( kmp_ticket_lock_t );
3080 
3081  __kmp_get_user_lock_owner_ =
3082  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3083  ( &__kmp_get_ticket_lock_owner );
3084 
3085  if ( __kmp_env_consistency_check ) {
3086  KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3087  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3088  }
3089  else {
3090  KMP_BIND_USER_LOCK(ticket);
3091  KMP_BIND_NESTED_USER_LOCK(ticket);
3092  }
3093 
3094  __kmp_destroy_user_lock_ =
3095  ( void ( * )( kmp_user_lock_p ) )
3096  ( &__kmp_destroy_ticket_lock );
3097 
3098  __kmp_is_user_lock_initialized_ =
3099  ( int ( * )( kmp_user_lock_p ) )
3100  ( &__kmp_is_ticket_lock_initialized );
3101 
3102  __kmp_get_user_lock_location_ =
3103  ( const ident_t * ( * )( kmp_user_lock_p ) )
3104  ( &__kmp_get_ticket_lock_location );
3105 
3106  __kmp_set_user_lock_location_ =
3107  ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3108  ( &__kmp_set_ticket_lock_location );
3109 
3110  __kmp_get_user_lock_flags_ =
3111  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3112  ( &__kmp_get_ticket_lock_flags );
3113 
3114  __kmp_set_user_lock_flags_ =
3115  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3116  ( &__kmp_set_ticket_lock_flags );
3117  }
3118  break;
3119 
3120  case lk_queuing: {
3121  __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t );
3122  __kmp_user_lock_size = sizeof( kmp_queuing_lock_t );
3123 
3124  __kmp_get_user_lock_owner_ =
3125  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3126  ( &__kmp_get_queuing_lock_owner );
3127 
3128  if ( __kmp_env_consistency_check ) {
3129  KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3130  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3131  }
3132  else {
3133  KMP_BIND_USER_LOCK(queuing);
3134  KMP_BIND_NESTED_USER_LOCK(queuing);
3135  }
3136 
3137  __kmp_destroy_user_lock_ =
3138  ( void ( * )( kmp_user_lock_p ) )
3139  ( &__kmp_destroy_queuing_lock );
3140 
3141  __kmp_is_user_lock_initialized_ =
3142  ( int ( * )( kmp_user_lock_p ) )
3143  ( &__kmp_is_queuing_lock_initialized );
3144 
3145  __kmp_get_user_lock_location_ =
3146  ( const ident_t * ( * )( kmp_user_lock_p ) )
3147  ( &__kmp_get_queuing_lock_location );
3148 
3149  __kmp_set_user_lock_location_ =
3150  ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3151  ( &__kmp_set_queuing_lock_location );
3152 
3153  __kmp_get_user_lock_flags_ =
3154  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3155  ( &__kmp_get_queuing_lock_flags );
3156 
3157  __kmp_set_user_lock_flags_ =
3158  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3159  ( &__kmp_set_queuing_lock_flags );
3160  }
3161  break;
3162 
3163 #if KMP_USE_ADAPTIVE_LOCKS
3164  case lk_adaptive: {
3165  __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t );
3166  __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t );
3167 
3168  __kmp_get_user_lock_owner_ =
3169  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3170  ( &__kmp_get_queuing_lock_owner );
3171 
3172  if ( __kmp_env_consistency_check ) {
3173  KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3174  }
3175  else {
3176  KMP_BIND_USER_LOCK(adaptive);
3177  }
3178 
3179  __kmp_destroy_user_lock_ =
3180  ( void ( * )( kmp_user_lock_p ) )
3181  ( &__kmp_destroy_adaptive_lock );
3182 
3183  __kmp_is_user_lock_initialized_ =
3184  ( int ( * )( kmp_user_lock_p ) )
3185  ( &__kmp_is_queuing_lock_initialized );
3186 
3187  __kmp_get_user_lock_location_ =
3188  ( const ident_t * ( * )( kmp_user_lock_p ) )
3189  ( &__kmp_get_queuing_lock_location );
3190 
3191  __kmp_set_user_lock_location_ =
3192  ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3193  ( &__kmp_set_queuing_lock_location );
3194 
3195  __kmp_get_user_lock_flags_ =
3196  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3197  ( &__kmp_get_queuing_lock_flags );
3198 
3199  __kmp_set_user_lock_flags_ =
3200  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3201  ( &__kmp_set_queuing_lock_flags );
3202 
3203  }
3204  break;
3205 #endif // KMP_USE_ADAPTIVE_LOCKS
3206 
3207  case lk_drdpa: {
3208  __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t );
3209  __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t );
3210 
3211  __kmp_get_user_lock_owner_ =
3212  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3213  ( &__kmp_get_drdpa_lock_owner );
3214 
3215  if ( __kmp_env_consistency_check ) {
3216  KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3217  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3218  }
3219  else {
3220  KMP_BIND_USER_LOCK(drdpa);
3221  KMP_BIND_NESTED_USER_LOCK(drdpa);
3222  }
3223 
3224  __kmp_destroy_user_lock_ =
3225  ( void ( * )( kmp_user_lock_p ) )
3226  ( &__kmp_destroy_drdpa_lock );
3227 
3228  __kmp_is_user_lock_initialized_ =
3229  ( int ( * )( kmp_user_lock_p ) )
3230  ( &__kmp_is_drdpa_lock_initialized );
3231 
3232  __kmp_get_user_lock_location_ =
3233  ( const ident_t * ( * )( kmp_user_lock_p ) )
3234  ( &__kmp_get_drdpa_lock_location );
3235 
3236  __kmp_set_user_lock_location_ =
3237  ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3238  ( &__kmp_set_drdpa_lock_location );
3239 
3240  __kmp_get_user_lock_flags_ =
3241  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3242  ( &__kmp_get_drdpa_lock_flags );
3243 
3244  __kmp_set_user_lock_flags_ =
3245  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3246  ( &__kmp_set_drdpa_lock_flags );
3247  }
3248  break;
3249  }
3250 }
3251 
3252 
3253 // ----------------------------------------------------------------------------
3254 // User lock table & lock allocation
3255 
3256 kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL };
3257 kmp_user_lock_p __kmp_lock_pool = NULL;
3258 
3259 // Lock block-allocation support.
3260 kmp_block_of_locks* __kmp_lock_blocks = NULL;
3261 int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3262 
3263 static kmp_lock_index_t
3264 __kmp_lock_table_insert( kmp_user_lock_p lck )
3265 {
3266  // Assume that kmp_global_lock is held upon entry/exit.
3267  kmp_lock_index_t index;
3268  if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) {
3269  kmp_lock_index_t size;
3270  kmp_user_lock_p *table;
3271  kmp_lock_index_t i;
3272  // Reallocate lock table.
3273  if ( __kmp_user_lock_table.allocated == 0 ) {
3274  size = 1024;
3275  }
3276  else {
3277  size = __kmp_user_lock_table.allocated * 2;
3278  }
3279  table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size );
3280  memcpy( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) );
3281  table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table;
3282  // We cannot free the previos table now, sinse it may be in use by other
3283  // threads. So save the pointer to the previous table in in the first element of the
3284  // new table. All the tables will be organized into a list, and could be freed when
3285  // library shutting down.
3286  __kmp_user_lock_table.table = table;
3287  __kmp_user_lock_table.allocated = size;
3288  }
3289  KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated );
3290  index = __kmp_user_lock_table.used;
3291  __kmp_user_lock_table.table[ index ] = lck;
3292  ++ __kmp_user_lock_table.used;
3293  return index;
3294 }
3295 
3296 static kmp_user_lock_p
3297 __kmp_lock_block_allocate()
3298 {
3299  // Assume that kmp_global_lock is held upon entry/exit.
3300  static int last_index = 0;
3301  if ( ( last_index >= __kmp_num_locks_in_block )
3302  || ( __kmp_lock_blocks == NULL ) ) {
3303  // Restart the index.
3304  last_index = 0;
3305  // Need to allocate a new block.
3306  KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
3307  size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3308  char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) );
3309  // Set up the new block.
3310  kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]);
3311  new_block->next_block = __kmp_lock_blocks;
3312  new_block->locks = (void *)buffer;
3313  // Publish the new block.
3314  KMP_MB();
3315  __kmp_lock_blocks = new_block;
3316  }
3317  kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) )
3318  [ last_index * __kmp_user_lock_size ] ) );
3319  last_index++;
3320  return ret;
3321 }
3322 
3323 //
3324 // Get memory for a lock. It may be freshly allocated memory or reused memory
3325 // from lock pool.
3326 //
3327 kmp_user_lock_p
3328 __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid,
3329  kmp_lock_flags_t flags )
3330 {
3331  kmp_user_lock_p lck;
3332  kmp_lock_index_t index;
3333  KMP_DEBUG_ASSERT( user_lock );
3334 
3335  __kmp_acquire_lock( &__kmp_global_lock, gtid );
3336 
3337  if ( __kmp_lock_pool == NULL ) {
3338  // Lock pool is empty. Allocate new memory.
3339  if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point.
3340  lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size );
3341  }
3342  else {
3343  lck = __kmp_lock_block_allocate();
3344  }
3345 
3346  // Insert lock in the table so that it can be freed in __kmp_cleanup,
3347  // and debugger has info on all allocated locks.
3348  index = __kmp_lock_table_insert( lck );
3349  }
3350  else {
3351  // Pick up lock from pool.
3352  lck = __kmp_lock_pool;
3353  index = __kmp_lock_pool->pool.index;
3354  __kmp_lock_pool = __kmp_lock_pool->pool.next;
3355  }
3356 
3357  //
3358  // We could potentially differentiate between nested and regular locks
3359  // here, and do the lock table lookup for regular locks only.
3360  //
3361  if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3362  * ( (kmp_lock_index_t *) user_lock ) = index;
3363  }
3364  else {
3365  * ( (kmp_user_lock_p *) user_lock ) = lck;
3366  }
3367 
3368  // mark the lock if it is critical section lock.
3369  __kmp_set_user_lock_flags( lck, flags );
3370 
3371  __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper
3372 
3373  return lck;
3374 }
3375 
3376 // Put lock's memory to pool for reusing.
3377 void
3378 __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck )
3379 {
3380  kmp_lock_pool_t * lock_pool;
3381 
3382  KMP_DEBUG_ASSERT( user_lock != NULL );
3383  KMP_DEBUG_ASSERT( lck != NULL );
3384 
3385  __kmp_acquire_lock( & __kmp_global_lock, gtid );
3386 
3387  lck->pool.next = __kmp_lock_pool;
3388  __kmp_lock_pool = lck;
3389  if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3390  kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock );
3391  KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used );
3392  lck->pool.index = index;
3393  }
3394 
3395  __kmp_release_lock( & __kmp_global_lock, gtid );
3396 }
3397 
3398 kmp_user_lock_p
3399 __kmp_lookup_user_lock( void **user_lock, char const *func )
3400 {
3401  kmp_user_lock_p lck = NULL;
3402 
3403  if ( __kmp_env_consistency_check ) {
3404  if ( user_lock == NULL ) {
3405  KMP_FATAL( LockIsUninitialized, func );
3406  }
3407  }
3408 
3409  if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3410  kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock );
3411  if ( __kmp_env_consistency_check ) {
3412  if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) {
3413  KMP_FATAL( LockIsUninitialized, func );
3414  }
3415  }
3416  KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used );
3417  KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
3418  lck = __kmp_user_lock_table.table[index];
3419  }
3420  else {
3421  lck = *( (kmp_user_lock_p *)user_lock );
3422  }
3423 
3424  if ( __kmp_env_consistency_check ) {
3425  if ( lck == NULL ) {
3426  KMP_FATAL( LockIsUninitialized, func );
3427  }
3428  }
3429 
3430  return lck;
3431 }
3432 
3433 void
3434 __kmp_cleanup_user_locks( void )
3435 {
3436  //
3437  // Reset lock pool. Do not worry about lock in the pool -- we will free
3438  // them when iterating through lock table (it includes all the locks,
3439  // dead or alive).
3440  //
3441  __kmp_lock_pool = NULL;
3442 
3443 #define IS_CRITICAL(lck) \
3444  ( ( __kmp_get_user_lock_flags_ != NULL ) && \
3445  ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) )
3446 
3447  //
3448  // Loop through lock table, free all locks.
3449  //
3450  // Do not free item [0], it is reserved for lock tables list.
3451  //
3452  // FIXME - we are iterating through a list of (pointers to) objects of
3453  // type union kmp_user_lock, but we have no way of knowing whether the
3454  // base type is currently "pool" or whatever the global user lock type
3455  // is.
3456  //
3457  // We are relying on the fact that for all of the user lock types
3458  // (except "tas"), the first field in the lock struct is the "initialized"
3459  // field, which is set to the address of the lock object itself when
3460  // the lock is initialized. When the union is of type "pool", the
3461  // first field is a pointer to the next object in the free list, which
3462  // will not be the same address as the object itself.
3463  //
3464  // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck )
3465  // will fail for "pool" objects on the free list. This must happen as
3466  // the "location" field of real user locks overlaps the "index" field
3467  // of "pool" objects.
3468  //
3469  // It would be better to run through the free list, and remove all "pool"
3470  // objects from the lock table before executing this loop. However,
3471  // "pool" objects do not always have their index field set (only on
3472  // lin_32e), and I don't want to search the lock table for the address
3473  // of every "pool" object on the free list.
3474  //
3475  while ( __kmp_user_lock_table.used > 1 ) {
3476  const ident *loc;
3477 
3478  //
3479  // reduce __kmp_user_lock_table.used before freeing the lock,
3480  // so that state of locks is consistent
3481  //
3482  kmp_user_lock_p lck = __kmp_user_lock_table.table[
3483  --__kmp_user_lock_table.used ];
3484 
3485  if ( ( __kmp_is_user_lock_initialized_ != NULL ) &&
3486  ( *__kmp_is_user_lock_initialized_ )( lck ) ) {
3487  //
3488  // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is
3489  // initialized AND it is NOT a critical section (user is not
3490  // responsible for destroying criticals) AND we know source
3491  // location to report.
3492  //
3493  if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) &&
3494  ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) &&
3495  ( loc->psource != NULL ) ) {
3496  kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 );
3497  KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.func,
3498  str_loc.line, str_loc.col );
3499  __kmp_str_loc_free( &str_loc);
3500  }
3501 
3502 #ifdef KMP_DEBUG
3503  if ( IS_CRITICAL( lck ) ) {
3504  KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) );
3505  }
3506  else {
3507  KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) );
3508  }
3509 #endif // KMP_DEBUG
3510 
3511  //
3512  // Cleanup internal lock dynamic resources
3513  // (for drdpa locks particularly).
3514  //
3515  __kmp_destroy_user_lock( lck );
3516  }
3517 
3518  //
3519  // Free the lock if block allocation of locks is not used.
3520  //
3521  if ( __kmp_lock_blocks == NULL ) {
3522  __kmp_free( lck );
3523  }
3524  }
3525 
3526 #undef IS_CRITICAL
3527 
3528  //
3529  // delete lock table(s).
3530  //
3531  kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
3532  __kmp_user_lock_table.table = NULL;
3533  __kmp_user_lock_table.allocated = 0;
3534 
3535  while ( table_ptr != NULL ) {
3536  //
3537  // In the first element we saved the pointer to the previous
3538  // (smaller) lock table.
3539  //
3540  kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] );
3541  __kmp_free( table_ptr );
3542  table_ptr = next;
3543  }
3544 
3545  //
3546  // Free buffers allocated for blocks of locks.
3547  //
3548  kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
3549  __kmp_lock_blocks = NULL;
3550 
3551  while ( block_ptr != NULL ) {
3552  kmp_block_of_locks_t *next = block_ptr->next_block;
3553  __kmp_free( block_ptr->locks );
3554  //
3555  // *block_ptr itself was allocated at the end of the locks vector.
3556  //
3557  block_ptr = next;
3558  }
3559 
3560  TCW_4(__kmp_init_user_locks, FALSE);
3561 }
3562 
Definition: kmp.h:218
char const * psource
Definition: kmp.h:227