Intel® OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Modules Pages
kmp_lock.cpp
1 /*
2  * kmp_lock.cpp -- lock-related functions
3  */
4 
5 /* <copyright>
6  Copyright (c) 1997-2015 Intel Corporation. All Rights Reserved.
7 
8  Redistribution and use in source and binary forms, with or without
9  modification, are permitted provided that the following conditions
10  are met:
11 
12  * Redistributions of source code must retain the above copyright
13  notice, this list of conditions and the following disclaimer.
14  * Redistributions in binary form must reproduce the above copyright
15  notice, this list of conditions and the following disclaimer in the
16  documentation and/or other materials provided with the distribution.
17  * Neither the name of Intel Corporation nor the names of its
18  contributors may be used to endorse or promote products derived
19  from this software without specific prior written permission.
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 
33 </copyright> */
34 
35 #include <stddef.h>
36 
37 #include "kmp.h"
38 #include "kmp_itt.h"
39 #include "kmp_i18n.h"
40 #include "kmp_lock.h"
41 #include "kmp_io.h"
42 
43 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
44 # include <unistd.h>
45 # include <sys/syscall.h>
46 // We should really include <futex.h>, but that causes compatibility problems on different
47 // Linux* OS distributions that either require that you include (or break when you try to include)
48 // <pci/types.h>.
49 // Since all we need is the two macros below (which are part of the kernel ABI, so can't change)
50 // we just define the constants here and don't include <futex.h>
51 # ifndef FUTEX_WAIT
52 # define FUTEX_WAIT 0
53 # endif
54 # ifndef FUTEX_WAKE
55 # define FUTEX_WAKE 1
56 # endif
57 #endif
58 
59 /* Implement spin locks for internal library use. */
60 /* The algorithm implemented is Lamport's bakery lock [1974]. */
61 
62 void
63 __kmp_validate_locks( void )
64 {
65  int i;
66  kmp_uint32 x, y;
67 
68  /* Check to make sure unsigned arithmetic does wraps properly */
69  x = ~((kmp_uint32) 0) - 2;
70  y = x - 2;
71 
72  for (i = 0; i < 8; ++i, ++x, ++y) {
73  kmp_uint32 z = (x - y);
74  KMP_ASSERT( z == 2 );
75  }
76 
77  KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 );
78 }
79 
80 
81 /* ------------------------------------------------------------------------ */
82 /* test and set locks */
83 
84 //
85 // For the non-nested locks, we can only assume that the first 4 bytes were
86 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
87 // compiler only allocates a 4 byte pointer on IA-32 architecture. On
88 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
89 //
90 // gcc reserves >= 8 bytes for nested locks, so we can assume that the
91 // entire 8 bytes were allocated for nested locks on all 64-bit platforms.
92 //
93 
94 static kmp_int32
95 __kmp_get_tas_lock_owner( kmp_tas_lock_t *lck )
96 {
97  return DYNA_LOCK_STRIP(TCR_4( lck->lk.poll )) - 1;
98 }
99 
100 static inline bool
101 __kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck )
102 {
103  return lck->lk.depth_locked != -1;
104 }
105 
106 __forceinline static void
107 __kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid )
108 {
109  KMP_MB();
110 
111 #ifdef USE_LOCK_PROFILE
112  kmp_uint32 curr = TCR_4( lck->lk.poll );
113  if ( ( curr != 0 ) && ( curr != gtid + 1 ) )
114  __kmp_printf( "LOCK CONTENTION: %p\n", lck );
115  /* else __kmp_printf( "." );*/
116 #endif /* USE_LOCK_PROFILE */
117 
118  if ( ( lck->lk.poll == DYNA_LOCK_FREE(tas) )
119  && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) {
120  KMP_FSYNC_ACQUIRED(lck);
121  return;
122  }
123 
124  kmp_uint32 spins;
125  KMP_FSYNC_PREPARE( lck );
126  KMP_INIT_YIELD( spins );
127  if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
128  __kmp_xproc ) ) {
129  KMP_YIELD( TRUE );
130  }
131  else {
132  KMP_YIELD_SPIN( spins );
133  }
134 
135  while ( ( lck->lk.poll != DYNA_LOCK_FREE(tas) ) ||
136  ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) ) {
137  //
138  // FIXME - use exponential backoff here
139  //
140  if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
141  __kmp_xproc ) ) {
142  KMP_YIELD( TRUE );
143  }
144  else {
145  KMP_YIELD_SPIN( spins );
146  }
147  }
148  KMP_FSYNC_ACQUIRED( lck );
149 }
150 
151 void
152 __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
153 {
154  __kmp_acquire_tas_lock_timed_template( lck, gtid );
155 }
156 
157 static void
158 __kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
159 {
160  char const * const func = "omp_set_lock";
161  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
162  && __kmp_is_tas_lock_nestable( lck ) ) {
163  KMP_FATAL( LockNestableUsedAsSimple, func );
164  }
165  if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) {
166  KMP_FATAL( LockIsAlreadyOwned, func );
167  }
168  __kmp_acquire_tas_lock( lck, gtid );
169 }
170 
171 int
172 __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
173 {
174  if ( ( lck->lk.poll == DYNA_LOCK_FREE(tas) )
175  && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) {
176  KMP_FSYNC_ACQUIRED( lck );
177  return TRUE;
178  }
179  return FALSE;
180 }
181 
182 static int
183 __kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
184 {
185  char const * const func = "omp_test_lock";
186  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
187  && __kmp_is_tas_lock_nestable( lck ) ) {
188  KMP_FATAL( LockNestableUsedAsSimple, func );
189  }
190  return __kmp_test_tas_lock( lck, gtid );
191 }
192 
193 void
194 __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
195 {
196  KMP_MB(); /* Flush all pending memory write invalidates. */
197 
198  KMP_FSYNC_RELEASING(lck);
199  KMP_ST_REL32( &(lck->lk.poll), DYNA_LOCK_FREE(tas) );
200  KMP_MB(); /* Flush all pending memory write invalidates. */
201 
202  KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
203  __kmp_xproc ) );
204 }
205 
206 static void
207 __kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
208 {
209  char const * const func = "omp_unset_lock";
210  KMP_MB(); /* in case another processor initialized lock */
211  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
212  && __kmp_is_tas_lock_nestable( lck ) ) {
213  KMP_FATAL( LockNestableUsedAsSimple, func );
214  }
215  if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
216  KMP_FATAL( LockUnsettingFree, func );
217  }
218  if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 )
219  && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) {
220  KMP_FATAL( LockUnsettingSetByAnother, func );
221  }
222  __kmp_release_tas_lock( lck, gtid );
223 }
224 
225 void
226 __kmp_init_tas_lock( kmp_tas_lock_t * lck )
227 {
228  TCW_4( lck->lk.poll, DYNA_LOCK_FREE(tas) );
229 }
230 
231 static void
232 __kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck )
233 {
234  __kmp_init_tas_lock( lck );
235 }
236 
237 void
238 __kmp_destroy_tas_lock( kmp_tas_lock_t *lck )
239 {
240  lck->lk.poll = 0;
241 }
242 
243 static void
244 __kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck )
245 {
246  char const * const func = "omp_destroy_lock";
247  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
248  && __kmp_is_tas_lock_nestable( lck ) ) {
249  KMP_FATAL( LockNestableUsedAsSimple, func );
250  }
251  if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
252  KMP_FATAL( LockStillOwned, func );
253  }
254  __kmp_destroy_tas_lock( lck );
255 }
256 
257 
258 //
259 // nested test and set locks
260 //
261 
262 void
263 __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
264 {
265  KMP_DEBUG_ASSERT( gtid >= 0 );
266 
267  if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
268  lck->lk.depth_locked += 1;
269  }
270  else {
271  __kmp_acquire_tas_lock_timed_template( lck, gtid );
272  lck->lk.depth_locked = 1;
273  }
274 }
275 
276 static void
277 __kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
278 {
279  char const * const func = "omp_set_nest_lock";
280  if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
281  KMP_FATAL( LockSimpleUsedAsNestable, func );
282  }
283  __kmp_acquire_nested_tas_lock( lck, gtid );
284 }
285 
286 int
287 __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
288 {
289  int retval;
290 
291  KMP_DEBUG_ASSERT( gtid >= 0 );
292 
293  if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
294  retval = ++lck->lk.depth_locked;
295  }
296  else if ( !__kmp_test_tas_lock( lck, gtid ) ) {
297  retval = 0;
298  }
299  else {
300  KMP_MB();
301  retval = lck->lk.depth_locked = 1;
302  }
303  return retval;
304 }
305 
306 static int
307 __kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
308 {
309  char const * const func = "omp_test_nest_lock";
310  if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
311  KMP_FATAL( LockSimpleUsedAsNestable, func );
312  }
313  return __kmp_test_nested_tas_lock( lck, gtid );
314 }
315 
316 void
317 __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
318 {
319  KMP_DEBUG_ASSERT( gtid >= 0 );
320 
321  KMP_MB();
322  if ( --(lck->lk.depth_locked) == 0 ) {
323  __kmp_release_tas_lock( lck, gtid );
324  }
325 }
326 
327 static void
328 __kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
329 {
330  char const * const func = "omp_unset_nest_lock";
331  KMP_MB(); /* in case another processor initialized lock */
332  if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
333  KMP_FATAL( LockSimpleUsedAsNestable, func );
334  }
335  if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
336  KMP_FATAL( LockUnsettingFree, func );
337  }
338  if ( __kmp_get_tas_lock_owner( lck ) != gtid ) {
339  KMP_FATAL( LockUnsettingSetByAnother, func );
340  }
341  __kmp_release_nested_tas_lock( lck, gtid );
342 }
343 
344 void
345 __kmp_init_nested_tas_lock( kmp_tas_lock_t * lck )
346 {
347  __kmp_init_tas_lock( lck );
348  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
349 }
350 
351 static void
352 __kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck )
353 {
354  __kmp_init_nested_tas_lock( lck );
355 }
356 
357 void
358 __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck )
359 {
360  __kmp_destroy_tas_lock( lck );
361  lck->lk.depth_locked = 0;
362 }
363 
364 static void
365 __kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck )
366 {
367  char const * const func = "omp_destroy_nest_lock";
368  if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
369  KMP_FATAL( LockSimpleUsedAsNestable, func );
370  }
371  if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
372  KMP_FATAL( LockStillOwned, func );
373  }
374  __kmp_destroy_nested_tas_lock( lck );
375 }
376 
377 
378 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
379 
380 /* ------------------------------------------------------------------------ */
381 /* futex locks */
382 
383 // futex locks are really just test and set locks, with a different method
384 // of handling contention. They take the same amount of space as test and
385 // set locks, and are allocated the same way (i.e. use the area allocated by
386 // the compiler for non-nested locks / allocate nested locks on the heap).
387 
388 static kmp_int32
389 __kmp_get_futex_lock_owner( kmp_futex_lock_t *lck )
390 {
391  return DYNA_LOCK_STRIP(( TCR_4( lck->lk.poll ) >> 1 )) - 1;
392 }
393 
394 static inline bool
395 __kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck )
396 {
397  return lck->lk.depth_locked != -1;
398 }
399 
400 __forceinline static void
401 __kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid )
402 {
403  kmp_int32 gtid_code = ( gtid + 1 ) << 1;
404 
405  KMP_MB();
406 
407 #ifdef USE_LOCK_PROFILE
408  kmp_uint32 curr = TCR_4( lck->lk.poll );
409  if ( ( curr != 0 ) && ( curr != gtid_code ) )
410  __kmp_printf( "LOCK CONTENTION: %p\n", lck );
411  /* else __kmp_printf( "." );*/
412 #endif /* USE_LOCK_PROFILE */
413 
414  KMP_FSYNC_PREPARE( lck );
415  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
416  lck, lck->lk.poll, gtid ) );
417 
418  kmp_int32 poll_val;
419 
420  while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex),
421  DYNA_LOCK_BUSY(gtid_code, futex) ) ) != DYNA_LOCK_FREE(futex) ) {
422 
423  kmp_int32 cond = DYNA_LOCK_STRIP(poll_val) & 1;
424  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
425  lck, gtid, poll_val, cond ) );
426 
427  //
428  // NOTE: if you try to use the following condition for this branch
429  //
430  // if ( poll_val & 1 == 0 )
431  //
432  // Then the 12.0 compiler has a bug where the following block will
433  // always be skipped, regardless of the value of the LSB of poll_val.
434  //
435  if ( ! cond ) {
436  //
437  // Try to set the lsb in the poll to indicate to the owner
438  // thread that they need to wake this thread up.
439  //
440  if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), poll_val, poll_val | DYNA_LOCK_BUSY(1, futex) ) ) {
441  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
442  lck, lck->lk.poll, gtid ) );
443  continue;
444  }
445  poll_val |= DYNA_LOCK_BUSY(1, futex);
446 
447  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n",
448  lck, lck->lk.poll, gtid ) );
449  }
450 
451  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
452  lck, gtid, poll_val ) );
453 
454  kmp_int32 rc;
455  if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT,
456  poll_val, NULL, NULL, 0 ) ) != 0 ) {
457  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n",
458  lck, gtid, poll_val, rc, errno ) );
459  continue;
460  }
461 
462  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
463  lck, gtid, poll_val ) );
464  //
465  // This thread has now done a successful futex wait call and was
466  // entered on the OS futex queue. We must now perform a futex
467  // wake call when releasing the lock, as we have no idea how many
468  // other threads are in the queue.
469  //
470  gtid_code |= 1;
471  }
472 
473  KMP_FSYNC_ACQUIRED( lck );
474  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n",
475  lck, lck->lk.poll, gtid ) );
476 }
477 
478 void
479 __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
480 {
481  __kmp_acquire_futex_lock_timed_template( lck, gtid );
482 }
483 
484 static void
485 __kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
486 {
487  char const * const func = "omp_set_lock";
488  if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
489  && __kmp_is_futex_lock_nestable( lck ) ) {
490  KMP_FATAL( LockNestableUsedAsSimple, func );
491  }
492  if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) {
493  KMP_FATAL( LockIsAlreadyOwned, func );
494  }
495  __kmp_acquire_futex_lock( lck, gtid );
496 }
497 
498 int
499 __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
500 {
501  if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex), DYNA_LOCK_BUSY(gtid+1, futex) << 1 ) ) {
502  KMP_FSYNC_ACQUIRED( lck );
503  return TRUE;
504  }
505  return FALSE;
506 }
507 
508 static int
509 __kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
510 {
511  char const * const func = "omp_test_lock";
512  if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
513  && __kmp_is_futex_lock_nestable( lck ) ) {
514  KMP_FATAL( LockNestableUsedAsSimple, func );
515  }
516  return __kmp_test_futex_lock( lck, gtid );
517 }
518 
519 void
520 __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
521 {
522  KMP_MB(); /* Flush all pending memory write invalidates. */
523 
524  KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
525  lck, lck->lk.poll, gtid ) );
526 
527  KMP_FSYNC_RELEASING(lck);
528 
529  kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex) );
530 
531  KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
532  lck, gtid, poll_val ) );
533 
534  if ( DYNA_LOCK_STRIP(poll_val) & 1 ) {
535  KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
536  lck, gtid ) );
537  syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, DYNA_LOCK_BUSY(1, futex), NULL, NULL, 0 );
538  }
539 
540  KMP_MB(); /* Flush all pending memory write invalidates. */
541 
542  KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n",
543  lck, lck->lk.poll, gtid ) );
544 
545  KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
546  __kmp_xproc ) );
547 }
548 
549 static void
550 __kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
551 {
552  char const * const func = "omp_unset_lock";
553  KMP_MB(); /* in case another processor initialized lock */
554  if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
555  && __kmp_is_futex_lock_nestable( lck ) ) {
556  KMP_FATAL( LockNestableUsedAsSimple, func );
557  }
558  if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
559  KMP_FATAL( LockUnsettingFree, func );
560  }
561  if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 )
562  && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) {
563  KMP_FATAL( LockUnsettingSetByAnother, func );
564  }
565  __kmp_release_futex_lock( lck, gtid );
566 }
567 
568 void
569 __kmp_init_futex_lock( kmp_futex_lock_t * lck )
570 {
571  TCW_4( lck->lk.poll, DYNA_LOCK_FREE(futex) );
572 }
573 
574 static void
575 __kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck )
576 {
577  __kmp_init_futex_lock( lck );
578 }
579 
580 void
581 __kmp_destroy_futex_lock( kmp_futex_lock_t *lck )
582 {
583  lck->lk.poll = 0;
584 }
585 
586 static void
587 __kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck )
588 {
589  char const * const func = "omp_destroy_lock";
590  if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
591  && __kmp_is_futex_lock_nestable( lck ) ) {
592  KMP_FATAL( LockNestableUsedAsSimple, func );
593  }
594  if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
595  KMP_FATAL( LockStillOwned, func );
596  }
597  __kmp_destroy_futex_lock( lck );
598 }
599 
600 
601 //
602 // nested futex locks
603 //
604 
605 void
606 __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
607 {
608  KMP_DEBUG_ASSERT( gtid >= 0 );
609 
610  if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
611  lck->lk.depth_locked += 1;
612  }
613  else {
614  __kmp_acquire_futex_lock_timed_template( lck, gtid );
615  lck->lk.depth_locked = 1;
616  }
617 }
618 
619 static void
620 __kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
621 {
622  char const * const func = "omp_set_nest_lock";
623  if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
624  KMP_FATAL( LockSimpleUsedAsNestable, func );
625  }
626  __kmp_acquire_nested_futex_lock( lck, gtid );
627 }
628 
629 int
630 __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
631 {
632  int retval;
633 
634  KMP_DEBUG_ASSERT( gtid >= 0 );
635 
636  if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
637  retval = ++lck->lk.depth_locked;
638  }
639  else if ( !__kmp_test_futex_lock( lck, gtid ) ) {
640  retval = 0;
641  }
642  else {
643  KMP_MB();
644  retval = lck->lk.depth_locked = 1;
645  }
646  return retval;
647 }
648 
649 static int
650 __kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
651 {
652  char const * const func = "omp_test_nest_lock";
653  if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
654  KMP_FATAL( LockSimpleUsedAsNestable, func );
655  }
656  return __kmp_test_nested_futex_lock( lck, gtid );
657 }
658 
659 void
660 __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
661 {
662  KMP_DEBUG_ASSERT( gtid >= 0 );
663 
664  KMP_MB();
665  if ( --(lck->lk.depth_locked) == 0 ) {
666  __kmp_release_futex_lock( lck, gtid );
667  }
668 }
669 
670 static void
671 __kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
672 {
673  char const * const func = "omp_unset_nest_lock";
674  KMP_MB(); /* in case another processor initialized lock */
675  if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
676  KMP_FATAL( LockSimpleUsedAsNestable, func );
677  }
678  if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
679  KMP_FATAL( LockUnsettingFree, func );
680  }
681  if ( __kmp_get_futex_lock_owner( lck ) != gtid ) {
682  KMP_FATAL( LockUnsettingSetByAnother, func );
683  }
684  __kmp_release_nested_futex_lock( lck, gtid );
685 }
686 
687 void
688 __kmp_init_nested_futex_lock( kmp_futex_lock_t * lck )
689 {
690  __kmp_init_futex_lock( lck );
691  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
692 }
693 
694 static void
695 __kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck )
696 {
697  __kmp_init_nested_futex_lock( lck );
698 }
699 
700 void
701 __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck )
702 {
703  __kmp_destroy_futex_lock( lck );
704  lck->lk.depth_locked = 0;
705 }
706 
707 static void
708 __kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck )
709 {
710  char const * const func = "omp_destroy_nest_lock";
711  if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
712  KMP_FATAL( LockSimpleUsedAsNestable, func );
713  }
714  if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
715  KMP_FATAL( LockStillOwned, func );
716  }
717  __kmp_destroy_nested_futex_lock( lck );
718 }
719 
720 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
721 
722 
723 /* ------------------------------------------------------------------------ */
724 /* ticket (bakery) locks */
725 
726 static kmp_int32
727 __kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck )
728 {
729  return TCR_4( lck->lk.owner_id ) - 1;
730 }
731 
732 static inline bool
733 __kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck )
734 {
735  return lck->lk.depth_locked != -1;
736 }
737 
738 static kmp_uint32
739 __kmp_bakery_check(kmp_uint value, kmp_uint checker)
740 {
741  register kmp_uint32 pause;
742 
743  if (value == checker) {
744  return TRUE;
745  }
746  for (pause = checker - value; pause != 0; --pause);
747  return FALSE;
748 }
749 
750 __forceinline static void
751 __kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid )
752 {
753  kmp_uint32 my_ticket;
754  KMP_MB();
755 
756  my_ticket = KMP_TEST_THEN_INC32( (kmp_int32 *) &lck->lk.next_ticket );
757 
758 #ifdef USE_LOCK_PROFILE
759  if ( TCR_4( lck->lk.now_serving ) != my_ticket )
760  __kmp_printf( "LOCK CONTENTION: %p\n", lck );
761  /* else __kmp_printf( "." );*/
762 #endif /* USE_LOCK_PROFILE */
763 
764  if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
765  KMP_FSYNC_ACQUIRED(lck);
766  return;
767  }
768  KMP_WAIT_YIELD( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck );
769  KMP_FSYNC_ACQUIRED(lck);
770 }
771 
772 void
773 __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
774 {
775  __kmp_acquire_ticket_lock_timed_template( lck, gtid );
776 }
777 
778 static void
779 __kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
780 {
781  char const * const func = "omp_set_lock";
782  if ( lck->lk.initialized != lck ) {
783  KMP_FATAL( LockIsUninitialized, func );
784  }
785  if ( __kmp_is_ticket_lock_nestable( lck ) ) {
786  KMP_FATAL( LockNestableUsedAsSimple, func );
787  }
788  if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) {
789  KMP_FATAL( LockIsAlreadyOwned, func );
790  }
791 
792  __kmp_acquire_ticket_lock( lck, gtid );
793 
794  lck->lk.owner_id = gtid + 1;
795 }
796 
797 int
798 __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
799 {
800  kmp_uint32 my_ticket = TCR_4( lck->lk.next_ticket );
801  if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
802  kmp_uint32 next_ticket = my_ticket + 1;
803  if ( KMP_COMPARE_AND_STORE_ACQ32( (kmp_int32 *) &lck->lk.next_ticket,
804  my_ticket, next_ticket ) ) {
805  KMP_FSYNC_ACQUIRED( lck );
806  return TRUE;
807  }
808  }
809  return FALSE;
810 }
811 
812 static int
813 __kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
814 {
815  char const * const func = "omp_test_lock";
816  if ( lck->lk.initialized != lck ) {
817  KMP_FATAL( LockIsUninitialized, func );
818  }
819  if ( __kmp_is_ticket_lock_nestable( lck ) ) {
820  KMP_FATAL( LockNestableUsedAsSimple, func );
821  }
822 
823  int retval = __kmp_test_ticket_lock( lck, gtid );
824 
825  if ( retval ) {
826  lck->lk.owner_id = gtid + 1;
827  }
828  return retval;
829 }
830 
831 void
832 __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
833 {
834  kmp_uint32 distance;
835 
836  KMP_MB(); /* Flush all pending memory write invalidates. */
837 
838  KMP_FSYNC_RELEASING(lck);
839  distance = ( TCR_4( lck->lk.next_ticket ) - TCR_4( lck->lk.now_serving ) );
840 
841  KMP_ST_REL32( &(lck->lk.now_serving), lck->lk.now_serving + 1 );
842 
843  KMP_MB(); /* Flush all pending memory write invalidates. */
844 
845  KMP_YIELD( distance
846  > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) );
847 }
848 
849 static void
850 __kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
851 {
852  char const * const func = "omp_unset_lock";
853  KMP_MB(); /* in case another processor initialized lock */
854  if ( lck->lk.initialized != lck ) {
855  KMP_FATAL( LockIsUninitialized, func );
856  }
857  if ( __kmp_is_ticket_lock_nestable( lck ) ) {
858  KMP_FATAL( LockNestableUsedAsSimple, func );
859  }
860  if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
861  KMP_FATAL( LockUnsettingFree, func );
862  }
863  if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 )
864  && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) {
865  KMP_FATAL( LockUnsettingSetByAnother, func );
866  }
867  lck->lk.owner_id = 0;
868  __kmp_release_ticket_lock( lck, gtid );
869 }
870 
871 void
872 __kmp_init_ticket_lock( kmp_ticket_lock_t * lck )
873 {
874  lck->lk.location = NULL;
875  TCW_4( lck->lk.next_ticket, 0 );
876  TCW_4( lck->lk.now_serving, 0 );
877  lck->lk.owner_id = 0; // no thread owns the lock.
878  lck->lk.depth_locked = -1; // -1 => not a nested lock.
879  lck->lk.initialized = (kmp_ticket_lock *)lck;
880 }
881 
882 static void
883 __kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
884 {
885  __kmp_init_ticket_lock( lck );
886 }
887 
888 void
889 __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck )
890 {
891  lck->lk.initialized = NULL;
892  lck->lk.location = NULL;
893  lck->lk.next_ticket = 0;
894  lck->lk.now_serving = 0;
895  lck->lk.owner_id = 0;
896  lck->lk.depth_locked = -1;
897 }
898 
899 static void
900 __kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
901 {
902  char const * const func = "omp_destroy_lock";
903  if ( lck->lk.initialized != lck ) {
904  KMP_FATAL( LockIsUninitialized, func );
905  }
906  if ( __kmp_is_ticket_lock_nestable( lck ) ) {
907  KMP_FATAL( LockNestableUsedAsSimple, func );
908  }
909  if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
910  KMP_FATAL( LockStillOwned, func );
911  }
912  __kmp_destroy_ticket_lock( lck );
913 }
914 
915 
916 //
917 // nested ticket locks
918 //
919 
920 void
921 __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
922 {
923  KMP_DEBUG_ASSERT( gtid >= 0 );
924 
925  if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
926  lck->lk.depth_locked += 1;
927  }
928  else {
929  __kmp_acquire_ticket_lock_timed_template( lck, gtid );
930  KMP_MB();
931  lck->lk.depth_locked = 1;
932  KMP_MB();
933  lck->lk.owner_id = gtid + 1;
934  }
935 }
936 
937 static void
938 __kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
939 {
940  char const * const func = "omp_set_nest_lock";
941  if ( lck->lk.initialized != lck ) {
942  KMP_FATAL( LockIsUninitialized, func );
943  }
944  if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
945  KMP_FATAL( LockSimpleUsedAsNestable, func );
946  }
947  __kmp_acquire_nested_ticket_lock( lck, gtid );
948 }
949 
950 int
951 __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
952 {
953  int retval;
954 
955  KMP_DEBUG_ASSERT( gtid >= 0 );
956 
957  if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
958  retval = ++lck->lk.depth_locked;
959  }
960  else if ( !__kmp_test_ticket_lock( lck, gtid ) ) {
961  retval = 0;
962  }
963  else {
964  KMP_MB();
965  retval = lck->lk.depth_locked = 1;
966  KMP_MB();
967  lck->lk.owner_id = gtid + 1;
968  }
969  return retval;
970 }
971 
972 static int
973 __kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck,
974  kmp_int32 gtid )
975 {
976  char const * const func = "omp_test_nest_lock";
977  if ( lck->lk.initialized != lck ) {
978  KMP_FATAL( LockIsUninitialized, func );
979  }
980  if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
981  KMP_FATAL( LockSimpleUsedAsNestable, func );
982  }
983  return __kmp_test_nested_ticket_lock( lck, gtid );
984 }
985 
986 void
987 __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
988 {
989  KMP_DEBUG_ASSERT( gtid >= 0 );
990 
991  KMP_MB();
992  if ( --(lck->lk.depth_locked) == 0 ) {
993  KMP_MB();
994  lck->lk.owner_id = 0;
995  __kmp_release_ticket_lock( lck, gtid );
996  }
997 }
998 
999 static void
1000 __kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
1001 {
1002  char const * const func = "omp_unset_nest_lock";
1003  KMP_MB(); /* in case another processor initialized lock */
1004  if ( lck->lk.initialized != lck ) {
1005  KMP_FATAL( LockIsUninitialized, func );
1006  }
1007  if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1008  KMP_FATAL( LockSimpleUsedAsNestable, func );
1009  }
1010  if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
1011  KMP_FATAL( LockUnsettingFree, func );
1012  }
1013  if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) {
1014  KMP_FATAL( LockUnsettingSetByAnother, func );
1015  }
1016  __kmp_release_nested_ticket_lock( lck, gtid );
1017 }
1018 
1019 void
1020 __kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck )
1021 {
1022  __kmp_init_ticket_lock( lck );
1023  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1024 }
1025 
1026 static void
1027 __kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
1028 {
1029  __kmp_init_nested_ticket_lock( lck );
1030 }
1031 
1032 void
1033 __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck )
1034 {
1035  __kmp_destroy_ticket_lock( lck );
1036  lck->lk.depth_locked = 0;
1037 }
1038 
1039 static void
1040 __kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
1041 {
1042  char const * const func = "omp_destroy_nest_lock";
1043  if ( lck->lk.initialized != lck ) {
1044  KMP_FATAL( LockIsUninitialized, func );
1045  }
1046  if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1047  KMP_FATAL( LockSimpleUsedAsNestable, func );
1048  }
1049  if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
1050  KMP_FATAL( LockStillOwned, func );
1051  }
1052  __kmp_destroy_nested_ticket_lock( lck );
1053 }
1054 
1055 
1056 //
1057 // access functions to fields which don't exist for all lock kinds.
1058 //
1059 
1060 static int
1061 __kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck )
1062 {
1063  return lck == lck->lk.initialized;
1064 }
1065 
1066 static const ident_t *
1067 __kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck )
1068 {
1069  return lck->lk.location;
1070 }
1071 
1072 static void
1073 __kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc )
1074 {
1075  lck->lk.location = loc;
1076 }
1077 
1078 static kmp_lock_flags_t
1079 __kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck )
1080 {
1081  return lck->lk.flags;
1082 }
1083 
1084 static void
1085 __kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags )
1086 {
1087  lck->lk.flags = flags;
1088 }
1089 
1090 /* ------------------------------------------------------------------------ */
1091 /* queuing locks */
1092 
1093 /*
1094  * First the states
1095  * (head,tail) = 0, 0 means lock is unheld, nobody on queue
1096  * UINT_MAX or -1, 0 means lock is held, nobody on queue
1097  * h, h means lock is held or about to transition, 1 element on queue
1098  * h, t h <> t, means lock is held or about to transition, >1 elements on queue
1099  *
1100  * Now the transitions
1101  * Acquire(0,0) = -1 ,0
1102  * Release(0,0) = Error
1103  * Acquire(-1,0) = h ,h h > 0
1104  * Release(-1,0) = 0 ,0
1105  * Acquire(h,h) = h ,t h > 0, t > 0, h <> t
1106  * Release(h,h) = -1 ,0 h > 0
1107  * Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
1108  * Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t
1109  *
1110  * And pictorially
1111  *
1112  *
1113  * +-----+
1114  * | 0, 0|------- release -------> Error
1115  * +-----+
1116  * | ^
1117  * acquire| |release
1118  * | |
1119  * | |
1120  * v |
1121  * +-----+
1122  * |-1, 0|
1123  * +-----+
1124  * | ^
1125  * acquire| |release
1126  * | |
1127  * | |
1128  * v |
1129  * +-----+
1130  * | h, h|
1131  * +-----+
1132  * | ^
1133  * acquire| |release
1134  * | |
1135  * | |
1136  * v |
1137  * +-----+
1138  * | h, t|----- acquire, release loopback ---+
1139  * +-----+ |
1140  * ^ |
1141  * | |
1142  * +------------------------------------+
1143  *
1144  */
1145 
1146 #ifdef DEBUG_QUEUING_LOCKS
1147 
1148 /* Stuff for circular trace buffer */
1149 #define TRACE_BUF_ELE 1024
1150 static char traces[TRACE_BUF_ELE][128] = { 0 }
1151 static int tc = 0;
1152 #define TRACE_LOCK(X,Y) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y );
1153 #define TRACE_LOCK_T(X,Y,Z) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X,Y,Z );
1154 #define TRACE_LOCK_HT(X,Y,Z,Q) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, Z, Q );
1155 
1156 static void
1157 __kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid,
1158  kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id )
1159 {
1160  kmp_int32 t, i;
1161 
1162  __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" );
1163 
1164  i = tc % TRACE_BUF_ELE;
1165  __kmp_printf_no_lock( "%s\n", traces[i] );
1166  i = (i+1) % TRACE_BUF_ELE;
1167  while ( i != (tc % TRACE_BUF_ELE) ) {
1168  __kmp_printf_no_lock( "%s", traces[i] );
1169  i = (i+1) % TRACE_BUF_ELE;
1170  }
1171  __kmp_printf_no_lock( "\n" );
1172 
1173  __kmp_printf_no_lock(
1174  "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n",
1175  gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting,
1176  head_id, tail_id );
1177 
1178  __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id );
1179 
1180  if ( lck->lk.head_id >= 1 ) {
1181  t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting;
1182  while (t > 0) {
1183  __kmp_printf_no_lock( "-> %d ", t );
1184  t = __kmp_threads[t-1]->th.th_next_waiting;
1185  }
1186  }
1187  __kmp_printf_no_lock( "; tail: %d ", lck->lk.tail_id );
1188  __kmp_printf_no_lock( "\n\n" );
1189 }
1190 
1191 #endif /* DEBUG_QUEUING_LOCKS */
1192 
1193 static kmp_int32
1194 __kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck )
1195 {
1196  return TCR_4( lck->lk.owner_id ) - 1;
1197 }
1198 
1199 static inline bool
1200 __kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck )
1201 {
1202  return lck->lk.depth_locked != -1;
1203 }
1204 
1205 /* Acquire a lock using a the queuing lock implementation */
1206 template <bool takeTime>
1207 /* [TLW] The unused template above is left behind because of what BEB believes is a
1208  potential compiler problem with __forceinline. */
1209 __forceinline static void
1210 __kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck,
1211  kmp_int32 gtid )
1212 {
1213  register kmp_info_t *this_thr = __kmp_thread_from_gtid( gtid );
1214  volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1215  volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1216  volatile kmp_uint32 *spin_here_p;
1217  kmp_int32 need_mf = 1;
1218 
1219  KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1220 
1221  KMP_FSYNC_PREPARE( lck );
1222  KMP_DEBUG_ASSERT( this_thr != NULL );
1223  spin_here_p = & this_thr->th.th_spin_here;
1224 
1225 #ifdef DEBUG_QUEUING_LOCKS
1226  TRACE_LOCK( gtid+1, "acq ent" );
1227  if ( *spin_here_p )
1228  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1229  if ( this_thr->th.th_next_waiting != 0 )
1230  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1231 #endif
1232  KMP_DEBUG_ASSERT( !*spin_here_p );
1233  KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1234 
1235 
1236  /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p
1237  that may follow, not just in execution order, but also in visibility order. This way,
1238  when a releasing thread observes the changes to the queue by this thread, it can
1239  rightly assume that spin_here_p has already been set to TRUE, so that when it sets
1240  spin_here_p to FALSE, it is not premature. If the releasing thread sets spin_here_p
1241  to FALSE before this thread sets it to TRUE, this thread will hang.
1242  */
1243  *spin_here_p = TRUE; /* before enqueuing to prevent race */
1244 
1245  while( 1 ) {
1246  kmp_int32 enqueued;
1247  kmp_int32 head;
1248  kmp_int32 tail;
1249 
1250  head = *head_id_p;
1251 
1252  switch ( head ) {
1253 
1254  case -1:
1255  {
1256 #ifdef DEBUG_QUEUING_LOCKS
1257  tail = *tail_id_p;
1258  TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1259 #endif
1260  tail = 0; /* to make sure next link asynchronously read is not set accidentally;
1261  this assignment prevents us from entering the if ( t > 0 )
1262  condition in the enqueued case below, which is not necessary for
1263  this state transition */
1264 
1265  need_mf = 0;
1266  /* try (-1,0)->(tid,tid) */
1267  enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p,
1268  KMP_PACK_64( -1, 0 ),
1269  KMP_PACK_64( gtid+1, gtid+1 ) );
1270 #ifdef DEBUG_QUEUING_LOCKS
1271  if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" );
1272 #endif
1273  }
1274  break;
1275 
1276  default:
1277  {
1278  tail = *tail_id_p;
1279  KMP_DEBUG_ASSERT( tail != gtid + 1 );
1280 
1281 #ifdef DEBUG_QUEUING_LOCKS
1282  TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1283 #endif
1284 
1285  if ( tail == 0 ) {
1286  enqueued = FALSE;
1287  }
1288  else {
1289  need_mf = 0;
1290  /* try (h,t) or (h,h)->(h,tid) */
1291  enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 );
1292 
1293 #ifdef DEBUG_QUEUING_LOCKS
1294  if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" );
1295 #endif
1296  }
1297  }
1298  break;
1299 
1300  case 0: /* empty queue */
1301  {
1302  kmp_int32 grabbed_lock;
1303 
1304 #ifdef DEBUG_QUEUING_LOCKS
1305  tail = *tail_id_p;
1306  TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1307 #endif
1308  /* try (0,0)->(-1,0) */
1309 
1310  /* only legal transition out of head = 0 is head = -1 with no change to tail */
1311  grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 );
1312 
1313  if ( grabbed_lock ) {
1314 
1315  *spin_here_p = FALSE;
1316 
1317  KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1318  lck, gtid ));
1319 #ifdef DEBUG_QUEUING_LOCKS
1320  TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 );
1321 #endif
1322  KMP_FSYNC_ACQUIRED( lck );
1323  return; /* lock holder cannot be on queue */
1324  }
1325  enqueued = FALSE;
1326  }
1327  break;
1328  }
1329 
1330  if ( enqueued ) {
1331  if ( tail > 0 ) {
1332  kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 );
1333  KMP_ASSERT( tail_thr != NULL );
1334  tail_thr->th.th_next_waiting = gtid+1;
1335  /* corresponding wait for this write in release code */
1336  }
1337  KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid ));
1338 
1339 
1340  /* ToDo: May want to consider using __kmp_wait_sleep or something that sleeps for
1341  * throughput only here.
1342  */
1343  KMP_MB();
1344  KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck);
1345 
1346 #ifdef DEBUG_QUEUING_LOCKS
1347  TRACE_LOCK( gtid+1, "acq spin" );
1348 
1349  if ( this_thr->th.th_next_waiting != 0 )
1350  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1351 #endif
1352  KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1353  KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n",
1354  lck, gtid ));
1355 
1356 #ifdef DEBUG_QUEUING_LOCKS
1357  TRACE_LOCK( gtid+1, "acq exit 2" );
1358 #endif
1359  /* got lock, we were dequeued by the thread that released lock */
1360  return;
1361  }
1362 
1363  /* Yield if number of threads > number of logical processors */
1364  /* ToDo: Not sure why this should only be in oversubscription case,
1365  maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1366  KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc :
1367  __kmp_xproc ) );
1368 #ifdef DEBUG_QUEUING_LOCKS
1369  TRACE_LOCK( gtid+1, "acq retry" );
1370 #endif
1371 
1372  }
1373  KMP_ASSERT2( 0, "should not get here" );
1374 }
1375 
1376 void
1377 __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1378 {
1379  KMP_DEBUG_ASSERT( gtid >= 0 );
1380 
1381  __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1382 }
1383 
1384 static void
1385 __kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1386  kmp_int32 gtid )
1387 {
1388  char const * const func = "omp_set_lock";
1389  if ( lck->lk.initialized != lck ) {
1390  KMP_FATAL( LockIsUninitialized, func );
1391  }
1392  if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1393  KMP_FATAL( LockNestableUsedAsSimple, func );
1394  }
1395  if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1396  KMP_FATAL( LockIsAlreadyOwned, func );
1397  }
1398 
1399  __kmp_acquire_queuing_lock( lck, gtid );
1400 
1401  lck->lk.owner_id = gtid + 1;
1402 }
1403 
1404 int
1405 __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1406 {
1407  volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1408  kmp_int32 head;
1409 #ifdef KMP_DEBUG
1410  kmp_info_t *this_thr;
1411 #endif
1412 
1413  KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid ));
1414  KMP_DEBUG_ASSERT( gtid >= 0 );
1415 #ifdef KMP_DEBUG
1416  this_thr = __kmp_thread_from_gtid( gtid );
1417  KMP_DEBUG_ASSERT( this_thr != NULL );
1418  KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1419 #endif
1420 
1421  head = *head_id_p;
1422 
1423  if ( head == 0 ) { /* nobody on queue, nobody holding */
1424 
1425  /* try (0,0)->(-1,0) */
1426 
1427  if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) {
1428  KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid ));
1429  KMP_FSYNC_ACQUIRED(lck);
1430  return TRUE;
1431  }
1432  }
1433 
1434  KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid ));
1435  return FALSE;
1436 }
1437 
1438 static int
1439 __kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1440 {
1441  char const * const func = "omp_test_lock";
1442  if ( lck->lk.initialized != lck ) {
1443  KMP_FATAL( LockIsUninitialized, func );
1444  }
1445  if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1446  KMP_FATAL( LockNestableUsedAsSimple, func );
1447  }
1448 
1449  int retval = __kmp_test_queuing_lock( lck, gtid );
1450 
1451  if ( retval ) {
1452  lck->lk.owner_id = gtid + 1;
1453  }
1454  return retval;
1455 }
1456 
1457 void
1458 __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1459 {
1460  register kmp_info_t *this_thr;
1461  volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1462  volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1463 
1464  KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1465  KMP_DEBUG_ASSERT( gtid >= 0 );
1466  this_thr = __kmp_thread_from_gtid( gtid );
1467  KMP_DEBUG_ASSERT( this_thr != NULL );
1468 #ifdef DEBUG_QUEUING_LOCKS
1469  TRACE_LOCK( gtid+1, "rel ent" );
1470 
1471  if ( this_thr->th.th_spin_here )
1472  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1473  if ( this_thr->th.th_next_waiting != 0 )
1474  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1475 #endif
1476  KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1477  KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1478 
1479  KMP_FSYNC_RELEASING(lck);
1480 
1481  while( 1 ) {
1482  kmp_int32 dequeued;
1483  kmp_int32 head;
1484  kmp_int32 tail;
1485 
1486  head = *head_id_p;
1487 
1488 #ifdef DEBUG_QUEUING_LOCKS
1489  tail = *tail_id_p;
1490  TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail );
1491  if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1492 #endif
1493  KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */
1494 
1495  if ( head == -1 ) { /* nobody on queue */
1496 
1497  /* try (-1,0)->(0,0) */
1498  if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) {
1499  KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1500  lck, gtid ));
1501 #ifdef DEBUG_QUEUING_LOCKS
1502  TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 );
1503 #endif
1504  return;
1505  }
1506  dequeued = FALSE;
1507 
1508  }
1509  else {
1510 
1511  tail = *tail_id_p;
1512  if ( head == tail ) { /* only one thread on the queue */
1513 
1514 #ifdef DEBUG_QUEUING_LOCKS
1515  if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1516 #endif
1517  KMP_DEBUG_ASSERT( head > 0 );
1518 
1519  /* try (h,h)->(-1,0) */
1520  dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p,
1521  KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) );
1522 #ifdef DEBUG_QUEUING_LOCKS
1523  TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" );
1524 #endif
1525 
1526  }
1527  else {
1528  volatile kmp_int32 *waiting_id_p;
1529  kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1530  KMP_DEBUG_ASSERT( head_thr != NULL );
1531  waiting_id_p = & head_thr->th.th_next_waiting;
1532 
1533  /* Does this require synchronous reads? */
1534 #ifdef DEBUG_QUEUING_LOCKS
1535  if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1536 #endif
1537  KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1538 
1539  /* try (h,t)->(h',t) or (t,t) */
1540 
1541  KMP_MB();
1542  /* make sure enqueuing thread has time to update next waiting thread field */
1543  *head_id_p = (kmp_int32) KMP_WAIT_YIELD((volatile kmp_uint*) waiting_id_p, 0, KMP_NEQ, NULL);
1544 #ifdef DEBUG_QUEUING_LOCKS
1545  TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" );
1546 #endif
1547  dequeued = TRUE;
1548  }
1549  }
1550 
1551  if ( dequeued ) {
1552  kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1553  KMP_DEBUG_ASSERT( head_thr != NULL );
1554 
1555  /* Does this require synchronous reads? */
1556 #ifdef DEBUG_QUEUING_LOCKS
1557  if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1558 #endif
1559  KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1560 
1561  /* For clean code only.
1562  * Thread not released until next statement prevents race with acquire code.
1563  */
1564  head_thr->th.th_next_waiting = 0;
1565 #ifdef DEBUG_QUEUING_LOCKS
1566  TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head );
1567 #endif
1568 
1569  KMP_MB();
1570  /* reset spin value */
1571  head_thr->th.th_spin_here = FALSE;
1572 
1573  KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n",
1574  lck, gtid ));
1575 #ifdef DEBUG_QUEUING_LOCKS
1576  TRACE_LOCK( gtid+1, "rel exit 2" );
1577 #endif
1578  return;
1579  }
1580  /* KMP_CPU_PAUSE( ); don't want to make releasing thread hold up acquiring threads */
1581 
1582 #ifdef DEBUG_QUEUING_LOCKS
1583  TRACE_LOCK( gtid+1, "rel retry" );
1584 #endif
1585 
1586  } /* while */
1587  KMP_ASSERT2( 0, "should not get here" );
1588 }
1589 
1590 static void
1591 __kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1592  kmp_int32 gtid )
1593 {
1594  char const * const func = "omp_unset_lock";
1595  KMP_MB(); /* in case another processor initialized lock */
1596  if ( lck->lk.initialized != lck ) {
1597  KMP_FATAL( LockIsUninitialized, func );
1598  }
1599  if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1600  KMP_FATAL( LockNestableUsedAsSimple, func );
1601  }
1602  if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1603  KMP_FATAL( LockUnsettingFree, func );
1604  }
1605  if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1606  KMP_FATAL( LockUnsettingSetByAnother, func );
1607  }
1608  lck->lk.owner_id = 0;
1609  __kmp_release_queuing_lock( lck, gtid );
1610 }
1611 
1612 void
1613 __kmp_init_queuing_lock( kmp_queuing_lock_t *lck )
1614 {
1615  lck->lk.location = NULL;
1616  lck->lk.head_id = 0;
1617  lck->lk.tail_id = 0;
1618  lck->lk.next_ticket = 0;
1619  lck->lk.now_serving = 0;
1620  lck->lk.owner_id = 0; // no thread owns the lock.
1621  lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1622  lck->lk.initialized = lck;
1623 
1624  KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1625 }
1626 
1627 static void
1628 __kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1629 {
1630  __kmp_init_queuing_lock( lck );
1631 }
1632 
1633 void
1634 __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck )
1635 {
1636  lck->lk.initialized = NULL;
1637  lck->lk.location = NULL;
1638  lck->lk.head_id = 0;
1639  lck->lk.tail_id = 0;
1640  lck->lk.next_ticket = 0;
1641  lck->lk.now_serving = 0;
1642  lck->lk.owner_id = 0;
1643  lck->lk.depth_locked = -1;
1644 }
1645 
1646 static void
1647 __kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1648 {
1649  char const * const func = "omp_destroy_lock";
1650  if ( lck->lk.initialized != lck ) {
1651  KMP_FATAL( LockIsUninitialized, func );
1652  }
1653  if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1654  KMP_FATAL( LockNestableUsedAsSimple, func );
1655  }
1656  if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1657  KMP_FATAL( LockStillOwned, func );
1658  }
1659  __kmp_destroy_queuing_lock( lck );
1660 }
1661 
1662 
1663 //
1664 // nested queuing locks
1665 //
1666 
1667 void
1668 __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1669 {
1670  KMP_DEBUG_ASSERT( gtid >= 0 );
1671 
1672  if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1673  lck->lk.depth_locked += 1;
1674  }
1675  else {
1676  __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1677  KMP_MB();
1678  lck->lk.depth_locked = 1;
1679  KMP_MB();
1680  lck->lk.owner_id = gtid + 1;
1681  }
1682 }
1683 
1684 static void
1685 __kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1686 {
1687  char const * const func = "omp_set_nest_lock";
1688  if ( lck->lk.initialized != lck ) {
1689  KMP_FATAL( LockIsUninitialized, func );
1690  }
1691  if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1692  KMP_FATAL( LockSimpleUsedAsNestable, func );
1693  }
1694  __kmp_acquire_nested_queuing_lock( lck, gtid );
1695 }
1696 
1697 int
1698 __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1699 {
1700  int retval;
1701 
1702  KMP_DEBUG_ASSERT( gtid >= 0 );
1703 
1704  if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1705  retval = ++lck->lk.depth_locked;
1706  }
1707  else if ( !__kmp_test_queuing_lock( lck, gtid ) ) {
1708  retval = 0;
1709  }
1710  else {
1711  KMP_MB();
1712  retval = lck->lk.depth_locked = 1;
1713  KMP_MB();
1714  lck->lk.owner_id = gtid + 1;
1715  }
1716  return retval;
1717 }
1718 
1719 static int
1720 __kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1721  kmp_int32 gtid )
1722 {
1723  char const * const func = "omp_test_nest_lock";
1724  if ( lck->lk.initialized != lck ) {
1725  KMP_FATAL( LockIsUninitialized, func );
1726  }
1727  if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1728  KMP_FATAL( LockSimpleUsedAsNestable, func );
1729  }
1730  return __kmp_test_nested_queuing_lock( lck, gtid );
1731 }
1732 
1733 void
1734 __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1735 {
1736  KMP_DEBUG_ASSERT( gtid >= 0 );
1737 
1738  KMP_MB();
1739  if ( --(lck->lk.depth_locked) == 0 ) {
1740  KMP_MB();
1741  lck->lk.owner_id = 0;
1742  __kmp_release_queuing_lock( lck, gtid );
1743  }
1744 }
1745 
1746 static void
1747 __kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1748 {
1749  char const * const func = "omp_unset_nest_lock";
1750  KMP_MB(); /* in case another processor initialized lock */
1751  if ( lck->lk.initialized != lck ) {
1752  KMP_FATAL( LockIsUninitialized, func );
1753  }
1754  if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1755  KMP_FATAL( LockSimpleUsedAsNestable, func );
1756  }
1757  if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1758  KMP_FATAL( LockUnsettingFree, func );
1759  }
1760  if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1761  KMP_FATAL( LockUnsettingSetByAnother, func );
1762  }
1763  __kmp_release_nested_queuing_lock( lck, gtid );
1764 }
1765 
1766 void
1767 __kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck )
1768 {
1769  __kmp_init_queuing_lock( lck );
1770  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1771 }
1772 
1773 static void
1774 __kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1775 {
1776  __kmp_init_nested_queuing_lock( lck );
1777 }
1778 
1779 void
1780 __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck )
1781 {
1782  __kmp_destroy_queuing_lock( lck );
1783  lck->lk.depth_locked = 0;
1784 }
1785 
1786 static void
1787 __kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1788 {
1789  char const * const func = "omp_destroy_nest_lock";
1790  if ( lck->lk.initialized != lck ) {
1791  KMP_FATAL( LockIsUninitialized, func );
1792  }
1793  if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1794  KMP_FATAL( LockSimpleUsedAsNestable, func );
1795  }
1796  if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1797  KMP_FATAL( LockStillOwned, func );
1798  }
1799  __kmp_destroy_nested_queuing_lock( lck );
1800 }
1801 
1802 
1803 //
1804 // access functions to fields which don't exist for all lock kinds.
1805 //
1806 
1807 static int
1808 __kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck )
1809 {
1810  return lck == lck->lk.initialized;
1811 }
1812 
1813 static const ident_t *
1814 __kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck )
1815 {
1816  return lck->lk.location;
1817 }
1818 
1819 static void
1820 __kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc )
1821 {
1822  lck->lk.location = loc;
1823 }
1824 
1825 static kmp_lock_flags_t
1826 __kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck )
1827 {
1828  return lck->lk.flags;
1829 }
1830 
1831 static void
1832 __kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags )
1833 {
1834  lck->lk.flags = flags;
1835 }
1836 
1837 #if KMP_USE_ADAPTIVE_LOCKS
1838 
1839 /*
1840  RTM Adaptive locks
1841 */
1842 
1843 // TODO: Use the header for intrinsics below with the compiler 13.0
1844 //#include <immintrin.h>
1845 
1846 // Values from the status register after failed speculation.
1847 #define _XBEGIN_STARTED (~0u)
1848 #define _XABORT_EXPLICIT (1 << 0)
1849 #define _XABORT_RETRY (1 << 1)
1850 #define _XABORT_CONFLICT (1 << 2)
1851 #define _XABORT_CAPACITY (1 << 3)
1852 #define _XABORT_DEBUG (1 << 4)
1853 #define _XABORT_NESTED (1 << 5)
1854 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1855 
1856 // Aborts for which it's worth trying again immediately
1857 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1858 
1859 #define STRINGIZE_INTERNAL(arg) #arg
1860 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1861 
1862 // Access to RTM instructions
1863 
1864 /*
1865  A version of XBegin which returns -1 on speculation, and the value of EAX on an abort.
1866  This is the same definition as the compiler intrinsic that will be supported at some point.
1867 */
1868 static __inline int _xbegin()
1869 {
1870  int res = -1;
1871 
1872 #if KMP_OS_WINDOWS
1873 #if KMP_ARCH_X86_64
1874  _asm {
1875  _emit 0xC7
1876  _emit 0xF8
1877  _emit 2
1878  _emit 0
1879  _emit 0
1880  _emit 0
1881  jmp L2
1882  mov res, eax
1883  L2:
1884  }
1885 #else /* IA32 */
1886  _asm {
1887  _emit 0xC7
1888  _emit 0xF8
1889  _emit 2
1890  _emit 0
1891  _emit 0
1892  _emit 0
1893  jmp L2
1894  mov res, eax
1895  L2:
1896  }
1897 #endif // KMP_ARCH_X86_64
1898 #else
1899  /* Note that %eax must be noted as killed (clobbered), because
1900  * the XSR is returned in %eax(%rax) on abort. Other register
1901  * values are restored, so don't need to be killed.
1902  *
1903  * We must also mark 'res' as an input and an output, since otherwise
1904  * 'res=-1' may be dropped as being dead, whereas we do need the
1905  * assignment on the successful (i.e., non-abort) path.
1906  */
1907  __asm__ volatile ("1: .byte 0xC7; .byte 0xF8;\n"
1908  " .long 1f-1b-6\n"
1909  " jmp 2f\n"
1910  "1: movl %%eax,%0\n"
1911  "2:"
1912  :"+r"(res)::"memory","%eax");
1913 #endif // KMP_OS_WINDOWS
1914  return res;
1915 }
1916 
1917 /*
1918  Transaction end
1919 */
1920 static __inline void _xend()
1921 {
1922 #if KMP_OS_WINDOWS
1923  __asm {
1924  _emit 0x0f
1925  _emit 0x01
1926  _emit 0xd5
1927  }
1928 #else
1929  __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory");
1930 #endif
1931 }
1932 
1933 /*
1934  This is a macro, the argument must be a single byte constant which
1935  can be evaluated by the inline assembler, since it is emitted as a
1936  byte into the assembly code.
1937 */
1938 #if KMP_OS_WINDOWS
1939 #define _xabort(ARG) \
1940  _asm _emit 0xc6 \
1941  _asm _emit 0xf8 \
1942  _asm _emit ARG
1943 #else
1944 #define _xabort(ARG) \
1945  __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory");
1946 #endif
1947 
1948 //
1949 // Statistics is collected for testing purpose
1950 //
1951 #if KMP_DEBUG_ADAPTIVE_LOCKS
1952 
1953 // We accumulate speculative lock statistics when the lock is destroyed.
1954 // We keep locks that haven't been destroyed in the liveLocks list
1955 // so that we can grab their statistics too.
1956 static kmp_adaptive_lock_statistics_t destroyedStats;
1957 
1958 // To hold the list of live locks.
1959 static kmp_adaptive_lock_info_t liveLocks;
1960 
1961 // A lock so we can safely update the list of locks.
1962 static kmp_bootstrap_lock_t chain_lock;
1963 
1964 // Initialize the list of stats.
1965 void
1966 __kmp_init_speculative_stats()
1967 {
1968  kmp_adaptive_lock_info_t *lck = &liveLocks;
1969 
1970  memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) );
1971  lck->stats.next = lck;
1972  lck->stats.prev = lck;
1973 
1974  KMP_ASSERT( lck->stats.next->stats.prev == lck );
1975  KMP_ASSERT( lck->stats.prev->stats.next == lck );
1976 
1977  __kmp_init_bootstrap_lock( &chain_lock );
1978 
1979 }
1980 
1981 // Insert the lock into the circular list
1982 static void
1983 __kmp_remember_lock( kmp_adaptive_lock_info_t * lck )
1984 {
1985  __kmp_acquire_bootstrap_lock( &chain_lock );
1986 
1987  lck->stats.next = liveLocks.stats.next;
1988  lck->stats.prev = &liveLocks;
1989 
1990  liveLocks.stats.next = lck;
1991  lck->stats.next->stats.prev = lck;
1992 
1993  KMP_ASSERT( lck->stats.next->stats.prev == lck );
1994  KMP_ASSERT( lck->stats.prev->stats.next == lck );
1995 
1996  __kmp_release_bootstrap_lock( &chain_lock );
1997 }
1998 
1999 static void
2000 __kmp_forget_lock( kmp_adaptive_lock_info_t * lck )
2001 {
2002  KMP_ASSERT( lck->stats.next->stats.prev == lck );
2003  KMP_ASSERT( lck->stats.prev->stats.next == lck );
2004 
2005  kmp_adaptive_lock_info_t * n = lck->stats.next;
2006  kmp_adaptive_lock_info_t * p = lck->stats.prev;
2007 
2008  n->stats.prev = p;
2009  p->stats.next = n;
2010 }
2011 
2012 static void
2013 __kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck )
2014 {
2015  memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) );
2016  __kmp_remember_lock( lck );
2017 }
2018 
2019 static void
2020 __kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck )
2021 {
2022  kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
2023 
2024  t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
2025  t->successfulSpeculations += s->successfulSpeculations;
2026  t->hardFailedSpeculations += s->hardFailedSpeculations;
2027  t->softFailedSpeculations += s->softFailedSpeculations;
2028  t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
2029  t->lemmingYields += s->lemmingYields;
2030 }
2031 
2032 static void
2033 __kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck)
2034 {
2035  kmp_adaptive_lock_statistics_t *t = &destroyedStats;
2036 
2037  __kmp_acquire_bootstrap_lock( &chain_lock );
2038 
2039  __kmp_add_stats( &destroyedStats, lck );
2040  __kmp_forget_lock( lck );
2041 
2042  __kmp_release_bootstrap_lock( &chain_lock );
2043 }
2044 
2045 static float
2046 percent (kmp_uint32 count, kmp_uint32 total)
2047 {
2048  return (total == 0) ? 0.0: (100.0 * count)/total;
2049 }
2050 
2051 static
2052 FILE * __kmp_open_stats_file()
2053 {
2054  if (strcmp (__kmp_speculative_statsfile, "-") == 0)
2055  return stdout;
2056 
2057  size_t buffLen = KMP_STRLEN( __kmp_speculative_statsfile ) + 20;
2058  char buffer[buffLen];
2059  KMP_SNPRINTF (&buffer[0], buffLen, __kmp_speculative_statsfile,
2060  (kmp_int32)getpid());
2061  FILE * result = fopen(&buffer[0], "w");
2062 
2063  // Maybe we should issue a warning here...
2064  return result ? result : stdout;
2065 }
2066 
2067 void
2068 __kmp_print_speculative_stats()
2069 {
2070  if (__kmp_user_lock_kind != lk_adaptive)
2071  return;
2072 
2073  FILE * statsFile = __kmp_open_stats_file();
2074 
2075  kmp_adaptive_lock_statistics_t total = destroyedStats;
2076  kmp_adaptive_lock_info_t *lck;
2077 
2078  for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
2079  __kmp_add_stats( &total, lck );
2080  }
2081  kmp_adaptive_lock_statistics_t *t = &total;
2082  kmp_uint32 totalSections = t->nonSpeculativeAcquires + t->successfulSpeculations;
2083  kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations +
2084  t->softFailedSpeculations;
2085 
2086  fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n");
2087  fprintf ( statsFile, " Lock parameters: \n"
2088  " max_soft_retries : %10d\n"
2089  " max_badness : %10d\n",
2090  __kmp_adaptive_backoff_params.max_soft_retries,
2091  __kmp_adaptive_backoff_params.max_badness);
2092  fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts );
2093  fprintf( statsFile, " Total critical sections : %10d\n", totalSections );
2094  fprintf( statsFile, " Successful speculations : %10d (%5.1f%%)\n",
2095  t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) );
2096  fprintf( statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n",
2097  t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) );
2098  fprintf( statsFile, " Lemming yields : %10d\n\n", t->lemmingYields );
2099 
2100  fprintf( statsFile, " Speculative acquire attempts : %10d\n", totalSpeculations );
2101  fprintf( statsFile, " Successes : %10d (%5.1f%%)\n",
2102  t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) );
2103  fprintf( statsFile, " Soft failures : %10d (%5.1f%%)\n",
2104  t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) );
2105  fprintf( statsFile, " Hard failures : %10d (%5.1f%%)\n",
2106  t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) );
2107 
2108  if (statsFile != stdout)
2109  fclose( statsFile );
2110 }
2111 
2112 # define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ )
2113 #else
2114 # define KMP_INC_STAT(lck,stat)
2115 
2116 #endif // KMP_DEBUG_ADAPTIVE_LOCKS
2117 
2118 static inline bool
2119 __kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck )
2120 {
2121  // It is enough to check that the head_id is zero.
2122  // We don't also need to check the tail.
2123  bool res = lck->lk.head_id == 0;
2124 
2125  // We need a fence here, since we must ensure that no memory operations
2126  // from later in this thread float above that read.
2127 #if KMP_COMPILER_ICC
2128  _mm_mfence();
2129 #else
2130  __sync_synchronize();
2131 #endif
2132 
2133  return res;
2134 }
2135 
2136 // Functions for manipulating the badness
2137 static __inline void
2138 __kmp_update_badness_after_success( kmp_adaptive_lock_t *lck )
2139 {
2140  // Reset the badness to zero so we eagerly try to speculate again
2141  lck->lk.adaptive.badness = 0;
2142  KMP_INC_STAT(lck,successfulSpeculations);
2143 }
2144 
2145 // Create a bit mask with one more set bit.
2146 static __inline void
2147 __kmp_step_badness( kmp_adaptive_lock_t *lck )
2148 {
2149  kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1;
2150  if ( newBadness > lck->lk.adaptive.max_badness) {
2151  return;
2152  } else {
2153  lck->lk.adaptive.badness = newBadness;
2154  }
2155 }
2156 
2157 // Check whether speculation should be attempted.
2158 static __inline int
2159 __kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2160 {
2161  kmp_uint32 badness = lck->lk.adaptive.badness;
2162  kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts;
2163  int res = (attempts & badness) == 0;
2164  return res;
2165 }
2166 
2167 // Attempt to acquire only the speculative lock.
2168 // Does not back off to the non-speculative lock.
2169 //
2170 static int
2171 __kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
2172 {
2173  int retries = lck->lk.adaptive.max_soft_retries;
2174 
2175  // We don't explicitly count the start of speculation, rather we record
2176  // the results (success, hard fail, soft fail). The sum of all of those
2177  // is the total number of times we started speculation since all
2178  // speculations must end one of those ways.
2179  do
2180  {
2181  kmp_uint32 status = _xbegin();
2182  // Switch this in to disable actual speculation but exercise
2183  // at least some of the rest of the code. Useful for debugging...
2184  // kmp_uint32 status = _XABORT_NESTED;
2185 
2186  if (status == _XBEGIN_STARTED )
2187  { /* We have successfully started speculation
2188  * Check that no-one acquired the lock for real between when we last looked
2189  * and now. This also gets the lock cache line into our read-set,
2190  * which we need so that we'll abort if anyone later claims it for real.
2191  */
2192  if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2193  {
2194  // Lock is now visibly acquired, so someone beat us to it.
2195  // Abort the transaction so we'll restart from _xbegin with the
2196  // failure status.
2197  _xabort(0x01)
2198  KMP_ASSERT2( 0, "should not get here" );
2199  }
2200  return 1; // Lock has been acquired (speculatively)
2201  } else {
2202  // We have aborted, update the statistics
2203  if ( status & SOFT_ABORT_MASK)
2204  {
2205  KMP_INC_STAT(lck,softFailedSpeculations);
2206  // and loop round to retry.
2207  }
2208  else
2209  {
2210  KMP_INC_STAT(lck,hardFailedSpeculations);
2211  // Give up if we had a hard failure.
2212  break;
2213  }
2214  }
2215  } while( retries-- ); // Loop while we have retries, and didn't fail hard.
2216 
2217  // Either we had a hard failure or we didn't succeed softly after
2218  // the full set of attempts, so back off the badness.
2219  __kmp_step_badness( lck );
2220  return 0;
2221 }
2222 
2223 // Attempt to acquire the speculative lock, or back off to the non-speculative one
2224 // if the speculative lock cannot be acquired.
2225 // We can succeed speculatively, non-speculatively, or fail.
2226 static int
2227 __kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2228 {
2229  // First try to acquire the lock speculatively
2230  if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) )
2231  return 1;
2232 
2233  // Speculative acquisition failed, so try to acquire it non-speculatively.
2234  // Count the non-speculative acquire attempt
2235  lck->lk.adaptive.acquire_attempts++;
2236 
2237  // Use base, non-speculative lock.
2238  if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) )
2239  {
2240  KMP_INC_STAT(lck,nonSpeculativeAcquires);
2241  return 1; // Lock is acquired (non-speculatively)
2242  }
2243  else
2244  {
2245  return 0; // Failed to acquire the lock, it's already visibly locked.
2246  }
2247 }
2248 
2249 static int
2250 __kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2251 {
2252  char const * const func = "omp_test_lock";
2253  if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2254  KMP_FATAL( LockIsUninitialized, func );
2255  }
2256 
2257  int retval = __kmp_test_adaptive_lock( lck, gtid );
2258 
2259  if ( retval ) {
2260  lck->lk.qlk.owner_id = gtid + 1;
2261  }
2262  return retval;
2263 }
2264 
2265 // Block until we can acquire a speculative, adaptive lock.
2266 // We check whether we should be trying to speculate.
2267 // If we should be, we check the real lock to see if it is free,
2268 // and, if not, pause without attempting to acquire it until it is.
2269 // Then we try the speculative acquire.
2270 // This means that although we suffer from lemmings a little (
2271 // because all we can't acquire the lock speculatively until
2272 // the queue of threads waiting has cleared), we don't get into a
2273 // state where we can never acquire the lock speculatively (because we
2274 // force the queue to clear by preventing new arrivals from entering the
2275 // queue).
2276 // This does mean that when we're trying to break lemmings, the lock
2277 // is no longer fair. However OpenMP makes no guarantee that its
2278 // locks are fair, so this isn't a real problem.
2279 static void
2280 __kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
2281 {
2282  if ( __kmp_should_speculate( lck, gtid ) )
2283  {
2284  if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2285  {
2286  if ( __kmp_test_adaptive_lock_only( lck , gtid ) )
2287  return;
2288  // We tried speculation and failed, so give up.
2289  }
2290  else
2291  {
2292  // We can't try speculation until the lock is free, so we
2293  // pause here (without suspending on the queueing lock,
2294  // to allow it to drain, then try again.
2295  // All other threads will also see the same result for
2296  // shouldSpeculate, so will be doing the same if they
2297  // try to claim the lock from now on.
2298  while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2299  {
2300  KMP_INC_STAT(lck,lemmingYields);
2301  __kmp_yield (TRUE);
2302  }
2303 
2304  if ( __kmp_test_adaptive_lock_only( lck, gtid ) )
2305  return;
2306  }
2307  }
2308 
2309  // Speculative acquisition failed, so acquire it non-speculatively.
2310  // Count the non-speculative acquire attempt
2311  lck->lk.adaptive.acquire_attempts++;
2312 
2313  __kmp_acquire_queuing_lock_timed_template<FALSE>( GET_QLK_PTR(lck), gtid );
2314  // We have acquired the base lock, so count that.
2315  KMP_INC_STAT(lck,nonSpeculativeAcquires );
2316 }
2317 
2318 static void
2319 __kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2320 {
2321  char const * const func = "omp_set_lock";
2322  if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2323  KMP_FATAL( LockIsUninitialized, func );
2324  }
2325  if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) {
2326  KMP_FATAL( LockIsAlreadyOwned, func );
2327  }
2328 
2329  __kmp_acquire_adaptive_lock( lck, gtid );
2330 
2331  lck->lk.qlk.owner_id = gtid + 1;
2332 }
2333 
2334 static void
2335 __kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2336 {
2337  if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2338  { // If the lock doesn't look claimed we must be speculating.
2339  // (Or the user's code is buggy and they're releasing without locking;
2340  // if we had XTEST we'd be able to check that case...)
2341  _xend(); // Exit speculation
2342  __kmp_update_badness_after_success( lck );
2343  }
2344  else
2345  { // Since the lock *is* visibly locked we're not speculating,
2346  // so should use the underlying lock's release scheme.
2347  __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid );
2348  }
2349 }
2350 
2351 static void
2352 __kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2353 {
2354  char const * const func = "omp_unset_lock";
2355  KMP_MB(); /* in case another processor initialized lock */
2356  if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2357  KMP_FATAL( LockIsUninitialized, func );
2358  }
2359  if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) {
2360  KMP_FATAL( LockUnsettingFree, func );
2361  }
2362  if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) {
2363  KMP_FATAL( LockUnsettingSetByAnother, func );
2364  }
2365  lck->lk.qlk.owner_id = 0;
2366  __kmp_release_adaptive_lock( lck, gtid );
2367 }
2368 
2369 static void
2370 __kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck )
2371 {
2372  __kmp_init_queuing_lock( GET_QLK_PTR(lck) );
2373  lck->lk.adaptive.badness = 0;
2374  lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0;
2375  lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries;
2376  lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;
2377 #if KMP_DEBUG_ADAPTIVE_LOCKS
2378  __kmp_zero_speculative_stats( &lck->lk.adaptive );
2379 #endif
2380  KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2381 }
2382 
2383 static void
2384 __kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck )
2385 {
2386  __kmp_init_adaptive_lock( lck );
2387 }
2388 
2389 static void
2390 __kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck )
2391 {
2392 #if KMP_DEBUG_ADAPTIVE_LOCKS
2393  __kmp_accumulate_speculative_stats( &lck->lk.adaptive );
2394 #endif
2395  __kmp_destroy_queuing_lock (GET_QLK_PTR(lck));
2396  // Nothing needed for the speculative part.
2397 }
2398 
2399 static void
2400 __kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck )
2401 {
2402  char const * const func = "omp_destroy_lock";
2403  if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2404  KMP_FATAL( LockIsUninitialized, func );
2405  }
2406  if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) {
2407  KMP_FATAL( LockStillOwned, func );
2408  }
2409  __kmp_destroy_adaptive_lock( lck );
2410 }
2411 
2412 
2413 #endif // KMP_USE_ADAPTIVE_LOCKS
2414 
2415 
2416 /* ------------------------------------------------------------------------ */
2417 /* DRDPA ticket locks */
2418 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2419 
2420 static kmp_int32
2421 __kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck )
2422 {
2423  return TCR_4( lck->lk.owner_id ) - 1;
2424 }
2425 
2426 static inline bool
2427 __kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck )
2428 {
2429  return lck->lk.depth_locked != -1;
2430 }
2431 
2432 __forceinline static void
2433 __kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2434 {
2435  kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket);
2436  kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2437  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2438  = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2439  TCR_PTR(lck->lk.polls); // volatile load
2440 
2441 #ifdef USE_LOCK_PROFILE
2442  if (TCR_8(polls[ticket & mask].poll) != ticket)
2443  __kmp_printf("LOCK CONTENTION: %p\n", lck);
2444  /* else __kmp_printf( "." );*/
2445 #endif /* USE_LOCK_PROFILE */
2446 
2447  //
2448  // Now spin-wait, but reload the polls pointer and mask, in case the
2449  // polling area has been reconfigured. Unless it is reconfigured, the
2450  // reloads stay in L1 cache and are cheap.
2451  //
2452  // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!!
2453  //
2454  // The current implementation of KMP_WAIT_YIELD doesn't allow for mask
2455  // and poll to be re-read every spin iteration.
2456  //
2457  kmp_uint32 spins;
2458 
2459  KMP_FSYNC_PREPARE(lck);
2460  KMP_INIT_YIELD(spins);
2461  while (TCR_8(polls[ticket & mask]).poll < ticket) { // volatile load
2462  // If we are oversubscribed,
2463  // or have waited a bit (and KMP_LIBRARY=turnaround), then yield.
2464  // CPU Pause is in the macros for yield.
2465  //
2466  KMP_YIELD(TCR_4(__kmp_nth)
2467  > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
2468  KMP_YIELD_SPIN(spins);
2469 
2470  // Re-read the mask and the poll pointer from the lock structure.
2471  //
2472  // Make certain that "mask" is read before "polls" !!!
2473  //
2474  // If another thread picks reconfigures the polling area and updates
2475  // their values, and we get the new value of mask and the old polls
2476  // pointer, we could access memory beyond the end of the old polling
2477  // area.
2478  //
2479  mask = TCR_8(lck->lk.mask); // volatile load
2480  polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2481  TCR_PTR(lck->lk.polls); // volatile load
2482  }
2483 
2484  //
2485  // Critical section starts here
2486  //
2487  KMP_FSYNC_ACQUIRED(lck);
2488  KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2489  ticket, lck));
2490  lck->lk.now_serving = ticket; // non-volatile store
2491 
2492  //
2493  // Deallocate a garbage polling area if we know that we are the last
2494  // thread that could possibly access it.
2495  //
2496  // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2497  // ticket.
2498  //
2499  if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2500  __kmp_free((void *)lck->lk.old_polls);
2501  lck->lk.old_polls = NULL;
2502  lck->lk.cleanup_ticket = 0;
2503  }
2504 
2505  //
2506  // Check to see if we should reconfigure the polling area.
2507  // If there is still a garbage polling area to be deallocated from a
2508  // previous reconfiguration, let a later thread reconfigure it.
2509  //
2510  if (lck->lk.old_polls == NULL) {
2511  bool reconfigure = false;
2512  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls;
2513  kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2514 
2515  if (TCR_4(__kmp_nth)
2516  > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2517  //
2518  // We are in oversubscription mode. Contract the polling area
2519  // down to a single location, if that hasn't been done already.
2520  //
2521  if (num_polls > 1) {
2522  reconfigure = true;
2523  num_polls = TCR_4(lck->lk.num_polls);
2524  mask = 0;
2525  num_polls = 1;
2526  polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2527  __kmp_allocate(num_polls * sizeof(*polls));
2528  polls[0].poll = ticket;
2529  }
2530  }
2531  else {
2532  //
2533  // We are in under/fully subscribed mode. Check the number of
2534  // threads waiting on the lock. The size of the polling area
2535  // should be at least the number of threads waiting.
2536  //
2537  kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2538  if (num_waiting > num_polls) {
2539  kmp_uint32 old_num_polls = num_polls;
2540  reconfigure = true;
2541  do {
2542  mask = (mask << 1) | 1;
2543  num_polls *= 2;
2544  } while (num_polls <= num_waiting);
2545 
2546  //
2547  // Allocate the new polling area, and copy the relevant portion
2548  // of the old polling area to the new area. __kmp_allocate()
2549  // zeroes the memory it allocates, and most of the old area is
2550  // just zero padding, so we only copy the release counters.
2551  //
2552  polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2553  __kmp_allocate(num_polls * sizeof(*polls));
2554  kmp_uint32 i;
2555  for (i = 0; i < old_num_polls; i++) {
2556  polls[i].poll = old_polls[i].poll;
2557  }
2558  }
2559  }
2560 
2561  if (reconfigure) {
2562  //
2563  // Now write the updated fields back to the lock structure.
2564  //
2565  // Make certain that "polls" is written before "mask" !!!
2566  //
2567  // If another thread picks up the new value of mask and the old
2568  // polls pointer , it could access memory beyond the end of the
2569  // old polling area.
2570  //
2571  // On x86, we need memory fences.
2572  //
2573  KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n",
2574  ticket, lck, num_polls));
2575 
2576  lck->lk.old_polls = old_polls; // non-volatile store
2577  lck->lk.polls = polls; // volatile store
2578 
2579  KMP_MB();
2580 
2581  lck->lk.num_polls = num_polls; // non-volatile store
2582  lck->lk.mask = mask; // volatile store
2583 
2584  KMP_MB();
2585 
2586  //
2587  // Only after the new polling area and mask have been flushed
2588  // to main memory can we update the cleanup ticket field.
2589  //
2590  // volatile load / non-volatile store
2591  //
2592  lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket);
2593  }
2594  }
2595 }
2596 
2597 void
2598 __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2599 {
2600  __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2601 }
2602 
2603 static void
2604 __kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2605 {
2606  char const * const func = "omp_set_lock";
2607  if ( lck->lk.initialized != lck ) {
2608  KMP_FATAL( LockIsUninitialized, func );
2609  }
2610  if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2611  KMP_FATAL( LockNestableUsedAsSimple, func );
2612  }
2613  if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) {
2614  KMP_FATAL( LockIsAlreadyOwned, func );
2615  }
2616 
2617  __kmp_acquire_drdpa_lock( lck, gtid );
2618 
2619  lck->lk.owner_id = gtid + 1;
2620 }
2621 
2622 int
2623 __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2624 {
2625  //
2626  // First get a ticket, then read the polls pointer and the mask.
2627  // The polls pointer must be read before the mask!!! (See above)
2628  //
2629  kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load
2630  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2631  = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2632  TCR_PTR(lck->lk.polls); // volatile load
2633  kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2634  if (TCR_8(polls[ticket & mask].poll) == ticket) {
2635  kmp_uint64 next_ticket = ticket + 1;
2636  if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket,
2637  ticket, next_ticket)) {
2638  KMP_FSYNC_ACQUIRED(lck);
2639  KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2640  ticket, lck));
2641  lck->lk.now_serving = ticket; // non-volatile store
2642 
2643  //
2644  // Since no threads are waiting, there is no possibility that
2645  // we would want to reconfigure the polling area. We might
2646  // have the cleanup ticket value (which says that it is now
2647  // safe to deallocate old_polls), but we'll let a later thread
2648  // which calls __kmp_acquire_lock do that - this routine
2649  // isn't supposed to block, and we would risk blocks if we
2650  // called __kmp_free() to do the deallocation.
2651  //
2652  return TRUE;
2653  }
2654  }
2655  return FALSE;
2656 }
2657 
2658 static int
2659 __kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2660 {
2661  char const * const func = "omp_test_lock";
2662  if ( lck->lk.initialized != lck ) {
2663  KMP_FATAL( LockIsUninitialized, func );
2664  }
2665  if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2666  KMP_FATAL( LockNestableUsedAsSimple, func );
2667  }
2668 
2669  int retval = __kmp_test_drdpa_lock( lck, gtid );
2670 
2671  if ( retval ) {
2672  lck->lk.owner_id = gtid + 1;
2673  }
2674  return retval;
2675 }
2676 
2677 void
2678 __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2679 {
2680  //
2681  // Read the ticket value from the lock data struct, then the polls
2682  // pointer and the mask. The polls pointer must be read before the
2683  // mask!!! (See above)
2684  //
2685  kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load
2686  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2687  = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2688  TCR_PTR(lck->lk.polls); // volatile load
2689  kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2690  KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2691  ticket - 1, lck));
2692  KMP_FSYNC_RELEASING(lck);
2693  KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store
2694 }
2695 
2696 static void
2697 __kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2698 {
2699  char const * const func = "omp_unset_lock";
2700  KMP_MB(); /* in case another processor initialized lock */
2701  if ( lck->lk.initialized != lck ) {
2702  KMP_FATAL( LockIsUninitialized, func );
2703  }
2704  if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2705  KMP_FATAL( LockNestableUsedAsSimple, func );
2706  }
2707  if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2708  KMP_FATAL( LockUnsettingFree, func );
2709  }
2710  if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 )
2711  && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) {
2712  KMP_FATAL( LockUnsettingSetByAnother, func );
2713  }
2714  lck->lk.owner_id = 0;
2715  __kmp_release_drdpa_lock( lck, gtid );
2716 }
2717 
2718 void
2719 __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck )
2720 {
2721  lck->lk.location = NULL;
2722  lck->lk.mask = 0;
2723  lck->lk.num_polls = 1;
2724  lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2725  __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2726  lck->lk.cleanup_ticket = 0;
2727  lck->lk.old_polls = NULL;
2728  lck->lk.next_ticket = 0;
2729  lck->lk.now_serving = 0;
2730  lck->lk.owner_id = 0; // no thread owns the lock.
2731  lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2732  lck->lk.initialized = lck;
2733 
2734  KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2735 }
2736 
2737 static void
2738 __kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2739 {
2740  __kmp_init_drdpa_lock( lck );
2741 }
2742 
2743 void
2744 __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck )
2745 {
2746  lck->lk.initialized = NULL;
2747  lck->lk.location = NULL;
2748  if (lck->lk.polls != NULL) {
2749  __kmp_free((void *)lck->lk.polls);
2750  lck->lk.polls = NULL;
2751  }
2752  if (lck->lk.old_polls != NULL) {
2753  __kmp_free((void *)lck->lk.old_polls);
2754  lck->lk.old_polls = NULL;
2755  }
2756  lck->lk.mask = 0;
2757  lck->lk.num_polls = 0;
2758  lck->lk.cleanup_ticket = 0;
2759  lck->lk.next_ticket = 0;
2760  lck->lk.now_serving = 0;
2761  lck->lk.owner_id = 0;
2762  lck->lk.depth_locked = -1;
2763 }
2764 
2765 static void
2766 __kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2767 {
2768  char const * const func = "omp_destroy_lock";
2769  if ( lck->lk.initialized != lck ) {
2770  KMP_FATAL( LockIsUninitialized, func );
2771  }
2772  if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2773  KMP_FATAL( LockNestableUsedAsSimple, func );
2774  }
2775  if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2776  KMP_FATAL( LockStillOwned, func );
2777  }
2778  __kmp_destroy_drdpa_lock( lck );
2779 }
2780 
2781 
2782 //
2783 // nested drdpa ticket locks
2784 //
2785 
2786 void
2787 __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2788 {
2789  KMP_DEBUG_ASSERT( gtid >= 0 );
2790 
2791  if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2792  lck->lk.depth_locked += 1;
2793  }
2794  else {
2795  __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2796  KMP_MB();
2797  lck->lk.depth_locked = 1;
2798  KMP_MB();
2799  lck->lk.owner_id = gtid + 1;
2800  }
2801 }
2802 
2803 static void
2804 __kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2805 {
2806  char const * const func = "omp_set_nest_lock";
2807  if ( lck->lk.initialized != lck ) {
2808  KMP_FATAL( LockIsUninitialized, func );
2809  }
2810  if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2811  KMP_FATAL( LockSimpleUsedAsNestable, func );
2812  }
2813  __kmp_acquire_nested_drdpa_lock( lck, gtid );
2814 }
2815 
2816 int
2817 __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2818 {
2819  int retval;
2820 
2821  KMP_DEBUG_ASSERT( gtid >= 0 );
2822 
2823  if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2824  retval = ++lck->lk.depth_locked;
2825  }
2826  else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) {
2827  retval = 0;
2828  }
2829  else {
2830  KMP_MB();
2831  retval = lck->lk.depth_locked = 1;
2832  KMP_MB();
2833  lck->lk.owner_id = gtid + 1;
2834  }
2835  return retval;
2836 }
2837 
2838 static int
2839 __kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2840 {
2841  char const * const func = "omp_test_nest_lock";
2842  if ( lck->lk.initialized != lck ) {
2843  KMP_FATAL( LockIsUninitialized, func );
2844  }
2845  if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2846  KMP_FATAL( LockSimpleUsedAsNestable, func );
2847  }
2848  return __kmp_test_nested_drdpa_lock( lck, gtid );
2849 }
2850 
2851 void
2852 __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2853 {
2854  KMP_DEBUG_ASSERT( gtid >= 0 );
2855 
2856  KMP_MB();
2857  if ( --(lck->lk.depth_locked) == 0 ) {
2858  KMP_MB();
2859  lck->lk.owner_id = 0;
2860  __kmp_release_drdpa_lock( lck, gtid );
2861  }
2862 }
2863 
2864 static void
2865 __kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2866 {
2867  char const * const func = "omp_unset_nest_lock";
2868  KMP_MB(); /* in case another processor initialized lock */
2869  if ( lck->lk.initialized != lck ) {
2870  KMP_FATAL( LockIsUninitialized, func );
2871  }
2872  if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2873  KMP_FATAL( LockSimpleUsedAsNestable, func );
2874  }
2875  if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2876  KMP_FATAL( LockUnsettingFree, func );
2877  }
2878  if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) {
2879  KMP_FATAL( LockUnsettingSetByAnother, func );
2880  }
2881  __kmp_release_nested_drdpa_lock( lck, gtid );
2882 }
2883 
2884 void
2885 __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck )
2886 {
2887  __kmp_init_drdpa_lock( lck );
2888  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2889 }
2890 
2891 static void
2892 __kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2893 {
2894  __kmp_init_nested_drdpa_lock( lck );
2895 }
2896 
2897 void
2898 __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck )
2899 {
2900  __kmp_destroy_drdpa_lock( lck );
2901  lck->lk.depth_locked = 0;
2902 }
2903 
2904 static void
2905 __kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2906 {
2907  char const * const func = "omp_destroy_nest_lock";
2908  if ( lck->lk.initialized != lck ) {
2909  KMP_FATAL( LockIsUninitialized, func );
2910  }
2911  if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2912  KMP_FATAL( LockSimpleUsedAsNestable, func );
2913  }
2914  if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2915  KMP_FATAL( LockStillOwned, func );
2916  }
2917  __kmp_destroy_nested_drdpa_lock( lck );
2918 }
2919 
2920 
2921 //
2922 // access functions to fields which don't exist for all lock kinds.
2923 //
2924 
2925 static int
2926 __kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck )
2927 {
2928  return lck == lck->lk.initialized;
2929 }
2930 
2931 static const ident_t *
2932 __kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck )
2933 {
2934  return lck->lk.location;
2935 }
2936 
2937 static void
2938 __kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc )
2939 {
2940  lck->lk.location = loc;
2941 }
2942 
2943 static kmp_lock_flags_t
2944 __kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck )
2945 {
2946  return lck->lk.flags;
2947 }
2948 
2949 static void
2950 __kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags )
2951 {
2952  lck->lk.flags = flags;
2953 }
2954 
2955 #if KMP_USE_DYNAMIC_LOCK
2956 
2957 // Definitions of lock hints.
2958 # ifndef __OMP_H
2959 typedef enum kmp_lock_hint_t {
2960  kmp_lock_hint_none = 0,
2961  kmp_lock_hint_contended,
2962  kmp_lock_hint_uncontended,
2963  kmp_lock_hint_nonspeculative,
2964  kmp_lock_hint_speculative,
2965  kmp_lock_hint_adaptive,
2966 } kmp_lock_hint_t;
2967 # endif
2968 
2969 // Direct lock initializers. It simply writes a tag to the low 8 bits of the lock word.
2970 #define expand_init_lock(l, a) \
2971 static void init_##l##_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq) { \
2972  *lck = DYNA_LOCK_FREE(l); \
2973  KA_TRACE(20, ("Initialized direct lock, tag = %x\n", *lck)); \
2974 }
2975 FOREACH_D_LOCK(expand_init_lock, 0)
2976 #undef expand_init_lock
2977 
2978 #if DYNA_HAS_HLE
2979 
2980 // HLE lock functions - imported from the testbed runtime.
2981 #if KMP_MIC
2982 # define machine_pause() _mm_delay_32(10) // TODO: find the right argument
2983 #else
2984 # define machine_pause() _mm_pause()
2985 #endif
2986 #define HLE_ACQUIRE ".byte 0xf2;"
2987 #define HLE_RELEASE ".byte 0xf3;"
2988 
2989 static inline kmp_uint32
2990 swap4(kmp_uint32 volatile *p, kmp_uint32 v)
2991 {
2992  __asm__ volatile(HLE_ACQUIRE "xchg %1,%0"
2993  : "+r"(v), "+m"(*p)
2994  :
2995  : "memory");
2996  return v;
2997 }
2998 
2999 static void
3000 __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck)
3001 {
3002  *lck = 0;
3003 }
3004 
3005 static void
3006 __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3007 {
3008  // Use gtid for DYNA_LOCK_BUSY if necessary
3009  if (swap4(lck, DYNA_LOCK_BUSY(1, hle)) != DYNA_LOCK_FREE(hle)) {
3010  int delay = 1;
3011  do {
3012  while (*(kmp_uint32 volatile *)lck != DYNA_LOCK_FREE(hle)) {
3013  for (int i = delay; i != 0; --i)
3014  machine_pause();
3015  delay = ((delay << 1) | 1) & 7;
3016  }
3017  } while (swap4(lck, DYNA_LOCK_BUSY(1, hle)) != DYNA_LOCK_FREE(hle));
3018  }
3019 }
3020 
3021 static void
3022 __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3023 {
3024  __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
3025 }
3026 
3027 static void
3028 __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3029 {
3030  __asm__ volatile(HLE_RELEASE "movl %1,%0"
3031  : "=m"(*lck)
3032  : "r"(DYNA_LOCK_FREE(hle))
3033  : "memory");
3034 }
3035 
3036 static void
3037 __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3038 {
3039  __kmp_release_hle_lock(lck, gtid); // TODO: add checks
3040 }
3041 
3042 static int
3043 __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3044 {
3045  return swap4(lck, DYNA_LOCK_BUSY(1, hle)) == DYNA_LOCK_FREE(hle);
3046 }
3047 
3048 static int
3049 __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3050 {
3051  return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
3052 }
3053 
3054 #endif // DYNA_HAS_HLE
3055 
3056 // Entry functions for indirect locks (first element of direct_*_ops[]).
3057 static void __kmp_init_indirect_lock(kmp_dyna_lock_t * l, kmp_dyna_lockseq_t tag);
3058 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock);
3059 static void __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3060 static void __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3061 static int __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3062 static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3063 static void __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3064 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3065 
3066 //
3067 // Jump tables for the indirect lock functions.
3068 // Only fill in the odd entries, that avoids the need to shift out the low bit.
3069 //
3070 #define expand_func0(l, op) 0,op##_##l##_##lock,
3071 void (*__kmp_direct_init_ops[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t)
3072  = { __kmp_init_indirect_lock, 0, FOREACH_D_LOCK(expand_func0, init) };
3073 
3074 #define expand_func1(l, op) 0,(void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_##lock,
3075 void (*__kmp_direct_destroy_ops[])(kmp_dyna_lock_t *)
3076  = { __kmp_destroy_indirect_lock, 0, FOREACH_D_LOCK(expand_func1, destroy) };
3077 
3078 // Differentiates *lock and *lock_with_checks.
3079 #define expand_func2(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock,
3080 #define expand_func2c(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3081 static void (*direct_set_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32)
3082  = { { __kmp_set_indirect_lock, 0, FOREACH_D_LOCK(expand_func2, acquire) },
3083  { __kmp_set_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func2c, acquire) } };
3084 static void (*direct_unset_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32)
3085  = { { __kmp_unset_indirect_lock, 0, FOREACH_D_LOCK(expand_func2, release) },
3086  { __kmp_unset_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func2c, release) } };
3087 
3088 #define expand_func3(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock,
3089 #define expand_func3c(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3090 static int (*direct_test_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32)
3091  = { { __kmp_test_indirect_lock, 0, FOREACH_D_LOCK(expand_func3, test) },
3092  { __kmp_test_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func3c, test) } };
3093 
3094 // Exposes only one set of jump tables (*lock or *lock_with_checks).
3095 void (*(*__kmp_direct_set_ops))(kmp_dyna_lock_t *, kmp_int32) = 0;
3096 void (*(*__kmp_direct_unset_ops))(kmp_dyna_lock_t *, kmp_int32) = 0;
3097 int (*(*__kmp_direct_test_ops))(kmp_dyna_lock_t *, kmp_int32) = 0;
3098 
3099 //
3100 // Jump tables for the indirect lock functions.
3101 //
3102 #define expand_func4(l, op) (void (*)(kmp_user_lock_p))__kmp_##op##_##l##_##lock,
3103 void (*__kmp_indirect_init_ops[])(kmp_user_lock_p)
3104  = { FOREACH_I_LOCK(expand_func4, init) };
3105 void (*__kmp_indirect_destroy_ops[])(kmp_user_lock_p)
3106  = { FOREACH_I_LOCK(expand_func4, destroy) };
3107 
3108 // Differentiates *lock and *lock_with_checks.
3109 #define expand_func5(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3110 #define expand_func5c(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3111 static void (*indirect_set_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32)
3112  = { { FOREACH_I_LOCK(expand_func5, acquire) },
3113  { FOREACH_I_LOCK(expand_func5c, acquire) } };
3114 static void (*indirect_unset_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32)
3115  = { { FOREACH_I_LOCK(expand_func5, release) },
3116  { FOREACH_I_LOCK(expand_func5c, release) } };
3117 
3118 #define expand_func6(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3119 #define expand_func6c(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3120 static int (*indirect_test_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32)
3121  = { { FOREACH_I_LOCK(expand_func6, test) },
3122  { FOREACH_I_LOCK(expand_func6c, test) } };
3123 
3124 // Exposes only one set of jump tables (*lock or *lock_with_checks).
3125 void (*(*__kmp_indirect_set_ops))(kmp_user_lock_p, kmp_int32) = 0;
3126 void (*(*__kmp_indirect_unset_ops))(kmp_user_lock_p, kmp_int32) = 0;
3127 int (*(*__kmp_indirect_test_ops))(kmp_user_lock_p, kmp_int32) = 0;
3128 
3129 // Lock index table.
3130 kmp_indirect_lock_t **__kmp_indirect_lock_table;
3131 kmp_lock_index_t __kmp_indirect_lock_table_size;
3132 kmp_lock_index_t __kmp_indirect_lock_table_next;
3133 
3134 // Size of indirect locks.
3135 static kmp_uint32 __kmp_indirect_lock_size[DYNA_NUM_I_LOCKS] = {
3136  sizeof(kmp_ticket_lock_t), sizeof(kmp_queuing_lock_t),
3137 #if KMP_USE_ADAPTIVE_LOCKS
3138  sizeof(kmp_adaptive_lock_t),
3139 #endif
3140  sizeof(kmp_drdpa_lock_t),
3141  sizeof(kmp_tas_lock_t),
3142 #if DYNA_HAS_FUTEX
3143  sizeof(kmp_futex_lock_t),
3144 #endif
3145  sizeof(kmp_ticket_lock_t), sizeof(kmp_queuing_lock_t),
3146  sizeof(kmp_drdpa_lock_t)
3147 };
3148 
3149 // Jump tables for lock accessor/modifier.
3150 void (*__kmp_indirect_set_location[DYNA_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = { 0 };
3151 void (*__kmp_indirect_set_flags[DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t) = { 0 };
3152 const ident_t * (*__kmp_indirect_get_location[DYNA_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
3153 kmp_lock_flags_t (*__kmp_indirect_get_flags[DYNA_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
3154 
3155 // Use different lock pools for different lock types.
3156 static kmp_indirect_lock_t * __kmp_indirect_lock_pool[DYNA_NUM_I_LOCKS] = { 0 };
3157 
3158 // Inserts the given lock ptr to the lock table.
3159 kmp_lock_index_t
3160 __kmp_insert_indirect_lock(kmp_indirect_lock_t *lck)
3161 {
3162  kmp_lock_index_t next = __kmp_indirect_lock_table_next;
3163  // Check capacity and double the size if required
3164  if (next >= __kmp_indirect_lock_table_size) {
3165  kmp_lock_index_t i;
3166  kmp_lock_index_t size = __kmp_indirect_lock_table_size;
3167  kmp_indirect_lock_t **old_table = __kmp_indirect_lock_table;
3168  __kmp_indirect_lock_table = (kmp_indirect_lock_t **)__kmp_allocate(2*next*sizeof(kmp_indirect_lock_t *));
3169  KMP_MEMCPY(__kmp_indirect_lock_table, old_table, next*sizeof(kmp_indirect_lock_t *));
3170  __kmp_free(old_table);
3171  __kmp_indirect_lock_table_size = 2*next;
3172  }
3173  // Insert lck to the table and return the index.
3174  __kmp_indirect_lock_table[next] = lck;
3175  __kmp_indirect_lock_table_next++;
3176  return next;
3177 }
3178 
3179 // User lock allocator for dynamically dispatched locks.
3180 kmp_indirect_lock_t *
3181 __kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag)
3182 {
3183  kmp_indirect_lock_t *lck;
3184  kmp_lock_index_t idx;
3185 
3186  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3187 
3188  if (__kmp_indirect_lock_pool[tag] != NULL) {
3189  lck = __kmp_indirect_lock_pool[tag];
3190  if (OMP_LOCK_T_SIZE < sizeof(void *))
3191  idx = lck->lock->pool.index;
3192  __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
3193  } else {
3194  lck = (kmp_indirect_lock_t *)__kmp_allocate(sizeof(kmp_indirect_lock_t));
3195  lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
3196  if (OMP_LOCK_T_SIZE < sizeof(void *))
3197  idx = __kmp_insert_indirect_lock(lck);
3198  }
3199 
3200  __kmp_release_lock(&__kmp_global_lock, gtid);
3201 
3202  lck->type = tag;
3203 
3204  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3205  *((kmp_lock_index_t *)user_lock) = idx << 1; // indirect lock word must be even.
3206  } else {
3207  *((kmp_indirect_lock_t **)user_lock) = lck;
3208  }
3209 
3210  return lck;
3211 }
3212 
3213 // User lock lookup for dynamically dispatched locks.
3214 static __forceinline
3215 kmp_indirect_lock_t *
3216 __kmp_lookup_indirect_lock(void **user_lock, const char *func)
3217 {
3218  if (__kmp_env_consistency_check) {
3219  kmp_indirect_lock_t *lck = NULL;
3220  if (user_lock == NULL) {
3221  KMP_FATAL(LockIsUninitialized, func);
3222  }
3223  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3224  kmp_lock_index_t idx = DYNA_EXTRACT_I_INDEX(user_lock);
3225  if (idx < 0 || idx >= __kmp_indirect_lock_table_size) {
3226  KMP_FATAL(LockIsUninitialized, func);
3227  }
3228  lck = __kmp_indirect_lock_table[idx];
3229  } else {
3230  lck = *((kmp_indirect_lock_t **)user_lock);
3231  }
3232  if (lck == NULL) {
3233  KMP_FATAL(LockIsUninitialized, func);
3234  }
3235  return lck;
3236  } else {
3237  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3238  return __kmp_indirect_lock_table[DYNA_EXTRACT_I_INDEX(user_lock)];
3239  } else {
3240  return *((kmp_indirect_lock_t **)user_lock);
3241  }
3242  }
3243 }
3244 
3245 static void
3246 __kmp_init_indirect_lock(kmp_dyna_lock_t * lock, kmp_dyna_lockseq_t seq)
3247 {
3248 #if KMP_USE_ADAPTIVE_LOCKS
3249  if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) {
3250  KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
3251  seq = lockseq_queuing;
3252  }
3253 #endif
3254  kmp_indirect_locktag_t tag = DYNA_GET_I_TAG(seq);
3255  kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
3256  DYNA_I_LOCK_FUNC(l, init)(l->lock);
3257  KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock, tag = %x\n", l->type));
3258 }
3259 
3260 static void
3261 __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock)
3262 {
3263  kmp_uint32 gtid = __kmp_entry_gtid();
3264  kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
3265  DYNA_I_LOCK_FUNC(l, destroy)(l->lock);
3266  kmp_indirect_locktag_t tag = l->type;
3267 
3268  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3269 
3270  // Use the base lock's space to keep the pool chain.
3271  l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
3272  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3273  l->lock->pool.index = DYNA_EXTRACT_I_INDEX(lock);
3274  }
3275  __kmp_indirect_lock_pool[tag] = l;
3276 
3277  __kmp_release_lock(&__kmp_global_lock, gtid);
3278 }
3279 
3280 static void
3281 __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3282 {
3283  kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock);
3284  DYNA_I_LOCK_FUNC(l, set)(l->lock, gtid);
3285 }
3286 
3287 static void
3288 __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3289 {
3290  kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock);
3291  DYNA_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3292 }
3293 
3294 static int
3295 __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3296 {
3297  kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock);
3298  return DYNA_I_LOCK_FUNC(l, test)(l->lock, gtid);
3299 }
3300 
3301 static void
3302 __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3303 {
3304  kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
3305  DYNA_I_LOCK_FUNC(l, set)(l->lock, gtid);
3306 }
3307 
3308 static void
3309 __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3310 {
3311  kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
3312  DYNA_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3313 }
3314 
3315 static int
3316 __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3317 {
3318  kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
3319  return DYNA_I_LOCK_FUNC(l, test)(l->lock, gtid);
3320 }
3321 
3322 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
3323 
3324 // Initialize a hinted lock.
3325 void
3326 __kmp_init_lock_hinted(void **lock, int hint)
3327 {
3328  kmp_dyna_lockseq_t seq;
3329  switch (hint) {
3330  case kmp_lock_hint_uncontended:
3331  seq = lockseq_tas;
3332  break;
3333  case kmp_lock_hint_speculative:
3334 #if DYNA_HAS_HLE
3335  seq = lockseq_hle;
3336 #else
3337  seq = lockseq_tas;
3338 #endif
3339  break;
3340  case kmp_lock_hint_adaptive:
3341 #if KMP_USE_ADAPTIVE_LOCKS
3342  seq = lockseq_adaptive;
3343 #else
3344  seq = lockseq_queuing;
3345 #endif
3346  break;
3347  // Defaults to queuing locks.
3348  case kmp_lock_hint_contended:
3349  case kmp_lock_hint_nonspeculative:
3350  default:
3351  seq = lockseq_queuing;
3352  break;
3353  }
3354  if (DYNA_IS_D_LOCK(seq)) {
3355  DYNA_INIT_D_LOCK(lock, seq);
3356 #if USE_ITT_BUILD
3357  __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
3358 #endif
3359  } else {
3360  DYNA_INIT_I_LOCK(lock, seq);
3361 #if USE_ITT_BUILD
3362  kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock);
3363  __kmp_itt_lock_creating(ilk->lock, NULL);
3364 #endif
3365  }
3366 }
3367 
3368 // This is used only in kmp_error.c when consistency checking is on.
3369 kmp_int32
3370 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq)
3371 {
3372  switch (seq) {
3373  case lockseq_tas:
3374  case lockseq_nested_tas:
3375  return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
3376 #if DYNA_HAS_FUTEX
3377  case lockseq_futex:
3378  case lockseq_nested_futex:
3379  return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
3380 #endif
3381  case lockseq_ticket:
3382  case lockseq_nested_ticket:
3383  return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
3384  case lockseq_queuing:
3385  case lockseq_nested_queuing:
3386 #if KMP_USE_ADAPTIVE_LOCKS
3387  case lockseq_adaptive:
3388  return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
3389 #endif
3390  case lockseq_drdpa:
3391  case lockseq_nested_drdpa:
3392  return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
3393  default:
3394  return 0;
3395  }
3396 }
3397 
3398 // The value initialized from KMP_LOCK_KIND needs to be translated to its
3399 // nested version.
3400 void
3401 __kmp_init_nest_lock_hinted(void **lock, int hint)
3402 {
3403  kmp_dyna_lockseq_t seq;
3404  switch (hint) {
3405  case kmp_lock_hint_uncontended:
3406  seq = lockseq_nested_tas;
3407  break;
3408  // Defaults to queuing locks.
3409  case kmp_lock_hint_contended:
3410  case kmp_lock_hint_nonspeculative:
3411  default:
3412  seq = lockseq_nested_queuing;
3413  break;
3414  }
3415  DYNA_INIT_I_LOCK(lock, seq);
3416 #if USE_ITT_BUILD
3417  kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock);
3418  __kmp_itt_lock_creating(ilk->lock, NULL);
3419 #endif
3420 }
3421 
3422 // Initializes the lock table for indirect locks.
3423 static void
3424 __kmp_init_indirect_lock_table()
3425 {
3426  __kmp_indirect_lock_table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)*1024);
3427  __kmp_indirect_lock_table_size = 1024;
3428  __kmp_indirect_lock_table_next = 0;
3429 }
3430 
3431 #if KMP_USE_ADAPTIVE_LOCKS
3432 # define init_lock_func(table, expand) { \
3433  table[locktag_ticket] = expand(ticket); \
3434  table[locktag_queuing] = expand(queuing); \
3435  table[locktag_adaptive] = expand(queuing); \
3436  table[locktag_drdpa] = expand(drdpa); \
3437  table[locktag_nested_ticket] = expand(ticket); \
3438  table[locktag_nested_queuing] = expand(queuing); \
3439  table[locktag_nested_drdpa] = expand(drdpa); \
3440 }
3441 #else
3442 # define init_lock_func(table, expand) { \
3443  table[locktag_ticket] = expand(ticket); \
3444  table[locktag_queuing] = expand(queuing); \
3445  table[locktag_drdpa] = expand(drdpa); \
3446  table[locktag_nested_ticket] = expand(ticket); \
3447  table[locktag_nested_queuing] = expand(queuing); \
3448  table[locktag_nested_drdpa] = expand(drdpa); \
3449 }
3450 #endif // KMP_USE_ADAPTIVE_LOCKS
3451 
3452 // Initializes data for dynamic user locks.
3453 void
3454 __kmp_init_dynamic_user_locks()
3455 {
3456  // Initialize jump table location
3457  int offset = (__kmp_env_consistency_check)? 1: 0;
3458  __kmp_direct_set_ops = direct_set_tab[offset];
3459  __kmp_direct_unset_ops = direct_unset_tab[offset];
3460  __kmp_direct_test_ops = direct_test_tab[offset];
3461  __kmp_indirect_set_ops = indirect_set_tab[offset];
3462  __kmp_indirect_unset_ops = indirect_unset_tab[offset];
3463  __kmp_indirect_test_ops = indirect_test_tab[offset];
3464  __kmp_init_indirect_lock_table();
3465 
3466  // Initialize lock accessor/modifier
3467  // Could have used designated initializer, but -TP /Qstd=c99 did not work with icl.exe.
3468 #define expand_func(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location
3469  init_lock_func(__kmp_indirect_set_location, expand_func);
3470 #undef expand_func
3471 #define expand_func(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags
3472  init_lock_func(__kmp_indirect_set_flags, expand_func);
3473 #undef expand_func
3474 #define expand_func(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location
3475  init_lock_func(__kmp_indirect_get_location, expand_func);
3476 #undef expand_func
3477 #define expand_func(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags
3478  init_lock_func(__kmp_indirect_get_flags, expand_func);
3479 #undef expand_func
3480 
3481  __kmp_init_user_locks = TRUE;
3482 }
3483 
3484 // Clean up the lock table.
3485 void
3486 __kmp_cleanup_indirect_user_locks()
3487 {
3488  kmp_lock_index_t i;
3489  int k;
3490 
3491  // Clean up locks in the pools first (they were already destroyed before going into the pools).
3492  for (k = 0; k < DYNA_NUM_I_LOCKS; ++k) {
3493  kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
3494  while (l != NULL) {
3495  kmp_indirect_lock_t *ll = l;
3496  l = (kmp_indirect_lock_t *)l->lock->pool.next;
3497  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3498  __kmp_indirect_lock_table[ll->lock->pool.index] = NULL;
3499  }
3500  __kmp_free(ll->lock);
3501  __kmp_free(ll);
3502  }
3503  }
3504  // Clean up the remaining undestroyed locks.
3505  for (i = 0; i < __kmp_indirect_lock_table_next; i++) {
3506  kmp_indirect_lock_t *l = __kmp_indirect_lock_table[i];
3507  if (l != NULL) {
3508  // Locks not destroyed explicitly need to be destroyed here.
3509  DYNA_I_LOCK_FUNC(l, destroy)(l->lock);
3510  __kmp_free(l->lock);
3511  __kmp_free(l);
3512  }
3513  }
3514  // Free the table
3515  __kmp_free(__kmp_indirect_lock_table);
3516 
3517  __kmp_init_user_locks = FALSE;
3518 }
3519 
3520 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3521 int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3522 
3523 #else // KMP_USE_DYNAMIC_LOCK
3524 
3525 /* ------------------------------------------------------------------------ */
3526 /* user locks
3527  *
3528  * They are implemented as a table of function pointers which are set to the
3529  * lock functions of the appropriate kind, once that has been determined.
3530  */
3531 
3532 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3533 
3534 size_t __kmp_base_user_lock_size = 0;
3535 size_t __kmp_user_lock_size = 0;
3536 
3537 kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL;
3538 void ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3539 
3540 int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3541 void ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3542 void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3543 void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL;
3544 void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3545 void ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3546 
3547 int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3548 void ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3549 void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3550 void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3551 
3552 int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL;
3553 const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL;
3554 void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL;
3555 kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL;
3556 void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL;
3557 
3558 void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind )
3559 {
3560  switch ( user_lock_kind ) {
3561  case lk_default:
3562  default:
3563  KMP_ASSERT( 0 );
3564 
3565  case lk_tas: {
3566  __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t );
3567  __kmp_user_lock_size = sizeof( kmp_tas_lock_t );
3568 
3569  __kmp_get_user_lock_owner_ =
3570  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3571  ( &__kmp_get_tas_lock_owner );
3572 
3573  if ( __kmp_env_consistency_check ) {
3574  KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
3575  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
3576  }
3577  else {
3578  KMP_BIND_USER_LOCK(tas);
3579  KMP_BIND_NESTED_USER_LOCK(tas);
3580  }
3581 
3582  __kmp_destroy_user_lock_ =
3583  ( void ( * )( kmp_user_lock_p ) )
3584  ( &__kmp_destroy_tas_lock );
3585 
3586  __kmp_is_user_lock_initialized_ =
3587  ( int ( * )( kmp_user_lock_p ) ) NULL;
3588 
3589  __kmp_get_user_lock_location_ =
3590  ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3591 
3592  __kmp_set_user_lock_location_ =
3593  ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3594 
3595  __kmp_get_user_lock_flags_ =
3596  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3597 
3598  __kmp_set_user_lock_flags_ =
3599  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3600  }
3601  break;
3602 
3603 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
3604 
3605  case lk_futex: {
3606  __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t );
3607  __kmp_user_lock_size = sizeof( kmp_futex_lock_t );
3608 
3609  __kmp_get_user_lock_owner_ =
3610  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3611  ( &__kmp_get_futex_lock_owner );
3612 
3613  if ( __kmp_env_consistency_check ) {
3614  KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3615  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3616  }
3617  else {
3618  KMP_BIND_USER_LOCK(futex);
3619  KMP_BIND_NESTED_USER_LOCK(futex);
3620  }
3621 
3622  __kmp_destroy_user_lock_ =
3623  ( void ( * )( kmp_user_lock_p ) )
3624  ( &__kmp_destroy_futex_lock );
3625 
3626  __kmp_is_user_lock_initialized_ =
3627  ( int ( * )( kmp_user_lock_p ) ) NULL;
3628 
3629  __kmp_get_user_lock_location_ =
3630  ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3631 
3632  __kmp_set_user_lock_location_ =
3633  ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3634 
3635  __kmp_get_user_lock_flags_ =
3636  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3637 
3638  __kmp_set_user_lock_flags_ =
3639  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3640  }
3641  break;
3642 
3643 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
3644 
3645  case lk_ticket: {
3646  __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t );
3647  __kmp_user_lock_size = sizeof( kmp_ticket_lock_t );
3648 
3649  __kmp_get_user_lock_owner_ =
3650  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3651  ( &__kmp_get_ticket_lock_owner );
3652 
3653  if ( __kmp_env_consistency_check ) {
3654  KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3655  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3656  }
3657  else {
3658  KMP_BIND_USER_LOCK(ticket);
3659  KMP_BIND_NESTED_USER_LOCK(ticket);
3660  }
3661 
3662  __kmp_destroy_user_lock_ =
3663  ( void ( * )( kmp_user_lock_p ) )
3664  ( &__kmp_destroy_ticket_lock );
3665 
3666  __kmp_is_user_lock_initialized_ =
3667  ( int ( * )( kmp_user_lock_p ) )
3668  ( &__kmp_is_ticket_lock_initialized );
3669 
3670  __kmp_get_user_lock_location_ =
3671  ( const ident_t * ( * )( kmp_user_lock_p ) )
3672  ( &__kmp_get_ticket_lock_location );
3673 
3674  __kmp_set_user_lock_location_ =
3675  ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3676  ( &__kmp_set_ticket_lock_location );
3677 
3678  __kmp_get_user_lock_flags_ =
3679  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3680  ( &__kmp_get_ticket_lock_flags );
3681 
3682  __kmp_set_user_lock_flags_ =
3683  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3684  ( &__kmp_set_ticket_lock_flags );
3685  }
3686  break;
3687 
3688  case lk_queuing: {
3689  __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t );
3690  __kmp_user_lock_size = sizeof( kmp_queuing_lock_t );
3691 
3692  __kmp_get_user_lock_owner_ =
3693  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3694  ( &__kmp_get_queuing_lock_owner );
3695 
3696  if ( __kmp_env_consistency_check ) {
3697  KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3698  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3699  }
3700  else {
3701  KMP_BIND_USER_LOCK(queuing);
3702  KMP_BIND_NESTED_USER_LOCK(queuing);
3703  }
3704 
3705  __kmp_destroy_user_lock_ =
3706  ( void ( * )( kmp_user_lock_p ) )
3707  ( &__kmp_destroy_queuing_lock );
3708 
3709  __kmp_is_user_lock_initialized_ =
3710  ( int ( * )( kmp_user_lock_p ) )
3711  ( &__kmp_is_queuing_lock_initialized );
3712 
3713  __kmp_get_user_lock_location_ =
3714  ( const ident_t * ( * )( kmp_user_lock_p ) )
3715  ( &__kmp_get_queuing_lock_location );
3716 
3717  __kmp_set_user_lock_location_ =
3718  ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3719  ( &__kmp_set_queuing_lock_location );
3720 
3721  __kmp_get_user_lock_flags_ =
3722  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3723  ( &__kmp_get_queuing_lock_flags );
3724 
3725  __kmp_set_user_lock_flags_ =
3726  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3727  ( &__kmp_set_queuing_lock_flags );
3728  }
3729  break;
3730 
3731 #if KMP_USE_ADAPTIVE_LOCKS
3732  case lk_adaptive: {
3733  __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t );
3734  __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t );
3735 
3736  __kmp_get_user_lock_owner_ =
3737  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3738  ( &__kmp_get_queuing_lock_owner );
3739 
3740  if ( __kmp_env_consistency_check ) {
3741  KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3742  }
3743  else {
3744  KMP_BIND_USER_LOCK(adaptive);
3745  }
3746 
3747  __kmp_destroy_user_lock_ =
3748  ( void ( * )( kmp_user_lock_p ) )
3749  ( &__kmp_destroy_adaptive_lock );
3750 
3751  __kmp_is_user_lock_initialized_ =
3752  ( int ( * )( kmp_user_lock_p ) )
3753  ( &__kmp_is_queuing_lock_initialized );
3754 
3755  __kmp_get_user_lock_location_ =
3756  ( const ident_t * ( * )( kmp_user_lock_p ) )
3757  ( &__kmp_get_queuing_lock_location );
3758 
3759  __kmp_set_user_lock_location_ =
3760  ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3761  ( &__kmp_set_queuing_lock_location );
3762 
3763  __kmp_get_user_lock_flags_ =
3764  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3765  ( &__kmp_get_queuing_lock_flags );
3766 
3767  __kmp_set_user_lock_flags_ =
3768  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3769  ( &__kmp_set_queuing_lock_flags );
3770 
3771  }
3772  break;
3773 #endif // KMP_USE_ADAPTIVE_LOCKS
3774 
3775  case lk_drdpa: {
3776  __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t );
3777  __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t );
3778 
3779  __kmp_get_user_lock_owner_ =
3780  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3781  ( &__kmp_get_drdpa_lock_owner );
3782 
3783  if ( __kmp_env_consistency_check ) {
3784  KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3785  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3786  }
3787  else {
3788  KMP_BIND_USER_LOCK(drdpa);
3789  KMP_BIND_NESTED_USER_LOCK(drdpa);
3790  }
3791 
3792  __kmp_destroy_user_lock_ =
3793  ( void ( * )( kmp_user_lock_p ) )
3794  ( &__kmp_destroy_drdpa_lock );
3795 
3796  __kmp_is_user_lock_initialized_ =
3797  ( int ( * )( kmp_user_lock_p ) )
3798  ( &__kmp_is_drdpa_lock_initialized );
3799 
3800  __kmp_get_user_lock_location_ =
3801  ( const ident_t * ( * )( kmp_user_lock_p ) )
3802  ( &__kmp_get_drdpa_lock_location );
3803 
3804  __kmp_set_user_lock_location_ =
3805  ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3806  ( &__kmp_set_drdpa_lock_location );
3807 
3808  __kmp_get_user_lock_flags_ =
3809  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3810  ( &__kmp_get_drdpa_lock_flags );
3811 
3812  __kmp_set_user_lock_flags_ =
3813  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3814  ( &__kmp_set_drdpa_lock_flags );
3815  }
3816  break;
3817  }
3818 }
3819 
3820 
3821 // ----------------------------------------------------------------------------
3822 // User lock table & lock allocation
3823 
3824 kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL };
3825 kmp_user_lock_p __kmp_lock_pool = NULL;
3826 
3827 // Lock block-allocation support.
3828 kmp_block_of_locks* __kmp_lock_blocks = NULL;
3829 int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3830 
3831 static kmp_lock_index_t
3832 __kmp_lock_table_insert( kmp_user_lock_p lck )
3833 {
3834  // Assume that kmp_global_lock is held upon entry/exit.
3835  kmp_lock_index_t index;
3836  if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) {
3837  kmp_lock_index_t size;
3838  kmp_user_lock_p *table;
3839  kmp_lock_index_t i;
3840  // Reallocate lock table.
3841  if ( __kmp_user_lock_table.allocated == 0 ) {
3842  size = 1024;
3843  }
3844  else {
3845  size = __kmp_user_lock_table.allocated * 2;
3846  }
3847  table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size );
3848  KMP_MEMCPY( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) );
3849  table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table;
3850  // We cannot free the previous table now, since it may be in use by other
3851  // threads. So save the pointer to the previous table in in the first element of the
3852  // new table. All the tables will be organized into a list, and could be freed when
3853  // library shutting down.
3854  __kmp_user_lock_table.table = table;
3855  __kmp_user_lock_table.allocated = size;
3856  }
3857  KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated );
3858  index = __kmp_user_lock_table.used;
3859  __kmp_user_lock_table.table[ index ] = lck;
3860  ++ __kmp_user_lock_table.used;
3861  return index;
3862 }
3863 
3864 static kmp_user_lock_p
3865 __kmp_lock_block_allocate()
3866 {
3867  // Assume that kmp_global_lock is held upon entry/exit.
3868  static int last_index = 0;
3869  if ( ( last_index >= __kmp_num_locks_in_block )
3870  || ( __kmp_lock_blocks == NULL ) ) {
3871  // Restart the index.
3872  last_index = 0;
3873  // Need to allocate a new block.
3874  KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
3875  size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3876  char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) );
3877  // Set up the new block.
3878  kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]);
3879  new_block->next_block = __kmp_lock_blocks;
3880  new_block->locks = (void *)buffer;
3881  // Publish the new block.
3882  KMP_MB();
3883  __kmp_lock_blocks = new_block;
3884  }
3885  kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) )
3886  [ last_index * __kmp_user_lock_size ] ) );
3887  last_index++;
3888  return ret;
3889 }
3890 
3891 //
3892 // Get memory for a lock. It may be freshly allocated memory or reused memory
3893 // from lock pool.
3894 //
3895 kmp_user_lock_p
3896 __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid,
3897  kmp_lock_flags_t flags )
3898 {
3899  kmp_user_lock_p lck;
3900  kmp_lock_index_t index;
3901  KMP_DEBUG_ASSERT( user_lock );
3902 
3903  __kmp_acquire_lock( &__kmp_global_lock, gtid );
3904 
3905  if ( __kmp_lock_pool == NULL ) {
3906  // Lock pool is empty. Allocate new memory.
3907  if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point.
3908  lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size );
3909  }
3910  else {
3911  lck = __kmp_lock_block_allocate();
3912  }
3913 
3914  // Insert lock in the table so that it can be freed in __kmp_cleanup,
3915  // and debugger has info on all allocated locks.
3916  index = __kmp_lock_table_insert( lck );
3917  }
3918  else {
3919  // Pick up lock from pool.
3920  lck = __kmp_lock_pool;
3921  index = __kmp_lock_pool->pool.index;
3922  __kmp_lock_pool = __kmp_lock_pool->pool.next;
3923  }
3924 
3925  //
3926  // We could potentially differentiate between nested and regular locks
3927  // here, and do the lock table lookup for regular locks only.
3928  //
3929  if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3930  * ( (kmp_lock_index_t *) user_lock ) = index;
3931  }
3932  else {
3933  * ( (kmp_user_lock_p *) user_lock ) = lck;
3934  }
3935 
3936  // mark the lock if it is critical section lock.
3937  __kmp_set_user_lock_flags( lck, flags );
3938 
3939  __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper
3940 
3941  return lck;
3942 }
3943 
3944 // Put lock's memory to pool for reusing.
3945 void
3946 __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck )
3947 {
3948  kmp_lock_pool_t * lock_pool;
3949 
3950  KMP_DEBUG_ASSERT( user_lock != NULL );
3951  KMP_DEBUG_ASSERT( lck != NULL );
3952 
3953  __kmp_acquire_lock( & __kmp_global_lock, gtid );
3954 
3955  lck->pool.next = __kmp_lock_pool;
3956  __kmp_lock_pool = lck;
3957  if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3958  kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock );
3959  KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used );
3960  lck->pool.index = index;
3961  }
3962 
3963  __kmp_release_lock( & __kmp_global_lock, gtid );
3964 }
3965 
3966 kmp_user_lock_p
3967 __kmp_lookup_user_lock( void **user_lock, char const *func )
3968 {
3969  kmp_user_lock_p lck = NULL;
3970 
3971  if ( __kmp_env_consistency_check ) {
3972  if ( user_lock == NULL ) {
3973  KMP_FATAL( LockIsUninitialized, func );
3974  }
3975  }
3976 
3977  if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3978  kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock );
3979  if ( __kmp_env_consistency_check ) {
3980  if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) {
3981  KMP_FATAL( LockIsUninitialized, func );
3982  }
3983  }
3984  KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used );
3985  KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
3986  lck = __kmp_user_lock_table.table[index];
3987  }
3988  else {
3989  lck = *( (kmp_user_lock_p *)user_lock );
3990  }
3991 
3992  if ( __kmp_env_consistency_check ) {
3993  if ( lck == NULL ) {
3994  KMP_FATAL( LockIsUninitialized, func );
3995  }
3996  }
3997 
3998  return lck;
3999 }
4000 
4001 void
4002 __kmp_cleanup_user_locks( void )
4003 {
4004  //
4005  // Reset lock pool. Do not worry about lock in the pool -- we will free
4006  // them when iterating through lock table (it includes all the locks,
4007  // dead or alive).
4008  //
4009  __kmp_lock_pool = NULL;
4010 
4011 #define IS_CRITICAL(lck) \
4012  ( ( __kmp_get_user_lock_flags_ != NULL ) && \
4013  ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) )
4014 
4015  //
4016  // Loop through lock table, free all locks.
4017  //
4018  // Do not free item [0], it is reserved for lock tables list.
4019  //
4020  // FIXME - we are iterating through a list of (pointers to) objects of
4021  // type union kmp_user_lock, but we have no way of knowing whether the
4022  // base type is currently "pool" or whatever the global user lock type
4023  // is.
4024  //
4025  // We are relying on the fact that for all of the user lock types
4026  // (except "tas"), the first field in the lock struct is the "initialized"
4027  // field, which is set to the address of the lock object itself when
4028  // the lock is initialized. When the union is of type "pool", the
4029  // first field is a pointer to the next object in the free list, which
4030  // will not be the same address as the object itself.
4031  //
4032  // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck )
4033  // will fail for "pool" objects on the free list. This must happen as
4034  // the "location" field of real user locks overlaps the "index" field
4035  // of "pool" objects.
4036  //
4037  // It would be better to run through the free list, and remove all "pool"
4038  // objects from the lock table before executing this loop. However,
4039  // "pool" objects do not always have their index field set (only on
4040  // lin_32e), and I don't want to search the lock table for the address
4041  // of every "pool" object on the free list.
4042  //
4043  while ( __kmp_user_lock_table.used > 1 ) {
4044  const ident *loc;
4045 
4046  //
4047  // reduce __kmp_user_lock_table.used before freeing the lock,
4048  // so that state of locks is consistent
4049  //
4050  kmp_user_lock_p lck = __kmp_user_lock_table.table[
4051  --__kmp_user_lock_table.used ];
4052 
4053  if ( ( __kmp_is_user_lock_initialized_ != NULL ) &&
4054  ( *__kmp_is_user_lock_initialized_ )( lck ) ) {
4055  //
4056  // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is
4057  // initialized AND it is NOT a critical section (user is not
4058  // responsible for destroying criticals) AND we know source
4059  // location to report.
4060  //
4061  if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) &&
4062  ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) &&
4063  ( loc->psource != NULL ) ) {
4064  kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 );
4065  KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.line );
4066  __kmp_str_loc_free( &str_loc);
4067  }
4068 
4069 #ifdef KMP_DEBUG
4070  if ( IS_CRITICAL( lck ) ) {
4071  KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) );
4072  }
4073  else {
4074  KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) );
4075  }
4076 #endif // KMP_DEBUG
4077 
4078  //
4079  // Cleanup internal lock dynamic resources
4080  // (for drdpa locks particularly).
4081  //
4082  __kmp_destroy_user_lock( lck );
4083  }
4084 
4085  //
4086  // Free the lock if block allocation of locks is not used.
4087  //
4088  if ( __kmp_lock_blocks == NULL ) {
4089  __kmp_free( lck );
4090  }
4091  }
4092 
4093 #undef IS_CRITICAL
4094 
4095  //
4096  // delete lock table(s).
4097  //
4098  kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
4099  __kmp_user_lock_table.table = NULL;
4100  __kmp_user_lock_table.allocated = 0;
4101 
4102  while ( table_ptr != NULL ) {
4103  //
4104  // In the first element we saved the pointer to the previous
4105  // (smaller) lock table.
4106  //
4107  kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] );
4108  __kmp_free( table_ptr );
4109  table_ptr = next;
4110  }
4111 
4112  //
4113  // Free buffers allocated for blocks of locks.
4114  //
4115  kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
4116  __kmp_lock_blocks = NULL;
4117 
4118  while ( block_ptr != NULL ) {
4119  kmp_block_of_locks_t *next = block_ptr->next_block;
4120  __kmp_free( block_ptr->locks );
4121  //
4122  // *block_ptr itself was allocated at the end of the locks vector.
4123  //
4124  block_ptr = next;
4125  }
4126 
4127  TCW_4(__kmp_init_user_locks, FALSE);
4128 }
4129 
4130 #endif // KMP_USE_DYNAMIC_LOCK
Definition: kmp.h:221
char const * psource
Definition: kmp.h:230