Intel® OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Modules Pages
kmp_wait_release.h
1 /*
2  * kmp_wait_release.h -- Wait/Release implementation
3  */
4 
5 /* <copyright>
6  Copyright (c) 1997-2015 Intel Corporation. All Rights Reserved.
7 
8  Redistribution and use in source and binary forms, with or without
9  modification, are permitted provided that the following conditions
10  are met:
11 
12  * Redistributions of source code must retain the above copyright
13  notice, this list of conditions and the following disclaimer.
14  * Redistributions in binary form must reproduce the above copyright
15  notice, this list of conditions and the following disclaimer in the
16  documentation and/or other materials provided with the distribution.
17  * Neither the name of Intel Corporation nor the names of its
18  contributors may be used to endorse or promote products derived
19  from this software without specific prior written permission.
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 
33 </copyright> */
34 
35 #ifndef KMP_WAIT_RELEASE_H
36 #define KMP_WAIT_RELEASE_H
37 
38 #include "kmp.h"
39 #include "kmp_itt.h"
40 
57 enum flag_type {
61 };
62 
66 template <typename P>
67 class kmp_flag {
68  volatile P * loc;
70  public:
71  typedef P flag_t;
72  kmp_flag(volatile P *p, flag_type ft) : loc(p), t(ft) {}
76  volatile P * get() { return loc; }
80  flag_type get_type() { return t; }
81  // Derived classes must provide the following:
82  /*
83  kmp_info_t * get_waiter(kmp_uint32 i);
84  kmp_uint32 get_num_waiters();
85  bool done_check();
86  bool done_check_val(P old_loc);
87  bool notdone_check();
88  P internal_release();
89  P set_sleeping();
90  P unset_sleeping();
91  bool is_sleeping();
92  bool is_sleeping_val(P old_loc);
93  */
94 };
95 
96 /* Spin wait loop that first does pause, then yield, then sleep. A thread that calls __kmp_wait_*
97  must make certain that another thread calls __kmp_release to wake it back up to prevent deadlocks! */
98 template <class C>
99 static inline void __kmp_wait_template(kmp_info_t *this_thr, C *flag, int final_spin
100  USE_ITT_BUILD_ARG(void * itt_sync_obj) )
101 {
102  // NOTE: We may not belong to a team at this point.
103  volatile typename C::flag_t *spin = flag->get();
104  kmp_uint32 spins;
105  kmp_uint32 hibernate;
106  int th_gtid;
107  int tasks_completed = FALSE;
108 
109  KMP_FSYNC_SPIN_INIT(spin, NULL);
110  if (flag->done_check()) {
111  KMP_FSYNC_SPIN_ACQUIRED(spin);
112  return;
113  }
114  th_gtid = this_thr->th.th_info.ds.ds_gtid;
115  KA_TRACE(20, ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
116 
117  // Setup for waiting
118  KMP_INIT_YIELD(spins);
119 
120  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
121  // The worker threads cannot rely on the team struct existing at this point.
122  // Use the bt values cached in the thread struct instead.
123 #ifdef KMP_ADJUST_BLOCKTIME
124  if (__kmp_zero_bt && !this_thr->th.th_team_bt_set)
125  // Force immediate suspend if not set by user and more threads than available procs
126  hibernate = 0;
127  else
128  hibernate = this_thr->th.th_team_bt_intervals;
129 #else
130  hibernate = this_thr->th.th_team_bt_intervals;
131 #endif /* KMP_ADJUST_BLOCKTIME */
132 
133  /* If the blocktime is nonzero, we want to make sure that we spin wait for the entirety
134  of the specified #intervals, plus up to one interval more. This increment make
135  certain that this thread doesn't go to sleep too soon. */
136  if (hibernate != 0)
137  hibernate++;
138 
139  // Add in the current time value.
140  hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
141  KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
142  th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
143  hibernate - __kmp_global.g.g_time.dt.t_value));
144  }
145  KMP_MB();
146 
147  // Main wait spin loop
148  while (flag->notdone_check()) {
149  int in_pool;
150 
151  /* If the task team is NULL, it means one of things:
152  1) A newly-created thread is first being released by __kmp_fork_barrier(), and
153  its task team has not been set up yet.
154  2) All tasks have been executed to completion, this thread has decremented the task
155  team's ref ct and possibly deallocated it, and should no longer reference it.
156  3) Tasking is off for this region. This could be because we are in a serialized region
157  (perhaps the outer one), or else tasking was manually disabled (KMP_TASKING=0). */
158  kmp_task_team_t * task_team = NULL;
159  if (__kmp_tasking_mode != tskm_immediate_exec) {
160  task_team = this_thr->th.th_task_team;
161  if (task_team != NULL) {
162  if (!TCR_SYNC_4(task_team->tt.tt_active)) {
163  KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
164  __kmp_unref_task_team(task_team, this_thr);
165  } else if (KMP_TASKING_ENABLED(task_team)) {
166  flag->execute_tasks(this_thr, th_gtid, final_spin, &tasks_completed
167  USE_ITT_BUILD_ARG(itt_sync_obj), 0);
168  }
169  } // if
170  } // if
171 
172  KMP_FSYNC_SPIN_PREPARE(spin);
173  if (TCR_4(__kmp_global.g.g_done)) {
174  if (__kmp_global.g.g_abort)
175  __kmp_abort_thread();
176  break;
177  }
178 
179  // If we are oversubscribed, or have waited a bit (and KMP_LIBRARY=throughput), then yield
180  KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
181  // TODO: Should it be number of cores instead of thread contexts? Like:
182  // KMP_YIELD(TCR_4(__kmp_nth) > __kmp_ncores);
183  // Need performance improvement data to make the change...
184  KMP_YIELD_SPIN(spins);
185 
186  // Check if this thread was transferred from a team
187  // to the thread pool (or vice-versa) while spinning.
188  in_pool = !!TCR_4(this_thr->th.th_in_pool);
189  if (in_pool != !!this_thr->th.th_active_in_pool) {
190  if (in_pool) { // Recently transferred from team to pool
191  KMP_TEST_THEN_INC32((kmp_int32 *)&__kmp_thread_pool_active_nth);
192  this_thr->th.th_active_in_pool = TRUE;
193  /* Here, we cannot assert that:
194  KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) <= __kmp_thread_pool_nth);
195  __kmp_thread_pool_nth is inc/dec'd by the master thread while the fork/join
196  lock is held, whereas __kmp_thread_pool_active_nth is inc/dec'd asynchronously
197  by the workers. The two can get out of sync for brief periods of time. */
198  }
199  else { // Recently transferred from pool to team
200  KMP_TEST_THEN_DEC32((kmp_int32 *) &__kmp_thread_pool_active_nth);
201  KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
202  this_thr->th.th_active_in_pool = FALSE;
203  }
204  }
205 
206  // Don't suspend if KMP_BLOCKTIME is set to "infinite"
207  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
208  continue;
209 
210  // Don't suspend if there is a likelihood of new tasks being spawned.
211  if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
212  continue;
213 
214  // If we have waited a bit more, fall asleep
215  if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
216  continue;
217 
218  KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
219 
220  flag->suspend(th_gtid);
221 
222  if (TCR_4(__kmp_global.g.g_done)) {
223  if (__kmp_global.g.g_abort)
224  __kmp_abort_thread();
225  break;
226  }
227  // TODO: If thread is done with work and times out, disband/free
228  }
229  KMP_FSYNC_SPIN_ACQUIRED(spin);
230 }
231 
232 /* Release any threads specified as waiting on the flag by releasing the flag and resume the waiting thread
233  if indicated by the sleep bit(s). A thread that calls __kmp_wait_template must call this function to wake
234  up the potentially sleeping thread and prevent deadlocks! */
235 template <class C>
236 static inline void __kmp_release_template(C *flag)
237 {
238 #ifdef KMP_DEBUG
239  // FIX ME
240  kmp_info_t * wait_thr = flag->get_waiter(0);
241  int target_gtid = wait_thr->th.th_info.ds.ds_gtid;
242  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
243 #endif
244  KF_TRACE(20, ("__kmp_release: T#%d releasing T#%d spin(%p)\n", gtid, target_gtid, flag->get()));
245  KMP_DEBUG_ASSERT(flag->get());
246  KMP_FSYNC_RELEASING(flag->get());
247 
248  typename C::flag_t old_spin = flag->internal_release();
249 
250  KF_TRACE(100, ("__kmp_release: T#%d old spin(%p)=%d, set new spin=%d\n",
251  gtid, flag->get(), old_spin, *(flag->get())));
252 
253  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
254  // Only need to check sleep stuff if infinite block time not set
255  if (flag->is_sleeping_val(old_spin)) {
256  for (unsigned int i=0; i<flag->get_num_waiters(); ++i) {
257  kmp_info_t * waiter = flag->get_waiter(i);
258  int wait_gtid = waiter->th.th_info.ds.ds_gtid;
259  // Wake up thread if needed
260  KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep spin(%p) set\n",
261  gtid, wait_gtid, flag->get()));
262  flag->resume(wait_gtid);
263  }
264  } else {
265  KF_TRACE(50, ("__kmp_release: T#%d don't wake up thread T#%d since sleep spin(%p) not set\n",
266  gtid, target_gtid, flag->get()));
267  }
268  }
269 }
270 
271 template <typename FlagType>
272 struct flag_traits {};
273 
274 template <>
275 struct flag_traits<kmp_uint32> {
276  typedef kmp_uint32 flag_t;
277  static const flag_type t = flag32;
278  static inline flag_t tcr(flag_t f) { return TCR_4(f); }
279  static inline flag_t test_then_add4(volatile flag_t *f) { return KMP_TEST_THEN_ADD4_32((volatile kmp_int32 *)f); }
280  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_OR32((volatile kmp_int32 *)f, v); }
281  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_AND32((volatile kmp_int32 *)f, v); }
282 };
283 
284 template <>
285 struct flag_traits<kmp_uint64> {
286  typedef kmp_uint64 flag_t;
287  static const flag_type t = flag64;
288  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
289  static inline flag_t test_then_add4(volatile flag_t *f) { return KMP_TEST_THEN_ADD4_64((volatile kmp_int64 *)f); }
290  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_OR64((volatile kmp_int64 *)f, v); }
291  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_AND64((volatile kmp_int64 *)f, v); }
292 };
293 
294 template <typename FlagType>
295 class kmp_basic_flag : public kmp_flag<FlagType> {
296  typedef flag_traits<FlagType> traits_type;
297  FlagType checker;
298  kmp_info_t * waiting_threads[1];
299  kmp_uint32 num_waiting_threads;
300 public:
301  kmp_basic_flag(volatile FlagType *p) : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
302  kmp_basic_flag(volatile FlagType *p, kmp_info_t *thr) : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
303  waiting_threads[0] = thr;
304  }
305  kmp_basic_flag(volatile FlagType *p, FlagType c) : kmp_flag<FlagType>(p, traits_type::t), checker(c), num_waiting_threads(0) {}
310  kmp_info_t * get_waiter(kmp_uint32 i) {
311  KMP_DEBUG_ASSERT(i<num_waiting_threads);
312  return waiting_threads[i];
313  }
317  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
323  void set_waiter(kmp_info_t *thr) {
324  waiting_threads[0] = thr;
325  num_waiting_threads = 1;
326  }
330  bool done_check() { return traits_type::tcr(*(this->get())) == checker; }
335  bool done_check_val(FlagType old_loc) { return old_loc == checker; }
343  bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
348  FlagType internal_release() {
349  return traits_type::test_then_add4((volatile FlagType *)this->get());
350  }
355  FlagType set_sleeping() {
356  return traits_type::test_then_or((volatile FlagType *)this->get(), KMP_BARRIER_SLEEP_STATE);
357  }
362  FlagType unset_sleeping() {
363  return traits_type::test_then_and((volatile FlagType *)this->get(), ~KMP_BARRIER_SLEEP_STATE);
364  }
369  bool is_sleeping_val(FlagType old_loc) { return old_loc & KMP_BARRIER_SLEEP_STATE; }
373  bool is_sleeping() { return is_sleeping_val(*(this->get())); }
374 };
375 
376 class kmp_flag_32 : public kmp_basic_flag<kmp_uint32> {
377 public:
378  kmp_flag_32(volatile kmp_uint32 *p) : kmp_basic_flag<kmp_uint32>(p) {}
379  kmp_flag_32(volatile kmp_uint32 *p, kmp_info_t *thr) : kmp_basic_flag<kmp_uint32>(p, thr) {}
380  kmp_flag_32(volatile kmp_uint32 *p, kmp_uint32 c) : kmp_basic_flag<kmp_uint32>(p, c) {}
381  void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
382  void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
383  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished
384  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) {
385  return __kmp_execute_tasks_32(this_thr, gtid, this, final_spin, thread_finished
386  USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
387  }
388  void wait(kmp_info_t *this_thr, int final_spin
389  USE_ITT_BUILD_ARG(void * itt_sync_obj)) {
390  __kmp_wait_template(this_thr, this, final_spin
391  USE_ITT_BUILD_ARG(itt_sync_obj));
392  }
393  void release() { __kmp_release_template(this); }
394 };
395 
396 class kmp_flag_64 : public kmp_basic_flag<kmp_uint64> {
397 public:
398  kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag<kmp_uint64>(p) {}
399  kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr) : kmp_basic_flag<kmp_uint64>(p, thr) {}
400  kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c) : kmp_basic_flag<kmp_uint64>(p, c) {}
401  void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
402  void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
403  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished
404  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) {
405  return __kmp_execute_tasks_64(this_thr, gtid, this, final_spin, thread_finished
406  USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
407  }
408  void wait(kmp_info_t *this_thr, int final_spin
409  USE_ITT_BUILD_ARG(void * itt_sync_obj)) {
410  __kmp_wait_template(this_thr, this, final_spin
411  USE_ITT_BUILD_ARG(itt_sync_obj));
412  }
413  void release() { __kmp_release_template(this); }
414 };
415 
416 // Hierarchical 64-bit on-core barrier instantiation
417 class kmp_flag_oncore : public kmp_flag<kmp_uint64> {
418  kmp_uint64 checker;
419  kmp_info_t * waiting_threads[1];
420  kmp_uint32 num_waiting_threads;
421  kmp_uint32 offset;
422  bool flag_switch;
423  enum barrier_type bt;
424  kmp_info_t * this_thr;
425 #if USE_ITT_BUILD
426  void *itt_sync_obj;
427 #endif
428  char& byteref(volatile kmp_uint64* loc, size_t offset) { return ((char *)loc)[offset]; }
429 public:
430  kmp_flag_oncore(volatile kmp_uint64 *p)
431  : kmp_flag<kmp_uint64>(p, flag_oncore), num_waiting_threads(0), flag_switch(false) {}
432  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
433  : kmp_flag<kmp_uint64>(p, flag_oncore), offset(idx), num_waiting_threads(0), flag_switch(false) {}
434  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx, enum barrier_type bar_t,
435  kmp_info_t * thr
436 #if USE_ITT_BUILD
437  , void *itt
438 #endif
439  )
440  : kmp_flag<kmp_uint64>(p, flag_oncore), checker(c), offset(idx), bt(bar_t), this_thr(thr)
441 #if USE_ITT_BUILD
442  , itt_sync_obj(itt)
443 #endif
444  , num_waiting_threads(0), flag_switch(false) {}
445  kmp_info_t * get_waiter(kmp_uint32 i) {
446  KMP_DEBUG_ASSERT(i<num_waiting_threads);
447  return waiting_threads[i];
448  }
449  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
450  void set_waiter(kmp_info_t *thr) {
451  waiting_threads[0] = thr;
452  num_waiting_threads = 1;
453  }
454  bool done_check_val(kmp_uint64 old_loc) { return byteref(&old_loc,offset) == checker; }
455  bool done_check() { return done_check_val(*get()); }
456  bool notdone_check() {
457  // Calculate flag_switch
458  if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
459  flag_switch = true;
460  if (byteref(get(),offset) != 1 && !flag_switch)
461  return true;
462  else if (flag_switch) {
463  this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
464  kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go, (kmp_uint64)KMP_BARRIER_STATE_BUMP);
465  __kmp_wait_64(this_thr, &flag, TRUE
466 #if USE_ITT_BUILD
467  , itt_sync_obj
468 #endif
469  );
470  }
471  return false;
472  }
473  kmp_uint64 internal_release() {
474  kmp_uint64 old_val;
475  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
476  old_val = *get();
477  byteref(get(),offset) = 1;
478  }
479  else {
480  kmp_uint64 mask=0;
481  byteref(&mask,offset) = 1;
482  old_val = KMP_TEST_THEN_OR64((volatile kmp_int64 *)get(), mask);
483  }
484  return old_val;
485  }
486  kmp_uint64 set_sleeping() {
487  return KMP_TEST_THEN_OR64((kmp_int64 volatile *)get(), KMP_BARRIER_SLEEP_STATE);
488  }
489  kmp_uint64 unset_sleeping() {
490  return KMP_TEST_THEN_AND64((kmp_int64 volatile *)get(), ~KMP_BARRIER_SLEEP_STATE);
491  }
492  bool is_sleeping_val(kmp_uint64 old_loc) { return old_loc & KMP_BARRIER_SLEEP_STATE; }
493  bool is_sleeping() { return is_sleeping_val(*get()); }
494  void wait(kmp_info_t *this_thr, int final_spin
495  USE_ITT_BUILD_ARG(void * itt_sync_obj)) {
496  __kmp_wait_template(this_thr, this, final_spin
497  USE_ITT_BUILD_ARG(itt_sync_obj));
498  }
499  void release() { __kmp_release_template(this); }
500  void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
501  void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
502  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished
503  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) {
504  return __kmp_execute_tasks_oncore(this_thr, gtid, this, final_spin, thread_finished
505  USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
506  }
507 };
508 
513 #endif // KMP_WAIT_RELEASE_H
volatile P * loc
flag_type get_type()
flag_type
flag_type t