Intel® OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Modules Pages
kmp_wait_release.h
1 /*
2  * kmp_wait_release.h -- Wait/Release implementation
3  * $Revision: 43417 $
4  * $Date: 2014-08-26 14:06:38 -0500 (Tue, 26 Aug 2014) $
5  */
6 
7 /* <copyright>
8  Copyright (c) 1997-2014 Intel Corporation. All Rights Reserved.
9 
10  Redistribution and use in source and binary forms, with or without
11  modification, are permitted provided that the following conditions
12  are met:
13 
14  * Redistributions of source code must retain the above copyright
15  notice, this list of conditions and the following disclaimer.
16  * Redistributions in binary form must reproduce the above copyright
17  notice, this list of conditions and the following disclaimer in the
18  documentation and/or other materials provided with the distribution.
19  * Neither the name of Intel Corporation nor the names of its
20  contributors may be used to endorse or promote products derived
21  from this software without specific prior written permission.
22 
23  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 
35 </copyright> */
36 
37 #ifndef KMP_WAIT_RELEASE_H
38 #define KMP_WAIT_RELEASE_H
39 
40 #include "kmp.h"
41 #include "kmp_itt.h"
42 
59 enum flag_type {
63 };
64 
68 template <typename P>
69 class kmp_flag {
70  volatile P * loc;
72  public:
73  typedef P flag_t;
74  kmp_flag(volatile P *p, flag_type ft) : loc(p), t(ft) {}
78  volatile P * get() { return loc; }
82  flag_type get_type() { return t; }
83  // Derived classes must provide the following:
84  /*
85  kmp_info_t * get_waiter(kmp_uint32 i);
86  kmp_uint32 get_num_waiters();
87  bool done_check();
88  bool done_check_val(P old_loc);
89  bool notdone_check();
90  P internal_release();
91  P set_sleeping();
92  P unset_sleeping();
93  bool is_sleeping();
94  bool is_sleeping_val(P old_loc);
95  */
96 };
97 
98 /* Spin wait loop that first does pause, then yield, then sleep. A thread that calls __kmp_wait_*
99  must make certain that another thread calls __kmp_release to wake it back up to prevent deadlocks! */
100 template <class C>
101 static inline void __kmp_wait_template(kmp_info_t *this_thr, C *flag, int final_spin
102  USE_ITT_BUILD_ARG(void * itt_sync_obj) )
103 {
104  // NOTE: We may not belong to a team at this point.
105  volatile typename C::flag_t *spin = flag->get();
106  kmp_uint32 spins;
107  kmp_uint32 hibernate;
108  int th_gtid;
109  int tasks_completed = FALSE;
110 
111  KMP_FSYNC_SPIN_INIT(spin, NULL);
112  if (flag->done_check()) {
113  KMP_FSYNC_SPIN_ACQUIRED(spin);
114  return;
115  }
116  th_gtid = this_thr->th.th_info.ds.ds_gtid;
117  KA_TRACE(20, ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
118 
119  // Setup for waiting
120  KMP_INIT_YIELD(spins);
121 
122  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
123  // The worker threads cannot rely on the team struct existing at this point.
124  // Use the bt values cached in the thread struct instead.
125 #ifdef KMP_ADJUST_BLOCKTIME
126  if (__kmp_zero_bt && !this_thr->th.th_team_bt_set)
127  // Force immediate suspend if not set by user and more threads than available procs
128  hibernate = 0;
129  else
130  hibernate = this_thr->th.th_team_bt_intervals;
131 #else
132  hibernate = this_thr->th.th_team_bt_intervals;
133 #endif /* KMP_ADJUST_BLOCKTIME */
134 
135  /* If the blocktime is nonzero, we want to make sure that we spin wait for the entirety
136  of the specified #intervals, plus up to one interval more. This increment make
137  certain that this thread doesn't go to sleep too soon. */
138  if (hibernate != 0)
139  hibernate++;
140 
141  // Add in the current time value.
142  hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
143  KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
144  th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
145  hibernate - __kmp_global.g.g_time.dt.t_value));
146  }
147  KMP_MB();
148 
149  // Main wait spin loop
150  while (flag->notdone_check()) {
151  int in_pool;
152 
153  /* If the task team is NULL, it means one of things:
154  1) A newly-created thread is first being released by __kmp_fork_barrier(), and
155  its task team has not been set up yet.
156  2) All tasks have been executed to completion, this thread has decremented the task
157  team's ref ct and possibly deallocated it, and should no longer reference it.
158  3) Tasking is off for this region. This could be because we are in a serialized region
159  (perhaps the outer one), or else tasking was manually disabled (KMP_TASKING=0). */
160  kmp_task_team_t * task_team = NULL;
161  if (__kmp_tasking_mode != tskm_immediate_exec) {
162  task_team = this_thr->th.th_task_team;
163  if (task_team != NULL) {
164  if (!TCR_SYNC_4(task_team->tt.tt_active)) {
165  KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
166  __kmp_unref_task_team(task_team, this_thr);
167  } else if (KMP_TASKING_ENABLED(task_team, this_thr->th.th_task_state)) {
168  flag->execute_tasks(this_thr, th_gtid, final_spin, &tasks_completed
169  USE_ITT_BUILD_ARG(itt_sync_obj), 0);
170  }
171  } // if
172  } // if
173 
174  KMP_FSYNC_SPIN_PREPARE(spin);
175  if (TCR_4(__kmp_global.g.g_done)) {
176  if (__kmp_global.g.g_abort)
177  __kmp_abort_thread();
178  break;
179  }
180 
181  // If we are oversubscribed, or have waited a bit (and KMP_LIBRARY=throughput), then yield
182  KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
183  // TODO: Should it be number of cores instead of thread contexts? Like:
184  // KMP_YIELD(TCR_4(__kmp_nth) > __kmp_ncores);
185  // Need performance improvement data to make the change...
186  KMP_YIELD_SPIN(spins);
187 
188  // Check if this thread was transferred from a team
189  // to the thread pool (or vice-versa) while spinning.
190  in_pool = !!TCR_4(this_thr->th.th_in_pool);
191  if (in_pool != !!this_thr->th.th_active_in_pool) {
192  if (in_pool) { // Recently transferred from team to pool
193  KMP_TEST_THEN_INC32((kmp_int32 *)&__kmp_thread_pool_active_nth);
194  this_thr->th.th_active_in_pool = TRUE;
195  /* Here, we cannot assert that:
196  KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) <= __kmp_thread_pool_nth);
197  __kmp_thread_pool_nth is inc/dec'd by the master thread while the fork/join
198  lock is held, whereas __kmp_thread_pool_active_nth is inc/dec'd asynchronously
199  by the workers. The two can get out of sync for brief periods of time. */
200  }
201  else { // Recently transferred from pool to team
202  KMP_TEST_THEN_DEC32((kmp_int32 *) &__kmp_thread_pool_active_nth);
203  KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
204  this_thr->th.th_active_in_pool = FALSE;
205  }
206  }
207 
208  // Don't suspend if KMP_BLOCKTIME is set to "infinite"
209  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
210  continue;
211 
212  // Don't suspend if there is a likelihood of new tasks being spawned.
213  if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
214  continue;
215 
216  // If we have waited a bit more, fall asleep
217  if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
218  continue;
219 
220  KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
221 
222  flag->suspend(th_gtid);
223 
224  if (TCR_4(__kmp_global.g.g_done)) {
225  if (__kmp_global.g.g_abort)
226  __kmp_abort_thread();
227  break;
228  }
229  // TODO: If thread is done with work and times out, disband/free
230  }
231  KMP_FSYNC_SPIN_ACQUIRED(spin);
232 }
233 
234 /* Release any threads specified as waiting on the flag by releasing the flag and resume the waiting thread
235  if indicated by the sleep bit(s). A thread that calls __kmp_wait_template must call this function to wake
236  up the potentially sleeping thread and prevent deadlocks! */
237 template <class C>
238 static inline void __kmp_release_template(C *flag)
239 {
240 #ifdef KMP_DEBUG
241  // FIX ME
242  kmp_info_t * wait_thr = flag->get_waiter(0);
243  int target_gtid = wait_thr->th.th_info.ds.ds_gtid;
244  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
245 #endif
246  KF_TRACE(20, ("__kmp_release: T#%d releasing T#%d spin(%p)\n", gtid, target_gtid, flag->get()));
247  KMP_DEBUG_ASSERT(flag->get());
248  KMP_FSYNC_RELEASING(flag->get());
249 
250  typename C::flag_t old_spin = flag->internal_release();
251 
252  KF_TRACE(100, ("__kmp_release: T#%d old spin(%p)=%d, set new spin=%d\n",
253  gtid, flag->get(), old_spin, *(flag->get())));
254 
255  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
256  // Only need to check sleep stuff if infinite block time not set
257  if (flag->is_sleeping_val(old_spin)) {
258  for (unsigned int i=0; i<flag->get_num_waiters(); ++i) {
259  kmp_info_t * waiter = flag->get_waiter(i);
260  int wait_gtid = waiter->th.th_info.ds.ds_gtid;
261  // Wake up thread if needed
262  KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep spin(%p) set\n",
263  gtid, wait_gtid, flag->get()));
264  flag->resume(wait_gtid);
265  }
266  } else {
267  KF_TRACE(50, ("__kmp_release: T#%d don't wake up thread T#%d since sleep spin(%p) not set\n",
268  gtid, target_gtid, flag->get()));
269  }
270  }
271 }
272 
273 template <typename FlagType>
274 struct flag_traits {};
275 
276 template <>
277 struct flag_traits<kmp_uint32> {
278  typedef kmp_uint32 flag_t;
279  static const flag_type t = flag32;
280  static inline flag_t tcr(flag_t f) { return TCR_4(f); }
281  static inline flag_t test_then_add4(volatile flag_t *f) { return KMP_TEST_THEN_ADD4_32((volatile kmp_int32 *)f); }
282  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_OR32((volatile kmp_int32 *)f, v); }
283  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_AND32((volatile kmp_int32 *)f, v); }
284 };
285 
286 template <>
287 struct flag_traits<kmp_uint64> {
288  typedef kmp_uint64 flag_t;
289  static const flag_type t = flag64;
290  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
291  static inline flag_t test_then_add4(volatile flag_t *f) { return KMP_TEST_THEN_ADD4_64((volatile kmp_int64 *)f); }
292  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_OR64((volatile kmp_int64 *)f, v); }
293  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_AND64((volatile kmp_int64 *)f, v); }
294 };
295 
296 template <typename FlagType>
297 class kmp_basic_flag : public kmp_flag<FlagType> {
298  typedef flag_traits<FlagType> traits_type;
299  FlagType checker;
300  kmp_info_t * waiting_threads[1];
301  kmp_uint32 num_waiting_threads;
302 public:
303  kmp_basic_flag(volatile FlagType *p) : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
304  kmp_basic_flag(volatile FlagType *p, kmp_info_t *thr) : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
305  waiting_threads[0] = thr;
306  }
307  kmp_basic_flag(volatile FlagType *p, FlagType c) : kmp_flag<FlagType>(p, traits_type::t), checker(c), num_waiting_threads(0) {}
312  kmp_info_t * get_waiter(kmp_uint32 i) {
313  KMP_DEBUG_ASSERT(i<num_waiting_threads);
314  return waiting_threads[i];
315  }
319  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
325  void set_waiter(kmp_info_t *thr) {
326  waiting_threads[0] = thr;
327  num_waiting_threads = 1;
328  }
332  bool done_check() { return traits_type::tcr(*(this->get())) == checker; }
337  bool done_check_val(FlagType old_loc) { return old_loc == checker; }
345  bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
350  FlagType internal_release() {
351  return traits_type::test_then_add4((volatile FlagType *)this->get());
352  }
357  FlagType set_sleeping() {
358  return traits_type::test_then_or((volatile FlagType *)this->get(), KMP_BARRIER_SLEEP_STATE);
359  }
364  FlagType unset_sleeping() {
365  return traits_type::test_then_and((volatile FlagType *)this->get(), ~KMP_BARRIER_SLEEP_STATE);
366  }
371  bool is_sleeping_val(FlagType old_loc) { return old_loc & KMP_BARRIER_SLEEP_STATE; }
375  bool is_sleeping() { return is_sleeping_val(*(this->get())); }
376 };
377 
378 class kmp_flag_32 : public kmp_basic_flag<kmp_uint32> {
379 public:
380  kmp_flag_32(volatile kmp_uint32 *p) : kmp_basic_flag<kmp_uint32>(p) {}
381  kmp_flag_32(volatile kmp_uint32 *p, kmp_info_t *thr) : kmp_basic_flag<kmp_uint32>(p, thr) {}
382  kmp_flag_32(volatile kmp_uint32 *p, kmp_uint32 c) : kmp_basic_flag<kmp_uint32>(p, c) {}
383  void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
384  void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
385  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished
386  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) {
387  return __kmp_execute_tasks_32(this_thr, gtid, this, final_spin, thread_finished
388  USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
389  }
390  void wait(kmp_info_t *this_thr, int final_spin
391  USE_ITT_BUILD_ARG(void * itt_sync_obj)) {
392  __kmp_wait_template(this_thr, this, final_spin
393  USE_ITT_BUILD_ARG(itt_sync_obj));
394  }
395  void release() { __kmp_release_template(this); }
396 };
397 
398 class kmp_flag_64 : public kmp_basic_flag<kmp_uint64> {
399 public:
400  kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag<kmp_uint64>(p) {}
401  kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr) : kmp_basic_flag<kmp_uint64>(p, thr) {}
402  kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c) : kmp_basic_flag<kmp_uint64>(p, c) {}
403  void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
404  void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
405  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished
406  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) {
407  return __kmp_execute_tasks_64(this_thr, gtid, this, final_spin, thread_finished
408  USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
409  }
410  void wait(kmp_info_t *this_thr, int final_spin
411  USE_ITT_BUILD_ARG(void * itt_sync_obj)) {
412  __kmp_wait_template(this_thr, this, final_spin
413  USE_ITT_BUILD_ARG(itt_sync_obj));
414  }
415  void release() { __kmp_release_template(this); }
416 };
417 
418 // Hierarchical 64-bit on-core barrier instantiation
419 class kmp_flag_oncore : public kmp_flag<kmp_uint64> {
420  kmp_uint64 checker;
421  kmp_info_t * waiting_threads[1];
422  kmp_uint32 num_waiting_threads;
423  kmp_uint32 offset;
424  bool flag_switch;
425  enum barrier_type bt;
426  kmp_info_t * this_thr;
427 #if USE_ITT_BUILD
428  void *itt_sync_obj;
429 #endif
430  char& byteref(volatile kmp_uint64* loc, size_t offset) { return ((char *)loc)[offset]; }
431 public:
432  kmp_flag_oncore(volatile kmp_uint64 *p)
433  : kmp_flag<kmp_uint64>(p, flag_oncore), num_waiting_threads(0), flag_switch(false) {}
434  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
435  : kmp_flag<kmp_uint64>(p, flag_oncore), offset(idx), num_waiting_threads(0), flag_switch(false) {}
436  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx, enum barrier_type bar_t,
437  kmp_info_t * thr
438 #if USE_ITT_BUILD
439  , void *itt
440 #endif
441  )
442  : kmp_flag<kmp_uint64>(p, flag_oncore), checker(c), offset(idx), bt(bar_t), this_thr(thr)
443 #if USE_ITT_BUILD
444  , itt_sync_obj(itt)
445 #endif
446  , num_waiting_threads(0), flag_switch(false) {}
447  kmp_info_t * get_waiter(kmp_uint32 i) {
448  KMP_DEBUG_ASSERT(i<num_waiting_threads);
449  return waiting_threads[i];
450  }
451  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
452  void set_waiter(kmp_info_t *thr) {
453  waiting_threads[0] = thr;
454  num_waiting_threads = 1;
455  }
456  bool done_check_val(kmp_uint64 old_loc) { return byteref(&old_loc,offset) == checker; }
457  bool done_check() { return done_check_val(*get()); }
458  bool notdone_check() {
459  // Calculate flag_switch
460  if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
461  flag_switch = true;
462  if (byteref(get(),offset) != 1 && !flag_switch)
463  return true;
464  else if (flag_switch) {
465  this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
466  kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go, (kmp_uint64)KMP_BARRIER_STATE_BUMP);
467  __kmp_wait_64(this_thr, &flag, TRUE
468 #if USE_ITT_BUILD
469  , itt_sync_obj
470 #endif
471  );
472  }
473  return false;
474  }
475  kmp_uint64 internal_release() {
476  kmp_uint64 old_val;
477  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
478  old_val = *get();
479  byteref(get(),offset) = 1;
480  }
481  else {
482  kmp_uint64 mask=0;
483  byteref(&mask,offset) = 1;
484  old_val = KMP_TEST_THEN_OR64((volatile kmp_int64 *)get(), mask);
485  }
486  return old_val;
487  }
488  kmp_uint64 set_sleeping() {
489  return KMP_TEST_THEN_OR64((kmp_int64 volatile *)get(), KMP_BARRIER_SLEEP_STATE);
490  }
491  kmp_uint64 unset_sleeping() {
492  return KMP_TEST_THEN_AND64((kmp_int64 volatile *)get(), ~KMP_BARRIER_SLEEP_STATE);
493  }
494  bool is_sleeping_val(kmp_uint64 old_loc) { return old_loc & KMP_BARRIER_SLEEP_STATE; }
495  bool is_sleeping() { return is_sleeping_val(*get()); }
496  void wait(kmp_info_t *this_thr, int final_spin
497  USE_ITT_BUILD_ARG(void * itt_sync_obj)) {
498  __kmp_wait_template(this_thr, this, final_spin
499  USE_ITT_BUILD_ARG(itt_sync_obj));
500  }
501  void release() { __kmp_release_template(this); }
502  void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
503  void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
504  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished
505  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) {
506  return __kmp_execute_tasks_oncore(this_thr, gtid, this, final_spin, thread_finished
507  USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
508  }
509 };
510 
515 #endif // KMP_WAIT_RELEASE_H
volatile P * loc
flag_type get_type()
flag_type
flag_type t