Intel® OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Modules Pages
kmp_tasking.c
1 /*
2  * kmp_tasking.c -- OpenMP 3.0 tasking support.
3  */
4 
5 /* <copyright>
6  Copyright (c) 1997-2015 Intel Corporation. All Rights Reserved.
7 
8  Redistribution and use in source and binary forms, with or without
9  modification, are permitted provided that the following conditions
10  are met:
11 
12  * Redistributions of source code must retain the above copyright
13  notice, this list of conditions and the following disclaimer.
14  * Redistributions in binary form must reproduce the above copyright
15  notice, this list of conditions and the following disclaimer in the
16  documentation and/or other materials provided with the distribution.
17  * Neither the name of Intel Corporation nor the names of its
18  contributors may be used to endorse or promote products derived
19  from this software without specific prior written permission.
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 
33 </copyright> */
34 
35 #include "kmp.h"
36 #include "kmp_i18n.h"
37 #include "kmp_itt.h"
38 #include "kmp_wait_release.h"
39 
40 
41 
42 /* ------------------------------------------------------------------------ */
43 /* ------------------------------------------------------------------------ */
44 
45 
46 /* forward declaration */
47 static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
48 static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
49 static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
50 
51 #ifdef OMP_41_ENABLED
52 static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask );
53 #endif
54 
55 static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
56  switch (((kmp_flag_64 *)flag)->get_type()) {
57  case flag32: __kmp_resume_32(gtid, NULL); break;
58  case flag64: __kmp_resume_64(gtid, NULL); break;
59  case flag_oncore: __kmp_resume_oncore(gtid, NULL); break;
60  }
61 }
62 
63 #ifdef BUILD_TIED_TASK_STACK
64 
65 //---------------------------------------------------------------------------
66 // __kmp_trace_task_stack: print the tied tasks from the task stack in order
67 // from top do bottom
68 //
69 // gtid: global thread identifier for thread containing stack
70 // thread_data: thread data for task team thread containing stack
71 // threshold: value above which the trace statement triggers
72 // location: string identifying call site of this function (for trace)
73 
74 static void
75 __kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location )
76 {
77  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
78  kmp_taskdata_t **stack_top = task_stack -> ts_top;
79  kmp_int32 entries = task_stack -> ts_entries;
80  kmp_taskdata_t *tied_task;
81 
82  KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
83  "first_block = %p, stack_top = %p \n",
84  location, gtid, entries, task_stack->ts_first_block, stack_top ) );
85 
86  KMP_DEBUG_ASSERT( stack_top != NULL );
87  KMP_DEBUG_ASSERT( entries > 0 );
88 
89  while ( entries != 0 )
90  {
91  KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
92  // fix up ts_top if we need to pop from previous block
93  if ( entries & TASK_STACK_INDEX_MASK == 0 )
94  {
95  kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
96 
97  stack_block = stack_block -> sb_prev;
98  stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
99  }
100 
101  // finish bookkeeping
102  stack_top--;
103  entries--;
104 
105  tied_task = * stack_top;
106 
107  KMP_DEBUG_ASSERT( tied_task != NULL );
108  KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
109 
110  KA_TRACE(threshold, ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
111  "stack_top=%p, tied_task=%p\n",
112  location, gtid, entries, stack_top, tied_task ) );
113  }
114  KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
115 
116  KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
117  location, gtid ) );
118 }
119 
120 //---------------------------------------------------------------------------
121 // __kmp_init_task_stack: initialize the task stack for the first time
122 // after a thread_data structure is created.
123 // It should not be necessary to do this again (assuming the stack works).
124 //
125 // gtid: global thread identifier of calling thread
126 // thread_data: thread data for task team thread containing stack
127 
128 static void
129 __kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
130 {
131  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
132  kmp_stack_block_t *first_block;
133 
134  // set up the first block of the stack
135  first_block = & task_stack -> ts_first_block;
136  task_stack -> ts_top = (kmp_taskdata_t **) first_block;
137  memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
138 
139  // initialize the stack to be empty
140  task_stack -> ts_entries = TASK_STACK_EMPTY;
141  first_block -> sb_next = NULL;
142  first_block -> sb_prev = NULL;
143 }
144 
145 
146 //---------------------------------------------------------------------------
147 // __kmp_free_task_stack: free the task stack when thread_data is destroyed.
148 //
149 // gtid: global thread identifier for calling thread
150 // thread_data: thread info for thread containing stack
151 
152 static void
153 __kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
154 {
155  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
156  kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
157 
158  KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
159  // free from the second block of the stack
160  while ( stack_block != NULL ) {
161  kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
162 
163  stack_block -> sb_next = NULL;
164  stack_block -> sb_prev = NULL;
165  if (stack_block != & task_stack -> ts_first_block) {
166  __kmp_thread_free( thread, stack_block ); // free the block, if not the first
167  }
168  stack_block = next_block;
169  }
170  // initialize the stack to be empty
171  task_stack -> ts_entries = 0;
172  task_stack -> ts_top = NULL;
173 }
174 
175 
176 //---------------------------------------------------------------------------
177 // __kmp_push_task_stack: Push the tied task onto the task stack.
178 // Grow the stack if necessary by allocating another block.
179 //
180 // gtid: global thread identifier for calling thread
181 // thread: thread info for thread containing stack
182 // tied_task: the task to push on the stack
183 
184 static void
185 __kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
186 {
187  // GEH - need to consider what to do if tt_threads_data not allocated yet
188  kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
189  tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
190  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
191 
192  if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
193  return; // Don't push anything on stack if team or team tasks are serialized
194  }
195 
196  KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
197  KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
198 
199  KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
200  gtid, thread, tied_task ) );
201  // Store entry
202  * (task_stack -> ts_top) = tied_task;
203 
204  // Do bookkeeping for next push
205  task_stack -> ts_top++;
206  task_stack -> ts_entries++;
207 
208  if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
209  {
210  // Find beginning of this task block
211  kmp_stack_block_t *stack_block =
212  (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
213 
214  // Check if we already have a block
215  if ( stack_block -> sb_next != NULL )
216  { // reset ts_top to beginning of next block
217  task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
218  }
219  else
220  { // Alloc new block and link it up
221  kmp_stack_block_t *new_block = (kmp_stack_block_t *)
222  __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t));
223 
224  task_stack -> ts_top = & new_block -> sb_block[0];
225  stack_block -> sb_next = new_block;
226  new_block -> sb_prev = stack_block;
227  new_block -> sb_next = NULL;
228 
229  KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
230  gtid, tied_task, new_block ) );
231  }
232  }
233  KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
234 }
235 
236 //---------------------------------------------------------------------------
237 // __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return
238 // the task, just check to make sure it matches the ending task passed in.
239 //
240 // gtid: global thread identifier for the calling thread
241 // thread: thread info structure containing stack
242 // tied_task: the task popped off the stack
243 // ending_task: the task that is ending (should match popped task)
244 
245 static void
246 __kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
247 {
248  // GEH - need to consider what to do if tt_threads_data not allocated yet
249  kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
250  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
251  kmp_taskdata_t *tied_task;
252 
253  if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
254  return; // Don't pop anything from stack if team or team tasks are serialized
255  }
256 
257  KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
258  KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
259 
260  KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
261 
262  // fix up ts_top if we need to pop from previous block
263  if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
264  {
265  kmp_stack_block_t *stack_block =
266  (kmp_stack_block_t *) (task_stack -> ts_top) ;
267 
268  stack_block = stack_block -> sb_prev;
269  task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
270  }
271 
272  // finish bookkeeping
273  task_stack -> ts_top--;
274  task_stack -> ts_entries--;
275 
276  tied_task = * (task_stack -> ts_top );
277 
278  KMP_DEBUG_ASSERT( tied_task != NULL );
279  KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
280  KMP_DEBUG_ASSERT( tied_task == ending_task ); // If we built the stack correctly
281 
282  KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
283  return;
284 }
285 #endif /* BUILD_TIED_TASK_STACK */
286 
287 //---------------------------------------------------
288 // __kmp_push_task: Add a task to the thread's deque
289 
290 static kmp_int32
291 __kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
292 {
293  kmp_info_t * thread = __kmp_threads[ gtid ];
294  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
295  kmp_task_team_t * task_team = thread->th.th_task_team;
296  kmp_int32 tid = __kmp_tid_from_gtid( gtid );
297  kmp_thread_data_t * thread_data;
298 
299  KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
300 
301  // The first check avoids building task_team thread data if serialized
302  if ( taskdata->td_flags.task_serial ) {
303  KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
304  gtid, taskdata ) );
305  return TASK_NOT_PUSHED;
306  }
307 
308  // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode
309  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
310  if ( ! KMP_TASKING_ENABLED(task_team) ) {
311  __kmp_enable_tasking( task_team, thread );
312  }
313  KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
314  KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
315 
316  // Find tasking deque specific to encountering thread
317  thread_data = & task_team -> tt.tt_threads_data[ tid ];
318 
319  // No lock needed since only owner can allocate
320  if (thread_data -> td.td_deque == NULL ) {
321  __kmp_alloc_task_deque( thread, thread_data );
322  }
323 
324  // Check if deque is full
325  if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
326  {
327  KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
328  gtid, taskdata ) );
329  return TASK_NOT_PUSHED;
330  }
331 
332  // Lock the deque for the task push operation
333  __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
334 
335 #if OMP_41_ENABLED
336  // Need to recheck as we can get a proxy task from a thread outside of OpenMP
337  if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
338  {
339  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
340  KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full on 2nd check; returning TASK_NOT_PUSHED for task %p\n",
341  gtid, taskdata ) );
342  return TASK_NOT_PUSHED;
343  }
344 #else
345  // Must have room since no thread can add tasks but calling thread
346  KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE );
347 #endif
348 
349  thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; // Push taskdata
350  // Wrap index.
351  thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
352  TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); // Adjust task count
353 
354  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
355 
356  KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
357  "task=%p ntasks=%d head=%u tail=%u\n",
358  gtid, taskdata, thread_data->td.td_deque_ntasks,
359  thread_data->td.td_deque_tail, thread_data->td.td_deque_head) );
360 
361  return TASK_SUCCESSFULLY_PUSHED;
362 }
363 
364 
365 //-----------------------------------------------------------------------------------------
366 // __kmp_pop_current_task_from_thread: set up current task from called thread when team ends
367 // this_thr: thread structure to set current_task in.
368 
369 void
370 __kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
371 {
372  KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, "
373  "curtask_parent=%p\n",
374  0, this_thr, this_thr -> th.th_current_task,
375  this_thr -> th.th_current_task -> td_parent ) );
376 
377  this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
378 
379  KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, "
380  "curtask_parent=%p\n",
381  0, this_thr, this_thr -> th.th_current_task,
382  this_thr -> th.th_current_task -> td_parent ) );
383 }
384 
385 
386 //---------------------------------------------------------------------------------------
387 // __kmp_push_current_task_to_thread: set up current task in called thread for a new team
388 // this_thr: thread structure to set up
389 // team: team for implicit task data
390 // tid: thread within team to set up
391 
392 void
393 __kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid )
394 {
395  // current task of the thread is a parent of the new just created implicit tasks of new team
396  KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p "
397  "parent_task=%p\n",
398  tid, this_thr, this_thr->th.th_current_task,
399  team->t.t_implicit_task_taskdata[tid].td_parent ) );
400 
401  KMP_DEBUG_ASSERT (this_thr != NULL);
402 
403  if( tid == 0 ) {
404  if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
405  team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
406  this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
407  }
408  } else {
409  team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
410  this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
411  }
412 
413  KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p "
414  "parent_task=%p\n",
415  tid, this_thr, this_thr->th.th_current_task,
416  team->t.t_implicit_task_taskdata[tid].td_parent ) );
417 }
418 
419 
420 //----------------------------------------------------------------------
421 // __kmp_task_start: bookkeeping for a task starting execution
422 // GTID: global thread id of calling thread
423 // task: task starting execution
424 // current_task: task suspending
425 
426 static void
427 __kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
428 {
429  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
430  kmp_info_t * thread = __kmp_threads[ gtid ];
431 
432  KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
433  gtid, taskdata, current_task) );
434 
435  KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
436 
437  // mark currently executing task as suspended
438  // TODO: GEH - make sure root team implicit task is initialized properly.
439  // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
440  current_task -> td_flags.executing = 0;
441 
442  // Add task to stack if tied
443 #ifdef BUILD_TIED_TASK_STACK
444  if ( taskdata -> td_flags.tiedness == TASK_TIED )
445  {
446  __kmp_push_task_stack( gtid, thread, taskdata );
447  }
448 #endif /* BUILD_TIED_TASK_STACK */
449 
450  // mark starting task as executing and as current task
451  thread -> th.th_current_task = taskdata;
452 
453  KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 0 );
454  KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 0 );
455  taskdata -> td_flags.started = 1;
456  taskdata -> td_flags.executing = 1;
457  KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
458  KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
459 
460  // GEH TODO: shouldn't we pass some sort of location identifier here?
461  // APT: yes, we will pass location here.
462  // need to store current thread state (in a thread or taskdata structure)
463  // before setting work_state, otherwise wrong state is set after end of task
464 
465  KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n",
466  gtid, taskdata ) );
467 
468  return;
469 }
470 
471 
472 //----------------------------------------------------------------------
473 // __kmpc_omp_task_begin_if0: report that a given serialized task has started execution
474 // loc_ref: source location information; points to beginning of task block.
475 // gtid: global thread number.
476 // task: task thunk for the started task.
477 
478 void
479 __kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
480 {
481  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
482  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
483 
484  KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
485  gtid, loc_ref, taskdata, current_task ) );
486 
487  taskdata -> td_flags.task_serial = 1; // Execute this task immediately, not deferred.
488  __kmp_task_start( gtid, task, current_task );
489 
490  KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
491  gtid, loc_ref, taskdata ) );
492 
493  return;
494 }
495 
496 #ifdef TASK_UNUSED
497 //----------------------------------------------------------------------
498 // __kmpc_omp_task_begin: report that a given task has started execution
499 // NEVER GENERATED BY COMPILER, DEPRECATED!!!
500 
501 void
502 __kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
503 {
504  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
505 
506  KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
507  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
508 
509  __kmp_task_start( gtid, task, current_task );
510 
511  KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
512  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
513 
514  return;
515 }
516 #endif // TASK_UNUSED
517 
518 
519 //-------------------------------------------------------------------------------------
520 // __kmp_free_task: free the current task space and the space for shareds
521 // gtid: Global thread ID of calling thread
522 // taskdata: task to free
523 // thread: thread data structure of caller
524 
525 static void
526 __kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
527 {
528  KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n",
529  gtid, taskdata) );
530 
531  // Check to make sure all flags and counters have the correct values
532  KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
533  KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
534  KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
535  KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
536  KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1);
537  KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
538 
539  taskdata->td_flags.freed = 1;
540  // deallocate the taskdata and shared variable blocks associated with this task
541  #if USE_FAST_MEMORY
542  __kmp_fast_free( thread, taskdata );
543  #else /* ! USE_FAST_MEMORY */
544  __kmp_thread_free( thread, taskdata );
545  #endif
546 
547  KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n",
548  gtid, taskdata) );
549 }
550 
551 //-------------------------------------------------------------------------------------
552 // __kmp_free_task_and_ancestors: free the current task and ancestors without children
553 //
554 // gtid: Global thread ID of calling thread
555 // taskdata: task to free
556 // thread: thread data structure of caller
557 
558 static void
559 __kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
560 {
561  kmp_int32 children = 0;
562  kmp_int32 team_or_tasking_serialized = taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser;
563 
564  KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
565 
566  if ( !team_or_tasking_serialized ) {
567  children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
568  KMP_DEBUG_ASSERT( children >= 0 );
569  }
570 
571  // Now, go up the ancestor tree to see if any ancestors can now be freed.
572  while ( children == 0 )
573  {
574  kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
575 
576  KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
577  "and freeing itself\n", gtid, taskdata) );
578 
579  // --- Deallocate my ancestor task ---
580  __kmp_free_task( gtid, taskdata, thread );
581 
582  taskdata = parent_taskdata;
583 
584  // Stop checking ancestors at implicit task or if tasking serialized
585  // instead of walking up ancestor tree to avoid premature deallocation of ancestors.
586  if ( team_or_tasking_serialized || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
587  return;
588 
589  if ( !team_or_tasking_serialized ) {
590  // Predecrement simulated by "- 1" calculation
591  children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
592  KMP_DEBUG_ASSERT( children >= 0 );
593  }
594  }
595 
596  KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
597  "not freeing it yet\n", gtid, taskdata, children) );
598 }
599 
600 //---------------------------------------------------------------------
601 // __kmp_task_finish: bookkeeping to do when a task finishes execution
602 // gtid: global thread ID for calling thread
603 // task: task to be finished
604 // resumed_task: task to be resumed. (may be NULL if task is serialized)
605 
606 static void
607 __kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
608 {
609  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
610  kmp_info_t * thread = __kmp_threads[ gtid ];
611  kmp_int32 children = 0;
612 
613  KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
614  gtid, taskdata, resumed_task) );
615 
616  KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
617 
618  // Pop task from stack if tied
619 #ifdef BUILD_TIED_TASK_STACK
620  if ( taskdata -> td_flags.tiedness == TASK_TIED )
621  {
622  __kmp_pop_task_stack( gtid, thread, taskdata );
623  }
624 #endif /* BUILD_TIED_TASK_STACK */
625 
626  KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
627  taskdata -> td_flags.complete = 1; // mark the task as completed
628  KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
629  KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
630 
631  // Only need to keep track of count if team parallel and tasking not serialized
632  if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
633  // Predecrement simulated by "- 1" calculation
634  children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
635  KMP_DEBUG_ASSERT( children >= 0 );
636 #if OMP_40_ENABLED
637  if ( taskdata->td_taskgroup )
638  KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
639  __kmp_release_deps(gtid,taskdata);
640 #endif
641  }
642 
643  // td_flags.executing must be marked as 0 after __kmp_release_deps has been called
644  // Othertwise, if a task is executed immediately from the release_deps code
645  // the flag will be reset to 1 again by this same function
646  KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
647  taskdata -> td_flags.executing = 0; // suspend the finishing task
648 
649  KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
650  gtid, taskdata, children) );
651 
652 #if OMP_40_ENABLED
653  /* If the tasks' destructor thunk flag has been set, we need to invoke the
654  destructor thunk that has been generated by the compiler.
655  The code is placed here, since at this point other tasks might have been released
656  hence overlapping the destructor invokations with some other work in the
657  released tasks. The OpenMP spec is not specific on when the destructors are
658  invoked, so we should be free to choose.
659  */
660  if (taskdata->td_flags.destructors_thunk) {
661  kmp_routine_entry_t destr_thunk = task->destructors;
662  KMP_ASSERT(destr_thunk);
663  destr_thunk(gtid, task);
664  }
665 #endif // OMP_40_ENABLED
666 
667  // bookkeeping for resuming task:
668  // GEH - note tasking_ser => task_serial
669  KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
670  taskdata->td_flags.task_serial);
671  if ( taskdata->td_flags.task_serial )
672  {
673  if (resumed_task == NULL) {
674  resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
675  }
676  else {
677  // verify resumed task passed in points to parent
678  KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
679  }
680  }
681  else {
682  KMP_DEBUG_ASSERT( resumed_task != NULL ); // verify that resumed task is passed as arguemnt
683  }
684 
685  // Free this task and then ancestor tasks if they have no children.
686  __kmp_free_task_and_ancestors(gtid, taskdata, thread);
687 
688  __kmp_threads[ gtid ] -> th.th_current_task = resumed_task; // restore current_task
689 
690  // TODO: GEH - make sure root team implicit task is initialized properly.
691  // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
692  resumed_task->td_flags.executing = 1; // resume previous task
693 
694  KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
695  gtid, taskdata, resumed_task) );
696 
697  return;
698 }
699 
700 //---------------------------------------------------------------------
701 // __kmpc_omp_task_complete_if0: report that a task has completed execution
702 // loc_ref: source location information; points to end of task block.
703 // gtid: global thread number.
704 // task: task thunk for the completed task.
705 
706 void
707 __kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
708 {
709  KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
710  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
711 
712  __kmp_task_finish( gtid, task, NULL ); // this routine will provide task to resume
713 
714  KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
715  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
716 
717  return;
718 }
719 
720 #ifdef TASK_UNUSED
721 //---------------------------------------------------------------------
722 // __kmpc_omp_task_complete: report that a task has completed execution
723 // NEVER GENERATED BY COMPILER, DEPRECATED!!!
724 
725 void
726 __kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
727 {
728  KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
729  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
730 
731  __kmp_task_finish( gtid, task, NULL ); // Not sure how to find task to resume
732 
733  KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
734  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
735  return;
736 }
737 #endif // TASK_UNUSED
738 
739 
740 //----------------------------------------------------------------------------------------------------
741 // __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread
742 //
743 // loc_ref: reference to source location of parallel region
744 // this_thr: thread data structure corresponding to implicit task
745 // team: team for this_thr
746 // tid: thread id of given thread within team
747 // set_curr_task: TRUE if need to push current task to thread
748 // NOTE: Routine does not set up the implicit task ICVS. This is assumed to have already been done elsewhere.
749 // TODO: Get better loc_ref. Value passed in may be NULL
750 
751 void
752 __kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task )
753 {
754  kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ];
755 
756  KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
757  tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) );
758 
759  task->td_task_id = KMP_GEN_TASK_ID();
760  task->td_team = team;
761 // task->td_parent = NULL; // fix for CQ230101 (broken parent task info in debugger)
762  task->td_ident = loc_ref;
763  task->td_taskwait_ident = NULL;
764  task->td_taskwait_counter = 0;
765  task->td_taskwait_thread = 0;
766 
767  task->td_flags.tiedness = TASK_TIED;
768  task->td_flags.tasktype = TASK_IMPLICIT;
769 #if OMP_41_ENABLED
770  task->td_flags.proxy = TASK_FULL;
771 #endif
772 
773  // All implicit tasks are executed immediately, not deferred
774  task->td_flags.task_serial = 1;
775  task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
776  task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
777 
778  task->td_flags.started = 1;
779  task->td_flags.executing = 1;
780  task->td_flags.complete = 0;
781  task->td_flags.freed = 0;
782 
783 #if OMP_40_ENABLED
784  task->td_dephash = NULL;
785  task->td_depnode = NULL;
786 #endif
787 
788  if (set_curr_task) { // only do this initialization the first time a thread is created
789  task->td_incomplete_child_tasks = 0;
790  task->td_allocated_child_tasks = 0; // Not used because do not need to deallocate implicit task
791 #if OMP_40_ENABLED
792  task->td_taskgroup = NULL; // An implicit task does not have taskgroup
793 #endif
794  __kmp_push_current_task_to_thread( this_thr, team, tid );
795  } else {
796  KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
797  KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
798  }
799 
800  KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
801  tid, team, task ) );
802 }
803 
804 // Round up a size to a power of two specified by val
805 // Used to insert padding between structures co-allocated using a single malloc() call
806 static size_t
807 __kmp_round_up_to_val( size_t size, size_t val ) {
808  if ( size & ( val - 1 ) ) {
809  size &= ~ ( val - 1 );
810  if ( size <= KMP_SIZE_T_MAX - val ) {
811  size += val; // Round up if there is no overflow.
812  }; // if
813  }; // if
814  return size;
815 } // __kmp_round_up_to_va
816 
817 
818 //---------------------------------------------------------------------------------
819 // __kmp_task_alloc: Allocate the taskdata and task data structures for a task
820 //
821 // loc_ref: source location information
822 // gtid: global thread number.
823 // flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered.
824 // Converted from kmp_int32 to kmp_tasking_flags_t in routine.
825 // sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including private vars accessed in task.
826 // sizeof_shareds: Size in bytes of array of pointers to shared vars accessed in task.
827 // task_entry: Pointer to task code entry point generated by compiler.
828 // returns: a pointer to the allocated kmp_task_t structure (task).
829 
830 kmp_task_t *
831 __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
832  size_t sizeof_kmp_task_t, size_t sizeof_shareds,
833  kmp_routine_entry_t task_entry )
834 {
835  kmp_task_t *task;
836  kmp_taskdata_t *taskdata;
837  kmp_info_t *thread = __kmp_threads[ gtid ];
838  kmp_team_t *team = thread->th.th_team;
839  kmp_taskdata_t *parent_task = thread->th.th_current_task;
840  size_t shareds_offset;
841 
842  KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
843  "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
844  gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
845  sizeof_shareds, task_entry) );
846 
847  if ( parent_task->td_flags.final ) {
848  if (flags->merged_if0) {
849  }
850  flags->final = 1;
851  }
852 
853 #if OMP_41_ENABLED
854  if ( flags->proxy == TASK_PROXY ) {
855  flags->tiedness = TASK_UNTIED;
856  flags->merged_if0 = 1;
857 
858  /* are we running in a sequential parallel or tskm_immediate_exec... we need tasking support enabled */
859  if ( (thread->th.th_task_team) == NULL ) {
860  /* This should only happen if the team is serialized
861  setup a task team and propagate it to the thread
862  */
863  KMP_DEBUG_ASSERT(team->t.t_serialized);
864  KA_TRACE(30,("T#%d creating task team in __kmp_task_alloc for proxy task\n", gtid));
865  __kmp_task_team_setup(thread,team,0,1);
866  thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
867  }
868  kmp_task_team_t * task_team = thread->th.th_task_team;
869 
870  /* tasking must be enabled now as the task might not be pushed */
871  if ( !KMP_TASKING_ENABLED( task_team ) ) {
872  KA_TRACE(30,("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
873  __kmp_enable_tasking( task_team, thread );
874  kmp_int32 tid = thread->th.th_info.ds.ds_tid;
875  kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
876  // No lock needed since only owner can allocate
877  if (thread_data -> td.td_deque == NULL ) {
878  __kmp_alloc_task_deque( thread, thread_data );
879  }
880  }
881 
882  if ( task_team->tt.tt_found_proxy_tasks == FALSE )
883  TCW_4(task_team -> tt.tt_found_proxy_tasks, TRUE);
884  }
885 #endif
886 
887  // Calculate shared structure offset including padding after kmp_task_t struct
888  // to align pointers in shared struct
889  shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
890  shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * ));
891 
892  // Allocate a kmp_taskdata_t block and a kmp_task_t block.
893  KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n",
894  gtid, shareds_offset) );
895  KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n",
896  gtid, sizeof_shareds) );
897 
898  // Avoid double allocation here by combining shareds with taskdata
899  #if USE_FAST_MEMORY
900  taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
901  #else /* ! USE_FAST_MEMORY */
902  taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
903  #endif /* USE_FAST_MEMORY */
904 
905  task = KMP_TASKDATA_TO_TASK(taskdata);
906 
907  // Make sure task & taskdata are aligned appropriately
908 #if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD
909  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 );
910  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 );
911 #else
912  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 );
913  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 );
914 #endif
915  if (sizeof_shareds > 0) {
916  // Avoid double allocation here by combining shareds with taskdata
917  task->shareds = & ((char *) taskdata)[ shareds_offset ];
918  // Make sure shareds struct is aligned to pointer size
919  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 );
920  } else {
921  task->shareds = NULL;
922  }
923  task->routine = task_entry;
924  task->part_id = 0; // AC: Always start with 0 part id
925 
926  taskdata->td_task_id = KMP_GEN_TASK_ID();
927  taskdata->td_team = team;
928  taskdata->td_alloc_thread = thread;
929  taskdata->td_parent = parent_task;
930  taskdata->td_level = parent_task->td_level + 1; // increment nesting level
931  taskdata->td_ident = loc_ref;
932  taskdata->td_taskwait_ident = NULL;
933  taskdata->td_taskwait_counter = 0;
934  taskdata->td_taskwait_thread = 0;
935  KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
936 #if OMP_41_ENABLED
937  // avoid copying icvs for proxy tasks
938  if ( flags->proxy == TASK_FULL )
939 #endif
940  copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
941 
942  taskdata->td_flags.tiedness = flags->tiedness;
943  taskdata->td_flags.final = flags->final;
944  taskdata->td_flags.merged_if0 = flags->merged_if0;
945 #if OMP_40_ENABLED
946  taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
947 #endif // OMP_40_ENABLED
948 #if OMP_41_ENABLED
949  taskdata->td_flags.proxy = flags->proxy;
950 #endif
951  taskdata->td_flags.tasktype = TASK_EXPLICIT;
952 
953  // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
954  taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
955 
956  // GEH - TODO: fix this to copy parent task's value of team_serial flag
957  taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
958 
959  // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region
960  // tasks are not left until program termination to execute. Also, it helps locality to execute
961  // immediately.
962 
963  taskdata->td_flags.task_serial = ( parent_task->td_flags.final
964  || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
965 
966  taskdata->td_flags.started = 0;
967  taskdata->td_flags.executing = 0;
968  taskdata->td_flags.complete = 0;
969  taskdata->td_flags.freed = 0;
970 
971  taskdata->td_flags.native = flags->native;
972 
973  taskdata->td_incomplete_child_tasks = 0;
974  taskdata->td_allocated_child_tasks = 1; // start at one because counts current task and children
975 #if OMP_40_ENABLED
976  taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
977  taskdata->td_dephash = NULL;
978  taskdata->td_depnode = NULL;
979 #endif
980 
981  // Only need to keep track of child task counts if team parallel and tasking not serialized or if it is a proxy task
982 #if OMP_41_ENABLED
983  if ( flags->proxy == TASK_PROXY || !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
984 #else
985  if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
986 #endif
987  {
988  KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
989 #if OMP_40_ENABLED
990  if ( parent_task->td_taskgroup )
991  KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
992 #endif
993  // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
994  if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
995  KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
996  }
997  }
998 
999  KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1000  gtid, taskdata, taskdata->td_parent) );
1001 
1002  return task;
1003 }
1004 
1005 
1006 kmp_task_t *
1007 __kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
1008  size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1009  kmp_routine_entry_t task_entry )
1010 {
1011  kmp_task_t *retval;
1012  kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
1013 
1014  input_flags->native = FALSE;
1015  // __kmp_task_alloc() sets up all other runtime flags
1016 
1017  KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) "
1018  "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1019  gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
1020 #if OMP_41_ENABLED
1021  input_flags->proxy ? "proxy" : "",
1022 #else
1023  "",
1024 #endif
1025  sizeof_kmp_task_t, sizeof_shareds, task_entry) );
1026 
1027  retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1028  sizeof_shareds, task_entry );
1029 
1030  KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
1031 
1032  return retval;
1033 }
1034 
1035 //-----------------------------------------------------------
1036 // __kmp_invoke_task: invoke the specified task
1037 //
1038 // gtid: global thread ID of caller
1039 // task: the task to invoke
1040 // current_task: the task to resume after task invokation
1041 
1042 static void
1043 __kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
1044 {
1045  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
1046 #if OMP_40_ENABLED
1047  int discard = 0 /* false */;
1048 #endif
1049  KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1050  gtid, taskdata, current_task) );
1051 
1052 #if OMP_41_ENABLED
1053  if ( taskdata->td_flags.proxy == TASK_PROXY &&
1054  taskdata->td_flags.complete == 1)
1055  {
1056  // This is a proxy task that was already completed but it needs to run
1057  // its bottom-half finish
1058  KA_TRACE(30, ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1059  gtid, taskdata) );
1060 
1061  __kmp_bottom_half_finish_proxy(gtid,task);
1062 
1063  KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for proxy task %p, resuming task %p\n", gtid, taskdata, current_task) );
1064 
1065  return;
1066  }
1067 #endif
1068 
1069 #if OMP_41_ENABLED
1070  // Proxy tasks are not handled by the runtime
1071  if ( taskdata->td_flags.proxy != TASK_PROXY )
1072 #endif
1073  __kmp_task_start( gtid, task, current_task );
1074 
1075 #if OMP_40_ENABLED
1076  // TODO: cancel tasks if the parallel region has also been cancelled
1077  // TODO: check if this sequence can be hoisted above __kmp_task_start
1078  // if cancellation has been enabled for this run ...
1079  if (__kmp_omp_cancellation) {
1080  kmp_info_t *this_thr = __kmp_threads [ gtid ];
1081  kmp_team_t * this_team = this_thr->th.th_team;
1082  kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1083  if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
1084  // this task belongs to a task group and we need to cancel it
1085  discard = 1 /* true */;
1086  }
1087  }
1088 
1089  //
1090  // Invoke the task routine and pass in relevant data.
1091  // Thunks generated by gcc take a different argument list.
1092  //
1093  if (!discard) {
1094 #endif // OMP_40_ENABLED
1095 #ifdef KMP_GOMP_COMPAT
1096  if (taskdata->td_flags.native) {
1097  ((void (*)(void *))(*(task->routine)))(task->shareds);
1098  }
1099  else
1100 #endif /* KMP_GOMP_COMPAT */
1101  {
1102  (*(task->routine))(gtid, task);
1103  }
1104 #if OMP_40_ENABLED
1105  }
1106 #endif // OMP_40_ENABLED
1107 
1108 #if OMP_41_ENABLED
1109  // Proxy tasks are not handled by the runtime
1110  if ( taskdata->td_flags.proxy != TASK_PROXY )
1111 #endif
1112  __kmp_task_finish( gtid, task, current_task );
1113 
1114  KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
1115  gtid, taskdata, current_task) );
1116  return;
1117 }
1118 
1119 //-----------------------------------------------------------------------
1120 // __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
1121 //
1122 // loc_ref: location of original task pragma (ignored)
1123 // gtid: Global Thread ID of encountering thread
1124 // new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
1125 // Returns:
1126 // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1127 // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1128 
1129 kmp_int32
1130 __kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1131 {
1132  kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1133 
1134  KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
1135  gtid, loc_ref, new_taskdata ) );
1136 
1137  /* Should we execute the new task or queue it? For now, let's just always try to
1138  queue it. If the queue fills up, then we'll execute it. */
1139 
1140  if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1141  { // Execute this task immediately
1142  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1143  new_taskdata->td_flags.task_serial = 1;
1144  __kmp_invoke_task( gtid, new_task, current_task );
1145  }
1146 
1147  KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1148  "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
1149  new_taskdata ) );
1150 
1151  return TASK_CURRENT_NOT_QUEUED;
1152 }
1153 
1154 //---------------------------------------------------------------------
1155 // __kmp_omp_task: Schedule a non-thread-switchable task for execution
1156 // gtid: Global Thread ID of encountering thread
1157 // new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1158 // serialize_immediate: if TRUE then if the task is executed immediately its execution will be serialized
1159 // returns:
1160 //
1161 // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1162 // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1163 kmp_int32
1164 __kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate )
1165 {
1166  kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1167 
1168  /* Should we execute the new task or queue it? For now, let's just always try to
1169  queue it. If the queue fills up, then we'll execute it. */
1170 #if OMP_41_ENABLED
1171  if ( new_taskdata->td_flags.proxy == TASK_PROXY || __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1172 #else
1173  if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1174 #endif
1175  { // Execute this task immediately
1176  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1177  if ( serialize_immediate )
1178  new_taskdata -> td_flags.task_serial = 1;
1179  __kmp_invoke_task( gtid, new_task, current_task );
1180  }
1181 
1182 
1183  return TASK_CURRENT_NOT_QUEUED;
1184 }
1185 
1186 //---------------------------------------------------------------------
1187 // __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a non-thread-switchable task from
1188 // the parent thread only!
1189 // loc_ref: location of original task pragma (ignored)
1190 // gtid: Global Thread ID of encountering thread
1191 // new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1192 // returns:
1193 //
1194 // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1195 // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1196 
1197 kmp_int32
1198 __kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1199 {
1200  kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1201  kmp_int32 res;
1202 
1203  KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
1204  gtid, loc_ref, new_taskdata ) );
1205 
1206  res = __kmp_omp_task(gtid,new_task,true);
1207 
1208  KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1209  gtid, loc_ref, new_taskdata ) );
1210  return res;
1211 }
1212 
1213 //-------------------------------------------------------------------------------------
1214 // __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete
1215 
1216 kmp_int32
1217 __kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid )
1218 {
1219  kmp_taskdata_t * taskdata;
1220  kmp_info_t * thread;
1221  int thread_finished = FALSE;
1222 
1223  KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n",
1224  gtid, loc_ref) );
1225 
1226  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1227  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1228 
1229  thread = __kmp_threads[ gtid ];
1230  taskdata = thread -> th.th_current_task;
1231 #if USE_ITT_BUILD
1232  // Note: These values are used by ITT events as well.
1233 #endif /* USE_ITT_BUILD */
1234  taskdata->td_taskwait_counter += 1;
1235  taskdata->td_taskwait_ident = loc_ref;
1236  taskdata->td_taskwait_thread = gtid + 1;
1237 
1238 #if USE_ITT_BUILD
1239  void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1240  if ( itt_sync_obj != NULL )
1241  __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1242 #endif /* USE_ITT_BUILD */
1243 
1244 #if OMP_41_ENABLED
1245  if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
1246 #else
1247  if ( ! taskdata->td_flags.team_serial )
1248 #endif
1249  {
1250  // GEH: if team serialized, avoid reading the volatile variable below.
1251  kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U);
1252  while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
1253  flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1254  USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1255  }
1256  }
1257 #if USE_ITT_BUILD
1258  if ( itt_sync_obj != NULL )
1259  __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1260 #endif /* USE_ITT_BUILD */
1261 
1262  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1263  taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1264  }
1265 
1266  KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
1267  "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1268 
1269  return TASK_CURRENT_NOT_QUEUED;
1270 }
1271 
1272 
1273 //-------------------------------------------------
1274 // __kmpc_omp_taskyield: switch to a different task
1275 
1276 kmp_int32
1277 __kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part )
1278 {
1279  kmp_taskdata_t * taskdata;
1280  kmp_info_t * thread;
1281  int thread_finished = FALSE;
1282 
1283  KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1284  gtid, loc_ref, end_part) );
1285 
1286  if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) {
1287  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1288 
1289  thread = __kmp_threads[ gtid ];
1290  taskdata = thread -> th.th_current_task;
1291  // Should we model this as a task wait or not?
1292 #if USE_ITT_BUILD
1293  // Note: These values are used by ITT events as well.
1294 #endif /* USE_ITT_BUILD */
1295  taskdata->td_taskwait_counter += 1;
1296  taskdata->td_taskwait_ident = loc_ref;
1297  taskdata->td_taskwait_thread = gtid + 1;
1298 
1299 #if USE_ITT_BUILD
1300  void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1301  if ( itt_sync_obj != NULL )
1302  __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1303 #endif /* USE_ITT_BUILD */
1304  if ( ! taskdata->td_flags.team_serial ) {
1305  kmp_task_team_t * task_team = thread->th.th_task_team;
1306  if (task_team != NULL) {
1307  if (KMP_TASKING_ENABLED(task_team)) {
1308  __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished
1309  USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1310  }
1311  }
1312  }
1313 #if USE_ITT_BUILD
1314  if ( itt_sync_obj != NULL )
1315  __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1316 #endif /* USE_ITT_BUILD */
1317 
1318  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1319  taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1320  }
1321 
1322  KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
1323  "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1324 
1325  return TASK_CURRENT_NOT_QUEUED;
1326 }
1327 
1328 
1329 #if OMP_40_ENABLED
1330 //-------------------------------------------------------------------------------------
1331 // __kmpc_taskgroup: Start a new taskgroup
1332 
1333 void
1334 __kmpc_taskgroup( ident_t* loc, int gtid )
1335 {
1336  kmp_info_t * thread = __kmp_threads[ gtid ];
1337  kmp_taskdata_t * taskdata = thread->th.th_current_task;
1338  kmp_taskgroup_t * tg_new =
1339  (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) );
1340  KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
1341  tg_new->count = 0;
1342  tg_new->cancel_request = cancel_noreq;
1343  tg_new->parent = taskdata->td_taskgroup;
1344  taskdata->td_taskgroup = tg_new;
1345 }
1346 
1347 
1348 //-------------------------------------------------------------------------------------
1349 // __kmpc_end_taskgroup: Wait until all tasks generated by the current task
1350 // and its descendants are complete
1351 
1352 void
1353 __kmpc_end_taskgroup( ident_t* loc, int gtid )
1354 {
1355  kmp_info_t * thread = __kmp_threads[ gtid ];
1356  kmp_taskdata_t * taskdata = thread->th.th_current_task;
1357  kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1358  int thread_finished = FALSE;
1359 
1360  KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
1361  KMP_DEBUG_ASSERT( taskgroup != NULL );
1362 
1363  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1364 #if USE_ITT_BUILD
1365  // For ITT the taskgroup wait is similar to taskwait until we need to distinguish them
1366  void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1367  if ( itt_sync_obj != NULL )
1368  __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1369 #endif /* USE_ITT_BUILD */
1370 
1371 #if OMP_41_ENABLED
1372  if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
1373 #else
1374  if ( ! taskdata->td_flags.team_serial )
1375 #endif
1376  {
1377  kmp_flag_32 flag(&(taskgroup->count), 0U);
1378  while ( TCR_4(taskgroup->count) != 0 ) {
1379  flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1380  USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1381  }
1382  }
1383 
1384 #if USE_ITT_BUILD
1385  if ( itt_sync_obj != NULL )
1386  __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1387 #endif /* USE_ITT_BUILD */
1388  }
1389  KMP_DEBUG_ASSERT( taskgroup->count == 0 );
1390 
1391  // Restore parent taskgroup for the current task
1392  taskdata->td_taskgroup = taskgroup->parent;
1393  __kmp_thread_free( thread, taskgroup );
1394 
1395  KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
1396 }
1397 #endif
1398 
1399 
1400 //------------------------------------------------------
1401 // __kmp_remove_my_task: remove a task from my own deque
1402 
1403 static kmp_task_t *
1404 __kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
1405  kmp_int32 is_constrained )
1406 {
1407  kmp_task_t * task;
1408  kmp_taskdata_t * taskdata;
1409  kmp_thread_data_t *thread_data;
1410  kmp_uint32 tail;
1411 
1412  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1413  KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition
1414 
1415  thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
1416 
1417  KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1418  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1419  thread_data->td.td_deque_tail) );
1420 
1421  if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1422  KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1423  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1424  thread_data->td.td_deque_tail) );
1425  return NULL;
1426  }
1427 
1428  __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1429 
1430  if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1431  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1432  KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1433  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1434  thread_data->td.td_deque_tail) );
1435  return NULL;
1436  }
1437 
1438  tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1439  taskdata = thread_data -> td.td_deque[ tail ];
1440 
1441  if (is_constrained) {
1442  // we need to check if the candidate obeys task scheduling constraint:
1443  // only child of current task can be scheduled
1444  kmp_taskdata_t * current = thread->th.th_current_task;
1445  kmp_int32 level = current->td_level;
1446  kmp_taskdata_t * parent = taskdata->td_parent;
1447  while ( parent != current && parent->td_level > level ) {
1448  parent = parent->td_parent; // check generation up to the level of the current task
1449  KMP_DEBUG_ASSERT(parent != NULL);
1450  }
1451  if ( parent != current ) {
1452  // If the tail task is not a child, then no other childs can appear in the deque.
1453  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1454  KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1455  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1456  thread_data->td.td_deque_tail) );
1457  return NULL;
1458  }
1459  }
1460 
1461  thread_data -> td.td_deque_tail = tail;
1462  TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
1463 
1464  __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
1465 
1466  KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
1467  gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1468  thread_data->td.td_deque_tail) );
1469 
1470  task = KMP_TASKDATA_TO_TASK( taskdata );
1471  return task;
1472 }
1473 
1474 
1475 //-----------------------------------------------------------
1476 // __kmp_steal_task: remove a task from another thread's deque
1477 // Assume that calling thread has already checked existence of
1478 // task_team thread_data before calling this routine.
1479 
1480 static kmp_task_t *
1481 __kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
1482  volatile kmp_uint32 *unfinished_threads, int *thread_finished,
1483  kmp_int32 is_constrained )
1484 {
1485  kmp_task_t * task;
1486  kmp_taskdata_t * taskdata;
1487  kmp_thread_data_t *victim_td, *threads_data;
1488  kmp_int32 victim_tid, thread_tid;
1489 
1490  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1491 
1492  threads_data = task_team -> tt.tt_threads_data;
1493  KMP_DEBUG_ASSERT( threads_data != NULL ); // Caller should check this condition
1494 
1495  victim_tid = victim->th.th_info.ds.ds_tid;
1496  victim_td = & threads_data[ victim_tid ];
1497 
1498  KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d "
1499  "head=%u tail=%u\n",
1500  gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1501  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1502 
1503  if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition
1504  (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1505  {
1506  KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p "
1507  "ntasks=%d head=%u tail=%u\n",
1508  gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1509  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1510  return NULL;
1511  }
1512 
1513  __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
1514 
1515  // Check again after we acquire the lock
1516  if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1517  (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1518  {
1519  __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1520  KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1521  "ntasks=%d head=%u tail=%u\n",
1522  gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1523  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1524  return NULL;
1525  }
1526 
1527  KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
1528 
1529  if ( !is_constrained ) {
1530  taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ];
1531  // Bump head pointer and Wrap.
1532  victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK;
1533  } else {
1534  // While we have postponed tasks let's steal from tail of the deque (smaller tasks)
1535  kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1536  taskdata = victim_td -> td.td_deque[ tail ];
1537  // we need to check if the candidate obeys task scheduling constraint:
1538  // only child of current task can be scheduled
1539  kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
1540  kmp_int32 level = current->td_level;
1541  kmp_taskdata_t * parent = taskdata->td_parent;
1542  while ( parent != current && parent->td_level > level ) {
1543  parent = parent->td_parent; // check generation up to the level of the current task
1544  KMP_DEBUG_ASSERT(parent != NULL);
1545  }
1546  if ( parent != current ) {
1547  // If the tail task is not a child, then no other childs can appear in the deque (?).
1548  __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1549  KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1550  "ntasks=%d head=%u tail=%u\n",
1551  gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
1552  task_team, victim_td->td.td_deque_ntasks,
1553  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1554  return NULL;
1555  }
1556  victim_td -> td.td_deque_tail = tail;
1557  }
1558  if (*thread_finished) {
1559  // We need to un-mark this victim as a finished victim. This must be done before
1560  // releasing the lock, or else other threads (starting with the master victim)
1561  // might be prematurely released from the barrier!!!
1562  kmp_uint32 count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
1563 
1564  KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
1565  gtid, count + 1, task_team) );
1566 
1567  *thread_finished = FALSE;
1568  }
1569  TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
1570 
1571  __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1572 
1573  KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
1574  "ntasks=%d head=%u tail=%u\n",
1575  gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
1576  victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1577  victim_td->td.td_deque_tail) );
1578 
1579  task = KMP_TASKDATA_TO_TASK( taskdata );
1580  return task;
1581 }
1582 
1583 
1584 //-----------------------------------------------------------------------------
1585 // __kmp_execute_tasks_template: Choose and execute tasks until either the condition
1586 // is statisfied (return true) or there are none left (return false).
1587 // final_spin is TRUE if this is the spin at the release barrier.
1588 // thread_finished indicates whether the thread is finished executing all
1589 // the tasks it has on its deque, and is at the release barrier.
1590 // spinner is the location on which to spin.
1591 // spinner == NULL means only execute a single task and return.
1592 // checker is the value to check to terminate the spin.
1593 template <class C>
1594 static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin,
1595  int *thread_finished
1596  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1597 {
1598  kmp_task_team_t * task_team;
1599  kmp_team_t * team;
1600  kmp_thread_data_t * threads_data;
1601  kmp_task_t * task;
1602  kmp_taskdata_t * current_task = thread -> th.th_current_task;
1603  volatile kmp_uint32 * unfinished_threads;
1604  kmp_int32 nthreads, last_stolen, k, tid;
1605 
1606  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1607  KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
1608 
1609  task_team = thread -> th.th_task_team;
1610  KMP_DEBUG_ASSERT( task_team != NULL );
1611 
1612  KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n",
1613  gtid, final_spin, *thread_finished) );
1614 
1615  threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1616  KMP_DEBUG_ASSERT( threads_data != NULL );
1617 
1618  nthreads = task_team -> tt.tt_nproc;
1619  unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
1620 #if OMP_41_ENABLED
1621  KMP_DEBUG_ASSERT( nthreads > 1 || task_team->tt.tt_found_proxy_tasks);
1622 #else
1623  KMP_DEBUG_ASSERT( nthreads > 1 );
1624 #endif
1625  KMP_DEBUG_ASSERT( TCR_4((int)*unfinished_threads) >= 0 );
1626 
1627  // Choose tasks from our own work queue.
1628  start:
1629  while (( task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained )) != NULL ) {
1630 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1631  if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1632  if ( itt_sync_obj == NULL ) {
1633  // we are at fork barrier where we could not get the object reliably
1634  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1635  }
1636  __kmp_itt_task_starting( itt_sync_obj );
1637  }
1638 #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1639  __kmp_invoke_task( gtid, task, current_task );
1640 #if USE_ITT_BUILD
1641  if ( itt_sync_obj != NULL )
1642  __kmp_itt_task_finished( itt_sync_obj );
1643 #endif /* USE_ITT_BUILD */
1644 
1645  // If this thread is only partway through the barrier and the condition
1646  // is met, then return now, so that the barrier gather/release pattern can proceed.
1647  // If this thread is in the last spin loop in the barrier, waiting to be
1648  // released, we know that the termination condition will not be satisified,
1649  // so don't waste any cycles checking it.
1650  if (flag == NULL || (!final_spin && flag->done_check())) {
1651  KA_TRACE(15, ("__kmp_execute_tasks_template(exit #1): T#%d spin condition satisfied\n", gtid) );
1652  return TRUE;
1653  }
1654  KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1655  }
1656 
1657  // This thread's work queue is empty. If we are in the final spin loop
1658  // of the barrier, check and see if the termination condition is satisfied.
1659 #if OMP_41_ENABLED
1660  // The work queue may be empty but there might be proxy tasks still executing
1661  if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1662 #else
1663  if (final_spin)
1664 #endif
1665  {
1666  // First, decrement the #unfinished threads, if that has not already
1667  // been done. This decrement might be to the spin location, and
1668  // result in the termination condition being satisfied.
1669  if (! *thread_finished) {
1670  kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1671  KA_TRACE(20, ("__kmp_execute_tasks_template(dec #1): T#%d dec unfinished_threads to %d task_team=%p\n",
1672  gtid, count, task_team) );
1673  *thread_finished = TRUE;
1674  }
1675 
1676  // It is now unsafe to reference thread->th.th_team !!!
1677  // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1678  // thread to pass through the barrier, where it might reset each thread's
1679  // th.th_team field for the next parallel region.
1680  // If we can steal more work, we know that this has not happened yet.
1681  if (flag != NULL && flag->done_check()) {
1682  KA_TRACE(15, ("__kmp_execute_tasks_template(exit #2): T#%d spin condition satisfied\n", gtid) );
1683  return TRUE;
1684  }
1685  }
1686 
1687 #if OMP_41_ENABLED
1688  // check if there are other threads to steal from, otherwise go back
1689  if ( nthreads == 1 )
1690  goto start;
1691 #endif
1692 
1693  // Try to steal from the last place I stole from successfully.
1694  tid = thread -> th.th_info.ds.ds_tid;//__kmp_tid_from_gtid( gtid );
1695  last_stolen = threads_data[ tid ].td.td_deque_last_stolen;
1696 
1697  if (last_stolen != -1) {
1698  kmp_info_t *other_thread = threads_data[last_stolen].td.td_thr;
1699 
1700  while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1701  thread_finished, is_constrained )) != NULL)
1702  {
1703 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1704  if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1705  if ( itt_sync_obj == NULL ) {
1706  // we are at fork barrier where we could not get the object reliably
1707  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1708  }
1709  __kmp_itt_task_starting( itt_sync_obj );
1710  }
1711 #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1712  __kmp_invoke_task( gtid, task, current_task );
1713 #if USE_ITT_BUILD
1714  if ( itt_sync_obj != NULL )
1715  __kmp_itt_task_finished( itt_sync_obj );
1716 #endif /* USE_ITT_BUILD */
1717 
1718  // Check to see if this thread can proceed.
1719  if (flag == NULL || (!final_spin && flag->done_check())) {
1720  KA_TRACE(15, ("__kmp_execute_tasks_template(exit #3): T#%d spin condition satisfied\n",
1721  gtid) );
1722  return TRUE;
1723  }
1724 
1725  KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1726  // If the execution of the stolen task resulted in more tasks being
1727  // placed on our run queue, then restart the whole process.
1728  if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
1729  KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
1730  gtid) );
1731  goto start;
1732  }
1733  }
1734 
1735  // Don't give priority to stealing from this thread anymore.
1736  threads_data[ tid ].td.td_deque_last_stolen = -1;
1737 
1738  // The victims's work queue is empty. If we are in the final spin loop
1739  // of the barrier, check and see if the termination condition is satisfied.
1740 #if OMP_41_ENABLED
1741  // The work queue may be empty but there might be proxy tasks still executing
1742  if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1743 #else
1744  if (final_spin)
1745 #endif
1746  {
1747  // First, decrement the #unfinished threads, if that has not already
1748  // been done. This decrement might be to the spin location, and
1749  // result in the termination condition being satisfied.
1750  if (! *thread_finished) {
1751  kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1752  KA_TRACE(20, ("__kmp_execute_tasks_template(dec #2): T#%d dec unfinished_threads to %d "
1753  "task_team=%p\n", gtid, count, task_team) );
1754  *thread_finished = TRUE;
1755  }
1756 
1757  // If __kmp_tasking_mode != tskm_immediate_exec
1758  // then it is now unsafe to reference thread->th.th_team !!!
1759  // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1760  // thread to pass through the barrier, where it might reset each thread's
1761  // th.th_team field for the next parallel region.
1762  // If we can steal more work, we know that this has not happened yet.
1763  if (flag != NULL && flag->done_check()) {
1764  KA_TRACE(15, ("__kmp_execute_tasks_template(exit #4): T#%d spin condition satisfied\n",
1765  gtid) );
1766  return TRUE;
1767  }
1768  }
1769  }
1770 
1771  // Find a different thread to steal work from. Pick a random thread.
1772  // My initial plan was to cycle through all the threads, and only return
1773  // if we tried to steal from every thread, and failed. Arch says that's
1774  // not such a great idea.
1775  // GEH - need yield code in this loop for throughput library mode?
1776  new_victim:
1777  k = __kmp_get_random( thread ) % (nthreads - 1);
1778  if ( k >= thread -> th.th_info.ds.ds_tid ) {
1779  ++k; // Adjusts random distribution to exclude self
1780  }
1781  {
1782  kmp_info_t *other_thread = threads_data[k].td.td_thr;
1783  int first;
1784 
1785  // There is a slight chance that __kmp_enable_tasking() did not wake up
1786  // all threads waiting at the barrier. If this thread is sleeping, then
1787  // then wake it up. Since we weree going to pay the cache miss penalty
1788  // for referenceing another thread's kmp_info_t struct anyway, the check
1789  // shouldn't cost too much performance at this point.
1790  // In extra barrier mode, tasks do not sleep at the separate tasking
1791  // barrier, so this isn't a problem.
1792  if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1793  (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
1794  (TCR_PTR(other_thread->th.th_sleep_loc) != NULL))
1795  {
1796  __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc);
1797  // A sleeping thread should not have any tasks on it's queue.
1798  // There is a slight possibility that it resumes, steals a task from
1799  // another thread, which spawns more tasks, all in the that it takes
1800  // this thread to check => don't write an assertion that the victim's
1801  // queue is empty. Try stealing from a different thread.
1802  goto new_victim;
1803  }
1804 
1805  // Now try to steal work from the selected thread
1806  first = TRUE;
1807  while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1808  thread_finished, is_constrained )) != NULL)
1809  {
1810 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1811  if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1812  if ( itt_sync_obj == NULL ) {
1813  // we are at fork barrier where we could not get the object reliably
1814  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1815  }
1816  __kmp_itt_task_starting( itt_sync_obj );
1817  }
1818 #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1819  __kmp_invoke_task( gtid, task, current_task );
1820 #if USE_ITT_BUILD
1821  if ( itt_sync_obj != NULL )
1822  __kmp_itt_task_finished( itt_sync_obj );
1823 #endif /* USE_ITT_BUILD */
1824 
1825  // Try stealing from this victim again, in the future.
1826  if (first) {
1827  threads_data[ tid ].td.td_deque_last_stolen = k;
1828  first = FALSE;
1829  }
1830 
1831  // Check to see if this thread can proceed.
1832  if (flag == NULL || (!final_spin && flag->done_check())) {
1833  KA_TRACE(15, ("__kmp_execute_tasks_template(exit #5): T#%d spin condition satisfied\n",
1834  gtid) );
1835  return TRUE;
1836  }
1837  KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1838 
1839  // If the execution of the stolen task resulted in more tasks being
1840  // placed on our run queue, then restart the whole process.
1841  if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
1842  KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
1843  gtid) );
1844  goto start;
1845  }
1846  }
1847 
1848  // The victims's work queue is empty. If we are in the final spin loop
1849  // of the barrier, check and see if the termination condition is satisfied.
1850  // Going on and finding a new victim to steal from is expensive, as it
1851  // involves a lot of cache misses, so we definitely want to re-check the
1852  // termination condition before doing that.
1853 #if OMP_41_ENABLED
1854  // The work queue may be empty but there might be proxy tasks still executing
1855  if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1856 #else
1857  if (final_spin)
1858 #endif
1859  {
1860  // First, decrement the #unfinished threads, if that has not already
1861  // been done. This decrement might be to the spin location, and
1862  // result in the termination condition being satisfied.
1863  if (! *thread_finished) {
1864  kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1865  KA_TRACE(20, ("__kmp_execute_tasks_template(dec #3): T#%d dec unfinished_threads to %d; "
1866  "task_team=%p\n",
1867  gtid, count, task_team) );
1868  *thread_finished = TRUE;
1869  }
1870 
1871  // If __kmp_tasking_mode != tskm_immediate_exec,
1872  // then it is now unsafe to reference thread->th.th_team !!!
1873  // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1874  // thread to pass through the barrier, where it might reset each thread's
1875  // th.th_team field for the next parallel region.
1876  // If we can steal more work, we know that this has not happened yet.
1877  if (flag != NULL && flag->done_check()) {
1878  KA_TRACE(15, ("__kmp_execute_tasks_template(exit #6): T#%d spin condition satisfied\n", gtid) );
1879  return TRUE;
1880  }
1881  }
1882  }
1883 
1884  KA_TRACE(15, ("__kmp_execute_tasks_template(exit #7): T#%d can't find work\n", gtid) );
1885  return FALSE;
1886 }
1887 
1888 int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
1889  int *thread_finished
1890  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1891 {
1892  return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1893  USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1894 }
1895 
1896 int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
1897  int *thread_finished
1898  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1899 {
1900  return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1901  USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1902 }
1903 
1904 int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
1905  int *thread_finished
1906  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1907 {
1908  return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1909  USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1910 }
1911 
1912 
1913 
1914 //-----------------------------------------------------------------------------
1915 // __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
1916 // next barrier so they can assist in executing enqueued tasks.
1917 // First thread in allocates the task team atomically.
1918 
1919 static void
1920 __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
1921 {
1922  kmp_team_t *team = this_thr->th.th_team;
1923  kmp_thread_data_t *threads_data;
1924  int nthreads, i, is_init_thread;
1925 
1926  KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n",
1927  __kmp_gtid_from_thread( this_thr ) ) );
1928 
1929  KMP_DEBUG_ASSERT(task_team != NULL);
1930  KMP_DEBUG_ASSERT(team != NULL);
1931 
1932  nthreads = task_team->tt.tt_nproc;
1933  KMP_DEBUG_ASSERT(nthreads > 0);
1934  KMP_DEBUG_ASSERT(nthreads == team->t.t_nproc);
1935 
1936  // Allocate or increase the size of threads_data if necessary
1937  is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
1938 
1939  if (!is_init_thread) {
1940  // Some other thread already set up the array.
1941  KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
1942  __kmp_gtid_from_thread( this_thr ) ) );
1943  return;
1944  }
1945  threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1946  KMP_DEBUG_ASSERT( threads_data != NULL );
1947 
1948  if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1949  ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
1950  {
1951  // Release any threads sleeping at the barrier, so that they can steal
1952  // tasks and execute them. In extra barrier mode, tasks do not sleep
1953  // at the separate tasking barrier, so this isn't a problem.
1954  for (i = 0; i < nthreads; i++) {
1955  volatile void *sleep_loc;
1956  kmp_info_t *thread = threads_data[i].td.td_thr;
1957 
1958  if (i == this_thr->th.th_info.ds.ds_tid) {
1959  continue;
1960  }
1961  // Since we haven't locked the thread's suspend mutex lock at this
1962  // point, there is a small window where a thread might be putting
1963  // itself to sleep, but hasn't set the th_sleep_loc field yet.
1964  // To work around this, __kmp_execute_tasks_template() periodically checks
1965  // see if other threads are sleeping (using the same random
1966  // mechanism that is used for task stealing) and awakens them if
1967  // they are.
1968  if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
1969  {
1970  KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n",
1971  __kmp_gtid_from_thread( this_thr ),
1972  __kmp_gtid_from_thread( thread ) ) );
1973  __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
1974  }
1975  else {
1976  KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
1977  __kmp_gtid_from_thread( this_thr ),
1978  __kmp_gtid_from_thread( thread ) ) );
1979  }
1980  }
1981  }
1982 
1983  KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n",
1984  __kmp_gtid_from_thread( this_thr ) ) );
1985 }
1986 
1987 
1988 /* ------------------------------------------------------------------------ */
1989 /* // TODO: Check the comment consistency
1990  * Utility routines for "task teams". A task team (kmp_task_t) is kind of
1991  * like a shadow of the kmp_team_t data struct, with a different lifetime.
1992  * After a child * thread checks into a barrier and calls __kmp_release() from
1993  * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
1994  * longer assume that the kmp_team_t structure is intact (at any moment, the
1995  * master thread may exit the barrier code and free the team data structure,
1996  * and return the threads to the thread pool).
1997  *
1998  * This does not work with the the tasking code, as the thread is still
1999  * expected to participate in the execution of any tasks that may have been
2000  * spawned my a member of the team, and the thread still needs access to all
2001  * to each thread in the team, so that it can steal work from it.
2002  *
2003  * Enter the existence of the kmp_task_team_t struct. It employs a reference
2004  * counting mechanims, and is allocated by the master thread before calling
2005  * __kmp_<barrier_kind>_release, and then is release by the last thread to
2006  * exit __kmp_<barrier_kind>_release at the next barrier. I.e. the lifetimes
2007  * of the kmp_task_team_t structs for consecutive barriers can overlap
2008  * (and will, unless the master thread is the last thread to exit the barrier
2009  * release phase, which is not typical).
2010  *
2011  * The existence of such a struct is useful outside the context of tasking,
2012  * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro,
2013  * so that any performance differences show up when comparing the 2.5 vs. 3.0
2014  * libraries.
2015  *
2016  * We currently use the existence of the threads array as an indicator that
2017  * tasks were spawned since the last barrier. If the structure is to be
2018  * useful outside the context of tasking, then this will have to change, but
2019  * not settting the field minimizes the performance impact of tasking on
2020  * barriers, when no explicit tasks were spawned (pushed, actually).
2021  */
2022 
2023 
2024 static kmp_task_team_t *__kmp_free_task_teams = NULL; // Free list for task_team data structures
2025 // Lock for task team data structures
2026 static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
2027 
2028 
2029 //------------------------------------------------------------------------------
2030 // __kmp_alloc_task_deque:
2031 // Allocates a task deque for a particular thread, and initialize the necessary
2032 // data structures relating to the deque. This only happens once per thread
2033 // per task team since task teams are recycled.
2034 // No lock is needed during allocation since each thread allocates its own
2035 // deque.
2036 
2037 static void
2038 __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
2039 {
2040  __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
2041  KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
2042 
2043  // Initialize last stolen task field to "none"
2044  thread_data -> td.td_deque_last_stolen = -1;
2045 
2046  KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
2047  KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
2048  KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
2049 
2050  KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
2051  __kmp_gtid_from_thread( thread ), TASK_DEQUE_SIZE, thread_data ) );
2052  // Allocate space for task deque, and zero the deque
2053  // Cannot use __kmp_thread_calloc() because threads not around for
2054  // kmp_reap_task_team( ).
2055  thread_data -> td.td_deque = (kmp_taskdata_t **)
2056  __kmp_allocate( TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
2057 }
2058 
2059 
2060 //------------------------------------------------------------------------------
2061 // __kmp_free_task_deque:
2062 // Deallocates a task deque for a particular thread.
2063 // Happens at library deallocation so don't need to reset all thread data fields.
2064 
2065 static void
2066 __kmp_free_task_deque( kmp_thread_data_t *thread_data )
2067 {
2068  __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
2069 
2070  if ( thread_data -> td.td_deque != NULL ) {
2071  TCW_4(thread_data -> td.td_deque_ntasks, 0);
2072  __kmp_free( thread_data -> td.td_deque );
2073  thread_data -> td.td_deque = NULL;
2074  }
2075  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
2076 
2077 #ifdef BUILD_TIED_TASK_STACK
2078  // GEH: Figure out what to do here for td_susp_tied_tasks
2079  if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
2080  __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
2081  }
2082 #endif // BUILD_TIED_TASK_STACK
2083 }
2084 
2085 
2086 //------------------------------------------------------------------------------
2087 // __kmp_realloc_task_threads_data:
2088 // Allocates a threads_data array for a task team, either by allocating an initial
2089 // array or enlarging an existing array. Only the first thread to get the lock
2090 // allocs or enlarges the array and re-initializes the array eleemnts.
2091 // That thread returns "TRUE", the rest return "FALSE".
2092 // Assumes that the new array size is given by task_team -> tt.tt_nproc.
2093 // The current size is given by task_team -> tt.tt_max_threads.
2094 
2095 static int
2096 __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
2097 {
2098  kmp_thread_data_t ** threads_data_p;
2099  kmp_int32 nthreads, maxthreads;
2100  int is_init_thread = FALSE;
2101 
2102  if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
2103  // Already reallocated and initialized.
2104  return FALSE;
2105  }
2106 
2107  threads_data_p = & task_team -> tt.tt_threads_data;
2108  nthreads = task_team -> tt.tt_nproc;
2109  maxthreads = task_team -> tt.tt_max_threads;
2110 
2111  // All threads must lock when they encounter the first task of the implicit task
2112  // region to make sure threads_data fields are (re)initialized before used.
2113  __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2114 
2115  if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
2116  // first thread to enable tasking
2117  kmp_team_t *team = thread -> th.th_team;
2118  int i;
2119 
2120  is_init_thread = TRUE;
2121  if ( maxthreads < nthreads ) {
2122 
2123  if ( *threads_data_p != NULL ) {
2124  kmp_thread_data_t *old_data = *threads_data_p;
2125  kmp_thread_data_t *new_data = NULL;
2126 
2127  KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating "
2128  "threads data for task_team %p, new_size = %d, old_size = %d\n",
2129  __kmp_gtid_from_thread( thread ), task_team,
2130  nthreads, maxthreads ) );
2131  // Reallocate threads_data to have more elements than current array
2132  // Cannot use __kmp_thread_realloc() because threads not around for
2133  // kmp_reap_task_team( ). Note all new array entries are initialized
2134  // to zero by __kmp_allocate().
2135  new_data = (kmp_thread_data_t *)
2136  __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2137  // copy old data to new data
2138  KMP_MEMCPY_S( (void *) new_data, nthreads * sizeof(kmp_thread_data_t),
2139  (void *) old_data,
2140  maxthreads * sizeof(kmp_taskdata_t *) );
2141 
2142 #ifdef BUILD_TIED_TASK_STACK
2143  // GEH: Figure out if this is the right thing to do
2144  for (i = maxthreads; i < nthreads; i++) {
2145  kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2146  __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2147  }
2148 #endif // BUILD_TIED_TASK_STACK
2149  // Install the new data and free the old data
2150  (*threads_data_p) = new_data;
2151  __kmp_free( old_data );
2152  }
2153  else {
2154  KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating "
2155  "threads data for task_team %p, size = %d\n",
2156  __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
2157  // Make the initial allocate for threads_data array, and zero entries
2158  // Cannot use __kmp_thread_calloc() because threads not around for
2159  // kmp_reap_task_team( ).
2160  *threads_data_p = (kmp_thread_data_t *)
2161  __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2162 #ifdef BUILD_TIED_TASK_STACK
2163  // GEH: Figure out if this is the right thing to do
2164  for (i = 0; i < nthreads; i++) {
2165  kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2166  __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2167  }
2168 #endif // BUILD_TIED_TASK_STACK
2169  }
2170  task_team -> tt.tt_max_threads = nthreads;
2171  }
2172  else {
2173  // If array has (more than) enough elements, go ahead and use it
2174  KMP_DEBUG_ASSERT( *threads_data_p != NULL );
2175  }
2176 
2177  // initialize threads_data pointers back to thread_info structures
2178  for (i = 0; i < nthreads; i++) {
2179  kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2180  thread_data -> td.td_thr = team -> t.t_threads[i];
2181 
2182  if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
2183  // The last stolen field survives across teams / barrier, and the number
2184  // of threads may have changed. It's possible (likely?) that a new
2185  // parallel region will exhibit the same behavior as the previous region.
2186  thread_data -> td.td_deque_last_stolen = -1;
2187  }
2188  }
2189 
2190  KMP_MB();
2191  TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
2192  }
2193 
2194  __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2195  return is_init_thread;
2196 }
2197 
2198 
2199 //------------------------------------------------------------------------------
2200 // __kmp_free_task_threads_data:
2201 // Deallocates a threads_data array for a task team, including any attached
2202 // tasking deques. Only occurs at library shutdown.
2203 
2204 static void
2205 __kmp_free_task_threads_data( kmp_task_team_t *task_team )
2206 {
2207  __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2208  if ( task_team -> tt.tt_threads_data != NULL ) {
2209  int i;
2210  for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
2211  __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
2212  }
2213  __kmp_free( task_team -> tt.tt_threads_data );
2214  task_team -> tt.tt_threads_data = NULL;
2215  }
2216  __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2217 }
2218 
2219 
2220 //------------------------------------------------------------------------------
2221 // __kmp_allocate_task_team:
2222 // Allocates a task team associated with a specific team, taking it from
2223 // the global task team free list if possible. Also initializes data structures.
2224 
2225 static kmp_task_team_t *
2226 __kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
2227 {
2228  kmp_task_team_t *task_team = NULL;
2229  int nthreads;
2230 
2231  KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n",
2232  (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
2233 
2234  if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2235  // Take a task team from the task team pool
2236  __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2237  if (__kmp_free_task_teams != NULL) {
2238  task_team = __kmp_free_task_teams;
2239  TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
2240  task_team -> tt.tt_next = NULL;
2241  }
2242  __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2243  }
2244 
2245  if (task_team == NULL) {
2246  KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating "
2247  "task team for team %p\n",
2248  __kmp_gtid_from_thread( thread ), team ) );
2249  // Allocate a new task team if one is not available.
2250  // Cannot use __kmp_thread_malloc() because threads not around for
2251  // kmp_reap_task_team( ).
2252  task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) );
2253  __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2254  //task_team -> tt.tt_threads_data = NULL; // AC: __kmp_allocate zeroes returned memory
2255  //task_team -> tt.tt_max_threads = 0;
2256  //task_team -> tt.tt_next = NULL;
2257  }
2258 
2259  TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2260 #if OMP_41_ENABLED
2261  TCW_4(task_team -> tt.tt_found_proxy_tasks, FALSE);
2262 #endif
2263  task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
2264 
2265  TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
2266  TCW_4( task_team -> tt.tt_active, TRUE );
2267  TCW_4( task_team -> tt.tt_ref_ct, nthreads - 1);
2268 
2269  KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p\n",
2270  (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team ) );
2271  return task_team;
2272 }
2273 
2274 
2275 //------------------------------------------------------------------------------
2276 // __kmp_free_task_team:
2277 // Frees the task team associated with a specific thread, and adds it
2278 // to the global task team free list.
2279 //
2280 
2281 static void
2282 __kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
2283 {
2284  KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n",
2285  thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
2286 
2287  KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_ref_ct) == 0 );
2288 
2289  // Put task team back on free list
2290  __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
2291 
2292  KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
2293  task_team -> tt.tt_next = __kmp_free_task_teams;
2294  TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2295  TCW_PTR(__kmp_free_task_teams, task_team);
2296 
2297  __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
2298 }
2299 
2300 
2301 //------------------------------------------------------------------------------
2302 // __kmp_reap_task_teams:
2303 // Free all the task teams on the task team free list.
2304 // Should only be done during library shutdown.
2305 // Cannot do anything that needs a thread structure or gtid since they are already gone.
2306 
2307 void
2308 __kmp_reap_task_teams( void )
2309 {
2310  kmp_task_team_t *task_team;
2311 
2312  if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
2313  // Free all task_teams on the free list
2314  __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2315  while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
2316  __kmp_free_task_teams = task_team -> tt.tt_next;
2317  task_team -> tt.tt_next = NULL;
2318 
2319  // Free threads_data if necessary
2320  if ( task_team -> tt.tt_threads_data != NULL ) {
2321  __kmp_free_task_threads_data( task_team );
2322  }
2323  __kmp_free( task_team );
2324  }
2325  __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2326  }
2327 }
2328 
2329 
2330 //------------------------------------------------------------------------------
2331 // __kmp_unref_task_teams:
2332 // Remove one thread from referencing the task team structure by
2333 // decreasing the reference count and deallocate task team if no more
2334 // references to it.
2335 //
2336 void
2337 __kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread )
2338 {
2339  kmp_uint ref_ct;
2340 
2341  ref_ct = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& task_team->tt.tt_ref_ct) ) - 1;
2342 
2343  KA_TRACE( 20, ( "__kmp_unref_task_team: T#%d task_team = %p ref_ct = %d\n",
2344  __kmp_gtid_from_thread( thread ), task_team, ref_ct ) );
2345 
2346 
2347  if ( ref_ct == 0 ) {
2348  __kmp_free_task_team( thread, task_team );
2349  }
2350 
2351  TCW_PTR( *((volatile kmp_task_team_t **)(&thread->th.th_task_team)), NULL );
2352 }
2353 
2354 
2355 //------------------------------------------------------------------------------
2356 // __kmp_wait_to_unref_task_teams:
2357 // Some threads could still be in the fork barrier release code, possibly
2358 // trying to steal tasks. Wait for each thread to unreference its task team.
2359 //
2360 void
2361 __kmp_wait_to_unref_task_teams(void)
2362 {
2363  kmp_info_t *thread;
2364  kmp_uint32 spins;
2365  int done;
2366 
2367  KMP_INIT_YIELD( spins );
2368 
2369 
2370  for (;;) {
2371  done = TRUE;
2372 
2373  // TODO: GEH - this may be is wrong because some sync would be necessary
2374  // in case threads are added to the pool during the traversal.
2375  // Need to verify that lock for thread pool is held when calling
2376  // this routine.
2377  for (thread = (kmp_info_t *)__kmp_thread_pool;
2378  thread != NULL;
2379  thread = thread->th.th_next_pool)
2380  {
2381 #if KMP_OS_WINDOWS
2382  DWORD exit_val;
2383 #endif
2384  if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
2385  KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2386  __kmp_gtid_from_thread( thread ) ) );
2387  continue;
2388  }
2389 #if KMP_OS_WINDOWS
2390  // TODO: GEH - add this check for Linux* OS / OS X* as well?
2391  if (!__kmp_is_thread_alive(thread, &exit_val)) {
2392  if (TCR_PTR(thread->th.th_task_team) != NULL) {
2393  __kmp_unref_task_team( thread->th.th_task_team, thread );
2394  }
2395  continue;
2396  }
2397 #endif
2398 
2399  done = FALSE; // Because th_task_team pointer is not NULL for this thread
2400 
2401  KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
2402  __kmp_gtid_from_thread( thread ) ) );
2403 
2404  if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
2405  volatile void *sleep_loc;
2406  // If the thread is sleeping, awaken it.
2407  if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
2408  KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2409  __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
2410  __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
2411  }
2412  }
2413  }
2414  if (done) {
2415  break;
2416  }
2417 
2418  // If we are oversubscribed,
2419  // or have waited a bit (and library mode is throughput), yield.
2420  // Pause is in the following code.
2421  KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2422  KMP_YIELD_SPIN( spins ); // Yields only if KMP_LIBRARY=throughput
2423  }
2424 
2425 
2426 }
2427 
2428 
2429 //------------------------------------------------------------------------------
2430 // __kmp_task_team_setup: Create a task_team for the current team, but use
2431 // an already created, unused one if it already exists.
2432 // This may be called by any thread, but only for teams with # threads >1.
2433 void
2434 __kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int both, int always )
2435 {
2436  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2437 
2438  if ( ( team->t.t_task_team[this_thr->th.th_task_state] == NULL ) && ( always || team->t.t_nproc > 1 ) ) {
2439  // Allocate a new task team, which will be propagated to
2440  // all of the worker threads after the barrier. As they
2441  // spin in the barrier release phase, then will continue
2442  // to use the previous task team struct, until they receive
2443  // the signal to stop checking for tasks (they can't safely
2444  // reference the kmp_team_t struct, which could be reallocated
2445  // by the master thread).
2446  team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team );
2447  KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p for team %d\n",
2448  __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state],
2449  ((team != NULL) ? team->t.t_id : -1)));
2450  }
2451  //else
2452  // All threads have reported in, and no tasks were spawned
2453  // for this release->gather region. Leave the old task
2454  // team struct in place for the upcoming region. No task
2455  // teams are formed for serialized teams.
2456  if (both) {
2457  int other_team = 1 - this_thr->th.th_task_state;
2458  if ( ( team->t.t_task_team[other_team] == NULL ) && ( team->t.t_nproc > 1 ) ) { // setup other team as well
2459  team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team );
2460  KA_TRACE( 20, ( "__kmp_task_team_setup: Master T#%d created new task_team %p for team %d\n",
2461  __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2462  ((team != NULL) ? team->t.t_id : -1)) );
2463  }
2464  }
2465 }
2466 
2467 
2468 //------------------------------------------------------------------------------
2469 // __kmp_task_team_sync: Propagation of task team data from team to threads
2470 // which happens just after the release phase of a team barrier. This may be
2471 // called by any thread, but only for teams with # threads > 1.
2472 void
2473 __kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
2474 {
2475  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2476 
2477  // In case this thread never saw that the task team was no longer active, unref/deallocate it now.
2478  if ( this_thr->th.th_task_team != NULL ) {
2479  if ( ! TCR_SYNC_4( this_thr->th.th_task_team->tt.tt_active ) ) {
2480  KMP_DEBUG_ASSERT( ! KMP_MASTER_TID( __kmp_tid_from_gtid( __kmp_gtid_from_thread( this_thr ) ) ) );
2481  __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
2482  } else { // We are re-using a task team that was never enabled.
2483  KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]);
2484  }
2485  }
2486 
2487  // Toggle the th_task_state field, to switch which task_team this thread refers to
2488  this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
2489  // It is now safe to propagate the task team pointer from the team struct to the current thread.
2490  TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]);
2491  KA_TRACE( 20, ( "__kmp_task_team_sync: Thread T#%d task team assigned pointer (%p) from Team #%d task team\n",
2492  __kmp_gtid_from_thread( this_thr ), &this_thr->th.th_task_team,
2493  this_thr->th.th_task_team, ((team != NULL) ? (team->t.t_id) : -1) ) );
2494 }
2495 
2496 
2497 //------------------------------------------------------------------------------
2498 // __kmp_task_team_wait: Master thread waits for outstanding tasks after the
2499 // barrier gather phase. Only called by master thread if #threads in team > 1 or if proxy tasks were created
2500 void
2501 __kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team
2502  USE_ITT_BUILD_ARG(void * itt_sync_obj)
2503  )
2504 {
2505  kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
2506 
2507  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2508  KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
2509 
2510  if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) {
2511  KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d waiting for all tasks: task_team = %p\n",
2512  __kmp_gtid_from_thread( this_thr ), task_team ) );
2513  // All worker threads might have dropped through to the release phase, but could still
2514  // be executing tasks. Wait here for all tasks to complete. To avoid memory contention,
2515  // only the master thread checks for the termination condition.
2516  kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
2517  flag.wait(this_thr, TRUE
2518  USE_ITT_BUILD_ARG(itt_sync_obj));
2519 
2520  // Kill the old task team, so that the worker threads will stop referencing it while spinning.
2521  // They will deallocate it when the reference count reaches zero.
2522  // The master thread is not included in the ref count.
2523  KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d deactivating task_team %p\n",
2524  __kmp_gtid_from_thread( this_thr ), task_team ) );
2525 #if OMP_41_ENABLED
2526  KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == TRUE );
2527  TCW_SYNC_4( task_team->tt.tt_found_proxy_tasks, FALSE );
2528 #else
2529  KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
2530 #endif
2531  TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2532  KMP_MB();
2533 
2534  TCW_PTR(this_thr->th.th_task_team, NULL);
2535  team->t.t_task_team[this_thr->th.th_task_state] = NULL;
2536  }
2537 }
2538 
2539 
2540 //------------------------------------------------------------------------------
2541 // __kmp_tasking_barrier:
2542 // Internal function to execute all tasks prior to a regular barrier or a
2543 // join barrier. It is a full barrier itself, which unfortunately turns
2544 // regular barriers into double barriers and join barriers into 1 1/2
2545 // barriers.
2546 // This routine may only called when __kmp_tasking_mode == tskm_extra_barrier.
2547 
2548 void
2549 __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid )
2550 {
2551  volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads;
2552  int flag = FALSE;
2553  KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
2554 
2555 #if USE_ITT_BUILD
2556  KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
2557 #endif /* USE_ITT_BUILD */
2558  kmp_flag_32 spin_flag(spin, 0U);
2559  while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag
2560  USE_ITT_BUILD_ARG(NULL), 0 ) ) {
2561 #if USE_ITT_BUILD
2562  // TODO: What about itt_sync_obj??
2563  KMP_FSYNC_SPIN_PREPARE( spin );
2564 #endif /* USE_ITT_BUILD */
2565 
2566  if( TCR_4(__kmp_global.g.g_done) ) {
2567  if( __kmp_global.g.g_abort )
2568  __kmp_abort_thread( );
2569  break;
2570  }
2571  KMP_YIELD( TRUE ); // GH: We always yield here
2572  }
2573 #if USE_ITT_BUILD
2574  KMP_FSYNC_SPIN_ACQUIRED( (void*) spin );
2575 #endif /* USE_ITT_BUILD */
2576 }
2577 
2578 
2579 #if OMP_41_ENABLED
2580 
2581 /* __kmp_give_task puts a task into a given thread queue if:
2582  - the queue for that thread it was created
2583  - there's space in that queue
2584 
2585  Because of this, __kmp_push_task needs to check if there's space after getting the lock
2586  */
2587 static bool __kmp_give_task ( kmp_info_t *thread, kmp_int32 tid, kmp_task_t * task )
2588 {
2589  kmp_task_team_t * task_team = thread->th.th_task_team;
2590  kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
2591  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
2592  bool result = false;
2593 
2594  KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n", taskdata, tid ) );
2595 
2596  // assert tasking is enabled? what if not?
2597  KMP_DEBUG_ASSERT( task_team != NULL );
2598 
2599  if (thread_data -> td.td_deque == NULL ) {
2600  // There's no queue in this thread, go find another one
2601  // We're guaranteed that at least one thread has a queue
2602  KA_TRACE(30, ("__kmp_give_task: thread %d has no queue while giving task %p.\n", tid, taskdata ) );
2603  return result;
2604  }
2605 
2606  if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
2607  {
2608  KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2609  return result;
2610  }
2611 
2612  __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock );
2613 
2614  if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
2615  {
2616  KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2617  goto release_and_exit;
2618  }
2619 
2620  thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;
2621  // Wrap index.
2622  thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
2623  TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);
2624 
2625  result = true;
2626  KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n", taskdata, tid ) );
2627 
2628 release_and_exit:
2629  __kmp_release_bootstrap_lock( & thread_data-> td.td_deque_lock );
2630 
2631  return result;
2632 }
2633 
2634 
2635 /* The finish of the a proxy tasks is divided in two pieces:
2636  - the top half is the one that can be done from a thread outside the team
2637  - the bottom half must be run from a them within the team
2638 
2639  In order to run the bottom half the task gets queued back into one of the threads of the team.
2640  Once the td_incomplete_child_task counter of the parent is decremented the threads can leave the barriers.
2641  So, the bottom half needs to be queued before the counter is decremented. The top half is therefore divided in two parts:
2642  - things that can be run before queuing the bottom half
2643  - things that must be run after queuing the bottom half
2644 
2645  This creates a second race as the bottom half can free the task before the second top half is executed. To avoid this
2646  we use the td_incomplete_child_task of the proxy task to synchronize the top and bottom half.
2647 */
2648 
2649 static void __kmp_first_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2650 {
2651  KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
2652  KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2653  KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
2654  KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
2655 
2656  taskdata -> td_flags.complete = 1; // mark the task as completed
2657 
2658  if ( taskdata->td_taskgroup )
2659  KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
2660 
2661  // Create an imaginary children for this task so the bottom half cannot release the task before we have completed the second top half
2662  TCR_4(taskdata->td_incomplete_child_tasks++);
2663 }
2664 
2665 static void __kmp_second_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2666 {
2667  kmp_int32 children = 0;
2668 
2669  // Predecrement simulated by "- 1" calculation
2670  children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
2671  KMP_DEBUG_ASSERT( children >= 0 );
2672 
2673  // Remove the imaginary children
2674  TCR_4(taskdata->td_incomplete_child_tasks--);
2675 }
2676 
2677 static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask )
2678 {
2679  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2680  kmp_info_t * thread = __kmp_threads[ gtid ];
2681 
2682  KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2683  KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 1 ); // top half must run before bottom half
2684 
2685  // We need to wait to make sure the top half is finished
2686  // Spinning here should be ok as this should happen quickly
2687  while ( TCR_4(taskdata->td_incomplete_child_tasks) > 0 ) ;
2688 
2689  __kmp_release_deps(gtid,taskdata);
2690  __kmp_free_task_and_ancestors(gtid, taskdata, thread);
2691 }
2692 
2700 void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask )
2701 {
2702  KMP_DEBUG_ASSERT( ptask != NULL );
2703  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2704  KA_TRACE(10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n", gtid, taskdata ) );
2705 
2706  KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2707 
2708  __kmp_first_top_half_finish_proxy(taskdata);
2709  __kmp_second_top_half_finish_proxy(taskdata);
2710  __kmp_bottom_half_finish_proxy(gtid,ptask);
2711 
2712  KA_TRACE(10, ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n", gtid, taskdata ) );
2713 }
2714 
2721 void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask )
2722 {
2723  KMP_DEBUG_ASSERT( ptask != NULL );
2724  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2725 
2726  KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n", taskdata ) );
2727 
2728  KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2729 
2730  __kmp_first_top_half_finish_proxy(taskdata);
2731 
2732  // Enqueue task to complete bottom half completation from a thread within the corresponding team
2733  kmp_team_t * team = taskdata->td_team;
2734  kmp_int32 nthreads = team->t.t_nproc;
2735  kmp_info_t *thread;
2736  kmp_int32 k = 0;
2737 
2738  do {
2739  //This should be similar to k = __kmp_get_random( thread ) % nthreads but we cannot use __kmp_get_random here
2740  //For now we're just linearly trying to find a thread
2741  k = (k+1) % nthreads;
2742  thread = team->t.t_threads[k];
2743  } while ( !__kmp_give_task( thread, k, ptask ) );
2744 
2745  __kmp_second_top_half_finish_proxy(taskdata);
2746 
2747  KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n", taskdata ) );
2748 }
2749 
2750 #endif
Definition: kmp.h:221