Intel® OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Groups Pages
kmp_tasking.c
1 /*
2  * kmp_tasking.c -- OpenMP 3.0 tasking support.
3  * $Revision: 42852 $
4  * $Date: 2013-12-04 10:50:49 -0600 (Wed, 04 Dec 2013) $
5  */
6 
7 /* <copyright>
8  Copyright (c) 1997-2013 Intel Corporation. All Rights Reserved.
9 
10  Redistribution and use in source and binary forms, with or without
11  modification, are permitted provided that the following conditions
12  are met:
13 
14  * Redistributions of source code must retain the above copyright
15  notice, this list of conditions and the following disclaimer.
16  * Redistributions in binary form must reproduce the above copyright
17  notice, this list of conditions and the following disclaimer in the
18  documentation and/or other materials provided with the distribution.
19  * Neither the name of Intel Corporation nor the names of its
20  contributors may be used to endorse or promote products derived
21  from this software without specific prior written permission.
22 
23  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 
35 </copyright> */
36 
37 #include "kmp.h"
38 #include "kmp_i18n.h"
39 #include "kmp_itt.h"
40 
41 
42 #if OMP_30_ENABLED
43 
44 /* ------------------------------------------------------------------------ */
45 /* ------------------------------------------------------------------------ */
46 
47 
48 /* forward declaration */
49 static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
50 static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
51 static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
52 
53 #ifndef KMP_DEBUG
54 # define __kmp_static_delay( arg ) /* nothing to do */
55 #else
56 
57 static void
58 __kmp_static_delay( int arg )
59 {
60 /* Work around weird code-gen bug that causes assert to trip */
61 # if KMP_ARCH_X86_64 && KMP_OS_LINUX
62  KMP_ASSERT( arg != 0 );
63 # else
64  KMP_ASSERT( arg >= 0 );
65 # endif
66 }
67 #endif /* KMP_DEBUG */
68 
69 static void
70 __kmp_static_yield( int arg )
71 {
72  __kmp_yield( arg );
73 }
74 
75 #ifdef BUILD_TIED_TASK_STACK
76 
77 //---------------------------------------------------------------------------
78 // __kmp_trace_task_stack: print the tied tasks from the task stack in order
79 // from top do bottom
80 //
81 // gtid: global thread identifier for thread containing stack
82 // thread_data: thread data for task team thread containing stack
83 // threshold: value above which the trace statement triggers
84 // location: string identifying call site of this function (for trace)
85 
86 static void
87 __kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location )
88 {
89  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
90  kmp_taskdata_t **stack_top = task_stack -> ts_top;
91  kmp_int32 entries = task_stack -> ts_entries;
92  kmp_taskdata_t *tied_task;
93 
94  KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
95  "first_block = %p, stack_top = %p \n",
96  location, gtid, entries, task_stack->ts_first_block, stack_top ) );
97 
98  KMP_DEBUG_ASSERT( stack_top != NULL );
99  KMP_DEBUG_ASSERT( entries > 0 );
100 
101  while ( entries != 0 )
102  {
103  KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
104  // fix up ts_top if we need to pop from previous block
105  if ( entries & TASK_STACK_INDEX_MASK == 0 )
106  {
107  kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
108 
109  stack_block = stack_block -> sb_prev;
110  stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
111  }
112 
113  // finish bookkeeping
114  stack_top--;
115  entries--;
116 
117  tied_task = * stack_top;
118 
119  KMP_DEBUG_ASSERT( tied_task != NULL );
120  KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
121 
122  KA_TRACE(threshold, ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
123  "stack_top=%p, tied_task=%p\n",
124  location, gtid, entries, stack_top, tied_task ) );
125  }
126  KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
127 
128  KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
129  location, gtid ) );
130 }
131 
132 //---------------------------------------------------------------------------
133 // __kmp_init_task_stack: initialize the task stack for the first time
134 // after a thread_data structure is created.
135 // It should not be necessary to do this again (assuming the stack works).
136 //
137 // gtid: global thread identifier of calling thread
138 // thread_data: thread data for task team thread containing stack
139 
140 static void
141 __kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
142 {
143  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
144  kmp_stack_block_t *first_block;
145 
146  // set up the first block of the stack
147  first_block = & task_stack -> ts_first_block;
148  task_stack -> ts_top = (kmp_taskdata_t **) first_block;
149  memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
150 
151  // initialize the stack to be empty
152  task_stack -> ts_entries = TASK_STACK_EMPTY;
153  first_block -> sb_next = NULL;
154  first_block -> sb_prev = NULL;
155 }
156 
157 
158 //---------------------------------------------------------------------------
159 // __kmp_free_task_stack: free the task stack when thread_data is destroyed.
160 //
161 // gtid: global thread identifier for calling thread
162 // thread_data: thread info for thread containing stack
163 
164 static void
165 __kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
166 {
167  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
168  kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
169 
170  KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
171  // free from the second block of the stack
172  while ( stack_block != NULL ) {
173  kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
174 
175  stack_block -> sb_next = NULL;
176  stack_block -> sb_prev = NULL;
177  if (stack_block != & task_stack -> ts_first_block) {
178  __kmp_thread_free( thread, stack_block ); // free the block, if not the first
179  }
180  stack_block = next_block;
181  }
182  // initialize the stack to be empty
183  task_stack -> ts_entries = 0;
184  task_stack -> ts_top = NULL;
185 }
186 
187 
188 //---------------------------------------------------------------------------
189 // __kmp_push_task_stack: Push the tied task onto the task stack.
190 // Grow the stack if necessary by allocating another block.
191 //
192 // gtid: global thread identifier for calling thread
193 // thread: thread info for thread containing stack
194 // tied_task: the task to push on the stack
195 
196 static void
197 __kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
198 {
199  // GEH - need to consider what to do if tt_threads_data not allocated yet
200  kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
201  tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
202  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
203 
204  if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
205  return; // Don't push anything on stack if team or team tasks are serialized
206  }
207 
208  KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
209  KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
210 
211  KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
212  gtid, thread, tied_task ) );
213  // Store entry
214  * (task_stack -> ts_top) = tied_task;
215 
216  // Do bookkeeping for next push
217  task_stack -> ts_top++;
218  task_stack -> ts_entries++;
219 
220  if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
221  {
222  // Find beginning of this task block
223  kmp_stack_block_t *stack_block =
224  (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
225 
226  // Check if we already have a block
227  if ( stack_block -> sb_next != NULL )
228  { // reset ts_top to beginning of next block
229  task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
230  }
231  else
232  { // Alloc new block and link it up
233  kmp_stack_block_t *new_block = (kmp_stack_block_t *)
234  __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t));
235 
236  task_stack -> ts_top = & new_block -> sb_block[0];
237  stack_block -> sb_next = new_block;
238  new_block -> sb_prev = stack_block;
239  new_block -> sb_next = NULL;
240 
241  KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
242  gtid, tied_task, new_block ) );
243  }
244  }
245  KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
246 }
247 
248 //---------------------------------------------------------------------------
249 // __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return
250 // the task, just check to make sure it matches the ending task passed in.
251 //
252 // gtid: global thread identifier for the calling thread
253 // thread: thread info structure containing stack
254 // tied_task: the task popped off the stack
255 // ending_task: the task that is ending (should match popped task)
256 
257 static void
258 __kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
259 {
260  // GEH - need to consider what to do if tt_threads_data not allocated yet
261  kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
262  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
263  kmp_taskdata_t *tied_task;
264 
265  if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
266  return; // Don't pop anything from stack if team or team tasks are serialized
267  }
268 
269  KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
270  KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
271 
272  KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
273 
274  // fix up ts_top if we need to pop from previous block
275  if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
276  {
277  kmp_stack_block_t *stack_block =
278  (kmp_stack_block_t *) (task_stack -> ts_top) ;
279 
280  stack_block = stack_block -> sb_prev;
281  task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
282  }
283 
284  // finish bookkeeping
285  task_stack -> ts_top--;
286  task_stack -> ts_entries--;
287 
288  tied_task = * (task_stack -> ts_top );
289 
290  KMP_DEBUG_ASSERT( tied_task != NULL );
291  KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
292  KMP_DEBUG_ASSERT( tied_task == ending_task ); // If we built the stack correctly
293 
294  KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
295  return;
296 }
297 #endif /* BUILD_TIED_TASK_STACK */
298 
299 //---------------------------------------------------
300 // __kmp_push_task: Add a task to the thread's deque
301 
302 static kmp_int32
303 __kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
304 {
305  kmp_info_t * thread = __kmp_threads[ gtid ];
306  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
307  kmp_task_team_t * task_team = thread->th.th_task_team;
308  kmp_int32 tid = __kmp_tid_from_gtid( gtid );
309  kmp_thread_data_t * thread_data;
310 
311  KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
312 
313  // The first check avoids building task_team thread data if serialized
314  if ( taskdata->td_flags.task_serial ) {
315  KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
316  gtid, taskdata ) );
317  return TASK_NOT_PUSHED;
318  }
319 
320  // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode
321  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
322  if ( ! KMP_TASKING_ENABLED( task_team, thread->th.th_task_state ) ) {
323  __kmp_enable_tasking( task_team, thread );
324  }
325  KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
326  KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
327 
328  // Find tasking deque specific to encountering thread
329  thread_data = & task_team -> tt.tt_threads_data[ tid ];
330 
331  // No lock needed since only owner can allocate
332  if (thread_data -> td.td_deque == NULL ) {
333  __kmp_alloc_task_deque( thread, thread_data );
334  }
335 
336  // Check if deque is full
337  if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
338  {
339  KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
340  gtid, taskdata ) );
341  return TASK_NOT_PUSHED;
342  }
343 
344  // Lock the deque for the task push operation
345  __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
346 
347  // Must have room since no thread can add tasks but calling thread
348  KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE );
349 
350  thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; // Push taskdata
351  // Wrap index.
352  thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
353  TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); // Adjust task count
354 
355  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
356 
357  KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
358  "task=%p ntasks=%d head=%u tail=%u\n",
359  gtid, taskdata, thread_data->td.td_deque_ntasks,
360  thread_data->td.td_deque_tail, thread_data->td.td_deque_head) );
361 
362  return TASK_SUCCESSFULLY_PUSHED;
363 }
364 
365 
366 //-----------------------------------------------------------------------------------------
367 // __kmp_pop_current_task_from_thread: set up current task from called thread when team ends
368 // this_thr: thread structure to set current_task in.
369 
370 void
371 __kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
372 {
373  KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, "
374  "curtask_parent=%p\n",
375  0, this_thr, this_thr -> th.th_current_task,
376  this_thr -> th.th_current_task -> td_parent ) );
377 
378  this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
379 
380  KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, "
381  "curtask_parent=%p\n",
382  0, this_thr, this_thr -> th.th_current_task,
383  this_thr -> th.th_current_task -> td_parent ) );
384 }
385 
386 
387 //---------------------------------------------------------------------------------------
388 // __kmp_push_current_task_to_thread: set up current task in called thread for a new team
389 // this_thr: thread structure to set up
390 // team: team for implicit task data
391 // tid: thread within team to set up
392 
393 void
394 __kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid )
395 {
396  // current task of the thread is a parent of the new just created implicit tasks of new team
397  KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p "
398  "parent_task=%p\n",
399  tid, this_thr, this_thr->th.th_current_task,
400  team->t.t_implicit_task_taskdata[tid].td_parent ) );
401 
402  KMP_DEBUG_ASSERT (this_thr != NULL);
403 
404  if( tid == 0 ) {
405  if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
406  team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
407  this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
408  }
409  } else {
410  team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
411  this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
412  }
413 
414  KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p "
415  "parent_task=%p\n",
416  tid, this_thr, this_thr->th.th_current_task,
417  team->t.t_implicit_task_taskdata[tid].td_parent ) );
418 }
419 
420 
421 //----------------------------------------------------------------------
422 // __kmp_task_start: bookkeeping for a task starting execution
423 // GTID: global thread id of calling thread
424 // task: task starting execution
425 // current_task: task suspending
426 
427 static void
428 __kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
429 {
430  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
431  kmp_info_t * thread = __kmp_threads[ gtid ];
432 
433  KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
434  gtid, taskdata, current_task) );
435 
436  KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
437 
438  // mark currently executing task as suspended
439  // TODO: GEH - make sure root team implicit task is initialized properly.
440  // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
441  current_task -> td_flags.executing = 0;
442 
443  // Add task to stack if tied
444 #ifdef BUILD_TIED_TASK_STACK
445  if ( taskdata -> td_flags.tiedness == TASK_TIED )
446  {
447  __kmp_push_task_stack( gtid, thread, taskdata );
448  }
449 #endif /* BUILD_TIED_TASK_STACK */
450 
451  // mark starting task as executing and as current task
452  thread -> th.th_current_task = taskdata;
453 
454  KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 0 );
455  KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 0 );
456  taskdata -> td_flags.started = 1;
457  taskdata -> td_flags.executing = 1;
458  KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
459  KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
460 
461  // GEH TODO: shouldn't we pass some sort of location identifier here?
462  // APT: yes, we will pass location here.
463  // need to store current thread state (in a thread or taskdata structure)
464  // before setting work_state, otherwise wrong state is set after end of task
465 
466  KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n",
467  gtid, taskdata ) );
468 
469  return;
470 }
471 
472 
473 //----------------------------------------------------------------------
474 // __kmpc_omp_task_begin_if0: report that a given serialized task has started execution
475 // loc_ref: source location information; points to beginning of task block.
476 // gtid: global thread number.
477 // task: task thunk for the started task.
478 
479 void
480 __kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
481 {
482  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
483  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
484 
485  KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
486  gtid, loc_ref, taskdata, current_task ) );
487 
488  taskdata -> td_flags.task_serial = 1; // Execute this task immediately, not deferred.
489  __kmp_task_start( gtid, task, current_task );
490 
491  KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
492  gtid, loc_ref, taskdata ) );
493 
494  return;
495 }
496 
497 #ifdef TASK_UNUSED
498 //----------------------------------------------------------------------
499 // __kmpc_omp_task_begin: report that a given task has started execution
500 // NEVER GENERATED BY COMPILER, DEPRECATED!!!
501 
502 void
503 __kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
504 {
505  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
506 
507  KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
508  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
509 
510  __kmp_task_start( gtid, task, current_task );
511 
512  KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
513  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
514 
515  return;
516 }
517 #endif // TASK_UNUSED
518 
519 
520 //-------------------------------------------------------------------------------------
521 // __kmp_free_task: free the current task space and the space for shareds
522 // gtid: Global thread ID of calling thread
523 // taskdata: task to free
524 // thread: thread data structure of caller
525 
526 static void
527 __kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
528 {
529  KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n",
530  gtid, taskdata) );
531 
532  // Check to make sure all flags and counters have the correct values
533  KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
534  KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
535  KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
536  KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
537  KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1);
538  KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
539 
540  taskdata->td_flags.freed = 1;
541  // deallocate the taskdata and shared variable blocks associated with this task
542  #if USE_FAST_MEMORY
543  __kmp_fast_free( thread, taskdata );
544  #else /* ! USE_FAST_MEMORY */
545  __kmp_thread_free( thread, taskdata );
546  #endif
547 
548  KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n",
549  gtid, taskdata) );
550 }
551 
552 //-------------------------------------------------------------------------------------
553 // __kmp_free_task_and_ancestors: free the current task and ancestors without children
554 //
555 // gtid: Global thread ID of calling thread
556 // taskdata: task to free
557 // thread: thread data structure of caller
558 
559 static void
560 __kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
561 {
562  kmp_int32 children = 0;
563  kmp_int32 team_or_tasking_serialized = taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser;
564 
565  KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
566 
567  if ( !team_or_tasking_serialized ) {
568  children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
569  KMP_DEBUG_ASSERT( children >= 0 );
570  }
571 
572  // Now, go up the ancestor tree to see if any ancestors can now be freed.
573  while ( children == 0 )
574  {
575  kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
576 
577  KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
578  "and freeing itself\n", gtid, taskdata) );
579 
580  // --- Deallocate my ancestor task ---
581  __kmp_free_task( gtid, taskdata, thread );
582 
583  taskdata = parent_taskdata;
584 
585  // Stop checking ancestors at implicit task or if tasking serialized
586  // instead of walking up ancestor tree to avoid premature deallocation of ancestors.
587  if ( team_or_tasking_serialized || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
588  return;
589 
590  if ( !team_or_tasking_serialized ) {
591  // Predecrement simulated by "- 1" calculation
592  children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
593  KMP_DEBUG_ASSERT( children >= 0 );
594  }
595  }
596 
597  KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
598  "not freeing it yet\n", gtid, taskdata, children) );
599 }
600 
601 //---------------------------------------------------------------------
602 // __kmp_task_finish: bookkeeping to do when a task finishes execution
603 // gtid: global thread ID for calling thread
604 // task: task to be finished
605 // resumed_task: task to be resumed. (may be NULL if task is serialized)
606 
607 static void
608 __kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
609 {
610  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
611  kmp_info_t * thread = __kmp_threads[ gtid ];
612  kmp_int32 children = 0;
613 
614  KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
615  gtid, taskdata, resumed_task) );
616 
617  KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
618 
619  // Pop task from stack if tied
620 #ifdef BUILD_TIED_TASK_STACK
621  if ( taskdata -> td_flags.tiedness == TASK_TIED )
622  {
623  __kmp_pop_task_stack( gtid, thread, taskdata );
624  }
625 #endif /* BUILD_TIED_TASK_STACK */
626 
627  KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
628  KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
629  taskdata -> td_flags.executing = 0; // suspend the finishing task
630  taskdata -> td_flags.complete = 1; // mark the task as completed
631  KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
632  KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
633 
634  // Only need to keep track of count if team parallel and tasking not serialized
635  if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
636  // Predecrement simulated by "- 1" calculation
637  children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
638  KMP_DEBUG_ASSERT( children >= 0 );
639 #if OMP_40_ENABLED
640  if ( taskdata->td_taskgroup )
641  KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
642  __kmp_release_deps(gtid,taskdata);
643 #endif
644  }
645 
646  KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
647  gtid, taskdata, children) );
648 
649 #if OMP_40_ENABLED
650  /* If the tasks' destructor thunk flag has been set, we need to invoke the
651  destructor thunk that has been generated by the compiler.
652  The code is placed here, since at this point other tasks might have been released
653  hence overlapping the destructor invokations with some other work in the
654  released tasks. The OpenMP spec is not specific on when the destructors are
655  invoked, so we should be free to choose.
656  */
657  if (taskdata->td_flags.destructors_thunk) {
658  kmp_routine_entry_t destr_thunk = task->destructors;
659  KMP_ASSERT(destr_thunk);
660  destr_thunk(gtid, task);
661  }
662 #endif // OMP_40_ENABLED
663 
664  // bookkeeping for resuming task:
665  // GEH - note tasking_ser => task_serial
666  KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
667  taskdata->td_flags.task_serial);
668  if ( taskdata->td_flags.task_serial )
669  {
670  if (resumed_task == NULL) {
671  resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
672  }
673  else {
674  // verify resumed task passed in points to parent
675  KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
676  }
677  }
678  else {
679  KMP_DEBUG_ASSERT( resumed_task != NULL ); // verify that resumed task is passed as arguemnt
680  }
681 
682  // Free this task and then ancestor tasks if they have no children.
683  __kmp_free_task_and_ancestors(gtid, taskdata, thread);
684 
685  __kmp_threads[ gtid ] -> th.th_current_task = resumed_task; // restore current_task
686 
687  // TODO: GEH - make sure root team implicit task is initialized properly.
688  // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
689  resumed_task->td_flags.executing = 1; // resume previous task
690 
691  KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
692  gtid, taskdata, resumed_task) );
693 
694  return;
695 }
696 
697 //---------------------------------------------------------------------
698 // __kmpc_omp_task_complete_if0: report that a task has completed execution
699 // loc_ref: source location information; points to end of task block.
700 // gtid: global thread number.
701 // task: task thunk for the completed task.
702 
703 void
704 __kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
705 {
706  KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
707  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
708 
709  __kmp_task_finish( gtid, task, NULL ); // this routine will provide task to resume
710 
711  KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
712  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
713 
714  return;
715 }
716 
717 #ifdef TASK_UNUSED
718 //---------------------------------------------------------------------
719 // __kmpc_omp_task_complete: report that a task has completed execution
720 // NEVER GENERATED BY COMPILER, DEPRECATED!!!
721 
722 void
723 __kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
724 {
725  KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
726  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
727 
728  __kmp_task_finish( gtid, task, NULL ); // Not sure how to find task to resume
729 
730  KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
731  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
732  return;
733 }
734 #endif // TASK_UNUSED
735 
736 
737 //----------------------------------------------------------------------------------------------------
738 // __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread
739 //
740 // loc_ref: reference to source location of parallel region
741 // this_thr: thread data structure corresponding to implicit task
742 // team: team for this_thr
743 // tid: thread id of given thread within team
744 // set_curr_task: TRUE if need to push current task to thread
745 // NOTE: Routine does not set up the implicit task ICVS. This is assumed to have already been done elsewhere.
746 // TODO: Get better loc_ref. Value passed in may be NULL
747 
748 void
749 __kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task )
750 {
751  kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ];
752 
753  KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
754  tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) );
755 
756  task->td_task_id = KMP_GEN_TASK_ID();
757  task->td_team = team;
758 // task->td_parent = NULL; // fix for CQ230101 (broken parent task info in debugger)
759  task->td_ident = loc_ref;
760  task->td_taskwait_ident = NULL;
761  task->td_taskwait_counter = 0;
762  task->td_taskwait_thread = 0;
763 
764  task->td_flags.tiedness = TASK_TIED;
765  task->td_flags.tasktype = TASK_IMPLICIT;
766  // All implicit tasks are executed immediately, not deferred
767  task->td_flags.task_serial = 1;
768  task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
769  task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
770 
771  task->td_flags.started = 1;
772  task->td_flags.executing = 1;
773  task->td_flags.complete = 0;
774  task->td_flags.freed = 0;
775 
776 #if OMP_40_ENABLED
777  task->td_dephash = NULL;
778  task->td_depnode = NULL;
779 #endif
780 
781  if (set_curr_task) { // only do this initialization the first time a thread is created
782  task->td_incomplete_child_tasks = 0;
783  task->td_allocated_child_tasks = 0; // Not used because do not need to deallocate implicit task
784 #if OMP_40_ENABLED
785  task->td_taskgroup = NULL; // An implicit task does not have taskgroup
786 #endif
787  __kmp_push_current_task_to_thread( this_thr, team, tid );
788  } else {
789  KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
790  KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
791  }
792 
793  KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
794  tid, team, task ) );
795 }
796 
797 // Round up a size to a power of two specified by val
798 // Used to insert padding between structures co-allocated using a single malloc() call
799 static size_t
800 __kmp_round_up_to_val( size_t size, size_t val ) {
801  if ( size & ( val - 1 ) ) {
802  size &= ~ ( val - 1 );
803  if ( size <= KMP_SIZE_T_MAX - val ) {
804  size += val; // Round up if there is no overflow.
805  }; // if
806  }; // if
807  return size;
808 } // __kmp_round_up_to_va
809 
810 
811 //---------------------------------------------------------------------------------
812 // __kmp_task_alloc: Allocate the taskdata and task data structures for a task
813 //
814 // loc_ref: source location information
815 // gtid: global thread number.
816 // flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered.
817 // Converted from kmp_int32 to kmp_tasking_flags_t in routine.
818 // sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including private vars accessed in task.
819 // sizeof_shareds: Size in bytes of array of pointers to shared vars accessed in task.
820 // task_entry: Pointer to task code entry point generated by compiler.
821 // returns: a pointer to the allocated kmp_task_t structure (task).
822 
823 kmp_task_t *
824 __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
825  size_t sizeof_kmp_task_t, size_t sizeof_shareds,
826  kmp_routine_entry_t task_entry )
827 {
828  kmp_task_t *task;
829  kmp_taskdata_t *taskdata;
830  kmp_info_t *thread = __kmp_threads[ gtid ];
831  kmp_team_t *team = thread->th.th_team;
832  kmp_taskdata_t *parent_task = thread->th.th_current_task;
833  size_t shareds_offset;
834 
835  KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
836  "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
837  gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
838  sizeof_shareds, task_entry) );
839 
840  if ( parent_task->td_flags.final ) {
841  if (flags->merged_if0) {
842  }
843  flags->final = 1;
844  }
845 
846  // Calculate shared structure offset including padding after kmp_task_t struct
847  // to align pointers in shared struct
848  shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
849  shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * ));
850 
851  // Allocate a kmp_taskdata_t block and a kmp_task_t block.
852  KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n",
853  gtid, shareds_offset) );
854  KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n",
855  gtid, sizeof_shareds) );
856 
857  // Avoid double allocation here by combining shareds with taskdata
858  #if USE_FAST_MEMORY
859  taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
860  #else /* ! USE_FAST_MEMORY */
861  taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
862  #endif /* USE_FAST_MEMORY */
863 
864  task = KMP_TASKDATA_TO_TASK(taskdata);
865 
866  // Make sure task & taskdata are aligned appropriately
867 #if KMP_ARCH_X86
868  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 );
869  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 );
870 #else
871  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 );
872  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 );
873 #endif
874  if (sizeof_shareds > 0) {
875  // Avoid double allocation here by combining shareds with taskdata
876  task->shareds = & ((char *) taskdata)[ shareds_offset ];
877  // Make sure shareds struct is aligned to pointer size
878  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 );
879  } else {
880  task->shareds = NULL;
881  }
882  task->routine = task_entry;
883  task->part_id = 0; // AC: Always start with 0 part id
884 
885  taskdata->td_task_id = KMP_GEN_TASK_ID();
886  taskdata->td_team = team;
887  taskdata->td_alloc_thread = thread;
888  taskdata->td_parent = parent_task;
889  taskdata->td_level = parent_task->td_level + 1; // increment nesting level
890  taskdata->td_ident = loc_ref;
891  taskdata->td_taskwait_ident = NULL;
892  taskdata->td_taskwait_counter = 0;
893  taskdata->td_taskwait_thread = 0;
894  KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
895  copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
896 
897  taskdata->td_flags.tiedness = flags->tiedness;
898  taskdata->td_flags.final = flags->final;
899  taskdata->td_flags.merged_if0 = flags->merged_if0;
900 #if OMP_40_ENABLED
901  taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
902 #endif // OMP_40_ENABLED
903  taskdata->td_flags.tasktype = TASK_EXPLICIT;
904 
905  // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
906  taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
907 
908  // GEH - TODO: fix this to copy parent task's value of team_serial flag
909  taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
910 
911  // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region
912  // tasks are not left until program termination to execute. Also, it helps locality to execute
913  // immediately.
914  taskdata->td_flags.task_serial = ( taskdata->td_flags.final
915  || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
916 
917  taskdata->td_flags.started = 0;
918  taskdata->td_flags.executing = 0;
919  taskdata->td_flags.complete = 0;
920  taskdata->td_flags.freed = 0;
921 
922  taskdata->td_flags.native = flags->native;
923 
924  taskdata->td_incomplete_child_tasks = 0;
925  taskdata->td_allocated_child_tasks = 1; // start at one because counts current task and children
926 #if OMP_40_ENABLED
927  taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
928  taskdata->td_dephash = NULL;
929  taskdata->td_depnode = NULL;
930 #endif
931  // Only need to keep track of child task counts if team parallel and tasking not serialized
932  if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
933  KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
934 #if OMP_40_ENABLED
935  if ( parent_task->td_taskgroup )
936  KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
937 #endif
938  // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
939  if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
940  KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
941  }
942  }
943 
944  KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
945  gtid, taskdata, taskdata->td_parent) );
946 
947  return task;
948 }
949 
950 
951 kmp_task_t *
952 __kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
953  size_t sizeof_kmp_task_t, size_t sizeof_shareds,
954  kmp_routine_entry_t task_entry )
955 {
956  kmp_task_t *retval;
957  kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
958 
959  input_flags->native = FALSE;
960  // __kmp_task_alloc() sets up all other runtime flags
961 
962  KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
963  "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
964  gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
965  sizeof_kmp_task_t, sizeof_shareds, task_entry) );
966 
967  retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
968  sizeof_shareds, task_entry );
969 
970  KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
971 
972  return retval;
973 }
974 
975 //-----------------------------------------------------------
976 // __kmp_invoke_task: invoke the specified task
977 //
978 // gtid: global thread ID of caller
979 // task: the task to invoke
980 // current_task: the task to resume after task invokation
981 
982 static void
983 __kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
984 {
985  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
986 #if OMP_40_ENABLED
987  int discard = 0 /* false */;
988 #endif
989  KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
990  gtid, taskdata, current_task) );
991 
992  __kmp_task_start( gtid, task, current_task );
993 
994 #if OMP_40_ENABLED
995  // TODO: cancel tasks if the parallel region has also been cancelled
996  // TODO: check if this sequence can be hoisted above __kmp_task_start
997  // if cancellation has been enabled for this run ...
998  if (__kmp_omp_cancellation) {
999  kmp_info_t *this_thr = __kmp_threads [ gtid ];
1000  kmp_team_t * this_team = this_thr->th.th_team;
1001  kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1002  if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
1003  // this task belongs to a task group and we need to cancel it
1004  discard = 1 /* true */;
1005  }
1006  }
1007 
1008  //
1009  // Invoke the task routine and pass in relevant data.
1010  // Thunks generated by gcc take a different argument list.
1011  //
1012  if (!discard) {
1013 #endif // OMP_40_ENABLED
1014 #ifdef KMP_GOMP_COMPAT
1015  if (taskdata->td_flags.native) {
1016  ((void (*)(void *))(*(task->routine)))(task->shareds);
1017  }
1018  else
1019 #endif /* KMP_GOMP_COMPAT */
1020  {
1021  (*(task->routine))(gtid, task);
1022  }
1023 #if OMP_40_ENABLED
1024  }
1025 #endif // OMP_40_ENABLED
1026 
1027  __kmp_task_finish( gtid, task, current_task );
1028 
1029  KA_TRACE(30, ("__kmp_inovke_task(exit): T#%d completed task %p, resuming task %p\n",
1030  gtid, taskdata, current_task) );
1031  return;
1032 }
1033 
1034 //-----------------------------------------------------------------------
1035 // __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
1036 //
1037 // loc_ref: location of original task pragma (ignored)
1038 // gtid: Global Thread ID of encountering thread
1039 // new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
1040 // Returns:
1041 // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1042 // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1043 
1044 kmp_int32
1045 __kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1046 {
1047  kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1048 
1049  KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
1050  gtid, loc_ref, new_taskdata ) );
1051 
1052  /* Should we execute the new task or queue it? For now, let's just always try to
1053  queue it. If the queue fills up, then we'll execute it. */
1054 
1055  if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1056  { // Execute this task immediately
1057  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1058  new_taskdata->td_flags.task_serial = 1;
1059  __kmp_invoke_task( gtid, new_task, current_task );
1060  }
1061 
1062  KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1063  "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
1064  new_taskdata ) );
1065 
1066  return TASK_CURRENT_NOT_QUEUED;
1067 }
1068 
1069 
1070 //---------------------------------------------------------------------
1071 // __kmpc_omp_task: Schedule a non-thread-switchable task for execution
1072 // loc_ref: location of original task pragma (ignored)
1073 // gtid: Global Thread ID of encountering thread
1074 // new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1075 // returns:
1076 //
1077 // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1078 // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1079 
1080 kmp_int32
1081 __kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1082 {
1083  kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1084  kmp_int32 rc;
1085 
1086  KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
1087  gtid, loc_ref, new_taskdata ) );
1088 
1089  /* Should we execute the new task or queue it? For now, let's just always try to
1090  queue it. If the queue fills up, then we'll execute it. */
1091 
1092  if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1093  { // Execute this task immediately
1094  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1095  new_taskdata -> td_flags.task_serial = 1;
1096  __kmp_invoke_task( gtid, new_task, current_task );
1097  }
1098 
1099  KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1100  gtid, loc_ref, new_taskdata ) );
1101 
1102  return TASK_CURRENT_NOT_QUEUED;
1103 }
1104 
1105 
1106 //-------------------------------------------------------------------------------------
1107 // __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete
1108 
1109 kmp_int32
1110 __kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid )
1111 {
1112  kmp_taskdata_t * taskdata;
1113  kmp_info_t * thread;
1114  int thread_finished = FALSE;
1115 
1116  KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n",
1117  gtid, loc_ref) );
1118 
1119  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1120  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1121 
1122  thread = __kmp_threads[ gtid ];
1123  taskdata = thread -> th.th_current_task;
1124 #if USE_ITT_BUILD
1125  // Note: These values are used by ITT events as well.
1126 #endif /* USE_ITT_BUILD */
1127  taskdata->td_taskwait_counter += 1;
1128  taskdata->td_taskwait_ident = loc_ref;
1129  taskdata->td_taskwait_thread = gtid + 1;
1130 
1131 #if USE_ITT_BUILD
1132  void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1133  if ( itt_sync_obj != NULL )
1134  __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1135 #endif /* USE_ITT_BUILD */
1136 
1137  if ( ! taskdata->td_flags.team_serial ) {
1138  // GEH: if team serialized, avoid reading the volatile variable below.
1139  while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
1140  __kmp_execute_tasks( thread, gtid, &(taskdata->td_incomplete_child_tasks),
1141  0, FALSE, &thread_finished
1142  USE_ITT_BUILD_ARG(itt_sync_obj),
1143  __kmp_task_stealing_constraint );
1144  }
1145  }
1146 #if USE_ITT_BUILD
1147  if ( itt_sync_obj != NULL )
1148  __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1149 #endif /* USE_ITT_BUILD */
1150 
1151  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1152  taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1153  }
1154 
1155  KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
1156  "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1157 
1158  return TASK_CURRENT_NOT_QUEUED;
1159 }
1160 
1161 
1162 //-------------------------------------------------
1163 // __kmpc_omp_taskyield: switch to a different task
1164 
1165 kmp_int32
1166 __kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part )
1167 {
1168  kmp_taskdata_t * taskdata;
1169  kmp_info_t * thread;
1170  int thread_finished = FALSE;
1171 
1172  KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1173  gtid, loc_ref, end_part) );
1174 
1175  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1176  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1177 
1178  thread = __kmp_threads[ gtid ];
1179  taskdata = thread -> th.th_current_task;
1180  // Should we model this as a task wait or not?
1181 #if USE_ITT_BUILD
1182  // Note: These values are used by ITT events as well.
1183 #endif /* USE_ITT_BUILD */
1184  taskdata->td_taskwait_counter += 1;
1185  taskdata->td_taskwait_ident = loc_ref;
1186  taskdata->td_taskwait_thread = gtid + 1;
1187 
1188 #if USE_ITT_BUILD
1189  void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1190  if ( itt_sync_obj != NULL )
1191  __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1192 #endif /* USE_ITT_BUILD */
1193  if ( ! taskdata->td_flags.team_serial ) {
1194  __kmp_execute_tasks( thread, gtid, NULL, 0, FALSE, &thread_finished
1195  USE_ITT_BUILD_ARG(itt_sync_obj),
1196  __kmp_task_stealing_constraint );
1197  }
1198 
1199 #if USE_ITT_BUILD
1200  if ( itt_sync_obj != NULL )
1201  __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1202 #endif /* USE_ITT_BUILD */
1203 
1204  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1205  taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1206  }
1207 
1208  KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
1209  "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1210 
1211  return TASK_CURRENT_NOT_QUEUED;
1212 }
1213 
1214 
1215 #if OMP_40_ENABLED
1216 //-------------------------------------------------------------------------------------
1217 // __kmpc_taskgroup: Start a new taskgroup
1218 
1219 void
1220 __kmpc_taskgroup( ident_t* loc, int gtid )
1221 {
1222  kmp_info_t * thread = __kmp_threads[ gtid ];
1223  kmp_taskdata_t * taskdata = thread->th.th_current_task;
1224  kmp_taskgroup_t * tg_new =
1225  (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) );
1226  KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
1227  tg_new->count = 0;
1228  tg_new->cancel_request = cancel_noreq;
1229  tg_new->parent = taskdata->td_taskgroup;
1230  taskdata->td_taskgroup = tg_new;
1231 }
1232 
1233 
1234 //-------------------------------------------------------------------------------------
1235 // __kmpc_end_taskgroup: Wait until all tasks generated by the current task
1236 // and its descendants are complete
1237 
1238 void
1239 __kmpc_end_taskgroup( ident_t* loc, int gtid )
1240 {
1241  kmp_info_t * thread = __kmp_threads[ gtid ];
1242  kmp_taskdata_t * taskdata = thread->th.th_current_task;
1243  kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1244  int thread_finished = FALSE;
1245 
1246  KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
1247  KMP_DEBUG_ASSERT( taskgroup != NULL );
1248 
1249  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1250 #if USE_ITT_BUILD
1251  // For ITT the taskgroup wait is similar to taskwait until we need to distinguish them
1252  void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1253  if ( itt_sync_obj != NULL )
1254  __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1255 #endif /* USE_ITT_BUILD */
1256 
1257  if ( ! taskdata->td_flags.team_serial ) {
1258  while ( TCR_4(taskgroup->count) != 0 ) {
1259  __kmp_execute_tasks( thread, gtid, &(taskgroup->count),
1260  0, FALSE, &thread_finished
1261  USE_ITT_BUILD_ARG(itt_sync_obj),
1262  __kmp_task_stealing_constraint );
1263  }
1264  }
1265 
1266 #if USE_ITT_BUILD
1267  if ( itt_sync_obj != NULL )
1268  __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1269 #endif /* USE_ITT_BUILD */
1270  }
1271  KMP_DEBUG_ASSERT( taskgroup->count == 0 );
1272 
1273  // Restore parent taskgroup for the current task
1274  taskdata->td_taskgroup = taskgroup->parent;
1275  __kmp_thread_free( thread, taskgroup );
1276 
1277  KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
1278 }
1279 #endif
1280 
1281 
1282 //------------------------------------------------------
1283 // __kmp_remove_my_task: remove a task from my own deque
1284 
1285 static kmp_task_t *
1286 __kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
1287  kmp_int32 is_constrained )
1288 {
1289  kmp_task_t * task;
1290  kmp_taskdata_t * taskdata;
1291  kmp_thread_data_t *thread_data;
1292  kmp_uint32 tail;
1293 
1294  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1295  KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition
1296 
1297  thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
1298 
1299  KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1300  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1301  thread_data->td.td_deque_tail) );
1302 
1303  if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1304  KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1305  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1306  thread_data->td.td_deque_tail) );
1307  return NULL;
1308  }
1309 
1310  __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1311 
1312  if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1313  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1314  KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1315  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1316  thread_data->td.td_deque_tail) );
1317  return NULL;
1318  }
1319 
1320  tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1321  taskdata = thread_data -> td.td_deque[ tail ];
1322 
1323  if (is_constrained) {
1324  // we need to check if the candidate obeys task scheduling constraint:
1325  // only child of current task can be scheduled
1326  kmp_taskdata_t * current = thread->th.th_current_task;
1327  kmp_int32 level = current->td_level;
1328  kmp_taskdata_t * parent = taskdata->td_parent;
1329  while ( parent != current && parent->td_level > level ) {
1330  parent = parent->td_parent; // check generation up to the level of the current task
1331  KMP_DEBUG_ASSERT(parent != NULL);
1332  }
1333  if ( parent != current ) {
1334  // If the tail task is not a child, then no other childs can appear in the deque.
1335  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1336  KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1337  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1338  thread_data->td.td_deque_tail) );
1339  return NULL;
1340  }
1341  }
1342 
1343  thread_data -> td.td_deque_tail = tail;
1344  TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
1345 
1346  __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
1347 
1348  KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
1349  gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1350  thread_data->td.td_deque_tail) );
1351 
1352  task = KMP_TASKDATA_TO_TASK( taskdata );
1353  return task;
1354 }
1355 
1356 
1357 //-----------------------------------------------------------
1358 // __kmp_steal_task: remove a task from another thread's deque
1359 // Assume that calling thread has already checked existence of
1360 // task_team thread_data before calling this routine.
1361 
1362 static kmp_task_t *
1363 __kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
1364  volatile kmp_uint32 *unfinished_threads, int *thread_finished,
1365  kmp_int32 is_constrained )
1366 {
1367  kmp_task_t * task;
1368  kmp_taskdata_t * taskdata;
1369  kmp_thread_data_t *victim_td, *threads_data;
1370  kmp_int32 victim_tid, thread_tid;
1371 
1372  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1373 
1374  threads_data = task_team -> tt.tt_threads_data;
1375  KMP_DEBUG_ASSERT( threads_data != NULL ); // Caller should check this condition
1376 
1377  victim_tid = victim->th.th_info.ds.ds_tid;
1378  victim_td = & threads_data[ victim_tid ];
1379 
1380  KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d "
1381  "head=%u tail=%u\n",
1382  gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1383  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1384 
1385  if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition
1386  (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1387  {
1388  KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p "
1389  "ntasks=%d head=%u tail=%u\n",
1390  gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1391  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1392  return NULL;
1393  }
1394 
1395  __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
1396 
1397  // Check again after we acquire the lock
1398  if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1399  (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1400  {
1401  __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1402  KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1403  "ntasks=%d head=%u tail=%u\n",
1404  gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1405  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1406  return NULL;
1407  }
1408 
1409  KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
1410 
1411  if ( !is_constrained ) {
1412  taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ];
1413  // Bump head pointer and Wrap.
1414  victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK;
1415  } else {
1416  // While we have postponed tasks let's steal from tail of the deque (smaller tasks)
1417  kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1418  taskdata = victim_td -> td.td_deque[ tail ];
1419  // we need to check if the candidate obeys task scheduling constraint:
1420  // only child of current task can be scheduled
1421  kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
1422  kmp_int32 level = current->td_level;
1423  kmp_taskdata_t * parent = taskdata->td_parent;
1424  while ( parent != current && parent->td_level > level ) {
1425  parent = parent->td_parent; // check generation up to the level of the current task
1426  KMP_DEBUG_ASSERT(parent != NULL);
1427  }
1428  if ( parent != current ) {
1429  // If the tail task is not a child, then no other childs can appear in the deque (?).
1430  __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1431  KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1432  "ntasks=%d head=%u tail=%u\n",
1433  gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
1434  task_team, victim_td->td.td_deque_ntasks,
1435  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1436  return NULL;
1437  }
1438  victim_td -> td.td_deque_tail = tail;
1439  }
1440  if (*thread_finished) {
1441  // We need to un-mark this victim as a finished victim. This must be done before
1442  // releasing the lock, or else other threads (starting with the master victim)
1443  // might be prematurely released from the barrier!!!
1444  kmp_uint32 count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
1445 
1446  KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
1447  gtid, count + 1, task_team) );
1448 
1449  *thread_finished = FALSE;
1450  }
1451  TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
1452 
1453  __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1454 
1455  KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#d: task_team=%p "
1456  "ntasks=%d head=%u tail=%u\n",
1457  gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
1458  victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1459  victim_td->td.td_deque_tail) );
1460 
1461  task = KMP_TASKDATA_TO_TASK( taskdata );
1462  return task;
1463 }
1464 
1465 
1466 //-----------------------------------------------------------------------------
1467 // __kmp_execute_tasks: Choose and execute tasks until either the condition
1468 // is statisfied (return true) or there are none left (return false).
1469 // final_spin is TRUE if this is the spin at the release barrier.
1470 // thread_finished indicates whether the thread is finished executing all
1471 // the tasks it has on its deque, and is at the release barrier.
1472 // spinner is the location on which to spin.
1473 // spinner == NULL means only execute a single task and return.
1474 // checker is the value to check to terminate the spin.
1475 
1476 int
1477 __kmp_execute_tasks( kmp_info_t *thread,
1478  kmp_int32 gtid,
1479  volatile kmp_uint *spinner,
1480  kmp_uint checker,
1481  int final_spin,
1482  int *thread_finished
1483  USE_ITT_BUILD_ARG(void * itt_sync_obj),
1484  kmp_int32 is_constrained )
1485 {
1486  kmp_task_team_t * task_team;
1487  kmp_team_t * team;
1488  kmp_thread_data_t * threads_data;
1489  kmp_task_t * task;
1490  kmp_taskdata_t * current_task = thread -> th.th_current_task;
1491  volatile kmp_uint32 * unfinished_threads;
1492  kmp_int32 nthreads, last_stolen, k, tid;
1493 
1494  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1495  KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
1496 
1497  task_team = thread -> th.th_task_team;
1498  KMP_DEBUG_ASSERT( task_team != NULL );
1499 
1500  KA_TRACE(15, ("__kmp_execute_tasks(enter): T#%d final_spin=%d *thread_finished=%d\n",
1501  gtid, final_spin, *thread_finished) );
1502 
1503  threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1504  KMP_DEBUG_ASSERT( threads_data != NULL );
1505 
1506  nthreads = task_team -> tt.tt_nproc;
1507  unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
1508  KMP_DEBUG_ASSERT( nthreads > 1 );
1509  KMP_DEBUG_ASSERT( TCR_4((int)*unfinished_threads) >= 0 );
1510 
1511  // Choose tasks from our own work queue.
1512  start:
1513  while (( task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained )) != NULL ) {
1514 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1515  if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1516  if ( itt_sync_obj == NULL ) {
1517  // we are at fork barrier where we could not get the object reliably
1518  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1519  }
1520  __kmp_itt_task_starting( itt_sync_obj );
1521  }
1522 #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1523  __kmp_invoke_task( gtid, task, current_task );
1524 #if USE_ITT_BUILD
1525  if ( itt_sync_obj != NULL )
1526  __kmp_itt_task_finished( itt_sync_obj );
1527 #endif /* USE_ITT_BUILD */
1528 
1529  // If this thread is only partway through the barrier and the condition
1530  // is met, then return now, so that the barrier gather/release pattern can proceed.
1531  // If this thread is in the last spin loop in the barrier, waiting to be
1532  // released, we know that the termination condition will not be satisified,
1533  // so don't waste any cycles checking it.
1534  if ((spinner == NULL) || ((!final_spin) && (TCR_4(*spinner) == checker))) {
1535  KA_TRACE(15, ("__kmp_execute_tasks(exit #1): T#%d spin condition satisfied\n", gtid) );
1536  return TRUE;
1537  }
1538  KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1539  }
1540 
1541  // This thread's work queue is empty. If we are in the final spin loop
1542  // of the barrier, check and see if the termination condition is satisfied.
1543  if (final_spin) {
1544  // First, decrement the #unfinished threads, if that has not already
1545  // been done. This decrement might be to the spin location, and
1546  // result in the termination condition being satisfied.
1547  if (! *thread_finished) {
1548  kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1549  KA_TRACE(20, ("__kmp_execute_tasks(dec #1): T#%d dec unfinished_threads to %d task_team=%p\n",
1550  gtid, count, task_team) );
1551  *thread_finished = TRUE;
1552  }
1553 
1554  // It is now unsafe to reference thread->th.th_team !!!
1555  // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1556  // thread to pass through the barrier, where it might reset each thread's
1557  // th.th_team field for the next parallel region.
1558  // If we can steal more work, we know that this has not happened yet.
1559  if ((spinner != NULL) && (TCR_4(*spinner) == checker)) {
1560  KA_TRACE(15, ("__kmp_execute_tasks(exit #2): T#%d spin condition satisfied\n", gtid) );
1561  return TRUE;
1562  }
1563  }
1564 
1565  // Try to steal from the last place I stole from successfully.
1566  tid = thread -> th.th_info.ds.ds_tid;//__kmp_tid_from_gtid( gtid );
1567  last_stolen = threads_data[ tid ].td.td_deque_last_stolen;
1568 
1569  if (last_stolen != -1) {
1570  kmp_info_t *other_thread = threads_data[last_stolen].td.td_thr;
1571 
1572  while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1573  thread_finished, is_constrained )) != NULL)
1574  {
1575 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1576  if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1577  if ( itt_sync_obj == NULL ) {
1578  // we are at fork barrier where we could not get the object reliably
1579  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1580  }
1581  __kmp_itt_task_starting( itt_sync_obj );
1582  }
1583 #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1584  __kmp_invoke_task( gtid, task, current_task );
1585 #if USE_ITT_BUILD
1586  if ( itt_sync_obj != NULL )
1587  __kmp_itt_task_finished( itt_sync_obj );
1588 #endif /* USE_ITT_BUILD */
1589 
1590  // Check to see if this thread can proceed.
1591  if ((spinner == NULL) || ((!final_spin) && (TCR_4(*spinner) == checker))) {
1592  KA_TRACE(15, ("__kmp_execute_tasks(exit #3): T#%d spin condition satisfied\n",
1593  gtid) );
1594  return TRUE;
1595  }
1596 
1597  KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1598  // If the execution of the stolen task resulted in more tasks being
1599  // placed on our run queue, then restart the whole process.
1600  if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
1601  KA_TRACE(20, ("__kmp_execute_tasks: T#%d stolen task spawned other tasks, restart\n",
1602  gtid) );
1603  goto start;
1604  }
1605  }
1606 
1607  // Don't give priority to stealing from this thread anymore.
1608  threads_data[ tid ].td.td_deque_last_stolen = -1;
1609 
1610  // The victims's work queue is empty. If we are in the final spin loop
1611  // of the barrier, check and see if the termination condition is satisfied.
1612  if (final_spin) {
1613  // First, decrement the #unfinished threads, if that has not already
1614  // been done. This decrement might be to the spin location, and
1615  // result in the termination condition being satisfied.
1616  if (! *thread_finished) {
1617  kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1618  KA_TRACE(20, ("__kmp_execute_tasks(dec #2): T#%d dec unfinished_threads to %d "
1619  "task_team=%p\n", gtid, count, task_team) );
1620  *thread_finished = TRUE;
1621  }
1622 
1623  // If __kmp_tasking_mode != tskm_immediate_exec
1624  // then it is now unsafe to reference thread->th.th_team !!!
1625  // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1626  // thread to pass through the barrier, where it might reset each thread's
1627  // th.th_team field for the next parallel region.
1628  // If we can steal more work, we know that this has not happened yet.
1629  if ((spinner != NULL) && (TCR_4(*spinner) == checker)) {
1630  KA_TRACE(15, ("__kmp_execute_tasks(exit #4): T#%d spin condition satisfied\n",
1631  gtid) );
1632  return TRUE;
1633  }
1634  }
1635  }
1636 
1637  // Find a different thread to steal work from. Pick a random thread.
1638  // My initial plan was to cycle through all the threads, and only return
1639  // if we tried to steal from every thread, and failed. Arch says that's
1640  // not such a great idea.
1641  // GEH - need yield code in this loop for throughput library mode?
1642  new_victim:
1643  k = __kmp_get_random( thread ) % (nthreads - 1);
1644  if ( k >= thread -> th.th_info.ds.ds_tid ) {
1645  ++k; // Adjusts random distribution to exclude self
1646  }
1647  {
1648  kmp_info_t *other_thread = threads_data[k].td.td_thr;
1649  int first;
1650 
1651  // There is a slight chance that __kmp_enable_tasking() did not wake up
1652  // all threads waiting at the barrier. If this thread is sleeping, then
1653  // then wake it up. Since we weree going to pay the cache miss penalty
1654  // for referenceing another thread's kmp_info_t struct anyway, the check
1655  // shouldn't cost too much performance at this point.
1656  // In extra barrier mode, tasks do not sleep at the separate tasking
1657  // barrier, so this isn't a problem.
1658  if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1659  (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
1660  (TCR_PTR(other_thread->th.th_sleep_loc) != NULL))
1661  {
1662  __kmp_resume( __kmp_gtid_from_thread( other_thread ), NULL );
1663 
1664  // A sleeping thread should not have any tasks on it's queue.
1665  // There is a slight possiblility that it resumes, steals a task from
1666  // another thread, which spawns more tasks, all in the that it takes
1667  // this thread to check => don't write an assertion that the victim's
1668  // queue is empty. Try stealing from a different thread.
1669  goto new_victim;
1670  }
1671 
1672  // Now try to steal work from the selected thread
1673  first = TRUE;
1674  while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1675  thread_finished, is_constrained )) != NULL)
1676  {
1677 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1678  if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1679  if ( itt_sync_obj == NULL ) {
1680  // we are at fork barrier where we could not get the object reliably
1681  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1682  }
1683  __kmp_itt_task_starting( itt_sync_obj );
1684  }
1685 #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1686  __kmp_invoke_task( gtid, task, current_task );
1687 #if USE_ITT_BUILD
1688  if ( itt_sync_obj != NULL )
1689  __kmp_itt_task_finished( itt_sync_obj );
1690 #endif /* USE_ITT_BUILD */
1691 
1692  // Try stealing from this victim again, in the future.
1693  if (first) {
1694  threads_data[ tid ].td.td_deque_last_stolen = k;
1695  first = FALSE;
1696  }
1697 
1698  // Check to see if this thread can proceed.
1699  if ((spinner == NULL) || ((!final_spin) && (TCR_4(*spinner) == checker))) {
1700  KA_TRACE(15, ("__kmp_execute_tasks(exit #5): T#%d spin condition satisfied\n",
1701  gtid) );
1702  return TRUE;
1703  }
1704  KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1705 
1706  // If the execution of the stolen task resulted in more tasks being
1707  // placed on our run queue, then restart the whole process.
1708  if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
1709  KA_TRACE(20, ("__kmp_execute_tasks: T#%d stolen task spawned other tasks, restart\n",
1710  gtid) );
1711  goto start;
1712  }
1713  }
1714 
1715  // The victims's work queue is empty. If we are in the final spin loop
1716  // of the barrier, check and see if the termination condition is satisfied.
1717  // Going on and finding a new victim to steal from is expensive, as it
1718  // involves a lot of cache misses, so we definitely want to re-check the
1719  // termination condition before doing that.
1720  if (final_spin) {
1721  // First, decrement the #unfinished threads, if that has not already
1722  // been done. This decrement might be to the spin location, and
1723  // result in the termination condition being satisfied.
1724  if (! *thread_finished) {
1725  kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1726  KA_TRACE(20, ("__kmp_execute_tasks(dec #3): T#%d dec unfinished_threads to %d; "
1727  "task_team=%p\n",
1728  gtid, count, task_team) );
1729  *thread_finished = TRUE;
1730  }
1731 
1732  // If __kmp_tasking_mode != tskm_immediate_exec,
1733  // then it is now unsafe to reference thread->th.th_team !!!
1734  // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1735  // thread to pass through the barrier, where it might reset each thread's
1736  // th.th_team field for the next parallel region.
1737  // If we can steal more work, we know that this has not happened yet.
1738  if ((spinner != NULL) && (TCR_4(*spinner) == checker)) {
1739  KA_TRACE(15, ("__kmp_execute_tasks(exit #6): T#%d spin condition satisfied\n",
1740  gtid) );
1741  return TRUE;
1742  }
1743  }
1744  }
1745 
1746  KA_TRACE(15, ("__kmp_execute_tasks(exit #7): T#%d can't find work\n", gtid) );
1747  return FALSE;
1748 }
1749 
1750 
1751 //-----------------------------------------------------------------------------
1752 // __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
1753 // next barrier so they can assist in executing enqueued tasks.
1754 // First thread in allocates the task team atomically.
1755 
1756 static void
1757 __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
1758 {
1759  kmp_team_t *team = this_thr->th.th_team;
1760  kmp_thread_data_t *threads_data;
1761  int nthreads, i, is_init_thread;
1762 
1763  KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n",
1764  __kmp_gtid_from_thread( this_thr ) ) );
1765 
1766  KMP_DEBUG_ASSERT(task_team != NULL);
1767  KMP_DEBUG_ASSERT(team != NULL);
1768 
1769  nthreads = task_team->tt.tt_nproc;
1770  KMP_DEBUG_ASSERT(nthreads > 0);
1771  KMP_DEBUG_ASSERT(nthreads == team->t.t_nproc);
1772 
1773  // Allocate or increase the size of threads_data if necessary
1774  is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
1775 
1776  if (!is_init_thread) {
1777  // Some other thread already set up the array.
1778  KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
1779  __kmp_gtid_from_thread( this_thr ) ) );
1780  return;
1781  }
1782  threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1783  KMP_DEBUG_ASSERT( threads_data != NULL );
1784 
1785  if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1786  ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
1787  {
1788  // Release any threads sleeping at the barrier, so that they can steal
1789  // tasks and execute them. In extra barrier mode, tasks do not sleep
1790  // at the separate tasking barrier, so this isn't a problem.
1791  for (i = 0; i < nthreads; i++) {
1792  volatile kmp_uint *sleep_loc;
1793  kmp_info_t *thread = threads_data[i].td.td_thr;
1794 
1795  if (i == this_thr->th.th_info.ds.ds_tid) {
1796  continue;
1797  }
1798  // Since we haven't locked the thread's suspend mutex lock at this
1799  // point, there is a small window where a thread might be putting
1800  // itself to sleep, but hasn't set the th_sleep_loc field yet.
1801  // To work around this, __kmp_execute_tasks() periodically checks
1802  // see if other threads are sleeping (using the same random
1803  // mechanism that is used for task stealing) and awakens them if
1804  // they are.
1805  if ( ( sleep_loc = (volatile kmp_uint *)
1806  TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
1807  {
1808  KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n",
1809  __kmp_gtid_from_thread( this_thr ),
1810  __kmp_gtid_from_thread( thread ) ) );
1811  __kmp_resume( __kmp_gtid_from_thread( thread ), sleep_loc );
1812  }
1813  else {
1814  KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
1815  __kmp_gtid_from_thread( this_thr ),
1816  __kmp_gtid_from_thread( thread ) ) );
1817  }
1818  }
1819  }
1820 
1821  KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n",
1822  __kmp_gtid_from_thread( this_thr ) ) );
1823 }
1824 
1825 
1826 /* ------------------------------------------------------------------------ */
1827 /*
1828  * Utility routines for "task teams". A task team (kmp_task_t) is kind of
1829  * like a shadow of the kmp_team_t data struct, with a different lifetime.
1830  * After a child * thread checks into a barrier and calls __kmp_release() from
1831  * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
1832  * longer assume that the kmp_team_t structure is intact (at any moment, the
1833  * master thread may exit the barrier code and free the team data structure,
1834  * and return the threads to the thread pool).
1835  *
1836  * This does not work with the the tasking code, as the thread is still
1837  * expected to participate in the execution of any tasks that may have been
1838  * spawned my a member of the team, and the thread still needs access to all
1839  * to each thread in the team, so that it can steal work from it.
1840  *
1841  * Enter the existence of the kmp_task_team_t struct. It employs a reference
1842  * counting mechanims, and is allocated by the master thread before calling
1843  * __kmp_<barrier_kind>_release, and then is release by the last thread to
1844  * exit __kmp_<barrier_kind>_release at the next barrier. I.e. the lifetimes
1845  * of the kmp_task_team_t structs for consecutive barriers can overlap
1846  * (and will, unless the master thread is the last thread to exit the barrier
1847  * release phase, which is not typical).
1848  *
1849  * The existence of such a struct is useful outside the context of tasking,
1850  * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro,
1851  * so that any performance differences show up when comparing the 2.5 vs. 3.0
1852  * libraries.
1853  *
1854  * We currently use the existence of the threads array as an indicator that
1855  * tasks were spawned since the last barrier. If the structure is to be
1856  * useful outside the context of tasking, then this will have to change, but
1857  * not settting the field minimizes the performance impact of tasking on
1858  * barriers, when no explicit tasks were spawned (pushed, actually).
1859  */
1860 
1861 static kmp_task_team_t *__kmp_free_task_teams = NULL; // Free list for task_team data structures
1862 // Lock for task team data structures
1863 static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
1864 
1865 
1866 //------------------------------------------------------------------------------
1867 // __kmp_alloc_task_deque:
1868 // Allocates a task deque for a particular thread, and initialize the necessary
1869 // data structures relating to the deque. This only happens once per thread
1870 // per task team since task teams are recycled.
1871 // No lock is needed during allocation since each thread allocates its own
1872 // deque.
1873 
1874 static void
1875 __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
1876 {
1877  __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
1878  KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
1879 
1880  // Initialize last stolen task field to "none"
1881  thread_data -> td.td_deque_last_stolen = -1;
1882 
1883  KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
1884  KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
1885  KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
1886 
1887  KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
1888  __kmp_gtid_from_thread( thread ), TASK_DEQUE_SIZE, thread_data ) );
1889  // Allocate space for task deque, and zero the deque
1890  // Cannot use __kmp_thread_calloc() because threads not around for
1891  // kmp_reap_task_team( ).
1892  thread_data -> td.td_deque = (kmp_taskdata_t **)
1893  __kmp_allocate( TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
1894 }
1895 
1896 
1897 //------------------------------------------------------------------------------
1898 // __kmp_free_task_deque:
1899 // Deallocates a task deque for a particular thread.
1900 // Happens at library deallocation so don't need to reset all thread data fields.
1901 
1902 static void
1903 __kmp_free_task_deque( kmp_thread_data_t *thread_data )
1904 {
1905  __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1906 
1907  if ( thread_data -> td.td_deque != NULL ) {
1908  TCW_4(thread_data -> td.td_deque_ntasks, 0);
1909  __kmp_free( thread_data -> td.td_deque );
1910  thread_data -> td.td_deque = NULL;
1911  }
1912  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1913 
1914 #ifdef BUILD_TIED_TASK_STACK
1915  // GEH: Figure out what to do here for td_susp_tied_tasks
1916  if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
1917  __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
1918  }
1919 #endif // BUILD_TIED_TASK_STACK
1920 }
1921 
1922 
1923 //------------------------------------------------------------------------------
1924 // __kmp_realloc_task_threads_data:
1925 // Allocates a threads_data array for a task team, either by allocating an initial
1926 // array or enlarging an existing array. Only the first thread to get the lock
1927 // allocs or enlarges the array and re-initializes the array eleemnts.
1928 // That thread returns "TRUE", the rest return "FALSE".
1929 // Assumes that the new array size is given by task_team -> tt.tt_nproc.
1930 // The current size is given by task_team -> tt.tt_max_threads.
1931 
1932 static int
1933 __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
1934 {
1935  kmp_thread_data_t ** threads_data_p;
1936  kmp_int32 nthreads, maxthreads;
1937  int is_init_thread = FALSE;
1938 
1939  if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
1940  // Already reallocated and initialized.
1941  return FALSE;
1942  }
1943 
1944  threads_data_p = & task_team -> tt.tt_threads_data;
1945  nthreads = task_team -> tt.tt_nproc;
1946  maxthreads = task_team -> tt.tt_max_threads;
1947 
1948  // All threads must lock when they encounter the first task of the implicit task
1949  // region to make sure threads_data fields are (re)initialized before used.
1950  __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
1951 
1952  if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
1953  // first thread to enable tasking
1954  kmp_team_t *team = thread -> th.th_team;
1955  int i;
1956 
1957  is_init_thread = TRUE;
1958  if ( maxthreads < nthreads ) {
1959 
1960  if ( *threads_data_p != NULL ) {
1961  kmp_thread_data_t *old_data = *threads_data_p;
1962  kmp_thread_data_t *new_data = NULL;
1963 
1964  KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating "
1965  "threads data for task_team %p, new_size = %d, old_size = %d\n",
1966  __kmp_gtid_from_thread( thread ), task_team,
1967  nthreads, maxthreads ) );
1968  // Reallocate threads_data to have more elements than current array
1969  // Cannot use __kmp_thread_realloc() because threads not around for
1970  // kmp_reap_task_team( ). Note all new array entries are initialized
1971  // to zero by __kmp_allocate().
1972  new_data = (kmp_thread_data_t *)
1973  __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
1974  // copy old data to new data
1975  memcpy( (void *) new_data, (void *) old_data,
1976  maxthreads * sizeof(kmp_taskdata_t *) );
1977 
1978 #ifdef BUILD_TIED_TASK_STACK
1979  // GEH: Figure out if this is the right thing to do
1980  for (i = maxthreads; i < nthreads; i++) {
1981  kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
1982  __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
1983  }
1984 #endif // BUILD_TIED_TASK_STACK
1985  // Install the new data and free the old data
1986  (*threads_data_p) = new_data;
1987  __kmp_free( old_data );
1988  }
1989  else {
1990  KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating "
1991  "threads data for task_team %p, size = %d\n",
1992  __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
1993  // Make the initial allocate for threads_data array, and zero entries
1994  // Cannot use __kmp_thread_calloc() because threads not around for
1995  // kmp_reap_task_team( ).
1996  *threads_data_p = (kmp_thread_data_t *)
1997  __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
1998 #ifdef BUILD_TIED_TASK_STACK
1999  // GEH: Figure out if this is the right thing to do
2000  for (i = 0; i < nthreads; i++) {
2001  kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2002  __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2003  }
2004 #endif // BUILD_TIED_TASK_STACK
2005  }
2006  task_team -> tt.tt_max_threads = nthreads;
2007  }
2008  else {
2009  // If array has (more than) enough elements, go ahead and use it
2010  KMP_DEBUG_ASSERT( *threads_data_p != NULL );
2011  }
2012 
2013  // initialize threads_data pointers back to thread_info structures
2014  for (i = 0; i < nthreads; i++) {
2015  kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2016  thread_data -> td.td_thr = team -> t.t_threads[i];
2017 
2018  if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
2019  // The last stolen field survives across teams / barrier, and the number
2020  // of threads may have changed. It's possible (likely?) that a new
2021  // parallel region will exhibit the same behavior as the previous region.
2022  thread_data -> td.td_deque_last_stolen = -1;
2023  }
2024  }
2025 
2026  KMP_MB();
2027  TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
2028  }
2029 
2030  __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2031  return is_init_thread;
2032 }
2033 
2034 
2035 //------------------------------------------------------------------------------
2036 // __kmp_free_task_threads_data:
2037 // Deallocates a threads_data array for a task team, including any attached
2038 // tasking deques. Only occurs at library shutdown.
2039 
2040 static void
2041 __kmp_free_task_threads_data( kmp_task_team_t *task_team )
2042 {
2043  __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2044  if ( task_team -> tt.tt_threads_data != NULL ) {
2045  int i;
2046  for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
2047  __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
2048  }
2049  __kmp_free( task_team -> tt.tt_threads_data );
2050  task_team -> tt.tt_threads_data = NULL;
2051  }
2052  __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2053 }
2054 
2055 
2056 //------------------------------------------------------------------------------
2057 // __kmp_allocate_task_team:
2058 // Allocates a task team associated with a specific team, taking it from
2059 // the global task team free list if possible. Also initializes data structures.
2060 
2061 static kmp_task_team_t *
2062 __kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
2063 {
2064  kmp_task_team_t *task_team = NULL;
2065  int nthreads;
2066 
2067  KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n",
2068  (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
2069 
2070  if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2071  // Take a task team from the task team pool
2072  __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2073  if (__kmp_free_task_teams != NULL) {
2074  task_team = __kmp_free_task_teams;
2075  TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
2076  task_team -> tt.tt_next = NULL;
2077  }
2078  __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2079  }
2080 
2081  if (task_team == NULL) {
2082  KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating "
2083  "task team for team %p\n",
2084  __kmp_gtid_from_thread( thread ), team ) );
2085  // Allocate a new task team if one is not available.
2086  // Cannot use __kmp_thread_malloc() because threads not around for
2087  // kmp_reap_task_team( ).
2088  task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) );
2089  __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2090  //task_team -> tt.tt_threads_data = NULL; // AC: __kmp_allocate zeroes returned memory
2091  //task_team -> tt.tt_max_threads = 0;
2092  //task_team -> tt.tt_next = NULL;
2093  }
2094 
2095  TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2096  task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
2097 
2098  task_team -> tt.tt_state = 0;
2099  TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
2100  TCW_4( task_team -> tt.tt_active, TRUE );
2101  TCW_4( task_team -> tt.tt_ref_ct, nthreads - 1);
2102 
2103  KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p\n",
2104  (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team ) );
2105  return task_team;
2106 }
2107 
2108 
2109 //------------------------------------------------------------------------------
2110 // __kmp_free_task_team:
2111 // Frees the task team associated with a specific thread, and adds it
2112 // to the global task team free list.
2113 //
2114 
2115 static void
2116 __kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
2117 {
2118  KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n",
2119  thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
2120 
2121  KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_ref_ct) == 0 );
2122 
2123  // Put task team back on free list
2124  __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
2125 
2126  KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
2127  task_team -> tt.tt_next = __kmp_free_task_teams;
2128  TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2129  TCW_PTR(__kmp_free_task_teams, task_team);
2130 
2131  __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
2132 }
2133 
2134 
2135 //------------------------------------------------------------------------------
2136 // __kmp_reap_task_teams:
2137 // Free all the task teams on the task team free list.
2138 // Should only be done during library shutdown.
2139 // Cannot do anything that needs a thread structure or gtid since they are already gone.
2140 
2141 void
2142 __kmp_reap_task_teams( void )
2143 {
2144  kmp_task_team_t *task_team;
2145 
2146  if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
2147  // Free all task_teams on the free list
2148  __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2149  while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
2150  __kmp_free_task_teams = task_team -> tt.tt_next;
2151  task_team -> tt.tt_next = NULL;
2152 
2153  // Free threads_data if necessary
2154  if ( task_team -> tt.tt_threads_data != NULL ) {
2155  __kmp_free_task_threads_data( task_team );
2156  }
2157  __kmp_free( task_team );
2158  }
2159  __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2160  }
2161 }
2162 
2163 
2164 //------------------------------------------------------------------------------
2165 // __kmp_unref_task_teams:
2166 // Remove one thread from referencing the task team structure by
2167 // decreasing the reference count and deallocate task team if no more
2168 // references to it.
2169 //
2170 void
2171 __kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread )
2172 {
2173  kmp_uint ref_ct;
2174 
2175  ref_ct = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& task_team->tt.tt_ref_ct) ) - 1;
2176 
2177  KA_TRACE( 20, ( "__kmp_unref_task_team: T#%d task_team = %p ref_ct = %d\n",
2178  __kmp_gtid_from_thread( thread ), task_team, ref_ct ) );
2179 
2180 
2181  if ( ref_ct == 0 ) {
2182  __kmp_free_task_team( thread, task_team );
2183  }
2184 
2185  TCW_PTR( *((volatile kmp_task_team_t **)(&thread->th.th_task_team)), NULL );
2186 }
2187 
2188 
2189 //------------------------------------------------------------------------------
2190 // __kmp_wait_to_unref_task_teams:
2191 // Some threads could still be in the fork barrier release code, possibly
2192 // trying to steal tasks. Wait for each thread to unreference its task team.
2193 //
2194 void
2195 __kmp_wait_to_unref_task_teams(void)
2196 {
2197  kmp_info_t *thread;
2198  kmp_uint32 spins;
2199  int done;
2200 
2201  KMP_INIT_YIELD( spins );
2202 
2203 
2204  for (;;) {
2205  done = TRUE;
2206 
2207  // TODO: GEH - this may be is wrong because some sync would be necessary
2208  // in case threads are added to the pool during the traversal.
2209  // Need to verify that lock for thread pool is held when calling
2210  // this routine.
2211  for (thread = (kmp_info_t *)__kmp_thread_pool;
2212  thread != NULL;
2213  thread = thread->th.th_next_pool)
2214  {
2215  volatile kmp_uint *sleep_loc;
2216 #if KMP_OS_WINDOWS
2217  DWORD exit_val;
2218 #endif
2219  if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
2220  KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2221  __kmp_gtid_from_thread( thread ) ) );
2222  continue;
2223  }
2224 #if KMP_OS_WINDOWS
2225  // TODO: GEH - add this check for Linux* OS / OS X* as well?
2226  if (!__kmp_is_thread_alive(thread, &exit_val)) {
2227  if (TCR_PTR(thread->th.th_task_team) != NULL) {
2228  __kmp_unref_task_team( thread->th.th_task_team, thread );
2229  }
2230  continue;
2231  }
2232 #endif
2233 
2234  done = FALSE; // Because th_task_team pointer is not NULL for this thread
2235 
2236  KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
2237  __kmp_gtid_from_thread( thread ) ) );
2238 
2239  if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
2240  // If the thread is sleeping, awaken it.
2241  if ( ( sleep_loc = (volatile kmp_uint *) TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
2242  KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2243  __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
2244  __kmp_resume( __kmp_gtid_from_thread( thread ), sleep_loc );
2245  }
2246  }
2247  }
2248  if (done) {
2249  break;
2250  }
2251 
2252  // If we are oversubscribed,
2253  // or have waited a bit (and library mode is throughput), yield.
2254  // Pause is in the following code.
2255  KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2256  KMP_YIELD_SPIN( spins ); // Yields only if KMP_LIBRARY=throughput
2257  }
2258 
2259 
2260 }
2261 
2262 
2263 //------------------------------------------------------------------------------
2264 // __kmp_task_team_setup: Create a task_team for the current team, but use
2265 // an already created, unused one if it already exists.
2266 // This may be called by any thread, but only for teams with # threads >1.
2267 
2268 void
2269 __kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team )
2270 {
2271  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2272 
2273  if ( ( team->t.t_task_team == NULL ) && ( team->t.t_nproc > 1 ) ) {
2274  // Allocate a new task team, which will be propagated to
2275  // all of the worker threads after the barrier. As they
2276  // spin in the barrier release phase, then will continue
2277  // to use the previous task team struct, until they receive
2278  // the signal to stop checking for tasks (they can't safely
2279  // reference the kmp_team_t struct, which could be reallocated
2280  // by the master thread).
2281  team->t.t_task_team = __kmp_allocate_task_team( this_thr, team );
2282  KA_TRACE( 20, ( "__kmp_task_team_setup: Master T#%d created new "
2283  "task_team %p for team %d\n",
2284  __kmp_gtid_from_thread( this_thr ), team->t.t_task_team,
2285  ((team != NULL) ? team->t.t_id : -1)) );
2286  }
2287  else {
2288  // All threads have reported in, and no tasks were spawned
2289  // for this release->gather region. Leave the old task
2290  // team struct in place for the upcoming region. No task
2291  // teams are formed for serialized teams.
2292  }
2293  if ( team->t.t_task_team != NULL ) {
2294  // Toggle the state flag so that we can tell which side of
2295  // the barrier we are on.
2296  team->t.t_task_team->tt.tt_state = 1 - this_thr->th.th_task_state;
2297  }
2298 }
2299 
2300 
2301 //------------------------------------------------------------------------------
2302 // __kmp_task_team_sync: Propagation of task team data from team to threads
2303 // which happens just after the release phase of a team barrier. This may be
2304 // called by any thread, but only for teams with # threads > 1.
2305 
2306 void
2307 __kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
2308 {
2309  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2310 
2311  // On the rare chance that this thread never saw that the task
2312  // team was no longer active, then unref/deallocate it now.
2313  if ( this_thr->th.th_task_team != NULL ) {
2314  if ( ! TCR_SYNC_4( this_thr->th.th_task_team->tt.tt_active ) ) {
2315  KMP_DEBUG_ASSERT( ! KMP_MASTER_TID( __kmp_tid_from_gtid( __kmp_gtid_from_thread( this_thr ) ) ) );
2316  __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
2317  } else {
2318  //
2319  // We are re-using a task team that was never enabled.
2320  //
2321  KMP_DEBUG_ASSERT( this_thr->th.th_task_team == team->t.t_task_team );
2322  }
2323  }
2324 
2325  //
2326  // It is now safe to propagate the task team pointer from the
2327  // team struct to the current thread.
2328  //
2329  TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team);
2330  if ( this_thr->th.th_task_team != NULL ) {
2331  //
2332  // Toggle the th_task_state field, instead of reading it from
2333  // the task team. Reading the tt_state field at this point
2334  // causes a 30% regression on EPCC parallel - toggling it
2335  // is much cheaper.
2336  //
2337  this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
2338  KMP_DEBUG_ASSERT( this_thr->th.th_task_state == TCR_4(team->t.t_task_team->tt.tt_state) );
2339  }
2340  KA_TRACE( 20, ( "__kmp_task_team_sync: Thread T#%d task team assigned pointer (%p) from Team #%d task team\n",
2341  __kmp_gtid_from_thread( this_thr ), &this_thr->th.th_task_team,
2342  this_thr->th.th_task_team, ((team != NULL) ? (team->t.t_id) : -1) ) );
2343 }
2344 
2345 
2346 //------------------------------------------------------------------------------
2347 // __kmp_task_team_wait: Master thread waits for outstanding tasks after
2348 // the barrier gather phase. Only called by master thread if #threads
2349 // in team > 1 !
2350 
2351 void
2352 __kmp_task_team_wait( kmp_info_t *this_thr,
2353  kmp_team_t *team
2354  USE_ITT_BUILD_ARG(void * itt_sync_obj)
2355  )
2356 {
2357  kmp_task_team_t *task_team = team->t.t_task_team;
2358 
2359  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2360  KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
2361 
2362  if ( ( task_team != NULL ) && KMP_TASKING_ENABLED( task_team, this_thr->th.th_task_state ) ) {
2363  KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d waiting for all tasks: task_team = %p\n",
2364  __kmp_gtid_from_thread( this_thr ), task_team ) );
2365  //
2366  // All worker threads might have dropped through to the
2367  // release phase, but could still be executing tasks.
2368  // Wait here for all tasks to complete. To avoid memory
2369  // contention, only the master thread checks for the
2370  // termination condition.
2371  //
2372  __kmp_wait_sleep( this_thr, &task_team->tt.tt_unfinished_threads, 0, TRUE
2373  USE_ITT_BUILD_ARG(itt_sync_obj)
2374  );
2375 
2376  //
2377  // Kill the old task team, so that the worker threads will
2378  // stop referencing it while spinning. They will
2379  // deallocate it when the reference count reaches zero.
2380  // The master thread is not included in the ref count.
2381  //
2382  KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d deactivating task_team %p\n",
2383  __kmp_gtid_from_thread( this_thr ), task_team ) );
2384  KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
2385  TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2386  KMP_MB();
2387 
2388  TCW_PTR(this_thr->th.th_task_team, NULL);
2389  team->t.t_task_team = NULL;
2390  }
2391 }
2392 
2393 
2394 //------------------------------------------------------------------------------
2395 // __kmp_tasking_barrier:
2396 // Internal function to execute all tasks prior to a regular barrier or a
2397 // join barrier. It is a full barrier itself, which unfortunately turns
2398 // regular barriers into double barriers and join barriers into 1 1/2
2399 // barriers.
2400 // This routine may only called when __kmp_tasking_mode == tskm_extra_barrier.
2401 
2402 void
2403 __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid )
2404 {
2405  volatile kmp_uint32 *spin = &team->t.t_task_team->tt.tt_unfinished_threads;
2406  int flag = FALSE;
2407  KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
2408 
2409 #if USE_ITT_BUILD
2410  KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
2411 #endif /* USE_ITT_BUILD */
2412  while (! __kmp_execute_tasks( thread, gtid, spin, 0, TRUE, &flag
2413  USE_ITT_BUILD_ARG(NULL), 0 ) ) {
2414 #if USE_ITT_BUILD
2415  // TODO: What about itt_sync_obj??
2416  KMP_FSYNC_SPIN_PREPARE( spin );
2417 #endif /* USE_ITT_BUILD */
2418 
2419  if( TCR_4(__kmp_global.g.g_done) ) {
2420  if( __kmp_global.g.g_abort )
2421  __kmp_abort_thread( );
2422  break;
2423  }
2424  KMP_YIELD( TRUE ); // GH: We always yield here
2425  }
2426 #if USE_ITT_BUILD
2427  KMP_FSYNC_SPIN_ACQUIRED( (void*) spin );
2428 #endif /* USE_ITT_BUILD */
2429 }
2430 
2431 #endif // OMP_30_ENABLED
2432 
Definition: kmp.h:200