Intel® OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Modules Pages
kmp_tasking.c
1 /*
2  * kmp_tasking.c -- OpenMP 3.0 tasking support.
3  * $Revision: 43389 $
4  * $Date: 2014-08-11 10:54:01 -0500 (Mon, 11 Aug 2014) $
5  */
6 
7 /* <copyright>
8  Copyright (c) 1997-2014 Intel Corporation. All Rights Reserved.
9 
10  Redistribution and use in source and binary forms, with or without
11  modification, are permitted provided that the following conditions
12  are met:
13 
14  * Redistributions of source code must retain the above copyright
15  notice, this list of conditions and the following disclaimer.
16  * Redistributions in binary form must reproduce the above copyright
17  notice, this list of conditions and the following disclaimer in the
18  documentation and/or other materials provided with the distribution.
19  * Neither the name of Intel Corporation nor the names of its
20  contributors may be used to endorse or promote products derived
21  from this software without specific prior written permission.
22 
23  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 
35 </copyright> */
36 
37 #include "kmp.h"
38 #include "kmp_i18n.h"
39 #include "kmp_itt.h"
40 #include "kmp_wait_release.h"
41 
42 
43 
44 /* ------------------------------------------------------------------------ */
45 /* ------------------------------------------------------------------------ */
46 
47 
48 /* forward declaration */
49 static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
50 static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
51 static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
52 
53 static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
54  switch (((kmp_flag_64 *)flag)->get_type()) {
55  case flag32: __kmp_resume_32(gtid, NULL); break;
56  case flag64: __kmp_resume_64(gtid, NULL); break;
57  case flag_oncore: __kmp_resume_oncore(gtid, NULL); break;
58  }
59 }
60 
61 #ifdef BUILD_TIED_TASK_STACK
62 
63 //---------------------------------------------------------------------------
64 // __kmp_trace_task_stack: print the tied tasks from the task stack in order
65 // from top do bottom
66 //
67 // gtid: global thread identifier for thread containing stack
68 // thread_data: thread data for task team thread containing stack
69 // threshold: value above which the trace statement triggers
70 // location: string identifying call site of this function (for trace)
71 
72 static void
73 __kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location )
74 {
75  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
76  kmp_taskdata_t **stack_top = task_stack -> ts_top;
77  kmp_int32 entries = task_stack -> ts_entries;
78  kmp_taskdata_t *tied_task;
79 
80  KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
81  "first_block = %p, stack_top = %p \n",
82  location, gtid, entries, task_stack->ts_first_block, stack_top ) );
83 
84  KMP_DEBUG_ASSERT( stack_top != NULL );
85  KMP_DEBUG_ASSERT( entries > 0 );
86 
87  while ( entries != 0 )
88  {
89  KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
90  // fix up ts_top if we need to pop from previous block
91  if ( entries & TASK_STACK_INDEX_MASK == 0 )
92  {
93  kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
94 
95  stack_block = stack_block -> sb_prev;
96  stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
97  }
98 
99  // finish bookkeeping
100  stack_top--;
101  entries--;
102 
103  tied_task = * stack_top;
104 
105  KMP_DEBUG_ASSERT( tied_task != NULL );
106  KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
107 
108  KA_TRACE(threshold, ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
109  "stack_top=%p, tied_task=%p\n",
110  location, gtid, entries, stack_top, tied_task ) );
111  }
112  KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
113 
114  KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
115  location, gtid ) );
116 }
117 
118 //---------------------------------------------------------------------------
119 // __kmp_init_task_stack: initialize the task stack for the first time
120 // after a thread_data structure is created.
121 // It should not be necessary to do this again (assuming the stack works).
122 //
123 // gtid: global thread identifier of calling thread
124 // thread_data: thread data for task team thread containing stack
125 
126 static void
127 __kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
128 {
129  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
130  kmp_stack_block_t *first_block;
131 
132  // set up the first block of the stack
133  first_block = & task_stack -> ts_first_block;
134  task_stack -> ts_top = (kmp_taskdata_t **) first_block;
135  memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
136 
137  // initialize the stack to be empty
138  task_stack -> ts_entries = TASK_STACK_EMPTY;
139  first_block -> sb_next = NULL;
140  first_block -> sb_prev = NULL;
141 }
142 
143 
144 //---------------------------------------------------------------------------
145 // __kmp_free_task_stack: free the task stack when thread_data is destroyed.
146 //
147 // gtid: global thread identifier for calling thread
148 // thread_data: thread info for thread containing stack
149 
150 static void
151 __kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
152 {
153  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
154  kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
155 
156  KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
157  // free from the second block of the stack
158  while ( stack_block != NULL ) {
159  kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
160 
161  stack_block -> sb_next = NULL;
162  stack_block -> sb_prev = NULL;
163  if (stack_block != & task_stack -> ts_first_block) {
164  __kmp_thread_free( thread, stack_block ); // free the block, if not the first
165  }
166  stack_block = next_block;
167  }
168  // initialize the stack to be empty
169  task_stack -> ts_entries = 0;
170  task_stack -> ts_top = NULL;
171 }
172 
173 
174 //---------------------------------------------------------------------------
175 // __kmp_push_task_stack: Push the tied task onto the task stack.
176 // Grow the stack if necessary by allocating another block.
177 //
178 // gtid: global thread identifier for calling thread
179 // thread: thread info for thread containing stack
180 // tied_task: the task to push on the stack
181 
182 static void
183 __kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
184 {
185  // GEH - need to consider what to do if tt_threads_data not allocated yet
186  kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
187  tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
188  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
189 
190  if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
191  return; // Don't push anything on stack if team or team tasks are serialized
192  }
193 
194  KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
195  KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
196 
197  KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
198  gtid, thread, tied_task ) );
199  // Store entry
200  * (task_stack -> ts_top) = tied_task;
201 
202  // Do bookkeeping for next push
203  task_stack -> ts_top++;
204  task_stack -> ts_entries++;
205 
206  if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
207  {
208  // Find beginning of this task block
209  kmp_stack_block_t *stack_block =
210  (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
211 
212  // Check if we already have a block
213  if ( stack_block -> sb_next != NULL )
214  { // reset ts_top to beginning of next block
215  task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
216  }
217  else
218  { // Alloc new block and link it up
219  kmp_stack_block_t *new_block = (kmp_stack_block_t *)
220  __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t));
221 
222  task_stack -> ts_top = & new_block -> sb_block[0];
223  stack_block -> sb_next = new_block;
224  new_block -> sb_prev = stack_block;
225  new_block -> sb_next = NULL;
226 
227  KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
228  gtid, tied_task, new_block ) );
229  }
230  }
231  KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
232 }
233 
234 //---------------------------------------------------------------------------
235 // __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return
236 // the task, just check to make sure it matches the ending task passed in.
237 //
238 // gtid: global thread identifier for the calling thread
239 // thread: thread info structure containing stack
240 // tied_task: the task popped off the stack
241 // ending_task: the task that is ending (should match popped task)
242 
243 static void
244 __kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
245 {
246  // GEH - need to consider what to do if tt_threads_data not allocated yet
247  kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
248  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
249  kmp_taskdata_t *tied_task;
250 
251  if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
252  return; // Don't pop anything from stack if team or team tasks are serialized
253  }
254 
255  KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
256  KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
257 
258  KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
259 
260  // fix up ts_top if we need to pop from previous block
261  if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
262  {
263  kmp_stack_block_t *stack_block =
264  (kmp_stack_block_t *) (task_stack -> ts_top) ;
265 
266  stack_block = stack_block -> sb_prev;
267  task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
268  }
269 
270  // finish bookkeeping
271  task_stack -> ts_top--;
272  task_stack -> ts_entries--;
273 
274  tied_task = * (task_stack -> ts_top );
275 
276  KMP_DEBUG_ASSERT( tied_task != NULL );
277  KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
278  KMP_DEBUG_ASSERT( tied_task == ending_task ); // If we built the stack correctly
279 
280  KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
281  return;
282 }
283 #endif /* BUILD_TIED_TASK_STACK */
284 
285 //---------------------------------------------------
286 // __kmp_push_task: Add a task to the thread's deque
287 
288 static kmp_int32
289 __kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
290 {
291  kmp_info_t * thread = __kmp_threads[ gtid ];
292  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
293  kmp_task_team_t * task_team = thread->th.th_task_team;
294  kmp_int32 tid = __kmp_tid_from_gtid( gtid );
295  kmp_thread_data_t * thread_data;
296 
297  KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
298 
299  // The first check avoids building task_team thread data if serialized
300  if ( taskdata->td_flags.task_serial ) {
301  KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
302  gtid, taskdata ) );
303  return TASK_NOT_PUSHED;
304  }
305 
306  // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode
307  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
308  if ( ! KMP_TASKING_ENABLED( task_team, thread->th.th_task_state ) ) {
309  __kmp_enable_tasking( task_team, thread );
310  }
311  KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
312  KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
313 
314  // Find tasking deque specific to encountering thread
315  thread_data = & task_team -> tt.tt_threads_data[ tid ];
316 
317  // No lock needed since only owner can allocate
318  if (thread_data -> td.td_deque == NULL ) {
319  __kmp_alloc_task_deque( thread, thread_data );
320  }
321 
322  // Check if deque is full
323  if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
324  {
325  KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
326  gtid, taskdata ) );
327  return TASK_NOT_PUSHED;
328  }
329 
330  // Lock the deque for the task push operation
331  __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
332 
333  // Must have room since no thread can add tasks but calling thread
334  KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE );
335 
336  thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; // Push taskdata
337  // Wrap index.
338  thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
339  TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); // Adjust task count
340 
341  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
342 
343  KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
344  "task=%p ntasks=%d head=%u tail=%u\n",
345  gtid, taskdata, thread_data->td.td_deque_ntasks,
346  thread_data->td.td_deque_tail, thread_data->td.td_deque_head) );
347 
348  return TASK_SUCCESSFULLY_PUSHED;
349 }
350 
351 
352 //-----------------------------------------------------------------------------------------
353 // __kmp_pop_current_task_from_thread: set up current task from called thread when team ends
354 // this_thr: thread structure to set current_task in.
355 
356 void
357 __kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
358 {
359  KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, "
360  "curtask_parent=%p\n",
361  0, this_thr, this_thr -> th.th_current_task,
362  this_thr -> th.th_current_task -> td_parent ) );
363 
364  this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
365 
366  KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, "
367  "curtask_parent=%p\n",
368  0, this_thr, this_thr -> th.th_current_task,
369  this_thr -> th.th_current_task -> td_parent ) );
370 }
371 
372 
373 //---------------------------------------------------------------------------------------
374 // __kmp_push_current_task_to_thread: set up current task in called thread for a new team
375 // this_thr: thread structure to set up
376 // team: team for implicit task data
377 // tid: thread within team to set up
378 
379 void
380 __kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid )
381 {
382  // current task of the thread is a parent of the new just created implicit tasks of new team
383  KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p "
384  "parent_task=%p\n",
385  tid, this_thr, this_thr->th.th_current_task,
386  team->t.t_implicit_task_taskdata[tid].td_parent ) );
387 
388  KMP_DEBUG_ASSERT (this_thr != NULL);
389 
390  if( tid == 0 ) {
391  if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
392  team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
393  this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
394  }
395  } else {
396  team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
397  this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
398  }
399 
400  KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p "
401  "parent_task=%p\n",
402  tid, this_thr, this_thr->th.th_current_task,
403  team->t.t_implicit_task_taskdata[tid].td_parent ) );
404 }
405 
406 
407 //----------------------------------------------------------------------
408 // __kmp_task_start: bookkeeping for a task starting execution
409 // GTID: global thread id of calling thread
410 // task: task starting execution
411 // current_task: task suspending
412 
413 static void
414 __kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
415 {
416  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
417  kmp_info_t * thread = __kmp_threads[ gtid ];
418 
419  KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
420  gtid, taskdata, current_task) );
421 
422  KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
423 
424  // mark currently executing task as suspended
425  // TODO: GEH - make sure root team implicit task is initialized properly.
426  // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
427  current_task -> td_flags.executing = 0;
428 
429  // Add task to stack if tied
430 #ifdef BUILD_TIED_TASK_STACK
431  if ( taskdata -> td_flags.tiedness == TASK_TIED )
432  {
433  __kmp_push_task_stack( gtid, thread, taskdata );
434  }
435 #endif /* BUILD_TIED_TASK_STACK */
436 
437  // mark starting task as executing and as current task
438  thread -> th.th_current_task = taskdata;
439 
440  KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 0 );
441  KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 0 );
442  taskdata -> td_flags.started = 1;
443  taskdata -> td_flags.executing = 1;
444  KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
445  KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
446 
447  // GEH TODO: shouldn't we pass some sort of location identifier here?
448  // APT: yes, we will pass location here.
449  // need to store current thread state (in a thread or taskdata structure)
450  // before setting work_state, otherwise wrong state is set after end of task
451 
452  KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n",
453  gtid, taskdata ) );
454 
455  return;
456 }
457 
458 
459 //----------------------------------------------------------------------
460 // __kmpc_omp_task_begin_if0: report that a given serialized task has started execution
461 // loc_ref: source location information; points to beginning of task block.
462 // gtid: global thread number.
463 // task: task thunk for the started task.
464 
465 void
466 __kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
467 {
468  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
469  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
470 
471  KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
472  gtid, loc_ref, taskdata, current_task ) );
473 
474  taskdata -> td_flags.task_serial = 1; // Execute this task immediately, not deferred.
475  __kmp_task_start( gtid, task, current_task );
476 
477  KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
478  gtid, loc_ref, taskdata ) );
479 
480  return;
481 }
482 
483 #ifdef TASK_UNUSED
484 //----------------------------------------------------------------------
485 // __kmpc_omp_task_begin: report that a given task has started execution
486 // NEVER GENERATED BY COMPILER, DEPRECATED!!!
487 
488 void
489 __kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
490 {
491  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
492 
493  KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
494  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
495 
496  __kmp_task_start( gtid, task, current_task );
497 
498  KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
499  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
500 
501  return;
502 }
503 #endif // TASK_UNUSED
504 
505 
506 //-------------------------------------------------------------------------------------
507 // __kmp_free_task: free the current task space and the space for shareds
508 // gtid: Global thread ID of calling thread
509 // taskdata: task to free
510 // thread: thread data structure of caller
511 
512 static void
513 __kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
514 {
515  KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n",
516  gtid, taskdata) );
517 
518  // Check to make sure all flags and counters have the correct values
519  KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
520  KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
521  KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
522  KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
523  KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1);
524  KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
525 
526  taskdata->td_flags.freed = 1;
527  // deallocate the taskdata and shared variable blocks associated with this task
528  #if USE_FAST_MEMORY
529  __kmp_fast_free( thread, taskdata );
530  #else /* ! USE_FAST_MEMORY */
531  __kmp_thread_free( thread, taskdata );
532  #endif
533 
534  KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n",
535  gtid, taskdata) );
536 }
537 
538 //-------------------------------------------------------------------------------------
539 // __kmp_free_task_and_ancestors: free the current task and ancestors without children
540 //
541 // gtid: Global thread ID of calling thread
542 // taskdata: task to free
543 // thread: thread data structure of caller
544 
545 static void
546 __kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
547 {
548  kmp_int32 children = 0;
549  kmp_int32 team_or_tasking_serialized = taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser;
550 
551  KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
552 
553  if ( !team_or_tasking_serialized ) {
554  children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
555  KMP_DEBUG_ASSERT( children >= 0 );
556  }
557 
558  // Now, go up the ancestor tree to see if any ancestors can now be freed.
559  while ( children == 0 )
560  {
561  kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
562 
563  KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
564  "and freeing itself\n", gtid, taskdata) );
565 
566  // --- Deallocate my ancestor task ---
567  __kmp_free_task( gtid, taskdata, thread );
568 
569  taskdata = parent_taskdata;
570 
571  // Stop checking ancestors at implicit task or if tasking serialized
572  // instead of walking up ancestor tree to avoid premature deallocation of ancestors.
573  if ( team_or_tasking_serialized || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
574  return;
575 
576  if ( !team_or_tasking_serialized ) {
577  // Predecrement simulated by "- 1" calculation
578  children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
579  KMP_DEBUG_ASSERT( children >= 0 );
580  }
581  }
582 
583  KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
584  "not freeing it yet\n", gtid, taskdata, children) );
585 }
586 
587 //---------------------------------------------------------------------
588 // __kmp_task_finish: bookkeeping to do when a task finishes execution
589 // gtid: global thread ID for calling thread
590 // task: task to be finished
591 // resumed_task: task to be resumed. (may be NULL if task is serialized)
592 
593 static void
594 __kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
595 {
596  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
597  kmp_info_t * thread = __kmp_threads[ gtid ];
598  kmp_int32 children = 0;
599 
600  KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
601  gtid, taskdata, resumed_task) );
602 
603  KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
604 
605  // Pop task from stack if tied
606 #ifdef BUILD_TIED_TASK_STACK
607  if ( taskdata -> td_flags.tiedness == TASK_TIED )
608  {
609  __kmp_pop_task_stack( gtid, thread, taskdata );
610  }
611 #endif /* BUILD_TIED_TASK_STACK */
612 
613  KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
614  taskdata -> td_flags.complete = 1; // mark the task as completed
615  KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
616  KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
617 
618  // Only need to keep track of count if team parallel and tasking not serialized
619  if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
620  // Predecrement simulated by "- 1" calculation
621  children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
622  KMP_DEBUG_ASSERT( children >= 0 );
623 #if OMP_40_ENABLED
624  if ( taskdata->td_taskgroup )
625  KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
626  __kmp_release_deps(gtid,taskdata);
627 #endif
628  }
629 
630  // td_flags.executing must be marked as 0 after __kmp_release_deps has been called
631  // Othertwise, if a task is executed immediately from the release_deps code
632  // the flag will be reset to 1 again by this same function
633  KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
634  taskdata -> td_flags.executing = 0; // suspend the finishing task
635 
636  KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
637  gtid, taskdata, children) );
638 
639 #if OMP_40_ENABLED
640  /* If the tasks' destructor thunk flag has been set, we need to invoke the
641  destructor thunk that has been generated by the compiler.
642  The code is placed here, since at this point other tasks might have been released
643  hence overlapping the destructor invokations with some other work in the
644  released tasks. The OpenMP spec is not specific on when the destructors are
645  invoked, so we should be free to choose.
646  */
647  if (taskdata->td_flags.destructors_thunk) {
648  kmp_routine_entry_t destr_thunk = task->destructors;
649  KMP_ASSERT(destr_thunk);
650  destr_thunk(gtid, task);
651  }
652 #endif // OMP_40_ENABLED
653 
654  // bookkeeping for resuming task:
655  // GEH - note tasking_ser => task_serial
656  KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
657  taskdata->td_flags.task_serial);
658  if ( taskdata->td_flags.task_serial )
659  {
660  if (resumed_task == NULL) {
661  resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
662  }
663  else {
664  // verify resumed task passed in points to parent
665  KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
666  }
667  }
668  else {
669  KMP_DEBUG_ASSERT( resumed_task != NULL ); // verify that resumed task is passed as arguemnt
670  }
671 
672  // Free this task and then ancestor tasks if they have no children.
673  __kmp_free_task_and_ancestors(gtid, taskdata, thread);
674 
675  __kmp_threads[ gtid ] -> th.th_current_task = resumed_task; // restore current_task
676 
677  // TODO: GEH - make sure root team implicit task is initialized properly.
678  // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
679  resumed_task->td_flags.executing = 1; // resume previous task
680 
681  KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
682  gtid, taskdata, resumed_task) );
683 
684  return;
685 }
686 
687 //---------------------------------------------------------------------
688 // __kmpc_omp_task_complete_if0: report that a task has completed execution
689 // loc_ref: source location information; points to end of task block.
690 // gtid: global thread number.
691 // task: task thunk for the completed task.
692 
693 void
694 __kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
695 {
696  KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
697  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
698 
699  __kmp_task_finish( gtid, task, NULL ); // this routine will provide task to resume
700 
701  KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
702  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
703 
704  return;
705 }
706 
707 #ifdef TASK_UNUSED
708 //---------------------------------------------------------------------
709 // __kmpc_omp_task_complete: report that a task has completed execution
710 // NEVER GENERATED BY COMPILER, DEPRECATED!!!
711 
712 void
713 __kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
714 {
715  KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
716  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
717 
718  __kmp_task_finish( gtid, task, NULL ); // Not sure how to find task to resume
719 
720  KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
721  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
722  return;
723 }
724 #endif // TASK_UNUSED
725 
726 
727 //----------------------------------------------------------------------------------------------------
728 // __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread
729 //
730 // loc_ref: reference to source location of parallel region
731 // this_thr: thread data structure corresponding to implicit task
732 // team: team for this_thr
733 // tid: thread id of given thread within team
734 // set_curr_task: TRUE if need to push current task to thread
735 // NOTE: Routine does not set up the implicit task ICVS. This is assumed to have already been done elsewhere.
736 // TODO: Get better loc_ref. Value passed in may be NULL
737 
738 void
739 __kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task )
740 {
741  kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ];
742 
743  KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
744  tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) );
745 
746  task->td_task_id = KMP_GEN_TASK_ID();
747  task->td_team = team;
748 // task->td_parent = NULL; // fix for CQ230101 (broken parent task info in debugger)
749  task->td_ident = loc_ref;
750  task->td_taskwait_ident = NULL;
751  task->td_taskwait_counter = 0;
752  task->td_taskwait_thread = 0;
753 
754  task->td_flags.tiedness = TASK_TIED;
755  task->td_flags.tasktype = TASK_IMPLICIT;
756  // All implicit tasks are executed immediately, not deferred
757  task->td_flags.task_serial = 1;
758  task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
759  task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
760 
761  task->td_flags.started = 1;
762  task->td_flags.executing = 1;
763  task->td_flags.complete = 0;
764  task->td_flags.freed = 0;
765 
766 #if OMP_40_ENABLED
767  task->td_dephash = NULL;
768  task->td_depnode = NULL;
769 #endif
770 
771  if (set_curr_task) { // only do this initialization the first time a thread is created
772  task->td_incomplete_child_tasks = 0;
773  task->td_allocated_child_tasks = 0; // Not used because do not need to deallocate implicit task
774 #if OMP_40_ENABLED
775  task->td_taskgroup = NULL; // An implicit task does not have taskgroup
776 #endif
777  __kmp_push_current_task_to_thread( this_thr, team, tid );
778  } else {
779  KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
780  KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
781  }
782 
783  KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
784  tid, team, task ) );
785 }
786 
787 // Round up a size to a power of two specified by val
788 // Used to insert padding between structures co-allocated using a single malloc() call
789 static size_t
790 __kmp_round_up_to_val( size_t size, size_t val ) {
791  if ( size & ( val - 1 ) ) {
792  size &= ~ ( val - 1 );
793  if ( size <= KMP_SIZE_T_MAX - val ) {
794  size += val; // Round up if there is no overflow.
795  }; // if
796  }; // if
797  return size;
798 } // __kmp_round_up_to_va
799 
800 
801 //---------------------------------------------------------------------------------
802 // __kmp_task_alloc: Allocate the taskdata and task data structures for a task
803 //
804 // loc_ref: source location information
805 // gtid: global thread number.
806 // flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered.
807 // Converted from kmp_int32 to kmp_tasking_flags_t in routine.
808 // sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including private vars accessed in task.
809 // sizeof_shareds: Size in bytes of array of pointers to shared vars accessed in task.
810 // task_entry: Pointer to task code entry point generated by compiler.
811 // returns: a pointer to the allocated kmp_task_t structure (task).
812 
813 kmp_task_t *
814 __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
815  size_t sizeof_kmp_task_t, size_t sizeof_shareds,
816  kmp_routine_entry_t task_entry )
817 {
818  kmp_task_t *task;
819  kmp_taskdata_t *taskdata;
820  kmp_info_t *thread = __kmp_threads[ gtid ];
821  kmp_team_t *team = thread->th.th_team;
822  kmp_taskdata_t *parent_task = thread->th.th_current_task;
823  size_t shareds_offset;
824 
825  KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
826  "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
827  gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
828  sizeof_shareds, task_entry) );
829 
830  if ( parent_task->td_flags.final ) {
831  if (flags->merged_if0) {
832  }
833  flags->final = 1;
834  }
835 
836  // Calculate shared structure offset including padding after kmp_task_t struct
837  // to align pointers in shared struct
838  shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
839  shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * ));
840 
841  // Allocate a kmp_taskdata_t block and a kmp_task_t block.
842  KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n",
843  gtid, shareds_offset) );
844  KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n",
845  gtid, sizeof_shareds) );
846 
847  // Avoid double allocation here by combining shareds with taskdata
848  #if USE_FAST_MEMORY
849  taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
850  #else /* ! USE_FAST_MEMORY */
851  taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
852  #endif /* USE_FAST_MEMORY */
853 
854  task = KMP_TASKDATA_TO_TASK(taskdata);
855 
856  // Make sure task & taskdata are aligned appropriately
857 #if KMP_ARCH_X86
858  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 );
859  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 );
860 #else
861  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 );
862  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 );
863 #endif
864  if (sizeof_shareds > 0) {
865  // Avoid double allocation here by combining shareds with taskdata
866  task->shareds = & ((char *) taskdata)[ shareds_offset ];
867  // Make sure shareds struct is aligned to pointer size
868  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 );
869  } else {
870  task->shareds = NULL;
871  }
872  task->routine = task_entry;
873  task->part_id = 0; // AC: Always start with 0 part id
874 
875  taskdata->td_task_id = KMP_GEN_TASK_ID();
876  taskdata->td_team = team;
877  taskdata->td_alloc_thread = thread;
878  taskdata->td_parent = parent_task;
879  taskdata->td_level = parent_task->td_level + 1; // increment nesting level
880  taskdata->td_ident = loc_ref;
881  taskdata->td_taskwait_ident = NULL;
882  taskdata->td_taskwait_counter = 0;
883  taskdata->td_taskwait_thread = 0;
884  KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
885  copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
886 
887  taskdata->td_flags.tiedness = flags->tiedness;
888  taskdata->td_flags.final = flags->final;
889  taskdata->td_flags.merged_if0 = flags->merged_if0;
890 #if OMP_40_ENABLED
891  taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
892 #endif // OMP_40_ENABLED
893  taskdata->td_flags.tasktype = TASK_EXPLICIT;
894 
895  // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
896  taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
897 
898  // GEH - TODO: fix this to copy parent task's value of team_serial flag
899  taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
900 
901  // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region
902  // tasks are not left until program termination to execute. Also, it helps locality to execute
903  // immediately.
904  taskdata->td_flags.task_serial = ( taskdata->td_flags.final
905  || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
906 
907  taskdata->td_flags.started = 0;
908  taskdata->td_flags.executing = 0;
909  taskdata->td_flags.complete = 0;
910  taskdata->td_flags.freed = 0;
911 
912  taskdata->td_flags.native = flags->native;
913 
914  taskdata->td_incomplete_child_tasks = 0;
915  taskdata->td_allocated_child_tasks = 1; // start at one because counts current task and children
916 #if OMP_40_ENABLED
917  taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
918  taskdata->td_dephash = NULL;
919  taskdata->td_depnode = NULL;
920 #endif
921  // Only need to keep track of child task counts if team parallel and tasking not serialized
922  if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
923  KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
924 #if OMP_40_ENABLED
925  if ( parent_task->td_taskgroup )
926  KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
927 #endif
928  // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
929  if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
930  KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
931  }
932  }
933 
934  KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
935  gtid, taskdata, taskdata->td_parent) );
936 
937  return task;
938 }
939 
940 
941 kmp_task_t *
942 __kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
943  size_t sizeof_kmp_task_t, size_t sizeof_shareds,
944  kmp_routine_entry_t task_entry )
945 {
946  kmp_task_t *retval;
947  kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
948 
949  input_flags->native = FALSE;
950  // __kmp_task_alloc() sets up all other runtime flags
951 
952  KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
953  "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
954  gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
955  sizeof_kmp_task_t, sizeof_shareds, task_entry) );
956 
957  retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
958  sizeof_shareds, task_entry );
959 
960  KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
961 
962  return retval;
963 }
964 
965 //-----------------------------------------------------------
966 // __kmp_invoke_task: invoke the specified task
967 //
968 // gtid: global thread ID of caller
969 // task: the task to invoke
970 // current_task: the task to resume after task invokation
971 
972 static void
973 __kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
974 {
975  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
976 #if OMP_40_ENABLED
977  int discard = 0 /* false */;
978 #endif
979  KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
980  gtid, taskdata, current_task) );
981 
982  __kmp_task_start( gtid, task, current_task );
983 
984 #if OMP_40_ENABLED
985  // TODO: cancel tasks if the parallel region has also been cancelled
986  // TODO: check if this sequence can be hoisted above __kmp_task_start
987  // if cancellation has been enabled for this run ...
988  if (__kmp_omp_cancellation) {
989  kmp_info_t *this_thr = __kmp_threads [ gtid ];
990  kmp_team_t * this_team = this_thr->th.th_team;
991  kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
992  if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
993  // this task belongs to a task group and we need to cancel it
994  discard = 1 /* true */;
995  }
996  }
997 
998  //
999  // Invoke the task routine and pass in relevant data.
1000  // Thunks generated by gcc take a different argument list.
1001  //
1002  if (!discard) {
1003 #endif // OMP_40_ENABLED
1004 #ifdef KMP_GOMP_COMPAT
1005  if (taskdata->td_flags.native) {
1006  ((void (*)(void *))(*(task->routine)))(task->shareds);
1007  }
1008  else
1009 #endif /* KMP_GOMP_COMPAT */
1010  {
1011  (*(task->routine))(gtid, task);
1012  }
1013 #if OMP_40_ENABLED
1014  }
1015 #endif // OMP_40_ENABLED
1016 
1017  __kmp_task_finish( gtid, task, current_task );
1018 
1019  KA_TRACE(30, ("__kmp_inovke_task(exit): T#%d completed task %p, resuming task %p\n",
1020  gtid, taskdata, current_task) );
1021  return;
1022 }
1023 
1024 //-----------------------------------------------------------------------
1025 // __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
1026 //
1027 // loc_ref: location of original task pragma (ignored)
1028 // gtid: Global Thread ID of encountering thread
1029 // new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
1030 // Returns:
1031 // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1032 // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1033 
1034 kmp_int32
1035 __kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1036 {
1037  kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1038 
1039  KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
1040  gtid, loc_ref, new_taskdata ) );
1041 
1042  /* Should we execute the new task or queue it? For now, let's just always try to
1043  queue it. If the queue fills up, then we'll execute it. */
1044 
1045  if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1046  { // Execute this task immediately
1047  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1048  new_taskdata->td_flags.task_serial = 1;
1049  __kmp_invoke_task( gtid, new_task, current_task );
1050  }
1051 
1052  KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1053  "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
1054  new_taskdata ) );
1055 
1056  return TASK_CURRENT_NOT_QUEUED;
1057 }
1058 
1059 //---------------------------------------------------------------------
1060 // __kmp_omp_task: Schedule a non-thread-switchable task for execution
1061 // gtid: Global Thread ID of encountering thread
1062 // new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1063 // serialize_immediate: if TRUE then if the task is executed immediately its execution will be serialized
1064 // returns:
1065 //
1066 // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1067 // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1068 kmp_int32
1069 __kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate )
1070 {
1071  kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1072 
1073  /* Should we execute the new task or queue it? For now, let's just always try to
1074  queue it. If the queue fills up, then we'll execute it. */
1075 
1076  if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1077  { // Execute this task immediately
1078  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1079  if ( serialize_immediate )
1080  new_taskdata -> td_flags.task_serial = 1;
1081  __kmp_invoke_task( gtid, new_task, current_task );
1082  }
1083 
1084 
1085  return TASK_CURRENT_NOT_QUEUED;
1086 }
1087 
1088 //---------------------------------------------------------------------
1089 // __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a non-thread-switchable task from
1090 // the parent thread only!
1091 // loc_ref: location of original task pragma (ignored)
1092 // gtid: Global Thread ID of encountering thread
1093 // new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1094 // returns:
1095 //
1096 // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1097 // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1098 
1099 kmp_int32
1100 __kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1101 {
1102  kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1103  kmp_int32 res;
1104 
1105  KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
1106  gtid, loc_ref, new_taskdata ) );
1107 
1108  res = __kmp_omp_task(gtid,new_task,true);
1109 
1110  KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1111  gtid, loc_ref, new_taskdata ) );
1112  return res;
1113 }
1114 
1115 //-------------------------------------------------------------------------------------
1116 // __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete
1117 
1118 kmp_int32
1119 __kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid )
1120 {
1121  kmp_taskdata_t * taskdata;
1122  kmp_info_t * thread;
1123  int thread_finished = FALSE;
1124 
1125  KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n",
1126  gtid, loc_ref) );
1127 
1128  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1129  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1130 
1131  thread = __kmp_threads[ gtid ];
1132  taskdata = thread -> th.th_current_task;
1133 #if USE_ITT_BUILD
1134  // Note: These values are used by ITT events as well.
1135 #endif /* USE_ITT_BUILD */
1136  taskdata->td_taskwait_counter += 1;
1137  taskdata->td_taskwait_ident = loc_ref;
1138  taskdata->td_taskwait_thread = gtid + 1;
1139 
1140 #if USE_ITT_BUILD
1141  void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1142  if ( itt_sync_obj != NULL )
1143  __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1144 #endif /* USE_ITT_BUILD */
1145 
1146  if ( ! taskdata->td_flags.team_serial ) {
1147  // GEH: if team serialized, avoid reading the volatile variable below.
1148  kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U);
1149  while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
1150  flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1151  USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1152  }
1153  }
1154 #if USE_ITT_BUILD
1155  if ( itt_sync_obj != NULL )
1156  __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1157 #endif /* USE_ITT_BUILD */
1158 
1159  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1160  taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1161  }
1162 
1163  KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
1164  "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1165 
1166  return TASK_CURRENT_NOT_QUEUED;
1167 }
1168 
1169 
1170 //-------------------------------------------------
1171 // __kmpc_omp_taskyield: switch to a different task
1172 
1173 kmp_int32
1174 __kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part )
1175 {
1176  kmp_taskdata_t * taskdata;
1177  kmp_info_t * thread;
1178  int thread_finished = FALSE;
1179 
1180  KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1181  gtid, loc_ref, end_part) );
1182 
1183  if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) {
1184  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1185 
1186  thread = __kmp_threads[ gtid ];
1187  taskdata = thread -> th.th_current_task;
1188  // Should we model this as a task wait or not?
1189 #if USE_ITT_BUILD
1190  // Note: These values are used by ITT events as well.
1191 #endif /* USE_ITT_BUILD */
1192  taskdata->td_taskwait_counter += 1;
1193  taskdata->td_taskwait_ident = loc_ref;
1194  taskdata->td_taskwait_thread = gtid + 1;
1195 
1196 #if USE_ITT_BUILD
1197  void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1198  if ( itt_sync_obj != NULL )
1199  __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1200 #endif /* USE_ITT_BUILD */
1201  if ( ! taskdata->td_flags.team_serial ) {
1202  kmp_task_team_t * task_team = thread->th.th_task_team;
1203  if (task_team != NULL) {
1204  if (KMP_TASKING_ENABLED(task_team, thread->th.th_task_state)) {
1205  __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished
1206  USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1207  }
1208  }
1209  }
1210 #if USE_ITT_BUILD
1211  if ( itt_sync_obj != NULL )
1212  __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1213 #endif /* USE_ITT_BUILD */
1214 
1215  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1216  taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1217  }
1218 
1219  KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
1220  "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1221 
1222  return TASK_CURRENT_NOT_QUEUED;
1223 }
1224 
1225 
1226 #if OMP_40_ENABLED
1227 //-------------------------------------------------------------------------------------
1228 // __kmpc_taskgroup: Start a new taskgroup
1229 
1230 void
1231 __kmpc_taskgroup( ident_t* loc, int gtid )
1232 {
1233  kmp_info_t * thread = __kmp_threads[ gtid ];
1234  kmp_taskdata_t * taskdata = thread->th.th_current_task;
1235  kmp_taskgroup_t * tg_new =
1236  (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) );
1237  KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
1238  tg_new->count = 0;
1239  tg_new->cancel_request = cancel_noreq;
1240  tg_new->parent = taskdata->td_taskgroup;
1241  taskdata->td_taskgroup = tg_new;
1242 }
1243 
1244 
1245 //-------------------------------------------------------------------------------------
1246 // __kmpc_end_taskgroup: Wait until all tasks generated by the current task
1247 // and its descendants are complete
1248 
1249 void
1250 __kmpc_end_taskgroup( ident_t* loc, int gtid )
1251 {
1252  kmp_info_t * thread = __kmp_threads[ gtid ];
1253  kmp_taskdata_t * taskdata = thread->th.th_current_task;
1254  kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1255  int thread_finished = FALSE;
1256 
1257  KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
1258  KMP_DEBUG_ASSERT( taskgroup != NULL );
1259 
1260  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1261 #if USE_ITT_BUILD
1262  // For ITT the taskgroup wait is similar to taskwait until we need to distinguish them
1263  void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1264  if ( itt_sync_obj != NULL )
1265  __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1266 #endif /* USE_ITT_BUILD */
1267 
1268  if ( ! taskdata->td_flags.team_serial ) {
1269  kmp_flag_32 flag(&(taskgroup->count), 0U);
1270  while ( TCR_4(taskgroup->count) != 0 ) {
1271  flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1272  USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1273  }
1274  }
1275 
1276 #if USE_ITT_BUILD
1277  if ( itt_sync_obj != NULL )
1278  __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1279 #endif /* USE_ITT_BUILD */
1280  }
1281  KMP_DEBUG_ASSERT( taskgroup->count == 0 );
1282 
1283  // Restore parent taskgroup for the current task
1284  taskdata->td_taskgroup = taskgroup->parent;
1285  __kmp_thread_free( thread, taskgroup );
1286 
1287  KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
1288 }
1289 #endif
1290 
1291 
1292 //------------------------------------------------------
1293 // __kmp_remove_my_task: remove a task from my own deque
1294 
1295 static kmp_task_t *
1296 __kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
1297  kmp_int32 is_constrained )
1298 {
1299  kmp_task_t * task;
1300  kmp_taskdata_t * taskdata;
1301  kmp_thread_data_t *thread_data;
1302  kmp_uint32 tail;
1303 
1304  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1305  KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition
1306 
1307  thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
1308 
1309  KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1310  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1311  thread_data->td.td_deque_tail) );
1312 
1313  if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1314  KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1315  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1316  thread_data->td.td_deque_tail) );
1317  return NULL;
1318  }
1319 
1320  __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1321 
1322  if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1323  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1324  KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1325  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1326  thread_data->td.td_deque_tail) );
1327  return NULL;
1328  }
1329 
1330  tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1331  taskdata = thread_data -> td.td_deque[ tail ];
1332 
1333  if (is_constrained) {
1334  // we need to check if the candidate obeys task scheduling constraint:
1335  // only child of current task can be scheduled
1336  kmp_taskdata_t * current = thread->th.th_current_task;
1337  kmp_int32 level = current->td_level;
1338  kmp_taskdata_t * parent = taskdata->td_parent;
1339  while ( parent != current && parent->td_level > level ) {
1340  parent = parent->td_parent; // check generation up to the level of the current task
1341  KMP_DEBUG_ASSERT(parent != NULL);
1342  }
1343  if ( parent != current ) {
1344  // If the tail task is not a child, then no other childs can appear in the deque.
1345  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1346  KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1347  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1348  thread_data->td.td_deque_tail) );
1349  return NULL;
1350  }
1351  }
1352 
1353  thread_data -> td.td_deque_tail = tail;
1354  TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
1355 
1356  __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
1357 
1358  KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
1359  gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1360  thread_data->td.td_deque_tail) );
1361 
1362  task = KMP_TASKDATA_TO_TASK( taskdata );
1363  return task;
1364 }
1365 
1366 
1367 //-----------------------------------------------------------
1368 // __kmp_steal_task: remove a task from another thread's deque
1369 // Assume that calling thread has already checked existence of
1370 // task_team thread_data before calling this routine.
1371 
1372 static kmp_task_t *
1373 __kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
1374  volatile kmp_uint32 *unfinished_threads, int *thread_finished,
1375  kmp_int32 is_constrained )
1376 {
1377  kmp_task_t * task;
1378  kmp_taskdata_t * taskdata;
1379  kmp_thread_data_t *victim_td, *threads_data;
1380  kmp_int32 victim_tid, thread_tid;
1381 
1382  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1383 
1384  threads_data = task_team -> tt.tt_threads_data;
1385  KMP_DEBUG_ASSERT( threads_data != NULL ); // Caller should check this condition
1386 
1387  victim_tid = victim->th.th_info.ds.ds_tid;
1388  victim_td = & threads_data[ victim_tid ];
1389 
1390  KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d "
1391  "head=%u tail=%u\n",
1392  gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1393  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1394 
1395  if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition
1396  (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1397  {
1398  KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p "
1399  "ntasks=%d head=%u tail=%u\n",
1400  gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1401  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1402  return NULL;
1403  }
1404 
1405  __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
1406 
1407  // Check again after we acquire the lock
1408  if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1409  (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1410  {
1411  __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1412  KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1413  "ntasks=%d head=%u tail=%u\n",
1414  gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1415  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1416  return NULL;
1417  }
1418 
1419  KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
1420 
1421  if ( !is_constrained ) {
1422  taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ];
1423  // Bump head pointer and Wrap.
1424  victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK;
1425  } else {
1426  // While we have postponed tasks let's steal from tail of the deque (smaller tasks)
1427  kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1428  taskdata = victim_td -> td.td_deque[ tail ];
1429  // we need to check if the candidate obeys task scheduling constraint:
1430  // only child of current task can be scheduled
1431  kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
1432  kmp_int32 level = current->td_level;
1433  kmp_taskdata_t * parent = taskdata->td_parent;
1434  while ( parent != current && parent->td_level > level ) {
1435  parent = parent->td_parent; // check generation up to the level of the current task
1436  KMP_DEBUG_ASSERT(parent != NULL);
1437  }
1438  if ( parent != current ) {
1439  // If the tail task is not a child, then no other childs can appear in the deque (?).
1440  __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1441  KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1442  "ntasks=%d head=%u tail=%u\n",
1443  gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
1444  task_team, victim_td->td.td_deque_ntasks,
1445  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1446  return NULL;
1447  }
1448  victim_td -> td.td_deque_tail = tail;
1449  }
1450  if (*thread_finished) {
1451  // We need to un-mark this victim as a finished victim. This must be done before
1452  // releasing the lock, or else other threads (starting with the master victim)
1453  // might be prematurely released from the barrier!!!
1454  kmp_uint32 count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
1455 
1456  KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
1457  gtid, count + 1, task_team) );
1458 
1459  *thread_finished = FALSE;
1460  }
1461  TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
1462 
1463  __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1464 
1465  KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
1466  "ntasks=%d head=%u tail=%u\n",
1467  gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
1468  victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1469  victim_td->td.td_deque_tail) );
1470 
1471  task = KMP_TASKDATA_TO_TASK( taskdata );
1472  return task;
1473 }
1474 
1475 
1476 //-----------------------------------------------------------------------------
1477 // __kmp_execute_tasks_template: Choose and execute tasks until either the condition
1478 // is statisfied (return true) or there are none left (return false).
1479 // final_spin is TRUE if this is the spin at the release barrier.
1480 // thread_finished indicates whether the thread is finished executing all
1481 // the tasks it has on its deque, and is at the release barrier.
1482 // spinner is the location on which to spin.
1483 // spinner == NULL means only execute a single task and return.
1484 // checker is the value to check to terminate the spin.
1485 template <class C>
1486 static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin,
1487  int *thread_finished
1488  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1489 {
1490  kmp_task_team_t * task_team;
1491  kmp_team_t * team;
1492  kmp_thread_data_t * threads_data;
1493  kmp_task_t * task;
1494  kmp_taskdata_t * current_task = thread -> th.th_current_task;
1495  volatile kmp_uint32 * unfinished_threads;
1496  kmp_int32 nthreads, last_stolen, k, tid;
1497 
1498  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1499  KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
1500 
1501  task_team = thread -> th.th_task_team;
1502  KMP_DEBUG_ASSERT( task_team != NULL );
1503 
1504  KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n",
1505  gtid, final_spin, *thread_finished) );
1506 
1507  threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1508  KMP_DEBUG_ASSERT( threads_data != NULL );
1509 
1510  nthreads = task_team -> tt.tt_nproc;
1511  unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
1512  KMP_DEBUG_ASSERT( nthreads > 1 );
1513  KMP_DEBUG_ASSERT( TCR_4((int)*unfinished_threads) >= 0 );
1514 
1515  // Choose tasks from our own work queue.
1516  start:
1517  while (( task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained )) != NULL ) {
1518 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1519  if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1520  if ( itt_sync_obj == NULL ) {
1521  // we are at fork barrier where we could not get the object reliably
1522  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1523  }
1524  __kmp_itt_task_starting( itt_sync_obj );
1525  }
1526 #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1527  __kmp_invoke_task( gtid, task, current_task );
1528 #if USE_ITT_BUILD
1529  if ( itt_sync_obj != NULL )
1530  __kmp_itt_task_finished( itt_sync_obj );
1531 #endif /* USE_ITT_BUILD */
1532 
1533  // If this thread is only partway through the barrier and the condition
1534  // is met, then return now, so that the barrier gather/release pattern can proceed.
1535  // If this thread is in the last spin loop in the barrier, waiting to be
1536  // released, we know that the termination condition will not be satisified,
1537  // so don't waste any cycles checking it.
1538  if (flag == NULL || (!final_spin && flag->done_check())) {
1539  KA_TRACE(15, ("__kmp_execute_tasks_template(exit #1): T#%d spin condition satisfied\n", gtid) );
1540  return TRUE;
1541  }
1542  KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1543  }
1544 
1545  // This thread's work queue is empty. If we are in the final spin loop
1546  // of the barrier, check and see if the termination condition is satisfied.
1547  if (final_spin) {
1548  // First, decrement the #unfinished threads, if that has not already
1549  // been done. This decrement might be to the spin location, and
1550  // result in the termination condition being satisfied.
1551  if (! *thread_finished) {
1552  kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1553  KA_TRACE(20, ("__kmp_execute_tasks_template(dec #1): T#%d dec unfinished_threads to %d task_team=%p\n",
1554  gtid, count, task_team) );
1555  *thread_finished = TRUE;
1556  }
1557 
1558  // It is now unsafe to reference thread->th.th_team !!!
1559  // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1560  // thread to pass through the barrier, where it might reset each thread's
1561  // th.th_team field for the next parallel region.
1562  // If we can steal more work, we know that this has not happened yet.
1563  if (flag != NULL && flag->done_check()) {
1564  KA_TRACE(15, ("__kmp_execute_tasks_template(exit #2): T#%d spin condition satisfied\n", gtid) );
1565  return TRUE;
1566  }
1567  }
1568 
1569  // Try to steal from the last place I stole from successfully.
1570  tid = thread -> th.th_info.ds.ds_tid;//__kmp_tid_from_gtid( gtid );
1571  last_stolen = threads_data[ tid ].td.td_deque_last_stolen;
1572 
1573  if (last_stolen != -1) {
1574  kmp_info_t *other_thread = threads_data[last_stolen].td.td_thr;
1575 
1576  while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1577  thread_finished, is_constrained )) != NULL)
1578  {
1579 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1580  if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1581  if ( itt_sync_obj == NULL ) {
1582  // we are at fork barrier where we could not get the object reliably
1583  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1584  }
1585  __kmp_itt_task_starting( itt_sync_obj );
1586  }
1587 #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1588  __kmp_invoke_task( gtid, task, current_task );
1589 #if USE_ITT_BUILD
1590  if ( itt_sync_obj != NULL )
1591  __kmp_itt_task_finished( itt_sync_obj );
1592 #endif /* USE_ITT_BUILD */
1593 
1594  // Check to see if this thread can proceed.
1595  if (flag == NULL || (!final_spin && flag->done_check())) {
1596  KA_TRACE(15, ("__kmp_execute_tasks_template(exit #3): T#%d spin condition satisfied\n",
1597  gtid) );
1598  return TRUE;
1599  }
1600 
1601  KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1602  // If the execution of the stolen task resulted in more tasks being
1603  // placed on our run queue, then restart the whole process.
1604  if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
1605  KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
1606  gtid) );
1607  goto start;
1608  }
1609  }
1610 
1611  // Don't give priority to stealing from this thread anymore.
1612  threads_data[ tid ].td.td_deque_last_stolen = -1;
1613 
1614  // The victims's work queue is empty. If we are in the final spin loop
1615  // of the barrier, check and see if the termination condition is satisfied.
1616  if (final_spin) {
1617  // First, decrement the #unfinished threads, if that has not already
1618  // been done. This decrement might be to the spin location, and
1619  // result in the termination condition being satisfied.
1620  if (! *thread_finished) {
1621  kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1622  KA_TRACE(20, ("__kmp_execute_tasks_template(dec #2): T#%d dec unfinished_threads to %d "
1623  "task_team=%p\n", gtid, count, task_team) );
1624  *thread_finished = TRUE;
1625  }
1626 
1627  // If __kmp_tasking_mode != tskm_immediate_exec
1628  // then it is now unsafe to reference thread->th.th_team !!!
1629  // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1630  // thread to pass through the barrier, where it might reset each thread's
1631  // th.th_team field for the next parallel region.
1632  // If we can steal more work, we know that this has not happened yet.
1633  if (flag != NULL && flag->done_check()) {
1634  KA_TRACE(15, ("__kmp_execute_tasks_template(exit #4): T#%d spin condition satisfied\n",
1635  gtid) );
1636  return TRUE;
1637  }
1638  }
1639  }
1640 
1641  // Find a different thread to steal work from. Pick a random thread.
1642  // My initial plan was to cycle through all the threads, and only return
1643  // if we tried to steal from every thread, and failed. Arch says that's
1644  // not such a great idea.
1645  // GEH - need yield code in this loop for throughput library mode?
1646  new_victim:
1647  k = __kmp_get_random( thread ) % (nthreads - 1);
1648  if ( k >= thread -> th.th_info.ds.ds_tid ) {
1649  ++k; // Adjusts random distribution to exclude self
1650  }
1651  {
1652  kmp_info_t *other_thread = threads_data[k].td.td_thr;
1653  int first;
1654 
1655  // There is a slight chance that __kmp_enable_tasking() did not wake up
1656  // all threads waiting at the barrier. If this thread is sleeping, then
1657  // then wake it up. Since we weree going to pay the cache miss penalty
1658  // for referenceing another thread's kmp_info_t struct anyway, the check
1659  // shouldn't cost too much performance at this point.
1660  // In extra barrier mode, tasks do not sleep at the separate tasking
1661  // barrier, so this isn't a problem.
1662  if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1663  (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
1664  (TCR_PTR(other_thread->th.th_sleep_loc) != NULL))
1665  {
1666  __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc);
1667  // A sleeping thread should not have any tasks on it's queue.
1668  // There is a slight possibility that it resumes, steals a task from
1669  // another thread, which spawns more tasks, all in the that it takes
1670  // this thread to check => don't write an assertion that the victim's
1671  // queue is empty. Try stealing from a different thread.
1672  goto new_victim;
1673  }
1674 
1675  // Now try to steal work from the selected thread
1676  first = TRUE;
1677  while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1678  thread_finished, is_constrained )) != NULL)
1679  {
1680 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1681  if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1682  if ( itt_sync_obj == NULL ) {
1683  // we are at fork barrier where we could not get the object reliably
1684  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1685  }
1686  __kmp_itt_task_starting( itt_sync_obj );
1687  }
1688 #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1689  __kmp_invoke_task( gtid, task, current_task );
1690 #if USE_ITT_BUILD
1691  if ( itt_sync_obj != NULL )
1692  __kmp_itt_task_finished( itt_sync_obj );
1693 #endif /* USE_ITT_BUILD */
1694 
1695  // Try stealing from this victim again, in the future.
1696  if (first) {
1697  threads_data[ tid ].td.td_deque_last_stolen = k;
1698  first = FALSE;
1699  }
1700 
1701  // Check to see if this thread can proceed.
1702  if (flag == NULL || (!final_spin && flag->done_check())) {
1703  KA_TRACE(15, ("__kmp_execute_tasks_template(exit #5): T#%d spin condition satisfied\n",
1704  gtid) );
1705  return TRUE;
1706  }
1707  KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1708 
1709  // If the execution of the stolen task resulted in more tasks being
1710  // placed on our run queue, then restart the whole process.
1711  if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
1712  KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
1713  gtid) );
1714  goto start;
1715  }
1716  }
1717 
1718  // The victims's work queue is empty. If we are in the final spin loop
1719  // of the barrier, check and see if the termination condition is satisfied.
1720  // Going on and finding a new victim to steal from is expensive, as it
1721  // involves a lot of cache misses, so we definitely want to re-check the
1722  // termination condition before doing that.
1723  if (final_spin) {
1724  // First, decrement the #unfinished threads, if that has not already
1725  // been done. This decrement might be to the spin location, and
1726  // result in the termination condition being satisfied.
1727  if (! *thread_finished) {
1728  kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1729  KA_TRACE(20, ("__kmp_execute_tasks_template(dec #3): T#%d dec unfinished_threads to %d; "
1730  "task_team=%p\n",
1731  gtid, count, task_team) );
1732  *thread_finished = TRUE;
1733  }
1734 
1735  // If __kmp_tasking_mode != tskm_immediate_exec,
1736  // then it is now unsafe to reference thread->th.th_team !!!
1737  // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1738  // thread to pass through the barrier, where it might reset each thread's
1739  // th.th_team field for the next parallel region.
1740  // If we can steal more work, we know that this has not happened yet.
1741  if (flag != NULL && flag->done_check()) {
1742  KA_TRACE(15, ("__kmp_execute_tasks_template(exit #6): T#%d spin condition satisfied\n", gtid) );
1743  return TRUE;
1744  }
1745  }
1746  }
1747 
1748  KA_TRACE(15, ("__kmp_execute_tasks_template(exit #7): T#%d can't find work\n", gtid) );
1749  return FALSE;
1750 }
1751 
1752 int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
1753  int *thread_finished
1754  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1755 {
1756  return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1757  USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1758 }
1759 
1760 int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
1761  int *thread_finished
1762  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1763 {
1764  return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1765  USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1766 }
1767 
1768 int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
1769  int *thread_finished
1770  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1771 {
1772  return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1773  USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1774 }
1775 
1776 
1777 
1778 //-----------------------------------------------------------------------------
1779 // __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
1780 // next barrier so they can assist in executing enqueued tasks.
1781 // First thread in allocates the task team atomically.
1782 
1783 static void
1784 __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
1785 {
1786  kmp_team_t *team = this_thr->th.th_team;
1787  kmp_thread_data_t *threads_data;
1788  int nthreads, i, is_init_thread;
1789 
1790  KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n",
1791  __kmp_gtid_from_thread( this_thr ) ) );
1792 
1793  KMP_DEBUG_ASSERT(task_team != NULL);
1794  KMP_DEBUG_ASSERT(team != NULL);
1795 
1796  nthreads = task_team->tt.tt_nproc;
1797  KMP_DEBUG_ASSERT(nthreads > 0);
1798  KMP_DEBUG_ASSERT(nthreads == team->t.t_nproc);
1799 
1800  // Allocate or increase the size of threads_data if necessary
1801  is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
1802 
1803  if (!is_init_thread) {
1804  // Some other thread already set up the array.
1805  KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
1806  __kmp_gtid_from_thread( this_thr ) ) );
1807  return;
1808  }
1809  threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1810  KMP_DEBUG_ASSERT( threads_data != NULL );
1811 
1812  if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1813  ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
1814  {
1815  // Release any threads sleeping at the barrier, so that they can steal
1816  // tasks and execute them. In extra barrier mode, tasks do not sleep
1817  // at the separate tasking barrier, so this isn't a problem.
1818  for (i = 0; i < nthreads; i++) {
1819  volatile void *sleep_loc;
1820  kmp_info_t *thread = threads_data[i].td.td_thr;
1821 
1822  if (i == this_thr->th.th_info.ds.ds_tid) {
1823  continue;
1824  }
1825  // Since we haven't locked the thread's suspend mutex lock at this
1826  // point, there is a small window where a thread might be putting
1827  // itself to sleep, but hasn't set the th_sleep_loc field yet.
1828  // To work around this, __kmp_execute_tasks_template() periodically checks
1829  // see if other threads are sleeping (using the same random
1830  // mechanism that is used for task stealing) and awakens them if
1831  // they are.
1832  if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
1833  {
1834  KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n",
1835  __kmp_gtid_from_thread( this_thr ),
1836  __kmp_gtid_from_thread( thread ) ) );
1837  __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
1838  }
1839  else {
1840  KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
1841  __kmp_gtid_from_thread( this_thr ),
1842  __kmp_gtid_from_thread( thread ) ) );
1843  }
1844  }
1845  }
1846 
1847  KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n",
1848  __kmp_gtid_from_thread( this_thr ) ) );
1849 }
1850 
1851 
1852 /* ------------------------------------------------------------------------ */
1853 /* // TODO: Check the comment consistency
1854  * Utility routines for "task teams". A task team (kmp_task_t) is kind of
1855  * like a shadow of the kmp_team_t data struct, with a different lifetime.
1856  * After a child * thread checks into a barrier and calls __kmp_release() from
1857  * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
1858  * longer assume that the kmp_team_t structure is intact (at any moment, the
1859  * master thread may exit the barrier code and free the team data structure,
1860  * and return the threads to the thread pool).
1861  *
1862  * This does not work with the the tasking code, as the thread is still
1863  * expected to participate in the execution of any tasks that may have been
1864  * spawned my a member of the team, and the thread still needs access to all
1865  * to each thread in the team, so that it can steal work from it.
1866  *
1867  * Enter the existence of the kmp_task_team_t struct. It employs a reference
1868  * counting mechanims, and is allocated by the master thread before calling
1869  * __kmp_<barrier_kind>_release, and then is release by the last thread to
1870  * exit __kmp_<barrier_kind>_release at the next barrier. I.e. the lifetimes
1871  * of the kmp_task_team_t structs for consecutive barriers can overlap
1872  * (and will, unless the master thread is the last thread to exit the barrier
1873  * release phase, which is not typical).
1874  *
1875  * The existence of such a struct is useful outside the context of tasking,
1876  * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro,
1877  * so that any performance differences show up when comparing the 2.5 vs. 3.0
1878  * libraries.
1879  *
1880  * We currently use the existence of the threads array as an indicator that
1881  * tasks were spawned since the last barrier. If the structure is to be
1882  * useful outside the context of tasking, then this will have to change, but
1883  * not settting the field minimizes the performance impact of tasking on
1884  * barriers, when no explicit tasks were spawned (pushed, actually).
1885  */
1886 
1887 
1888 static kmp_task_team_t *__kmp_free_task_teams = NULL; // Free list for task_team data structures
1889 // Lock for task team data structures
1890 static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
1891 
1892 
1893 //------------------------------------------------------------------------------
1894 // __kmp_alloc_task_deque:
1895 // Allocates a task deque for a particular thread, and initialize the necessary
1896 // data structures relating to the deque. This only happens once per thread
1897 // per task team since task teams are recycled.
1898 // No lock is needed during allocation since each thread allocates its own
1899 // deque.
1900 
1901 static void
1902 __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
1903 {
1904  __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
1905  KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
1906 
1907  // Initialize last stolen task field to "none"
1908  thread_data -> td.td_deque_last_stolen = -1;
1909 
1910  KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
1911  KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
1912  KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
1913 
1914  KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
1915  __kmp_gtid_from_thread( thread ), TASK_DEQUE_SIZE, thread_data ) );
1916  // Allocate space for task deque, and zero the deque
1917  // Cannot use __kmp_thread_calloc() because threads not around for
1918  // kmp_reap_task_team( ).
1919  thread_data -> td.td_deque = (kmp_taskdata_t **)
1920  __kmp_allocate( TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
1921 }
1922 
1923 
1924 //------------------------------------------------------------------------------
1925 // __kmp_free_task_deque:
1926 // Deallocates a task deque for a particular thread.
1927 // Happens at library deallocation so don't need to reset all thread data fields.
1928 
1929 static void
1930 __kmp_free_task_deque( kmp_thread_data_t *thread_data )
1931 {
1932  __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1933 
1934  if ( thread_data -> td.td_deque != NULL ) {
1935  TCW_4(thread_data -> td.td_deque_ntasks, 0);
1936  __kmp_free( thread_data -> td.td_deque );
1937  thread_data -> td.td_deque = NULL;
1938  }
1939  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1940 
1941 #ifdef BUILD_TIED_TASK_STACK
1942  // GEH: Figure out what to do here for td_susp_tied_tasks
1943  if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
1944  __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
1945  }
1946 #endif // BUILD_TIED_TASK_STACK
1947 }
1948 
1949 
1950 //------------------------------------------------------------------------------
1951 // __kmp_realloc_task_threads_data:
1952 // Allocates a threads_data array for a task team, either by allocating an initial
1953 // array or enlarging an existing array. Only the first thread to get the lock
1954 // allocs or enlarges the array and re-initializes the array eleemnts.
1955 // That thread returns "TRUE", the rest return "FALSE".
1956 // Assumes that the new array size is given by task_team -> tt.tt_nproc.
1957 // The current size is given by task_team -> tt.tt_max_threads.
1958 
1959 static int
1960 __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
1961 {
1962  kmp_thread_data_t ** threads_data_p;
1963  kmp_int32 nthreads, maxthreads;
1964  int is_init_thread = FALSE;
1965 
1966  if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
1967  // Already reallocated and initialized.
1968  return FALSE;
1969  }
1970 
1971  threads_data_p = & task_team -> tt.tt_threads_data;
1972  nthreads = task_team -> tt.tt_nproc;
1973  maxthreads = task_team -> tt.tt_max_threads;
1974 
1975  // All threads must lock when they encounter the first task of the implicit task
1976  // region to make sure threads_data fields are (re)initialized before used.
1977  __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
1978 
1979  if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
1980  // first thread to enable tasking
1981  kmp_team_t *team = thread -> th.th_team;
1982  int i;
1983 
1984  is_init_thread = TRUE;
1985  if ( maxthreads < nthreads ) {
1986 
1987  if ( *threads_data_p != NULL ) {
1988  kmp_thread_data_t *old_data = *threads_data_p;
1989  kmp_thread_data_t *new_data = NULL;
1990 
1991  KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating "
1992  "threads data for task_team %p, new_size = %d, old_size = %d\n",
1993  __kmp_gtid_from_thread( thread ), task_team,
1994  nthreads, maxthreads ) );
1995  // Reallocate threads_data to have more elements than current array
1996  // Cannot use __kmp_thread_realloc() because threads not around for
1997  // kmp_reap_task_team( ). Note all new array entries are initialized
1998  // to zero by __kmp_allocate().
1999  new_data = (kmp_thread_data_t *)
2000  __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2001  // copy old data to new data
2002  memcpy( (void *) new_data, (void *) old_data,
2003  maxthreads * sizeof(kmp_taskdata_t *) );
2004 
2005 #ifdef BUILD_TIED_TASK_STACK
2006  // GEH: Figure out if this is the right thing to do
2007  for (i = maxthreads; i < nthreads; i++) {
2008  kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2009  __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2010  }
2011 #endif // BUILD_TIED_TASK_STACK
2012  // Install the new data and free the old data
2013  (*threads_data_p) = new_data;
2014  __kmp_free( old_data );
2015  }
2016  else {
2017  KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating "
2018  "threads data for task_team %p, size = %d\n",
2019  __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
2020  // Make the initial allocate for threads_data array, and zero entries
2021  // Cannot use __kmp_thread_calloc() because threads not around for
2022  // kmp_reap_task_team( ).
2023  *threads_data_p = (kmp_thread_data_t *)
2024  __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2025 #ifdef BUILD_TIED_TASK_STACK
2026  // GEH: Figure out if this is the right thing to do
2027  for (i = 0; i < nthreads; i++) {
2028  kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2029  __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2030  }
2031 #endif // BUILD_TIED_TASK_STACK
2032  }
2033  task_team -> tt.tt_max_threads = nthreads;
2034  }
2035  else {
2036  // If array has (more than) enough elements, go ahead and use it
2037  KMP_DEBUG_ASSERT( *threads_data_p != NULL );
2038  }
2039 
2040  // initialize threads_data pointers back to thread_info structures
2041  for (i = 0; i < nthreads; i++) {
2042  kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2043  thread_data -> td.td_thr = team -> t.t_threads[i];
2044 
2045  if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
2046  // The last stolen field survives across teams / barrier, and the number
2047  // of threads may have changed. It's possible (likely?) that a new
2048  // parallel region will exhibit the same behavior as the previous region.
2049  thread_data -> td.td_deque_last_stolen = -1;
2050  }
2051  }
2052 
2053  KMP_MB();
2054  TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
2055  }
2056 
2057  __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2058  return is_init_thread;
2059 }
2060 
2061 
2062 //------------------------------------------------------------------------------
2063 // __kmp_free_task_threads_data:
2064 // Deallocates a threads_data array for a task team, including any attached
2065 // tasking deques. Only occurs at library shutdown.
2066 
2067 static void
2068 __kmp_free_task_threads_data( kmp_task_team_t *task_team )
2069 {
2070  __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2071  if ( task_team -> tt.tt_threads_data != NULL ) {
2072  int i;
2073  for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
2074  __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
2075  }
2076  __kmp_free( task_team -> tt.tt_threads_data );
2077  task_team -> tt.tt_threads_data = NULL;
2078  }
2079  __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2080 }
2081 
2082 
2083 //------------------------------------------------------------------------------
2084 // __kmp_allocate_task_team:
2085 // Allocates a task team associated with a specific team, taking it from
2086 // the global task team free list if possible. Also initializes data structures.
2087 
2088 static kmp_task_team_t *
2089 __kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
2090 {
2091  kmp_task_team_t *task_team = NULL;
2092  int nthreads;
2093 
2094  KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n",
2095  (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
2096 
2097  if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2098  // Take a task team from the task team pool
2099  __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2100  if (__kmp_free_task_teams != NULL) {
2101  task_team = __kmp_free_task_teams;
2102  TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
2103  task_team -> tt.tt_next = NULL;
2104  }
2105  __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2106  }
2107 
2108  if (task_team == NULL) {
2109  KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating "
2110  "task team for team %p\n",
2111  __kmp_gtid_from_thread( thread ), team ) );
2112  // Allocate a new task team if one is not available.
2113  // Cannot use __kmp_thread_malloc() because threads not around for
2114  // kmp_reap_task_team( ).
2115  task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) );
2116  __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2117  //task_team -> tt.tt_threads_data = NULL; // AC: __kmp_allocate zeroes returned memory
2118  //task_team -> tt.tt_max_threads = 0;
2119  //task_team -> tt.tt_next = NULL;
2120  }
2121 
2122  TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2123  task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
2124 
2125  task_team -> tt.tt_state = 0;
2126  TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
2127  TCW_4( task_team -> tt.tt_active, TRUE );
2128  TCW_4( task_team -> tt.tt_ref_ct, nthreads - 1);
2129 
2130  KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p\n",
2131  (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team ) );
2132  return task_team;
2133 }
2134 
2135 
2136 //------------------------------------------------------------------------------
2137 // __kmp_free_task_team:
2138 // Frees the task team associated with a specific thread, and adds it
2139 // to the global task team free list.
2140 //
2141 
2142 static void
2143 __kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
2144 {
2145  KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n",
2146  thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
2147 
2148  KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_ref_ct) == 0 );
2149 
2150  // Put task team back on free list
2151  __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
2152 
2153  KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
2154  task_team -> tt.tt_next = __kmp_free_task_teams;
2155  TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2156  TCW_PTR(__kmp_free_task_teams, task_team);
2157 
2158  __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
2159 }
2160 
2161 
2162 //------------------------------------------------------------------------------
2163 // __kmp_reap_task_teams:
2164 // Free all the task teams on the task team free list.
2165 // Should only be done during library shutdown.
2166 // Cannot do anything that needs a thread structure or gtid since they are already gone.
2167 
2168 void
2169 __kmp_reap_task_teams( void )
2170 {
2171  kmp_task_team_t *task_team;
2172 
2173  if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
2174  // Free all task_teams on the free list
2175  __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2176  while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
2177  __kmp_free_task_teams = task_team -> tt.tt_next;
2178  task_team -> tt.tt_next = NULL;
2179 
2180  // Free threads_data if necessary
2181  if ( task_team -> tt.tt_threads_data != NULL ) {
2182  __kmp_free_task_threads_data( task_team );
2183  }
2184  __kmp_free( task_team );
2185  }
2186  __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2187  }
2188 }
2189 
2190 
2191 //------------------------------------------------------------------------------
2192 // __kmp_unref_task_teams:
2193 // Remove one thread from referencing the task team structure by
2194 // decreasing the reference count and deallocate task team if no more
2195 // references to it.
2196 //
2197 void
2198 __kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread )
2199 {
2200  kmp_uint ref_ct;
2201 
2202  ref_ct = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& task_team->tt.tt_ref_ct) ) - 1;
2203 
2204  KA_TRACE( 20, ( "__kmp_unref_task_team: T#%d task_team = %p ref_ct = %d\n",
2205  __kmp_gtid_from_thread( thread ), task_team, ref_ct ) );
2206 
2207 
2208  if ( ref_ct == 0 ) {
2209  __kmp_free_task_team( thread, task_team );
2210  }
2211 
2212  TCW_PTR( *((volatile kmp_task_team_t **)(&thread->th.th_task_team)), NULL );
2213 }
2214 
2215 
2216 //------------------------------------------------------------------------------
2217 // __kmp_wait_to_unref_task_teams:
2218 // Some threads could still be in the fork barrier release code, possibly
2219 // trying to steal tasks. Wait for each thread to unreference its task team.
2220 //
2221 void
2222 __kmp_wait_to_unref_task_teams(void)
2223 {
2224  kmp_info_t *thread;
2225  kmp_uint32 spins;
2226  int done;
2227 
2228  KMP_INIT_YIELD( spins );
2229 
2230 
2231  for (;;) {
2232  done = TRUE;
2233 
2234  // TODO: GEH - this may be is wrong because some sync would be necessary
2235  // in case threads are added to the pool during the traversal.
2236  // Need to verify that lock for thread pool is held when calling
2237  // this routine.
2238  for (thread = (kmp_info_t *)__kmp_thread_pool;
2239  thread != NULL;
2240  thread = thread->th.th_next_pool)
2241  {
2242 #if KMP_OS_WINDOWS
2243  DWORD exit_val;
2244 #endif
2245  if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
2246  KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2247  __kmp_gtid_from_thread( thread ) ) );
2248  continue;
2249  }
2250 #if KMP_OS_WINDOWS
2251  // TODO: GEH - add this check for Linux* OS / OS X* as well?
2252  if (!__kmp_is_thread_alive(thread, &exit_val)) {
2253  if (TCR_PTR(thread->th.th_task_team) != NULL) {
2254  __kmp_unref_task_team( thread->th.th_task_team, thread );
2255  }
2256  continue;
2257  }
2258 #endif
2259 
2260  done = FALSE; // Because th_task_team pointer is not NULL for this thread
2261 
2262  KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
2263  __kmp_gtid_from_thread( thread ) ) );
2264 
2265  if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
2266  volatile void *sleep_loc;
2267  // If the thread is sleeping, awaken it.
2268  if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
2269  KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2270  __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
2271  __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
2272  }
2273  }
2274  }
2275  if (done) {
2276  break;
2277  }
2278 
2279  // If we are oversubscribed,
2280  // or have waited a bit (and library mode is throughput), yield.
2281  // Pause is in the following code.
2282  KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2283  KMP_YIELD_SPIN( spins ); // Yields only if KMP_LIBRARY=throughput
2284  }
2285 
2286 
2287 }
2288 
2289 
2290 //------------------------------------------------------------------------------
2291 // __kmp_task_team_setup: Create a task_team for the current team, but use
2292 // an already created, unused one if it already exists.
2293 // This may be called by any thread, but only for teams with # threads >1.
2294 
2295 void
2296 __kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team )
2297 {
2298  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2299 
2300  if ( ( team->t.t_task_team == NULL ) && ( team->t.t_nproc > 1 ) ) {
2301  // Allocate a new task team, which will be propagated to
2302  // all of the worker threads after the barrier. As they
2303  // spin in the barrier release phase, then will continue
2304  // to use the previous task team struct, until they receive
2305  // the signal to stop checking for tasks (they can't safely
2306  // reference the kmp_team_t struct, which could be reallocated
2307  // by the master thread).
2308  team->t.t_task_team = __kmp_allocate_task_team( this_thr, team );
2309  KA_TRACE( 20, ( "__kmp_task_team_setup: Master T#%d created new "
2310  "task_team %p for team %d\n",
2311  __kmp_gtid_from_thread( this_thr ), team->t.t_task_team,
2312  ((team != NULL) ? team->t.t_id : -1)) );
2313  }
2314  else {
2315  // All threads have reported in, and no tasks were spawned
2316  // for this release->gather region. Leave the old task
2317  // team struct in place for the upcoming region. No task
2318  // teams are formed for serialized teams.
2319  }
2320  if ( team->t.t_task_team != NULL ) {
2321  // Toggle the state flag so that we can tell which side of
2322  // the barrier we are on.
2323  team->t.t_task_team->tt.tt_state = 1 - this_thr->th.th_task_state;
2324  }
2325 }
2326 
2327 
2328 //------------------------------------------------------------------------------
2329 // __kmp_task_team_sync: Propagation of task team data from team to threads
2330 // which happens just after the release phase of a team barrier. This may be
2331 // called by any thread, but only for teams with # threads > 1.
2332 
2333 void
2334 __kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
2335 {
2336  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2337 
2338  // On the rare chance that this thread never saw that the task
2339  // team was no longer active, then unref/deallocate it now.
2340  if ( this_thr->th.th_task_team != NULL ) {
2341  if ( ! TCR_SYNC_4( this_thr->th.th_task_team->tt.tt_active ) ) {
2342  KMP_DEBUG_ASSERT( ! KMP_MASTER_TID( __kmp_tid_from_gtid( __kmp_gtid_from_thread( this_thr ) ) ) );
2343  __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
2344  } else {
2345  //
2346  // We are re-using a task team that was never enabled.
2347  //
2348  KMP_DEBUG_ASSERT( this_thr->th.th_task_team == team->t.t_task_team );
2349  }
2350  }
2351 
2352  //
2353  // It is now safe to propagate the task team pointer from the
2354  // team struct to the current thread.
2355  //
2356  TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team);
2357  if ( this_thr->th.th_task_team != NULL ) {
2358  //
2359  // Toggle the th_task_state field, instead of reading it from
2360  // the task team. Reading the tt_state field at this point
2361  // causes a 30% regression on EPCC parallel - toggling it
2362  // is much cheaper.
2363  //
2364  this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
2365  KMP_DEBUG_ASSERT( this_thr->th.th_task_state == TCR_4(team->t.t_task_team->tt.tt_state) );
2366  }
2367  KA_TRACE( 20, ( "__kmp_task_team_sync: Thread T#%d task team assigned pointer (%p) from Team #%d task team\n",
2368  __kmp_gtid_from_thread( this_thr ), &this_thr->th.th_task_team,
2369  this_thr->th.th_task_team, ((team != NULL) ? (team->t.t_id) : -1) ) );
2370 }
2371 
2372 
2373 //------------------------------------------------------------------------------
2374 // __kmp_task_team_wait: Master thread waits for outstanding tasks after
2375 // the barrier gather phase. Only called by master thread if #threads
2376 // in team > 1 !
2377 
2378 void
2379 __kmp_task_team_wait( kmp_info_t *this_thr,
2380  kmp_team_t *team
2381  USE_ITT_BUILD_ARG(void * itt_sync_obj)
2382  )
2383 {
2384  kmp_task_team_t *task_team = team->t.t_task_team;
2385 
2386  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2387  KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
2388 
2389  if ( ( task_team != NULL ) && KMP_TASKING_ENABLED( task_team, this_thr->th.th_task_state ) ) {
2390  KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d waiting for all tasks: task_team = %p\n",
2391  __kmp_gtid_from_thread( this_thr ), task_team ) );
2392  //
2393  // All worker threads might have dropped through to the
2394  // release phase, but could still be executing tasks.
2395  // Wait here for all tasks to complete. To avoid memory
2396  // contention, only the master thread checks for the
2397  // termination condition.
2398  //
2399  kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
2400  flag.wait(this_thr, TRUE
2401  USE_ITT_BUILD_ARG(itt_sync_obj));
2402 
2403  //
2404  // Kill the old task team, so that the worker threads will
2405  // stop referencing it while spinning. They will
2406  // deallocate it when the reference count reaches zero.
2407  // The master thread is not included in the ref count.
2408  //
2409  KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d deactivating task_team %p\n",
2410  __kmp_gtid_from_thread( this_thr ), task_team ) );
2411  KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
2412  TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2413  KMP_MB();
2414 
2415  TCW_PTR(this_thr->th.th_task_team, NULL);
2416  team->t.t_task_team = NULL;
2417  }
2418 }
2419 
2420 
2421 //------------------------------------------------------------------------------
2422 // __kmp_tasking_barrier:
2423 // Internal function to execute all tasks prior to a regular barrier or a
2424 // join barrier. It is a full barrier itself, which unfortunately turns
2425 // regular barriers into double barriers and join barriers into 1 1/2
2426 // barriers.
2427 // This routine may only called when __kmp_tasking_mode == tskm_extra_barrier.
2428 
2429 void
2430 __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid )
2431 {
2432  volatile kmp_uint32 *spin = &team->t.t_task_team->tt.tt_unfinished_threads;
2433  int flag = FALSE;
2434  KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
2435 
2436 #if USE_ITT_BUILD
2437  KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
2438 #endif /* USE_ITT_BUILD */
2439  kmp_flag_32 spin_flag(spin, 0U);
2440  while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag
2441  USE_ITT_BUILD_ARG(NULL), 0 ) ) {
2442 #if USE_ITT_BUILD
2443  // TODO: What about itt_sync_obj??
2444  KMP_FSYNC_SPIN_PREPARE( spin );
2445 #endif /* USE_ITT_BUILD */
2446 
2447  if( TCR_4(__kmp_global.g.g_done) ) {
2448  if( __kmp_global.g.g_abort )
2449  __kmp_abort_thread( );
2450  break;
2451  }
2452  KMP_YIELD( TRUE ); // GH: We always yield here
2453  }
2454 #if USE_ITT_BUILD
2455  KMP_FSYNC_SPIN_ACQUIRED( (void*) spin );
2456 #endif /* USE_ITT_BUILD */
2457 }
2458 
Definition: kmp.h:218