38 #include "kmp_wait_release.h"
47 static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
48 static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
49 static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
52 static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask );
55 static inline void __kmp_null_resume_wrapper(
int gtid,
volatile void *flag) {
56 switch (((kmp_flag_64 *)flag)->get_type()) {
57 case flag32: __kmp_resume_32(gtid, NULL);
break;
58 case flag64: __kmp_resume_64(gtid, NULL);
break;
59 case flag_oncore: __kmp_resume_oncore(gtid, NULL);
break;
63 #ifdef BUILD_TIED_TASK_STACK
75 __kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data,
int threshold,
char *location )
77 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
78 kmp_taskdata_t **stack_top = task_stack -> ts_top;
79 kmp_int32 entries = task_stack -> ts_entries;
80 kmp_taskdata_t *tied_task;
82 KA_TRACE(threshold, (
"__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
83 "first_block = %p, stack_top = %p \n",
84 location, gtid, entries, task_stack->ts_first_block, stack_top ) );
86 KMP_DEBUG_ASSERT( stack_top != NULL );
87 KMP_DEBUG_ASSERT( entries > 0 );
89 while ( entries != 0 )
91 KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
93 if ( entries & TASK_STACK_INDEX_MASK == 0 )
95 kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
97 stack_block = stack_block -> sb_prev;
98 stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
105 tied_task = * stack_top;
107 KMP_DEBUG_ASSERT( tied_task != NULL );
108 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
110 KA_TRACE(threshold, (
"__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
111 "stack_top=%p, tied_task=%p\n",
112 location, gtid, entries, stack_top, tied_task ) );
114 KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
116 KA_TRACE(threshold, (
"__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
129 __kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
131 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
132 kmp_stack_block_t *first_block;
135 first_block = & task_stack -> ts_first_block;
136 task_stack -> ts_top = (kmp_taskdata_t **) first_block;
137 memset( (
void *) first_block,
'\0', TASK_STACK_BLOCK_SIZE *
sizeof(kmp_taskdata_t *));
140 task_stack -> ts_entries = TASK_STACK_EMPTY;
141 first_block -> sb_next = NULL;
142 first_block -> sb_prev = NULL;
153 __kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
155 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
156 kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
158 KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
160 while ( stack_block != NULL ) {
161 kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
163 stack_block -> sb_next = NULL;
164 stack_block -> sb_prev = NULL;
165 if (stack_block != & task_stack -> ts_first_block) {
166 __kmp_thread_free( thread, stack_block );
168 stack_block = next_block;
171 task_stack -> ts_entries = 0;
172 task_stack -> ts_top = NULL;
185 __kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
188 kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
189 tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
190 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
192 if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
196 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
197 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
199 KA_TRACE(20, (
"__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
200 gtid, thread, tied_task ) );
202 * (task_stack -> ts_top) = tied_task;
205 task_stack -> ts_top++;
206 task_stack -> ts_entries++;
208 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
211 kmp_stack_block_t *stack_block =
212 (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
215 if ( stack_block -> sb_next != NULL )
217 task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
221 kmp_stack_block_t *new_block = (kmp_stack_block_t *)
222 __kmp_thread_calloc(thread,
sizeof(kmp_stack_block_t));
224 task_stack -> ts_top = & new_block -> sb_block[0];
225 stack_block -> sb_next = new_block;
226 new_block -> sb_prev = stack_block;
227 new_block -> sb_next = NULL;
229 KA_TRACE(30, (
"__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
230 gtid, tied_task, new_block ) );
233 KA_TRACE(20, (
"__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
246 __kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
249 kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
250 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
251 kmp_taskdata_t *tied_task;
253 if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
257 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
258 KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
260 KA_TRACE(20, (
"__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
263 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
265 kmp_stack_block_t *stack_block =
266 (kmp_stack_block_t *) (task_stack -> ts_top) ;
268 stack_block = stack_block -> sb_prev;
269 task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
273 task_stack -> ts_top--;
274 task_stack -> ts_entries--;
276 tied_task = * (task_stack -> ts_top );
278 KMP_DEBUG_ASSERT( tied_task != NULL );
279 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
280 KMP_DEBUG_ASSERT( tied_task == ending_task );
282 KA_TRACE(20, (
"__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
291 __kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
293 kmp_info_t * thread = __kmp_threads[ gtid ];
294 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
295 kmp_task_team_t * task_team = thread->th.th_task_team;
296 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
297 kmp_thread_data_t * thread_data;
299 KA_TRACE(20, (
"__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
302 if ( taskdata->td_flags.task_serial ) {
303 KA_TRACE(20, (
"__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
305 return TASK_NOT_PUSHED;
309 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
310 if ( ! KMP_TASKING_ENABLED(task_team) ) {
311 __kmp_enable_tasking( task_team, thread );
313 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
314 KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
317 thread_data = & task_team -> tt.tt_threads_data[ tid ];
320 if (thread_data -> td.td_deque == NULL ) {
321 __kmp_alloc_task_deque( thread, thread_data );
325 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
327 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
329 return TASK_NOT_PUSHED;
333 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
337 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
339 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
340 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full on 2nd check; returning TASK_NOT_PUSHED for task %p\n",
342 return TASK_NOT_PUSHED;
346 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE );
349 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;
351 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
352 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);
354 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
356 KA_TRACE(20, (
"__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
357 "task=%p ntasks=%d head=%u tail=%u\n",
358 gtid, taskdata, thread_data->td.td_deque_ntasks,
359 thread_data->td.td_deque_tail, thread_data->td.td_deque_head) );
361 return TASK_SUCCESSFULLY_PUSHED;
370 __kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
372 KF_TRACE( 10, (
"__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, "
373 "curtask_parent=%p\n",
374 0, this_thr, this_thr -> th.th_current_task,
375 this_thr -> th.th_current_task -> td_parent ) );
377 this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
379 KF_TRACE( 10, (
"__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, "
380 "curtask_parent=%p\n",
381 0, this_thr, this_thr -> th.th_current_task,
382 this_thr -> th.th_current_task -> td_parent ) );
393 __kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team,
int tid )
396 KF_TRACE( 10, (
"__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p "
398 tid, this_thr, this_thr->th.th_current_task,
399 team->t.t_implicit_task_taskdata[tid].td_parent ) );
401 KMP_DEBUG_ASSERT (this_thr != NULL);
404 if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
405 team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
406 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
409 team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
410 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
413 KF_TRACE( 10, (
"__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p "
415 tid, this_thr, this_thr->th.th_current_task,
416 team->t.t_implicit_task_taskdata[tid].td_parent ) );
427 __kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
429 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
430 kmp_info_t * thread = __kmp_threads[ gtid ];
432 KA_TRACE(10, (
"__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
433 gtid, taskdata, current_task) );
435 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
440 current_task -> td_flags.executing = 0;
443 #ifdef BUILD_TIED_TASK_STACK
444 if ( taskdata -> td_flags.tiedness == TASK_TIED )
446 __kmp_push_task_stack( gtid, thread, taskdata );
451 thread -> th.th_current_task = taskdata;
453 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 0 );
454 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 0 );
455 taskdata -> td_flags.started = 1;
456 taskdata -> td_flags.executing = 1;
457 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
458 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
465 KA_TRACE(10, (
"__kmp_task_start(exit): T#%d task=%p\n",
479 __kmpc_omp_task_begin_if0(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
481 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
482 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
484 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
485 gtid, loc_ref, taskdata, current_task ) );
487 taskdata -> td_flags.task_serial = 1;
488 __kmp_task_start( gtid, task, current_task );
490 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
491 gtid, loc_ref, taskdata ) );
502 __kmpc_omp_task_begin(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
504 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
506 KA_TRACE(10, (
"__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
507 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
509 __kmp_task_start( gtid, task, current_task );
511 KA_TRACE(10, (
"__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
512 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
516 #endif // TASK_UNUSED
526 __kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
528 KA_TRACE(30, (
"__kmp_free_task: T#%d freeing data from task %p\n",
532 KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
533 KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
534 KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
535 KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
536 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1);
537 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
539 taskdata->td_flags.freed = 1;
542 __kmp_fast_free( thread, taskdata );
544 __kmp_thread_free( thread, taskdata );
547 KA_TRACE(20, (
"__kmp_free_task: T#%d freed task %p\n",
559 __kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
561 kmp_int32 children = 0;
562 kmp_int32 team_or_tasking_serialized = taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser;
564 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
566 if ( !team_or_tasking_serialized ) {
567 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
568 KMP_DEBUG_ASSERT( children >= 0 );
572 while ( children == 0 )
574 kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
576 KA_TRACE(20, (
"__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
577 "and freeing itself\n", gtid, taskdata) );
580 __kmp_free_task( gtid, taskdata, thread );
582 taskdata = parent_taskdata;
586 if ( team_or_tasking_serialized || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
589 if ( !team_or_tasking_serialized ) {
591 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
592 KMP_DEBUG_ASSERT( children >= 0 );
596 KA_TRACE(20, (
"__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
597 "not freeing it yet\n", gtid, taskdata, children) );
607 __kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
609 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
610 kmp_info_t * thread = __kmp_threads[ gtid ];
611 kmp_int32 children = 0;
613 KA_TRACE(10, (
"__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
614 gtid, taskdata, resumed_task) );
616 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
619 #ifdef BUILD_TIED_TASK_STACK
620 if ( taskdata -> td_flags.tiedness == TASK_TIED )
622 __kmp_pop_task_stack( gtid, thread, taskdata );
626 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
627 taskdata -> td_flags.complete = 1;
628 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
629 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
632 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
634 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
635 KMP_DEBUG_ASSERT( children >= 0 );
637 if ( taskdata->td_taskgroup )
638 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
639 __kmp_release_deps(gtid,taskdata);
646 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
647 taskdata -> td_flags.executing = 0;
649 KA_TRACE(20, (
"__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
650 gtid, taskdata, children) );
660 if (taskdata->td_flags.destructors_thunk) {
661 kmp_routine_entry_t destr_thunk = task->destructors;
662 KMP_ASSERT(destr_thunk);
663 destr_thunk(gtid, task);
665 #endif // OMP_40_ENABLED
669 KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
670 taskdata->td_flags.task_serial);
671 if ( taskdata->td_flags.task_serial )
673 if (resumed_task == NULL) {
674 resumed_task = taskdata->td_parent;
678 KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
682 KMP_DEBUG_ASSERT( resumed_task != NULL );
686 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
688 __kmp_threads[ gtid ] -> th.th_current_task = resumed_task;
692 resumed_task->td_flags.executing = 1;
694 KA_TRACE(10, (
"__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
695 gtid, taskdata, resumed_task) );
707 __kmpc_omp_task_complete_if0(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
709 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
710 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
712 __kmp_task_finish( gtid, task, NULL );
714 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
715 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
726 __kmpc_omp_task_complete(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
728 KA_TRACE(10, (
"__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
729 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
731 __kmp_task_finish( gtid, task, NULL );
733 KA_TRACE(10, (
"__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
734 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
737 #endif // TASK_UNUSED
752 __kmp_init_implicit_task(
ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team,
int tid,
int set_curr_task )
754 kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ];
756 KF_TRACE(10, (
"__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
757 tid, team, task, set_curr_task ?
"TRUE" :
"FALSE" ) );
759 task->td_task_id = KMP_GEN_TASK_ID();
760 task->td_team = team;
762 task->td_ident = loc_ref;
763 task->td_taskwait_ident = NULL;
764 task->td_taskwait_counter = 0;
765 task->td_taskwait_thread = 0;
767 task->td_flags.tiedness = TASK_TIED;
768 task->td_flags.tasktype = TASK_IMPLICIT;
770 task->td_flags.proxy = TASK_FULL;
774 task->td_flags.task_serial = 1;
775 task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
776 task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
778 task->td_flags.started = 1;
779 task->td_flags.executing = 1;
780 task->td_flags.complete = 0;
781 task->td_flags.freed = 0;
784 task->td_dephash = NULL;
785 task->td_depnode = NULL;
789 task->td_incomplete_child_tasks = 0;
790 task->td_allocated_child_tasks = 0;
792 task->td_taskgroup = NULL;
794 __kmp_push_current_task_to_thread( this_thr, team, tid );
796 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
797 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
800 KF_TRACE(10, (
"__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
807 __kmp_round_up_to_val(
size_t size,
size_t val ) {
808 if ( size & ( val - 1 ) ) {
809 size &= ~ ( val - 1 );
810 if ( size <= KMP_SIZE_T_MAX - val ) {
831 __kmp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
832 size_t sizeof_kmp_task_t,
size_t sizeof_shareds,
833 kmp_routine_entry_t task_entry )
836 kmp_taskdata_t *taskdata;
837 kmp_info_t *thread = __kmp_threads[ gtid ];
838 kmp_team_t *team = thread->th.th_team;
839 kmp_taskdata_t *parent_task = thread->th.th_current_task;
840 size_t shareds_offset;
842 KA_TRACE(10, (
"__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
843 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
844 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
845 sizeof_shareds, task_entry) );
847 if ( parent_task->td_flags.final ) {
848 if (flags->merged_if0) {
854 if ( flags->proxy == TASK_PROXY ) {
855 flags->tiedness = TASK_UNTIED;
856 flags->merged_if0 = 1;
859 if ( (thread->th.th_task_team) == NULL ) {
863 KMP_DEBUG_ASSERT(team->t.t_serialized);
864 KA_TRACE(30,(
"T#%d creating task team in __kmp_task_alloc for proxy task\n", gtid));
865 __kmp_task_team_setup(thread,team,0,1);
866 thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
868 kmp_task_team_t * task_team = thread->th.th_task_team;
871 if ( !KMP_TASKING_ENABLED( task_team ) ) {
872 KA_TRACE(30,(
"T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
873 __kmp_enable_tasking( task_team, thread );
874 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
875 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
877 if (thread_data -> td.td_deque == NULL ) {
878 __kmp_alloc_task_deque( thread, thread_data );
882 if ( task_team->tt.tt_found_proxy_tasks == FALSE )
883 TCW_4(task_team -> tt.tt_found_proxy_tasks, TRUE);
889 shareds_offset =
sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
890 shareds_offset = __kmp_round_up_to_val( shareds_offset,
sizeof(
void * ));
893 KA_TRACE(30, (
"__kmp_task_alloc: T#%d First malloc size: %ld\n",
894 gtid, shareds_offset) );
895 KA_TRACE(30, (
"__kmp_task_alloc: T#%d Second malloc size: %ld\n",
896 gtid, sizeof_shareds) );
900 taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
902 taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
905 task = KMP_TASKDATA_TO_TASK(taskdata);
908 #if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD
909 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (
sizeof(
double)-1) ) == 0 );
910 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (
sizeof(
double)-1) ) == 0 );
912 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (
sizeof(_Quad)-1) ) == 0 );
913 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (
sizeof(_Quad)-1) ) == 0 );
915 if (sizeof_shareds > 0) {
917 task->shareds = & ((
char *) taskdata)[ shareds_offset ];
919 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (
sizeof(
void *)-1) ) == 0 );
921 task->shareds = NULL;
923 task->routine = task_entry;
926 taskdata->td_task_id = KMP_GEN_TASK_ID();
927 taskdata->td_team = team;
928 taskdata->td_alloc_thread = thread;
929 taskdata->td_parent = parent_task;
930 taskdata->td_level = parent_task->td_level + 1;
931 taskdata->td_ident = loc_ref;
932 taskdata->td_taskwait_ident = NULL;
933 taskdata->td_taskwait_counter = 0;
934 taskdata->td_taskwait_thread = 0;
935 KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
938 if ( flags->proxy == TASK_FULL )
940 copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
942 taskdata->td_flags.tiedness = flags->tiedness;
943 taskdata->td_flags.final = flags->final;
944 taskdata->td_flags.merged_if0 = flags->merged_if0;
946 taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
947 #endif // OMP_40_ENABLED
949 taskdata->td_flags.proxy = flags->proxy;
951 taskdata->td_flags.tasktype = TASK_EXPLICIT;
954 taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
957 taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
963 taskdata->td_flags.task_serial = ( parent_task->td_flags.final
964 || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
966 taskdata->td_flags.started = 0;
967 taskdata->td_flags.executing = 0;
968 taskdata->td_flags.complete = 0;
969 taskdata->td_flags.freed = 0;
971 taskdata->td_flags.native = flags->native;
973 taskdata->td_incomplete_child_tasks = 0;
974 taskdata->td_allocated_child_tasks = 1;
976 taskdata->td_taskgroup = parent_task->td_taskgroup;
977 taskdata->td_dephash = NULL;
978 taskdata->td_depnode = NULL;
983 if ( flags->proxy == TASK_PROXY || !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
985 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
988 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
990 if ( parent_task->td_taskgroup )
991 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
994 if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
995 KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
999 KA_TRACE(20, (
"__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1000 gtid, taskdata, taskdata->td_parent) );
1007 __kmpc_omp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
1008 size_t sizeof_kmp_task_t,
size_t sizeof_shareds,
1009 kmp_routine_entry_t task_entry )
1012 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
1014 input_flags->native = FALSE;
1017 KA_TRACE(10, (
"__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) "
1018 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1019 gtid, loc_ref, input_flags->tiedness ?
"tied " :
"untied",
1021 input_flags->proxy ?
"proxy" :
"",
1025 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
1027 retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1028 sizeof_shareds, task_entry );
1030 KA_TRACE(20, (
"__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
1043 __kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
1045 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
1049 KA_TRACE(30, (
"__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1050 gtid, taskdata, current_task) );
1053 if ( taskdata->td_flags.proxy == TASK_PROXY &&
1054 taskdata->td_flags.complete == 1)
1058 KA_TRACE(30, (
"__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1061 __kmp_bottom_half_finish_proxy(gtid,task);
1063 KA_TRACE(30, (
"__kmp_invoke_task(exit): T#%d completed bottom finish for proxy task %p, resuming task %p\n", gtid, taskdata, current_task) );
1071 if ( taskdata->td_flags.proxy != TASK_PROXY )
1073 __kmp_task_start( gtid, task, current_task );
1079 if (__kmp_omp_cancellation) {
1080 kmp_info_t *this_thr = __kmp_threads [ gtid ];
1081 kmp_team_t * this_team = this_thr->th.th_team;
1082 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1083 if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
1094 #endif // OMP_40_ENABLED
1095 #ifdef KMP_GOMP_COMPAT
1096 if (taskdata->td_flags.native) {
1097 ((void (*)(
void *))(*(task->routine)))(task->shareds);
1102 (*(task->routine))(gtid, task);
1106 #endif // OMP_40_ENABLED
1110 if ( taskdata->td_flags.proxy != TASK_PROXY )
1112 __kmp_task_finish( gtid, task, current_task );
1114 KA_TRACE(30, (
"__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
1115 gtid, taskdata, current_task) );
1130 __kmpc_omp_task_parts(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1132 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1134 KA_TRACE(10, (
"__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
1135 gtid, loc_ref, new_taskdata ) );
1140 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED )
1142 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1143 new_taskdata->td_flags.task_serial = 1;
1144 __kmp_invoke_task( gtid, new_task, current_task );
1147 KA_TRACE(10, (
"__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1148 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
1151 return TASK_CURRENT_NOT_QUEUED;
1164 __kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task,
bool serialize_immediate )
1166 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1171 if ( new_taskdata->td_flags.proxy == TASK_PROXY || __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED )
1173 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED )
1176 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1177 if ( serialize_immediate )
1178 new_taskdata -> td_flags.task_serial = 1;
1179 __kmp_invoke_task( gtid, new_task, current_task );
1183 return TASK_CURRENT_NOT_QUEUED;
1198 __kmpc_omp_task(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1200 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1203 KA_TRACE(10, (
"__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
1204 gtid, loc_ref, new_taskdata ) );
1206 res = __kmp_omp_task(gtid,new_task,
true);
1208 KA_TRACE(10, (
"__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1209 gtid, loc_ref, new_taskdata ) );
1217 __kmpc_omp_taskwait(
ident_t *loc_ref, kmp_int32 gtid )
1219 kmp_taskdata_t * taskdata;
1220 kmp_info_t * thread;
1221 int thread_finished = FALSE;
1223 KA_TRACE(10, (
"__kmpc_omp_taskwait(enter): T#%d loc=%p\n",
1226 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1229 thread = __kmp_threads[ gtid ];
1230 taskdata = thread -> th.th_current_task;
1234 taskdata->td_taskwait_counter += 1;
1235 taskdata->td_taskwait_ident = loc_ref;
1236 taskdata->td_taskwait_thread = gtid + 1;
1239 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1240 if ( itt_sync_obj != NULL )
1241 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1245 if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
1247 if ( ! taskdata->td_flags.team_serial )
1251 kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U);
1252 while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
1253 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1254 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1258 if ( itt_sync_obj != NULL )
1259 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1263 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1266 KA_TRACE(10, (
"__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
1267 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1269 return TASK_CURRENT_NOT_QUEUED;
1277 __kmpc_omp_taskyield(
ident_t *loc_ref, kmp_int32 gtid,
int end_part )
1279 kmp_taskdata_t * taskdata;
1280 kmp_info_t * thread;
1281 int thread_finished = FALSE;
1283 KA_TRACE(10, (
"__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1284 gtid, loc_ref, end_part) );
1286 if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) {
1289 thread = __kmp_threads[ gtid ];
1290 taskdata = thread -> th.th_current_task;
1295 taskdata->td_taskwait_counter += 1;
1296 taskdata->td_taskwait_ident = loc_ref;
1297 taskdata->td_taskwait_thread = gtid + 1;
1300 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1301 if ( itt_sync_obj != NULL )
1302 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1304 if ( ! taskdata->td_flags.team_serial ) {
1305 kmp_task_team_t * task_team = thread->th.th_task_team;
1306 if (task_team != NULL) {
1307 if (KMP_TASKING_ENABLED(task_team)) {
1308 __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished
1309 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1314 if ( itt_sync_obj != NULL )
1315 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1319 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1322 KA_TRACE(10, (
"__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
1323 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1325 return TASK_CURRENT_NOT_QUEUED;
1334 __kmpc_taskgroup(
ident_t* loc,
int gtid )
1336 kmp_info_t * thread = __kmp_threads[ gtid ];
1337 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1338 kmp_taskgroup_t * tg_new =
1339 (kmp_taskgroup_t *)__kmp_thread_malloc( thread,
sizeof( kmp_taskgroup_t ) );
1340 KA_TRACE(10, (
"__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
1342 tg_new->cancel_request = cancel_noreq;
1343 tg_new->parent = taskdata->td_taskgroup;
1344 taskdata->td_taskgroup = tg_new;
1353 __kmpc_end_taskgroup(
ident_t* loc,
int gtid )
1355 kmp_info_t * thread = __kmp_threads[ gtid ];
1356 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1357 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1358 int thread_finished = FALSE;
1360 KA_TRACE(10, (
"__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
1361 KMP_DEBUG_ASSERT( taskgroup != NULL );
1363 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1366 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1367 if ( itt_sync_obj != NULL )
1368 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1372 if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
1374 if ( ! taskdata->td_flags.team_serial )
1377 kmp_flag_32 flag(&(taskgroup->count), 0U);
1378 while ( TCR_4(taskgroup->count) != 0 ) {
1379 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1380 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1385 if ( itt_sync_obj != NULL )
1386 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1389 KMP_DEBUG_ASSERT( taskgroup->count == 0 );
1392 taskdata->td_taskgroup = taskgroup->parent;
1393 __kmp_thread_free( thread, taskgroup );
1395 KA_TRACE(10, (
"__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
1404 __kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
1405 kmp_int32 is_constrained )
1408 kmp_taskdata_t * taskdata;
1409 kmp_thread_data_t *thread_data;
1412 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1413 KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL );
1415 thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
1417 KA_TRACE(10, (
"__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1418 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1419 thread_data->td.td_deque_tail) );
1421 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1422 KA_TRACE(10, (
"__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1423 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1424 thread_data->td.td_deque_tail) );
1428 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1430 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1431 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1432 KA_TRACE(10, (
"__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1433 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1434 thread_data->td.td_deque_tail) );
1438 tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK;
1439 taskdata = thread_data -> td.td_deque[ tail ];
1441 if (is_constrained) {
1444 kmp_taskdata_t * current = thread->th.th_current_task;
1445 kmp_int32 level = current->td_level;
1446 kmp_taskdata_t * parent = taskdata->td_parent;
1447 while ( parent != current && parent->td_level > level ) {
1448 parent = parent->td_parent;
1449 KMP_DEBUG_ASSERT(parent != NULL);
1451 if ( parent != current ) {
1453 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1454 KA_TRACE(10, (
"__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1455 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1456 thread_data->td.td_deque_tail) );
1461 thread_data -> td.td_deque_tail = tail;
1462 TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
1464 __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
1466 KA_TRACE(10, (
"__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
1467 gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1468 thread_data->td.td_deque_tail) );
1470 task = KMP_TASKDATA_TO_TASK( taskdata );
1481 __kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
1482 volatile kmp_uint32 *unfinished_threads,
int *thread_finished,
1483 kmp_int32 is_constrained )
1486 kmp_taskdata_t * taskdata;
1487 kmp_thread_data_t *victim_td, *threads_data;
1488 kmp_int32 victim_tid, thread_tid;
1490 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1492 threads_data = task_team -> tt.tt_threads_data;
1493 KMP_DEBUG_ASSERT( threads_data != NULL );
1495 victim_tid = victim->th.th_info.ds.ds_tid;
1496 victim_td = & threads_data[ victim_tid ];
1498 KA_TRACE(10, (
"__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d "
1499 "head=%u tail=%u\n",
1500 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1501 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1503 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1504 (TCR_PTR(victim->th.th_task_team) != task_team))
1506 KA_TRACE(10, (
"__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p "
1507 "ntasks=%d head=%u tail=%u\n",
1508 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1509 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1513 __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
1516 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1517 (TCR_PTR(victim->th.th_task_team) != task_team))
1519 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1520 KA_TRACE(10, (
"__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1521 "ntasks=%d head=%u tail=%u\n",
1522 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1523 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1527 KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
1529 if ( !is_constrained ) {
1530 taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ];
1532 victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK;
1535 kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK;
1536 taskdata = victim_td -> td.td_deque[ tail ];
1539 kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
1540 kmp_int32 level = current->td_level;
1541 kmp_taskdata_t * parent = taskdata->td_parent;
1542 while ( parent != current && parent->td_level > level ) {
1543 parent = parent->td_parent;
1544 KMP_DEBUG_ASSERT(parent != NULL);
1546 if ( parent != current ) {
1548 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1549 KA_TRACE(10, (
"__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1550 "ntasks=%d head=%u tail=%u\n",
1551 gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
1552 task_team, victim_td->td.td_deque_ntasks,
1553 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1556 victim_td -> td.td_deque_tail = tail;
1558 if (*thread_finished) {
1562 kmp_uint32 count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
1564 KA_TRACE(20, (
"__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
1565 gtid, count + 1, task_team) );
1567 *thread_finished = FALSE;
1569 TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
1571 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1573 KA_TRACE(10, (
"__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
1574 "ntasks=%d head=%u tail=%u\n",
1575 gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
1576 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1577 victim_td->td.td_deque_tail) );
1579 task = KMP_TASKDATA_TO_TASK( taskdata );
1594 static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag,
int final_spin,
1595 int *thread_finished
1596 USE_ITT_BUILD_ARG(
void * itt_sync_obj), kmp_int32 is_constrained)
1598 kmp_task_team_t * task_team;
1600 kmp_thread_data_t * threads_data;
1602 kmp_taskdata_t * current_task = thread -> th.th_current_task;
1603 volatile kmp_uint32 * unfinished_threads;
1604 kmp_int32 nthreads, last_stolen, k, tid;
1606 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1607 KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
1609 task_team = thread -> th.th_task_team;
1610 KMP_DEBUG_ASSERT( task_team != NULL );
1612 KA_TRACE(15, (
"__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n",
1613 gtid, final_spin, *thread_finished) );
1615 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1616 KMP_DEBUG_ASSERT( threads_data != NULL );
1618 nthreads = task_team -> tt.tt_nproc;
1619 unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
1621 KMP_DEBUG_ASSERT( nthreads > 1 || task_team->tt.tt_found_proxy_tasks);
1623 KMP_DEBUG_ASSERT( nthreads > 1 );
1625 KMP_DEBUG_ASSERT( TCR_4((
int)*unfinished_threads) >= 0 );
1629 while (( task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained )) != NULL ) {
1630 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1631 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1632 if ( itt_sync_obj == NULL ) {
1634 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1636 __kmp_itt_task_starting( itt_sync_obj );
1639 __kmp_invoke_task( gtid, task, current_task );
1641 if ( itt_sync_obj != NULL )
1642 __kmp_itt_task_finished( itt_sync_obj );
1650 if (flag == NULL || (!final_spin && flag->done_check())) {
1651 KA_TRACE(15, (
"__kmp_execute_tasks_template(exit #1): T#%d spin condition satisfied\n", gtid) );
1654 KMP_YIELD( __kmp_library == library_throughput );
1661 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1669 if (! *thread_finished) {
1670 kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1671 KA_TRACE(20, (
"__kmp_execute_tasks_template(dec #1): T#%d dec unfinished_threads to %d task_team=%p\n",
1672 gtid, count, task_team) );
1673 *thread_finished = TRUE;
1681 if (flag != NULL && flag->done_check()) {
1682 KA_TRACE(15, (
"__kmp_execute_tasks_template(exit #2): T#%d spin condition satisfied\n", gtid) );
1689 if ( nthreads == 1 )
1694 tid = thread -> th.th_info.ds.ds_tid;
1695 last_stolen = threads_data[ tid ].td.td_deque_last_stolen;
1697 if (last_stolen != -1) {
1698 kmp_info_t *other_thread = threads_data[last_stolen].td.td_thr;
1700 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1701 thread_finished, is_constrained )) != NULL)
1703 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1704 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1705 if ( itt_sync_obj == NULL ) {
1707 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1709 __kmp_itt_task_starting( itt_sync_obj );
1712 __kmp_invoke_task( gtid, task, current_task );
1714 if ( itt_sync_obj != NULL )
1715 __kmp_itt_task_finished( itt_sync_obj );
1719 if (flag == NULL || (!final_spin && flag->done_check())) {
1720 KA_TRACE(15, (
"__kmp_execute_tasks_template(exit #3): T#%d spin condition satisfied\n",
1725 KMP_YIELD( __kmp_library == library_throughput );
1728 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
1729 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
1736 threads_data[ tid ].td.td_deque_last_stolen = -1;
1742 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1750 if (! *thread_finished) {
1751 kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1752 KA_TRACE(20, (
"__kmp_execute_tasks_template(dec #2): T#%d dec unfinished_threads to %d "
1753 "task_team=%p\n", gtid, count, task_team) );
1754 *thread_finished = TRUE;
1763 if (flag != NULL && flag->done_check()) {
1764 KA_TRACE(15, (
"__kmp_execute_tasks_template(exit #4): T#%d spin condition satisfied\n",
1777 k = __kmp_get_random( thread ) % (nthreads - 1);
1778 if ( k >= thread -> th.th_info.ds.ds_tid ) {
1782 kmp_info_t *other_thread = threads_data[k].td.td_thr;
1792 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1793 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
1794 (TCR_PTR(other_thread->th.th_sleep_loc) != NULL))
1796 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc);
1807 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1808 thread_finished, is_constrained )) != NULL)
1810 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1811 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1812 if ( itt_sync_obj == NULL ) {
1814 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1816 __kmp_itt_task_starting( itt_sync_obj );
1819 __kmp_invoke_task( gtid, task, current_task );
1821 if ( itt_sync_obj != NULL )
1822 __kmp_itt_task_finished( itt_sync_obj );
1827 threads_data[ tid ].td.td_deque_last_stolen = k;
1832 if (flag == NULL || (!final_spin && flag->done_check())) {
1833 KA_TRACE(15, (
"__kmp_execute_tasks_template(exit #5): T#%d spin condition satisfied\n",
1837 KMP_YIELD( __kmp_library == library_throughput );
1841 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
1842 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
1855 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1863 if (! *thread_finished) {
1864 kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1865 KA_TRACE(20, (
"__kmp_execute_tasks_template(dec #3): T#%d dec unfinished_threads to %d; "
1867 gtid, count, task_team) );
1868 *thread_finished = TRUE;
1877 if (flag != NULL && flag->done_check()) {
1878 KA_TRACE(15, (
"__kmp_execute_tasks_template(exit #6): T#%d spin condition satisfied\n", gtid) );
1884 KA_TRACE(15, (
"__kmp_execute_tasks_template(exit #7): T#%d can't find work\n", gtid) );
1888 int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag,
int final_spin,
1889 int *thread_finished
1890 USE_ITT_BUILD_ARG(
void * itt_sync_obj), kmp_int32 is_constrained)
1892 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1893 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1896 int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag,
int final_spin,
1897 int *thread_finished
1898 USE_ITT_BUILD_ARG(
void * itt_sync_obj), kmp_int32 is_constrained)
1900 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1901 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1904 int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag,
int final_spin,
1905 int *thread_finished
1906 USE_ITT_BUILD_ARG(
void * itt_sync_obj), kmp_int32 is_constrained)
1908 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1909 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1920 __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
1922 kmp_team_t *team = this_thr->th.th_team;
1923 kmp_thread_data_t *threads_data;
1924 int nthreads, i, is_init_thread;
1926 KA_TRACE( 10, (
"__kmp_enable_tasking(enter): T#%d\n",
1927 __kmp_gtid_from_thread( this_thr ) ) );
1929 KMP_DEBUG_ASSERT(task_team != NULL);
1930 KMP_DEBUG_ASSERT(team != NULL);
1932 nthreads = task_team->tt.tt_nproc;
1933 KMP_DEBUG_ASSERT(nthreads > 0);
1934 KMP_DEBUG_ASSERT(nthreads == team->t.t_nproc);
1937 is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
1939 if (!is_init_thread) {
1941 KA_TRACE( 20, (
"__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
1942 __kmp_gtid_from_thread( this_thr ) ) );
1945 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1946 KMP_DEBUG_ASSERT( threads_data != NULL );
1948 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1949 ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
1954 for (i = 0; i < nthreads; i++) {
1955 volatile void *sleep_loc;
1956 kmp_info_t *thread = threads_data[i].td.td_thr;
1958 if (i == this_thr->th.th_info.ds.ds_tid) {
1968 if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
1970 KF_TRACE( 50, (
"__kmp_enable_tasking: T#%d waking up thread T#%d\n",
1971 __kmp_gtid_from_thread( this_thr ),
1972 __kmp_gtid_from_thread( thread ) ) );
1973 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
1976 KF_TRACE( 50, (
"__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
1977 __kmp_gtid_from_thread( this_thr ),
1978 __kmp_gtid_from_thread( thread ) ) );
1983 KA_TRACE( 10, (
"__kmp_enable_tasking(exit): T#%d\n",
1984 __kmp_gtid_from_thread( this_thr ) ) );
2024 static kmp_task_team_t *__kmp_free_task_teams = NULL;
2026 static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
2038 __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
2040 __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
2041 KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
2044 thread_data -> td.td_deque_last_stolen = -1;
2046 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
2047 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
2048 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
2050 KE_TRACE( 10, (
"__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
2051 __kmp_gtid_from_thread( thread ), TASK_DEQUE_SIZE, thread_data ) );
2055 thread_data -> td.td_deque = (kmp_taskdata_t **)
2056 __kmp_allocate( TASK_DEQUE_SIZE *
sizeof(kmp_taskdata_t *));
2066 __kmp_free_task_deque( kmp_thread_data_t *thread_data )
2068 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
2070 if ( thread_data -> td.td_deque != NULL ) {
2071 TCW_4(thread_data -> td.td_deque_ntasks, 0);
2072 __kmp_free( thread_data -> td.td_deque );
2073 thread_data -> td.td_deque = NULL;
2075 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
2077 #ifdef BUILD_TIED_TASK_STACK
2079 if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
2080 __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
2082 #endif // BUILD_TIED_TASK_STACK
2096 __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
2098 kmp_thread_data_t ** threads_data_p;
2099 kmp_int32 nthreads, maxthreads;
2100 int is_init_thread = FALSE;
2102 if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
2107 threads_data_p = & task_team -> tt.tt_threads_data;
2108 nthreads = task_team -> tt.tt_nproc;
2109 maxthreads = task_team -> tt.tt_max_threads;
2113 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2115 if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
2117 kmp_team_t *team = thread -> th.th_team;
2120 is_init_thread = TRUE;
2121 if ( maxthreads < nthreads ) {
2123 if ( *threads_data_p != NULL ) {
2124 kmp_thread_data_t *old_data = *threads_data_p;
2125 kmp_thread_data_t *new_data = NULL;
2127 KE_TRACE( 10, (
"__kmp_realloc_task_threads_data: T#%d reallocating "
2128 "threads data for task_team %p, new_size = %d, old_size = %d\n",
2129 __kmp_gtid_from_thread( thread ), task_team,
2130 nthreads, maxthreads ) );
2135 new_data = (kmp_thread_data_t *)
2136 __kmp_allocate( nthreads *
sizeof(kmp_thread_data_t) );
2138 KMP_MEMCPY_S( (
void *) new_data, nthreads *
sizeof(kmp_thread_data_t),
2140 maxthreads *
sizeof(kmp_taskdata_t *) );
2142 #ifdef BUILD_TIED_TASK_STACK
2144 for (i = maxthreads; i < nthreads; i++) {
2145 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2146 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2148 #endif // BUILD_TIED_TASK_STACK
2150 (*threads_data_p) = new_data;
2151 __kmp_free( old_data );
2154 KE_TRACE( 10, (
"__kmp_realloc_task_threads_data: T#%d allocating "
2155 "threads data for task_team %p, size = %d\n",
2156 __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
2160 *threads_data_p = (kmp_thread_data_t *)
2161 __kmp_allocate( nthreads *
sizeof(kmp_thread_data_t) );
2162 #ifdef BUILD_TIED_TASK_STACK
2164 for (i = 0; i < nthreads; i++) {
2165 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2166 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2168 #endif // BUILD_TIED_TASK_STACK
2170 task_team -> tt.tt_max_threads = nthreads;
2174 KMP_DEBUG_ASSERT( *threads_data_p != NULL );
2178 for (i = 0; i < nthreads; i++) {
2179 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2180 thread_data -> td.td_thr = team -> t.t_threads[i];
2182 if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
2186 thread_data -> td.td_deque_last_stolen = -1;
2191 TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
2194 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2195 return is_init_thread;
2205 __kmp_free_task_threads_data( kmp_task_team_t *task_team )
2207 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2208 if ( task_team -> tt.tt_threads_data != NULL ) {
2210 for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
2211 __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
2213 __kmp_free( task_team -> tt.tt_threads_data );
2214 task_team -> tt.tt_threads_data = NULL;
2216 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2225 static kmp_task_team_t *
2226 __kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
2228 kmp_task_team_t *task_team = NULL;
2231 KA_TRACE( 20, (
"__kmp_allocate_task_team: T#%d entering; team = %p\n",
2232 (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
2234 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2236 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2237 if (__kmp_free_task_teams != NULL) {
2238 task_team = __kmp_free_task_teams;
2239 TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
2240 task_team -> tt.tt_next = NULL;
2242 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2245 if (task_team == NULL) {
2246 KE_TRACE( 10, (
"__kmp_allocate_task_team: T#%d allocating "
2247 "task team for team %p\n",
2248 __kmp_gtid_from_thread( thread ), team ) );
2252 task_team = (kmp_task_team_t *) __kmp_allocate(
sizeof(kmp_task_team_t) );
2253 __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2259 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2261 TCW_4(task_team -> tt.tt_found_proxy_tasks, FALSE);
2263 task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
2265 TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
2266 TCW_4( task_team -> tt.tt_active, TRUE );
2267 TCW_4( task_team -> tt.tt_ref_ct, nthreads - 1);
2269 KA_TRACE( 20, (
"__kmp_allocate_task_team: T#%d exiting; task_team = %p\n",
2270 (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team ) );
2282 __kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
2284 KA_TRACE( 20, (
"__kmp_free_task_team: T#%d task_team = %p\n",
2285 thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
2287 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_ref_ct) == 0 );
2290 __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
2292 KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
2293 task_team -> tt.tt_next = __kmp_free_task_teams;
2294 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2295 TCW_PTR(__kmp_free_task_teams, task_team);
2297 __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
2308 __kmp_reap_task_teams(
void )
2310 kmp_task_team_t *task_team;
2312 if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
2314 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2315 while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
2316 __kmp_free_task_teams = task_team -> tt.tt_next;
2317 task_team -> tt.tt_next = NULL;
2320 if ( task_team -> tt.tt_threads_data != NULL ) {
2321 __kmp_free_task_threads_data( task_team );
2323 __kmp_free( task_team );
2325 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2337 __kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread )
2341 ref_ct = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& task_team->tt.tt_ref_ct) ) - 1;
2343 KA_TRACE( 20, (
"__kmp_unref_task_team: T#%d task_team = %p ref_ct = %d\n",
2344 __kmp_gtid_from_thread( thread ), task_team, ref_ct ) );
2347 if ( ref_ct == 0 ) {
2348 __kmp_free_task_team( thread, task_team );
2351 TCW_PTR( *((
volatile kmp_task_team_t **)(&thread->th.th_task_team)), NULL );
2361 __kmp_wait_to_unref_task_teams(
void)
2367 KMP_INIT_YIELD( spins );
2377 for (thread = (kmp_info_t *)__kmp_thread_pool;
2379 thread = thread->th.th_next_pool)
2384 if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
2385 KA_TRACE( 10, (
"__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2386 __kmp_gtid_from_thread( thread ) ) );
2391 if (!__kmp_is_thread_alive(thread, &exit_val)) {
2392 if (TCR_PTR(thread->th.th_task_team) != NULL) {
2393 __kmp_unref_task_team( thread->th.th_task_team, thread );
2401 KA_TRACE( 10, (
"__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
2402 __kmp_gtid_from_thread( thread ) ) );
2404 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
2405 volatile void *sleep_loc;
2407 if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
2408 KA_TRACE( 10, (
"__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2409 __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
2410 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
2421 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2422 KMP_YIELD_SPIN( spins );
2434 __kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team,
int both,
int always )
2436 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2438 if ( ( team->t.t_task_team[this_thr->th.th_task_state] == NULL ) && ( always || team->t.t_nproc > 1 ) ) {
2446 team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team );
2447 KA_TRACE(20, (
"__kmp_task_team_setup: Master T#%d created new task_team %p for team %d\n",
2448 __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state],
2449 ((team != NULL) ? team->t.t_id : -1)));
2457 int other_team = 1 - this_thr->th.th_task_state;
2458 if ( ( team->t.t_task_team[other_team] == NULL ) && ( team->t.t_nproc > 1 ) ) {
2459 team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team );
2460 KA_TRACE( 20, (
"__kmp_task_team_setup: Master T#%d created new task_team %p for team %d\n",
2461 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2462 ((team != NULL) ? team->t.t_id : -1)) );
2473 __kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
2475 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2478 if ( this_thr->th.th_task_team != NULL ) {
2479 if ( ! TCR_SYNC_4( this_thr->th.th_task_team->tt.tt_active ) ) {
2480 KMP_DEBUG_ASSERT( ! KMP_MASTER_TID( __kmp_tid_from_gtid( __kmp_gtid_from_thread( this_thr ) ) ) );
2481 __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
2483 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]);
2488 this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
2490 TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]);
2491 KA_TRACE( 20, (
"__kmp_task_team_sync: Thread T#%d task team assigned pointer (%p) from Team #%d task team\n",
2492 __kmp_gtid_from_thread( this_thr ), &this_thr->th.th_task_team,
2493 this_thr->th.th_task_team, ((team != NULL) ? (team->t.t_id) : -1) ) );
2501 __kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team
2502 USE_ITT_BUILD_ARG(
void * itt_sync_obj)
2505 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
2507 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2508 KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
2510 if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) {
2511 KA_TRACE( 20, (
"__kmp_task_team_wait: Master T#%d waiting for all tasks: task_team = %p\n",
2512 __kmp_gtid_from_thread( this_thr ), task_team ) );
2516 kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
2517 flag.wait(this_thr, TRUE
2518 USE_ITT_BUILD_ARG(itt_sync_obj));
2523 KA_TRACE( 20, (
"__kmp_task_team_wait: Master T#%d deactivating task_team %p\n",
2524 __kmp_gtid_from_thread( this_thr ), task_team ) );
2526 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == TRUE );
2527 TCW_SYNC_4( task_team->tt.tt_found_proxy_tasks, FALSE );
2529 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
2531 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2534 TCW_PTR(this_thr->th.th_task_team, NULL);
2535 team->t.t_task_team[this_thr->th.th_task_state] = NULL;
2549 __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread,
int gtid )
2551 volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads;
2553 KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
2556 KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
2558 kmp_flag_32 spin_flag(spin, 0U);
2559 while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag
2560 USE_ITT_BUILD_ARG(NULL), 0 ) ) {
2563 KMP_FSYNC_SPIN_PREPARE( spin );
2566 if( TCR_4(__kmp_global.g.g_done) ) {
2567 if( __kmp_global.g.g_abort )
2568 __kmp_abort_thread( );
2574 KMP_FSYNC_SPIN_ACQUIRED( (
void*) spin );
2587 static bool __kmp_give_task ( kmp_info_t *thread, kmp_int32 tid, kmp_task_t * task )
2589 kmp_task_team_t * task_team = thread->th.th_task_team;
2590 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
2591 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
2592 bool result =
false;
2594 KA_TRACE(20, (
"__kmp_give_task: trying to give task %p to thread %d.\n", taskdata, tid ) );
2597 KMP_DEBUG_ASSERT( task_team != NULL );
2599 if (thread_data -> td.td_deque == NULL ) {
2602 KA_TRACE(30, (
"__kmp_give_task: thread %d has no queue while giving task %p.\n", tid, taskdata ) );
2606 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
2608 KA_TRACE(30, (
"__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2612 __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock );
2614 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
2616 KA_TRACE(30, (
"__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2617 goto release_and_exit;
2620 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;
2622 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
2623 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);
2626 KA_TRACE(30, (
"__kmp_give_task: successfully gave task %p to thread %d.\n", taskdata, tid ) );
2629 __kmp_release_bootstrap_lock( & thread_data-> td.td_deque_lock );
2649 static void __kmp_first_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2651 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
2652 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2653 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
2654 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
2656 taskdata -> td_flags.complete = 1;
2658 if ( taskdata->td_taskgroup )
2659 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
2662 TCR_4(taskdata->td_incomplete_child_tasks++);
2665 static void __kmp_second_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2667 kmp_int32 children = 0;
2670 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
2671 KMP_DEBUG_ASSERT( children >= 0 );
2674 TCR_4(taskdata->td_incomplete_child_tasks--);
2677 static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask )
2679 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2680 kmp_info_t * thread = __kmp_threads[ gtid ];
2682 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2683 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 1 );
2687 while ( TCR_4(taskdata->td_incomplete_child_tasks) > 0 ) ;
2689 __kmp_release_deps(gtid,taskdata);
2690 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
2700 void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask )
2702 KMP_DEBUG_ASSERT( ptask != NULL );
2703 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2704 KA_TRACE(10, (
"__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n", gtid, taskdata ) );
2706 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2708 __kmp_first_top_half_finish_proxy(taskdata);
2709 __kmp_second_top_half_finish_proxy(taskdata);
2710 __kmp_bottom_half_finish_proxy(gtid,ptask);
2712 KA_TRACE(10, (
"__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n", gtid, taskdata ) );
2721 void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask )
2723 KMP_DEBUG_ASSERT( ptask != NULL );
2724 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2726 KA_TRACE(10, (
"__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n", taskdata ) );
2728 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2730 __kmp_first_top_half_finish_proxy(taskdata);
2733 kmp_team_t * team = taskdata->td_team;
2734 kmp_int32 nthreads = team->t.t_nproc;
2741 k = (k+1) % nthreads;
2742 thread = team->t.t_threads[k];
2743 }
while ( !__kmp_give_task( thread, k, ptask ) );
2745 __kmp_second_top_half_finish_proxy(taskdata);
2747 KA_TRACE(10, (
"__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n", taskdata ) );