38 #include "kmp_error.h"
40 #define MAX_MESSAGE 512
49 #define KMP_DEBUG_REF_CTS(x) KF_TRACE(1, x);
51 #define THREAD_ALLOC_FOR_TASKQ
54 in_parallel_context( kmp_team_t *team )
56 return ! team -> t.t_serialized;
60 __kmp_taskq_eo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
63 int tid = __kmp_tid_from_gtid( gtid );
66 kmpc_task_queue_t *taskq;
67 kmp_taskq_t *tq = & __kmp_threads[gtid] -> th.th_team -> t.t_taskq;
69 if ( __kmp_env_consistency_check )
70 #if KMP_USE_DYNAMIC_LOCK
71 __kmp_push_sync( gtid, ct_ordered_in_taskq, loc_ref, NULL, 0 );
73 __kmp_push_sync( gtid, ct_ordered_in_taskq, loc_ref, NULL );
76 if ( ! __kmp_threads[ gtid ]-> th.th_team -> t.t_serialized ) {
82 my_token =tq->tq_curr_thunk[ tid ]-> th_tasknum;
84 taskq = tq->tq_curr_thunk[ tid ]-> th.th_shareds -> sv_queue;
86 KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_EQ, NULL);
92 __kmp_taskq_xo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
95 int tid = __kmp_tid_from_gtid( gtid );
97 kmp_taskq_t *tq = & __kmp_threads[gtid] -> th.th_team -> t.t_taskq;
99 if ( __kmp_env_consistency_check )
100 __kmp_pop_sync( gtid, ct_ordered_in_taskq, loc_ref );
102 if ( ! __kmp_threads[ gtid ]-> th.th_team -> t.t_serialized ) {
108 my_token = tq->tq_curr_thunk[ tid ]->th_tasknum;
112 tq->tq_curr_thunk[ tid ]-> th.th_shareds -> sv_queue -> tq_tasknum_serving = my_token + 1;
119 __kmp_taskq_check_ordered( kmp_int32 gtid, kmpc_thunk_t *thunk )
123 kmpc_task_queue_t *taskq;
129 my_token = thunk -> th_tasknum;
131 taskq = thunk -> th.th_shareds -> sv_queue;
133 if(taskq->tq_tasknum_serving <= my_token) {
134 KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_GE, NULL);
136 taskq->tq_tasknum_serving = my_token +1;
142 __kmp_dump_TQF(kmp_int32 flags)
144 if (flags & TQF_IS_ORDERED)
145 __kmp_printf(
"ORDERED ");
146 if (flags & TQF_IS_LASTPRIVATE)
147 __kmp_printf(
"LAST_PRIV ");
148 if (flags & TQF_IS_NOWAIT)
149 __kmp_printf(
"NOWAIT ");
150 if (flags & TQF_HEURISTICS)
151 __kmp_printf(
"HEURIST ");
152 if (flags & TQF_INTERFACE_RESERVED1)
153 __kmp_printf(
"RESERV1 ");
154 if (flags & TQF_INTERFACE_RESERVED2)
155 __kmp_printf(
"RESERV2 ");
156 if (flags & TQF_INTERFACE_RESERVED3)
157 __kmp_printf(
"RESERV3 ");
158 if (flags & TQF_INTERFACE_RESERVED4)
159 __kmp_printf(
"RESERV4 ");
160 if (flags & TQF_IS_LAST_TASK)
161 __kmp_printf(
"LAST_TASK ");
162 if (flags & TQF_TASKQ_TASK)
163 __kmp_printf(
"TASKQ_TASK ");
164 if (flags & TQF_RELEASE_WORKERS)
165 __kmp_printf(
"RELEASE ");
166 if (flags & TQF_ALL_TASKS_QUEUED)
167 __kmp_printf(
"ALL_QUEUED ");
168 if (flags & TQF_PARALLEL_CONTEXT)
169 __kmp_printf(
"PARALLEL ");
170 if (flags & TQF_DEALLOCATED)
171 __kmp_printf(
"DEALLOC ");
172 if (!(flags & (TQF_INTERNAL_FLAGS|TQF_INTERFACE_FLAGS)))
173 __kmp_printf(
"(NONE)");
177 __kmp_dump_thunk( kmp_taskq_t *tq, kmpc_thunk_t *thunk, kmp_int32 global_tid )
180 int nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc;
182 __kmp_printf(
"\tThunk at %p on (%d): ", thunk, global_tid);
185 for (i = 0; i < nproc; i++) {
186 if( tq->tq_curr_thunk[i] == thunk ) {
187 __kmp_printf(
"[%i] ", i);
190 __kmp_printf(
"th_shareds=%p, ", thunk->th.th_shareds);
191 __kmp_printf(
"th_task=%p, ", thunk->th_task);
192 __kmp_printf(
"th_encl_thunk=%p, ", thunk->th_encl_thunk);
193 __kmp_printf(
"th_status=%d, ", thunk->th_status);
194 __kmp_printf(
"th_tasknum=%u, ", thunk->th_tasknum);
195 __kmp_printf(
"th_flags="); __kmp_dump_TQF(thunk->th_flags);
202 __kmp_dump_thunk_stack(kmpc_thunk_t *thunk, kmp_int32 thread_num)
206 __kmp_printf(
" Thunk stack for T#%d: ", thread_num);
208 for (th = thunk; th != NULL; th = th->th_encl_thunk )
209 __kmp_printf(
"%p ", th);
215 __kmp_dump_task_queue( kmp_taskq_t *tq, kmpc_task_queue_t *queue, kmp_int32 global_tid )
219 kmpc_task_queue_t *taskq;
221 __kmp_printf(
"Task Queue at %p on (%d):\n", queue, global_tid);
224 int in_parallel = queue->tq_flags & TQF_PARALLEL_CONTEXT;
226 if ( __kmp_env_consistency_check ) {
227 __kmp_printf(
" tq_loc : ");
239 __kmp_printf(
" tq_parent : %p\n", queue->tq.tq_parent);
240 __kmp_printf(
" tq_first_child : %p\n", queue->tq_first_child);
241 __kmp_printf(
" tq_next_child : %p\n", queue->tq_next_child);
242 __kmp_printf(
" tq_prev_child : %p\n", queue->tq_prev_child);
243 __kmp_printf(
" tq_ref_count : %d\n", queue->tq_ref_count);
257 __kmp_printf(
" tq_shareds : ");
258 for (i=0; i<((queue == tq->tq_root) ? queue->tq_nproc : 1); i++)
259 __kmp_printf(
"%p ", queue->tq_shareds[i].ai_data);
263 __kmp_printf(
" tq_tasknum_queuing : %u\n", queue->tq_tasknum_queuing);
264 __kmp_printf(
" tq_tasknum_serving : %u\n", queue->tq_tasknum_serving);
267 __kmp_printf(
" tq_queue : %p\n", queue->tq_queue);
268 __kmp_printf(
" tq_thunk_space : %p\n", queue->tq_thunk_space);
269 __kmp_printf(
" tq_taskq_slot : %p\n", queue->tq_taskq_slot);
271 __kmp_printf(
" tq_free_thunks : ");
272 for (thunk = queue->tq_free_thunks; thunk != NULL; thunk = thunk->th.th_next_free )
273 __kmp_printf(
"%p ", thunk);
276 __kmp_printf(
" tq_nslots : %d\n", queue->tq_nslots);
277 __kmp_printf(
" tq_head : %d\n", queue->tq_head);
278 __kmp_printf(
" tq_tail : %d\n", queue->tq_tail);
279 __kmp_printf(
" tq_nfull : %d\n", queue->tq_nfull);
280 __kmp_printf(
" tq_hiwat : %d\n", queue->tq_hiwat);
281 __kmp_printf(
" tq_flags : "); __kmp_dump_TQF(queue->tq_flags);
285 __kmp_printf(
" tq_th_thunks : ");
286 for (i = 0; i < queue->tq_nproc; i++) {
287 __kmp_printf(
"%d ", queue->tq_th_thunks[i].ai_data);
293 __kmp_printf(
" Queue slots:\n");
297 for ( count = 0; count < queue->tq_nfull; ++count ) {
298 __kmp_printf(
"(%d)", qs);
299 __kmp_dump_thunk( tq, queue->tq_queue[qs].qs_thunk, global_tid );
300 qs = (qs+1) % queue->tq_nslots;
306 if (queue->tq_taskq_slot != NULL) {
307 __kmp_printf(
" TaskQ slot:\n");
308 __kmp_dump_thunk( tq, (kmpc_thunk_t *) queue->tq_taskq_slot, global_tid );
316 __kmp_printf(
" Taskq freelist: ");
323 for( taskq = tq->tq_freelist; taskq != NULL; taskq = taskq->tq.tq_next_free )
324 __kmp_printf(
"%p ", taskq);
328 __kmp_printf(
"\n\n");
332 __kmp_aux_dump_task_queue_tree( kmp_taskq_t *tq, kmpc_task_queue_t *curr_queue, kmp_int32 level, kmp_int32 global_tid )
335 int nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc;
336 kmpc_task_queue_t *queue = curr_queue;
338 if (curr_queue == NULL)
343 for (i=0; i<level; i++)
346 __kmp_printf(
"%p", curr_queue);
348 for (i = 0; i < nproc; i++) {
349 if( tq->tq_curr_thunk[i] && tq->tq_curr_thunk[i]->th.th_shareds->sv_queue == curr_queue ) {
350 __kmp_printf(
" [%i]", i);
361 qs = curr_queue->tq_tail;
363 for ( count = 0; count < curr_queue->tq_nfull; ++count ) {
364 __kmp_printf(
"%p ", curr_queue->tq_queue[qs].qs_thunk);
365 qs = (qs+1) % curr_queue->tq_nslots;
372 if (curr_queue->tq_first_child) {
378 if (curr_queue->tq_first_child) {
379 for(queue = (kmpc_task_queue_t *)curr_queue->tq_first_child;
381 queue = queue->tq_next_child) {
382 __kmp_aux_dump_task_queue_tree( tq, queue, level+1, global_tid );
391 __kmp_dump_task_queue_tree( kmp_taskq_t *tq, kmpc_task_queue_t *tqroot, kmp_int32 global_tid)
393 __kmp_printf(
"TaskQ Tree at root %p on (%d):\n", tqroot, global_tid);
395 __kmp_aux_dump_task_queue_tree( tq, tqroot, 0, global_tid );
409 __kmp_taskq_allocate(
size_t size, kmp_int32 global_tid)
411 void *addr, *orig_addr;
414 KB_TRACE( 5, (
"__kmp_taskq_allocate: called size=%d, gtid=%d\n", (
int) size, global_tid ) );
416 bytes =
sizeof(
void *) + CACHE_LINE + size;
418 #ifdef THREAD_ALLOC_FOR_TASKQ
419 orig_addr = (
void *) __kmp_thread_malloc( __kmp_thread_from_gtid(global_tid), bytes );
421 KE_TRACE( 10, (
"%%%%%% MALLOC( %d )\n", bytes ) );
422 orig_addr = (
void *) KMP_INTERNAL_MALLOC( bytes );
426 KMP_FATAL( OutOfHeapMemory );
430 if (((kmp_uintptr_t) addr & ( CACHE_LINE - 1 )) != 0) {
431 KB_TRACE( 50, (
"__kmp_taskq_allocate: adjust for cache alignment\n" ) );
432 addr = (
void *) (((kmp_uintptr_t) addr + CACHE_LINE) & ~( CACHE_LINE - 1 ));
435 (* (
void **) addr) = orig_addr;
437 KB_TRACE( 10, (
"__kmp_taskq_allocate: allocate: %p, use: %p - %p, size: %d, gtid: %d\n",
438 orig_addr, ((
void **) addr) + 1, ((
char *)(((
void **) addr) + 1)) + size-1,
439 (
int) size, global_tid ));
441 return ( ((
void **) addr) + 1 );
445 __kmpc_taskq_free(
void *p, kmp_int32 global_tid)
447 KB_TRACE( 5, (
"__kmpc_taskq_free: called addr=%p, gtid=%d\n", p, global_tid ) );
449 KB_TRACE(10, (
"__kmpc_taskq_free: freeing: %p, gtid: %d\n", (*( ((
void **) p)-1)), global_tid ));
451 #ifdef THREAD_ALLOC_FOR_TASKQ
452 __kmp_thread_free( __kmp_thread_from_gtid(global_tid), *( ((
void **) p)-1) );
454 KMP_INTERNAL_FREE( *( ((
void **) p)-1) );
465 static kmpc_task_queue_t *
466 __kmp_alloc_taskq ( kmp_taskq_t *tq,
int in_parallel, kmp_int32 nslots, kmp_int32 nthunks,
467 kmp_int32 nshareds, kmp_int32 nproc,
size_t sizeof_thunk,
468 size_t sizeof_shareds, kmpc_thunk_t **new_taskq_thunk, kmp_int32 global_tid )
472 kmpc_task_queue_t *new_queue;
473 kmpc_aligned_shared_vars_t *shared_var_array;
474 char *shared_var_storage;
477 __kmp_acquire_lock( & tq->tq_freelist_lck, global_tid );
482 if( tq->tq_freelist ) {
483 new_queue = tq -> tq_freelist;
484 tq -> tq_freelist = tq -> tq_freelist -> tq.tq_next_free;
486 KMP_DEBUG_ASSERT(new_queue->tq_flags & TQF_DEALLOCATED);
488 new_queue->tq_flags = 0;
490 __kmp_release_lock( & tq->tq_freelist_lck, global_tid );
493 __kmp_release_lock( & tq->tq_freelist_lck, global_tid );
495 new_queue = (kmpc_task_queue_t *) __kmp_taskq_allocate (
sizeof (kmpc_task_queue_t), global_tid);
496 new_queue->tq_flags = 0;
502 sizeof_thunk += (CACHE_LINE - (sizeof_thunk % CACHE_LINE));
503 pt = (
char *) __kmp_taskq_allocate (nthunks * sizeof_thunk, global_tid);
504 new_queue->tq_thunk_space = (kmpc_thunk_t *)pt;
505 *new_taskq_thunk = (kmpc_thunk_t *)(pt + (nthunks - 1) * sizeof_thunk);
509 new_queue->tq_free_thunks = (kmpc_thunk_t *)pt;
511 for (i = 0; i < (nthunks - 2); i++) {
512 ((kmpc_thunk_t *)(pt+i*sizeof_thunk))->th.th_next_free = (kmpc_thunk_t *)(pt + (i+1)*sizeof_thunk);
514 ((kmpc_thunk_t *)(pt+i*sizeof_thunk))->th_flags = TQF_DEALLOCATED;
518 ((kmpc_thunk_t *)(pt+(nthunks-2)*sizeof_thunk))->th.th_next_free = NULL;
520 ((kmpc_thunk_t *)(pt+(nthunks-2)*sizeof_thunk))->th_flags = TQF_DEALLOCATED;
526 __kmp_init_lock( & new_queue->tq_link_lck );
527 __kmp_init_lock( & new_queue->tq_free_thunks_lck );
528 __kmp_init_lock( & new_queue->tq_queue_lck );
533 bytes = nslots *
sizeof (kmpc_aligned_queue_slot_t);
534 new_queue->tq_queue = (kmpc_aligned_queue_slot_t *) __kmp_taskq_allocate( bytes, global_tid );
537 sizeof_shareds +=
sizeof(kmpc_task_queue_t *);
538 sizeof_shareds += (CACHE_LINE - (sizeof_shareds % CACHE_LINE));
540 bytes = nshareds *
sizeof (kmpc_aligned_shared_vars_t);
541 shared_var_array = (kmpc_aligned_shared_vars_t *) __kmp_taskq_allocate ( bytes, global_tid);
543 bytes = nshareds * sizeof_shareds;
544 shared_var_storage = (
char *) __kmp_taskq_allocate ( bytes, global_tid);
546 for (i=0; i<nshareds; i++) {
547 shared_var_array[i].ai_data = (kmpc_shared_vars_t *) (shared_var_storage + i*sizeof_shareds);
548 shared_var_array[i].ai_data->sv_queue = new_queue;
550 new_queue->tq_shareds = shared_var_array;
556 bytes = nproc *
sizeof(kmpc_aligned_int32_t);
557 new_queue->tq_th_thunks = (kmpc_aligned_int32_t *) __kmp_taskq_allocate ( bytes, global_tid);
558 new_queue->tq_nproc = nproc;
560 for (i=0; i<nproc; i++)
561 new_queue->tq_th_thunks[i].ai_data = 0;
568 __kmp_free_taskq (kmp_taskq_t *tq, kmpc_task_queue_t *p,
int in_parallel, kmp_int32 global_tid)
570 __kmpc_taskq_free(p->tq_thunk_space, global_tid);
571 __kmpc_taskq_free(p->tq_queue, global_tid);
574 __kmpc_taskq_free((
void *) p->tq_shareds[0].ai_data, global_tid);
577 __kmpc_taskq_free(p->tq_shareds, global_tid);
580 p->tq_first_child = NULL;
581 p->tq_next_child = NULL;
582 p->tq_prev_child = NULL;
583 p->tq_ref_count = -10;
584 p->tq_shareds = NULL;
585 p->tq_tasknum_queuing = 0;
586 p->tq_tasknum_serving = 0;
588 p->tq_thunk_space = NULL;
589 p->tq_taskq_slot = NULL;
590 p->tq_free_thunks = NULL;
600 for (i=0; i<p->tq_nproc; i++)
601 p->tq_th_thunks[i].ai_data = 0;
603 if ( __kmp_env_consistency_check )
605 KMP_DEBUG_ASSERT( p->tq_flags & TQF_DEALLOCATED );
606 p->tq_flags = TQF_DEALLOCATED;
610 __kmpc_taskq_free(p->tq_th_thunks, global_tid);
611 __kmp_destroy_lock(& p->tq_link_lck);
612 __kmp_destroy_lock(& p->tq_queue_lck);
613 __kmp_destroy_lock(& p->tq_free_thunks_lck);
616 p->tq_th_thunks = NULL;
622 __kmp_acquire_lock( & tq->tq_freelist_lck, global_tid );
623 p->tq.tq_next_free = tq->tq_freelist;
626 __kmp_release_lock( & tq->tq_freelist_lck, global_tid );
635 static kmpc_thunk_t *
636 __kmp_alloc_thunk (kmpc_task_queue_t *queue,
int in_parallel, kmp_int32 global_tid)
641 __kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid);
647 fl = queue->tq_free_thunks;
649 KMP_DEBUG_ASSERT (fl != NULL);
651 queue->tq_free_thunks = fl->th.th_next_free;
655 __kmp_release_lock(& queue->tq_free_thunks_lck, global_tid);
661 __kmp_free_thunk (kmpc_task_queue_t *queue, kmpc_thunk_t *p,
int in_parallel, kmp_int32 global_tid)
665 p->th_encl_thunk = 0;
672 __kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid);
678 p->th.th_next_free = queue->tq_free_thunks;
679 queue->tq_free_thunks = p;
682 p->th_flags = TQF_DEALLOCATED;
686 __kmp_release_lock(& queue->tq_free_thunks_lck, global_tid);
694 __kmp_enqueue_task ( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue, kmpc_thunk_t *thunk,
int in_parallel )
700 __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
706 KMP_DEBUG_ASSERT (queue->tq_nfull < queue->tq_nslots);
708 queue->tq_queue[(queue->tq_head)++].qs_thunk = thunk;
710 if (queue->tq_head >= queue->tq_nslots)
717 ret = (in_parallel) ? (queue->tq_nfull == queue->tq_nslots) : FALSE;
721 __kmp_release_lock(& queue->tq_queue_lck, global_tid);
723 if( tq->tq_global_flags & TQF_RELEASE_WORKERS ) {
730 tq->tq_global_flags &= ~TQF_RELEASE_WORKERS;
741 static kmpc_thunk_t *
742 __kmp_dequeue_task (kmp_int32 global_tid, kmpc_task_queue_t *queue,
int in_parallel)
745 int tid = __kmp_tid_from_gtid( global_tid );
747 KMP_DEBUG_ASSERT (queue->tq_nfull > 0);
749 if (queue->tq.tq_parent != NULL && in_parallel) {
751 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
752 ct = ++(queue->tq_ref_count);
753 __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
754 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p inc %d\n",
755 __LINE__, global_tid, queue, ct));
758 pt = queue->tq_queue[(queue->tq_tail)++].qs_thunk;
760 if (queue->tq_tail >= queue->tq_nslots)
764 queue->tq_th_thunks[tid].ai_data++;
768 KF_TRACE(200, (
"__kmp_dequeue_task: T#%d(:%d) now has %d outstanding thunks from queue %p\n",
769 global_tid, tid, queue->tq_th_thunks[tid].ai_data, queue));
779 KMP_DEBUG_ASSERT(queue->tq_nfull >= 0);
782 KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data <= __KMP_TASKQ_THUNKS_PER_TH);
813 static kmpc_thunk_t *
814 __kmp_find_task_in_queue (kmp_int32 global_tid, kmpc_task_queue_t *queue)
816 kmpc_thunk_t *pt = NULL;
817 int tid = __kmp_tid_from_gtid( global_tid );
820 if ( !(queue->tq_flags & TQF_DEALLOCATED) ) {
822 __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
825 if ( !(queue->tq_flags & TQF_DEALLOCATED) ) {
830 if ((queue->tq_taskq_slot != NULL) && (queue->tq_nfull <= queue->tq_hiwat)) {
833 pt = (kmpc_thunk_t *) queue->tq_taskq_slot;
834 queue->tq_taskq_slot = NULL;
836 else if (queue->tq_nfull == 0 ||
837 queue->tq_th_thunks[tid].ai_data >= __KMP_TASKQ_THUNKS_PER_TH) {
843 else if (queue->tq_nfull > 1) {
846 pt = __kmp_dequeue_task (global_tid, queue, TRUE);
848 else if (!(queue->tq_flags & TQF_IS_LASTPRIVATE)) {
851 pt = __kmp_dequeue_task (global_tid, queue, TRUE);
853 else if (queue->tq_flags & TQF_IS_LAST_TASK) {
858 pt = __kmp_dequeue_task (global_tid, queue, TRUE);
859 pt->th_flags |= TQF_IS_LAST_TASK;
864 __kmp_release_lock(& queue->tq_queue_lck, global_tid);
876 static kmpc_thunk_t *
877 __kmp_find_task_in_descendant_queue (kmp_int32 global_tid, kmpc_task_queue_t *curr_queue)
879 kmpc_thunk_t *pt = NULL;
880 kmpc_task_queue_t *queue = curr_queue;
882 if (curr_queue->tq_first_child != NULL) {
883 __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
888 queue = (kmpc_task_queue_t *) curr_queue->tq_first_child;
890 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
894 while (queue != NULL) {
896 kmpc_task_queue_t *next;
898 ct= ++(queue->tq_ref_count);
899 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
900 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p inc %d\n",
901 __LINE__, global_tid, queue, ct));
903 pt = __kmp_find_task_in_queue (global_tid, queue);
908 __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
913 ct = --(queue->tq_ref_count);
914 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p dec %d\n",
915 __LINE__, global_tid, queue, ct));
916 KMP_DEBUG_ASSERT( queue->tq_ref_count >= 0 );
918 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
926 pt = __kmp_find_task_in_descendant_queue (global_tid, queue);
931 __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
936 ct = --(queue->tq_ref_count);
937 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p dec %d\n",
938 __LINE__, global_tid, queue, ct));
939 KMP_DEBUG_ASSERT( ct >= 0 );
941 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
946 __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
951 next = queue->tq_next_child;
953 ct = --(queue->tq_ref_count);
954 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p dec %d\n",
955 __LINE__, global_tid, queue, ct));
956 KMP_DEBUG_ASSERT( ct >= 0 );
961 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
973 static kmpc_thunk_t *
974 __kmp_find_task_in_ancestor_queue (kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *curr_queue)
976 kmpc_task_queue_t *queue;
981 if (curr_queue->tq.tq_parent != NULL) {
982 queue = curr_queue->tq.tq_parent;
984 while (queue != NULL) {
985 if (queue->tq.tq_parent != NULL) {
987 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
992 ct = ++(queue->tq_ref_count);
993 __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
994 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p inc %d\n",
995 __LINE__, global_tid, queue, ct));
998 pt = __kmp_find_task_in_queue (global_tid, queue);
1000 if (queue->tq.tq_parent != NULL) {
1002 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1007 ct = --(queue->tq_ref_count);
1008 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p dec %d\n",
1009 __LINE__, global_tid, queue, ct));
1010 KMP_DEBUG_ASSERT( ct >= 0 );
1012 __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1018 if (queue->tq.tq_parent != NULL) {
1020 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1025 ct = --(queue->tq_ref_count);
1026 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p dec %d\n",
1027 __LINE__, global_tid, queue, ct));
1028 KMP_DEBUG_ASSERT( ct >= 0 );
1030 queue = queue->tq.tq_parent;
1033 __kmp_release_lock(& queue->tq_link_lck, global_tid);
1038 pt = __kmp_find_task_in_descendant_queue( global_tid, tq->tq_root );
1044 __kmp_taskq_tasks_finished (kmpc_task_queue_t *queue)
1050 for (i=0; i<queue->tq_nproc; i++) {
1051 if (queue->tq_th_thunks[i].ai_data != 0)
1059 __kmp_taskq_has_any_children (kmpc_task_queue_t *queue)
1061 return (queue->tq_first_child != NULL);
1065 __kmp_remove_queue_from_tree( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue,
int in_parallel )
1069 kmpc_thunk_t *thunk;
1072 KF_TRACE(50, (
"Before Deletion of TaskQ at %p on (%d):\n", queue, global_tid));
1073 KF_DUMP(50, __kmp_dump_task_queue( tq, queue, global_tid ));
1076 KMP_DEBUG_ASSERT (queue->tq.tq_parent != NULL);
1079 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1085 KMP_DEBUG_ASSERT (queue->tq_first_child == NULL);
1088 if (queue->tq_prev_child != NULL)
1089 queue->tq_prev_child->tq_next_child = queue->tq_next_child;
1090 if (queue->tq_next_child != NULL)
1091 queue->tq_next_child->tq_prev_child = queue->tq_prev_child;
1092 if (queue->tq.tq_parent->tq_first_child == queue)
1093 queue->tq.tq_parent->tq_first_child = queue->tq_next_child;
1095 queue->tq_prev_child = NULL;
1096 queue->tq_next_child = NULL;
1101 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p waiting for ref_count of %d to reach 1\n",
1102 __LINE__, global_tid, queue, queue->tq_ref_count));
1105 while (queue->tq_ref_count > 1) {
1106 __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1108 KMP_WAIT_YIELD((
volatile kmp_uint32*)&queue->tq_ref_count, 1, KMP_LE, NULL);
1110 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1116 __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1119 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p freeing queue\n",
1120 __LINE__, global_tid, queue));
1123 KMP_DEBUG_ASSERT(queue->tq_flags & TQF_ALL_TASKS_QUEUED);
1124 KMP_DEBUG_ASSERT(queue->tq_nfull == 0);
1126 for (i=0; i<queue->tq_nproc; i++) {
1127 KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0);
1131 for (thunk=queue->tq_free_thunks; thunk != NULL; thunk=thunk->th.th_next_free)
1134 KMP_ASSERT (i == queue->tq_nslots + (queue->tq_nproc * __KMP_TASKQ_THUNKS_PER_TH));
1138 __kmp_free_taskq ( tq, queue, TRUE, global_tid );
1140 KF_TRACE(50, (
"After Deletion of TaskQ at %p on (%d):\n", queue, global_tid));
1141 KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid ));
1152 __kmp_find_and_remove_finished_child_taskq( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *curr_queue )
1154 kmpc_task_queue_t *queue = curr_queue;
1156 if (curr_queue->tq_first_child != NULL) {
1157 __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
1162 queue = (kmpc_task_queue_t *) curr_queue->tq_first_child;
1163 if (queue != NULL) {
1164 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
1168 while (queue != NULL) {
1169 kmpc_task_queue_t *next;
1170 int ct = ++(queue->tq_ref_count);
1171 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p inc %d\n",
1172 __LINE__, global_tid, queue, ct));
1179 if (queue->tq_flags & TQF_IS_NOWAIT) {
1180 __kmp_find_and_remove_finished_child_taskq ( tq, global_tid, queue );
1182 if ((queue->tq_flags & TQF_ALL_TASKS_QUEUED) && (queue->tq_nfull == 0) &&
1183 __kmp_taskq_tasks_finished(queue) && ! __kmp_taskq_has_any_children(queue)) {
1190 if ( __kmp_test_lock(& queue->tq_queue_lck, global_tid) ) {
1191 if ( !(queue->tq_flags & TQF_DEALLOCATED) ) {
1192 queue->tq_flags |= TQF_DEALLOCATED;
1193 __kmp_release_lock(& queue->tq_queue_lck, global_tid);
1195 __kmp_remove_queue_from_tree( tq, global_tid, queue, TRUE );
1201 __kmp_release_lock(& queue->tq_queue_lck, global_tid);
1208 __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
1213 next = queue->tq_next_child;
1215 ct = --(queue->tq_ref_count);
1216 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p dec %d\n",
1217 __LINE__, global_tid, queue, ct));
1218 KMP_DEBUG_ASSERT( ct >= 0 );
1223 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
1234 __kmp_remove_all_child_taskq( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue )
1236 kmpc_task_queue_t *next_child;
1238 queue = (kmpc_task_queue_t *) queue->tq_first_child;
1240 while (queue != NULL) {
1241 __kmp_remove_all_child_taskq ( tq, global_tid, queue );
1243 next_child = queue->tq_next_child;
1244 queue->tq_flags |= TQF_DEALLOCATED;
1245 __kmp_remove_queue_from_tree ( tq, global_tid, queue, FALSE );
1251 __kmp_execute_task_from_queue( kmp_taskq_t *tq,
ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk,
int in_parallel )
1253 kmpc_task_queue_t *queue = thunk->th.th_shareds->sv_queue;
1254 kmp_int32 tid = __kmp_tid_from_gtid( global_tid );
1256 KF_TRACE(100, (
"After dequeueing this Task on (%d):\n", global_tid));
1257 KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid ));
1258 KF_TRACE(100, (
"Task Queue: %p looks like this (%d):\n", queue, global_tid));
1259 KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
1275 if (!(thunk->th_flags & TQF_TASKQ_TASK)) {
1276 kmp_int32 index = (queue == tq->tq_root) ? tid : 0;
1277 thunk->th.th_shareds = (kmpc_shared_vars_t *) queue->tq_shareds[index].ai_data;
1279 if ( __kmp_env_consistency_check ) {
1280 __kmp_push_workshare( global_tid,
1281 (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered : ct_task,
1286 if ( __kmp_env_consistency_check )
1287 __kmp_push_workshare( global_tid, ct_taskq, queue->tq_loc );
1291 thunk->th_encl_thunk = tq->tq_curr_thunk[tid];
1292 tq->tq_curr_thunk[tid] = thunk;
1294 KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
1297 KF_TRACE( 50, (
"Begin Executing Thunk %p from queue %p on (%d)\n", thunk, queue, global_tid));
1298 thunk->th_task (global_tid, thunk);
1299 KF_TRACE( 50, (
"End Executing Thunk %p from queue %p on (%d)\n", thunk, queue, global_tid));
1301 if (!(thunk->th_flags & TQF_TASKQ_TASK)) {
1302 if ( __kmp_env_consistency_check )
1303 __kmp_pop_workshare( global_tid, (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered : ct_task,
1307 tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
1308 thunk->th_encl_thunk = NULL;
1309 KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
1312 if ((thunk->th_flags & TQF_IS_ORDERED) && in_parallel) {
1313 __kmp_taskq_check_ordered(global_tid, thunk);
1316 __kmp_free_thunk (queue, thunk, in_parallel, global_tid);
1318 KF_TRACE(100, (
"T#%d After freeing thunk: %p, TaskQ looks like this:\n", global_tid, thunk));
1319 KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
1324 KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data >= 1);
1326 KF_TRACE( 200, (
"__kmp_execute_task_from_queue: T#%d has %d thunks in queue %p\n",
1327 global_tid, queue->tq_th_thunks[tid].ai_data-1, queue));
1329 queue->tq_th_thunks[tid].ai_data--;
1334 if (queue->tq.tq_parent != NULL && in_parallel) {
1336 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1337 ct = --(queue->tq_ref_count);
1338 __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1339 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p dec %d\n",
1340 __LINE__, global_tid, queue, ct));
1341 KMP_DEBUG_ASSERT( ct >= 0 );
1352 __kmpc_taskq(
ident_t *loc, kmp_int32 global_tid, kmpc_task_t taskq_task,
1353 size_t sizeof_thunk,
size_t sizeof_shareds,
1354 kmp_int32 flags, kmpc_shared_vars_t **shareds )
1357 kmp_int32 nslots, nthunks, nshareds, nproc;
1358 kmpc_task_queue_t *new_queue, *curr_queue;
1359 kmpc_thunk_t *new_taskq_thunk;
1365 KE_TRACE( 10, (
"__kmpc_taskq called (%d)\n", global_tid));
1367 th = __kmp_threads[ global_tid ];
1368 team = th -> th.th_team;
1369 tq = & team -> t.t_taskq;
1370 nproc = team -> t.t_nproc;
1371 tid = __kmp_tid_from_gtid( global_tid );
1374 in_parallel = in_parallel_context( team );
1376 if( ! tq->tq_root ) {
1379 th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo;
1382 th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo;
1388 if ( __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL )) {
1395 *shareds = (kmpc_shared_vars_t *) tq->tq_root->tq_shareds[tid].ai_data;
1397 KE_TRACE( 10, (
"__kmpc_taskq return (%d)\n", global_tid));
1405 if( tq->tq_curr_thunk_capacity < nproc ) {
1408 if(tq->tq_curr_thunk)
1409 __kmp_free(tq->tq_curr_thunk);
1412 __kmp_init_lock( & tq->tq_freelist_lck );
1415 tq->tq_curr_thunk = (kmpc_thunk_t **) __kmp_allocate( nproc *
sizeof(kmpc_thunk_t *) );
1416 tq -> tq_curr_thunk_capacity = nproc;
1420 tq->tq_global_flags = TQF_RELEASE_WORKERS;
1426 nslots = (in_parallel) ? (2 * nproc) : 1;
1431 nthunks = (in_parallel) ? (nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH) + 1) : nslots + 2;
1436 nshareds = ( !tq->tq_root && in_parallel) ? nproc : 1;
1440 new_queue = __kmp_alloc_taskq ( tq, in_parallel, nslots, nthunks, nshareds, nproc,
1441 sizeof_thunk, sizeof_shareds, &new_taskq_thunk, global_tid );
1445 new_queue->tq_flags = flags & TQF_INTERFACE_FLAGS;
1448 new_queue->tq_tasknum_queuing = 0;
1449 new_queue->tq_tasknum_serving = 0;
1450 new_queue->tq_flags |= TQF_PARALLEL_CONTEXT;
1453 new_queue->tq_taskq_slot = NULL;
1454 new_queue->tq_nslots = nslots;
1455 new_queue->tq_hiwat = HIGH_WATER_MARK (nslots);
1456 new_queue->tq_nfull = 0;
1457 new_queue->tq_head = 0;
1458 new_queue->tq_tail = 0;
1459 new_queue->tq_loc = loc;
1461 if ((new_queue->tq_flags & TQF_IS_ORDERED) && in_parallel) {
1463 new_queue->tq_tasknum_serving = 1;
1466 th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo;
1469 th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo;
1473 *shareds = (kmpc_shared_vars_t *) new_queue->tq_shareds[0].ai_data;
1475 new_taskq_thunk->th.th_shareds = *shareds;
1476 new_taskq_thunk->th_task = taskq_task;
1477 new_taskq_thunk->th_flags = new_queue->tq_flags | TQF_TASKQ_TASK;
1478 new_taskq_thunk->th_status = 0;
1480 KMP_DEBUG_ASSERT (new_taskq_thunk->th_flags & TQF_TASKQ_TASK);
1487 if( ! tq->tq_root ) {
1488 new_queue->tq.tq_parent = NULL;
1489 new_queue->tq_first_child = NULL;
1490 new_queue->tq_next_child = NULL;
1491 new_queue->tq_prev_child = NULL;
1492 new_queue->tq_ref_count = 1;
1493 tq->tq_root = new_queue;
1496 curr_queue = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue;
1497 new_queue->tq.tq_parent = curr_queue;
1498 new_queue->tq_first_child = NULL;
1499 new_queue->tq_prev_child = NULL;
1500 new_queue->tq_ref_count = 1;
1502 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p alloc %d\n",
1503 __LINE__, global_tid, new_queue, new_queue->tq_ref_count));
1505 __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
1510 new_queue->tq_next_child = (
struct kmpc_task_queue_t *) curr_queue->tq_first_child;
1512 if (curr_queue->tq_first_child != NULL)
1513 curr_queue->tq_first_child->tq_prev_child = new_queue;
1515 curr_queue->tq_first_child = new_queue;
1517 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
1521 new_taskq_thunk->th_encl_thunk = tq->tq_curr_thunk[tid];
1522 tq->tq_curr_thunk[tid] = new_taskq_thunk;
1524 KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
1527 new_taskq_thunk->th_encl_thunk = 0;
1528 new_queue->tq.tq_parent = NULL;
1529 new_queue->tq_first_child = NULL;
1530 new_queue->tq_next_child = NULL;
1531 new_queue->tq_prev_child = NULL;
1532 new_queue->tq_ref_count = 1;
1536 KF_TRACE(150, (
"Creating TaskQ Task on (%d):\n", global_tid));
1537 KF_DUMP(150, __kmp_dump_thunk( tq, new_taskq_thunk, global_tid ));
1540 KF_TRACE(25, (
"After TaskQ at %p Creation on (%d):\n", new_queue, global_tid));
1542 KF_TRACE(25, (
"After Serial TaskQ at %p Creation on (%d):\n", new_queue, global_tid));
1545 KF_DUMP(25, __kmp_dump_task_queue( tq, new_queue, global_tid ));
1548 KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid ));
1552 if ( __kmp_env_consistency_check )
1553 __kmp_push_workshare( global_tid, ct_taskq, new_queue->tq_loc );
1555 KE_TRACE( 10, (
"__kmpc_taskq return (%d)\n", global_tid));
1557 return new_taskq_thunk;
1564 __kmpc_end_taskq(
ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk)
1572 kmp_int32 is_outermost;
1573 kmpc_task_queue_t *queue;
1574 kmpc_thunk_t *thunk;
1577 KE_TRACE( 10, (
"__kmpc_end_taskq called (%d)\n", global_tid));
1579 tq = & __kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
1580 nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc;
1583 queue = (taskq_thunk == NULL) ? tq->tq_root : taskq_thunk->th.th_shareds->sv_queue;
1585 KE_TRACE( 50, (
"__kmpc_end_taskq queue=%p (%d) \n", queue, global_tid));
1586 is_outermost = (queue == tq->tq_root);
1587 in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
1595 if (is_outermost && (KMP_MASTER_GTID( global_tid ))) {
1596 if( tq->tq_global_flags & TQF_RELEASE_WORKERS ) {
1598 tq->tq_global_flags &= ~TQF_RELEASE_WORKERS;
1600 __kmp_end_split_barrier( bs_plain_barrier, global_tid );
1608 KMP_INIT_YIELD(spins);
1610 while ( (queue->tq_nfull == 0)
1611 && (queue->tq_taskq_slot == NULL)
1612 && (! __kmp_taskq_has_any_children(queue) )
1613 && (! (queue->tq_flags & TQF_ALL_TASKS_QUEUED) )
1615 KMP_YIELD_WHEN( TRUE, spins );
1619 while ( ( (queue->tq_nfull != 0) || (queue->tq_taskq_slot != NULL) )
1620 && (thunk = __kmp_find_task_in_queue(global_tid, queue)) != NULL
1622 KF_TRACE(50, (
"Found thunk: %p in primary queue %p (%d)\n", thunk, queue, global_tid));
1623 __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
1627 if ( (__kmp_taskq_has_any_children(queue))
1628 && (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) != NULL
1631 KF_TRACE(50, (
"Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n",
1632 thunk, thunk->th.th_shareds->sv_queue, queue, global_tid ));
1634 __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
1637 }
while ( (! (queue->tq_flags & TQF_ALL_TASKS_QUEUED))
1638 || (queue->tq_nfull != 0)
1641 KF_TRACE(50, (
"All tasks queued and dequeued in queue: %p (%d)\n", queue, global_tid));
1646 while ( (!__kmp_taskq_tasks_finished(queue))
1647 && (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) != NULL
1650 KF_TRACE(50, (
"Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n",
1651 thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
1653 __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
1656 KF_TRACE(50, (
"No work found in descendent queues or all work finished in queue: %p (%d)\n", queue, global_tid));
1658 if (!is_outermost) {
1661 if (queue->tq_flags & TQF_IS_NOWAIT) {
1662 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1663 queue->tq_ref_count--;
1664 KMP_DEBUG_ASSERT( queue->tq_ref_count >= 0 );
1665 __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1667 KE_TRACE( 10, (
"__kmpc_end_taskq return for nowait case (%d)\n", global_tid));
1672 __kmp_find_and_remove_finished_child_taskq( tq, global_tid, queue );
1675 KMP_INIT_YIELD(spins);
1677 while (!__kmp_taskq_tasks_finished(queue) || __kmp_taskq_has_any_children(queue)) {
1678 thunk = __kmp_find_task_in_ancestor_queue( tq, global_tid, queue );
1680 if (thunk != NULL) {
1681 KF_TRACE(50, (
"Stole thunk: %p in ancestor queue: %p while waiting in queue: %p (%d)\n",
1682 thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
1683 __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
1686 KMP_YIELD_WHEN( thunk == NULL, spins );
1688 __kmp_find_and_remove_finished_child_taskq( tq, global_tid, queue );
1691 __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
1692 if ( !(queue->tq_flags & TQF_DEALLOCATED) ) {
1693 queue->tq_flags |= TQF_DEALLOCATED;
1695 __kmp_release_lock(& queue->tq_queue_lck, global_tid);
1698 if (taskq_thunk != NULL) {
1699 __kmp_remove_queue_from_tree( tq, global_tid, queue, TRUE );
1702 KE_TRACE( 10, (
"__kmpc_end_taskq return for non_outermost queue, wait case (%d)\n", global_tid));
1709 KMP_INIT_YIELD(spins);
1711 while (!__kmp_taskq_tasks_finished(queue)) {
1712 thunk = __kmp_find_task_in_descendant_queue(global_tid, queue);
1714 if (thunk != NULL) {
1715 KF_TRACE(50, (
"Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n",
1716 thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
1718 __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
1721 KMP_YIELD_WHEN( thunk == NULL, spins );
1727 if ( !__kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL )) {
1732 __kmp_remove_all_child_taskq( tq, global_tid, queue );
1735 KF_TRACE(100, (
"T#%d Before Deletion of top-level TaskQ at %p:\n", global_tid, queue ));
1736 KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
1740 KMP_DEBUG_ASSERT ((queue->tq.tq_parent == NULL) && (queue->tq_next_child == NULL));
1743 KMP_DEBUG_ASSERT (queue->tq_first_child == NULL);
1745 for (i=0; i<nproc; i++) {
1746 KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0);
1749 for (i=0, thunk=queue->tq_free_thunks; thunk != NULL; i++, thunk=thunk->th.th_next_free);
1751 KMP_DEBUG_ASSERT (i == queue->tq_nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH));
1753 for (i = 0; i < nproc; i++) {
1754 KMP_DEBUG_ASSERT( ! tq->tq_curr_thunk[i] );
1758 tq -> tq_root = NULL;
1761 KF_TRACE(50, (
"After Deletion of top-level TaskQ at %p on (%d):\n", queue, global_tid));
1763 queue->tq_flags |= TQF_DEALLOCATED;
1764 __kmp_free_taskq ( tq, queue, in_parallel, global_tid );
1766 KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid ));
1769 __kmp_end_split_barrier( bs_plain_barrier, global_tid );
1772 th = __kmp_threads[ global_tid ];
1775 th->th.th_dispatch->th_deo_fcn = 0;
1778 th->th.th_dispatch->th_dxo_fcn = 0;
1784 if (queue->tq_nfull > 0) {
1785 KMP_DEBUG_ASSERT(queue->tq_nfull == 1);
1787 thunk = __kmp_dequeue_task(global_tid, queue, in_parallel);
1789 if (queue->tq_flags & TQF_IS_LAST_TASK) {
1795 thunk->th_flags |= TQF_IS_LAST_TASK;
1798 KF_TRACE(50, (
"T#%d found thunk: %p in serial queue: %p\n", global_tid, thunk, queue));
1800 __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
1804 KF_TRACE(100, (
"Before Deletion of Serialized TaskQ at %p on (%d):\n", queue, global_tid));
1805 KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
1809 for (thunk=queue->tq_free_thunks; thunk != NULL; thunk=thunk->th.th_next_free)
1811 KMP_DEBUG_ASSERT (i == queue->tq_nslots + 1);
1814 KF_TRACE(50, (
"Serialized TaskQ at %p deleted on (%d).\n", queue, global_tid));
1816 queue->tq_flags |= TQF_DEALLOCATED;
1817 __kmp_free_taskq ( tq, queue, in_parallel, global_tid );
1820 KE_TRACE( 10, (
"__kmpc_end_taskq return (%d)\n", global_tid));
1827 __kmpc_task(
ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk)
1830 kmpc_task_queue_t *queue;
1834 KE_TRACE( 10, (
"__kmpc_task called (%d)\n", global_tid));
1836 KMP_DEBUG_ASSERT (!(thunk->th_flags & TQF_TASKQ_TASK));
1838 tq = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
1839 queue = thunk->th.th_shareds->sv_queue;
1840 in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
1842 if (in_parallel && (thunk->th_flags & TQF_IS_ORDERED))
1843 thunk->th_tasknum = ++queue->tq_tasknum_queuing;
1848 if (!in_parallel && queue->tq_nfull > 0) {
1849 kmpc_thunk_t *prev_thunk;
1851 KMP_DEBUG_ASSERT(queue->tq_nfull == 1);
1853 prev_thunk = __kmp_dequeue_task(global_tid, queue, in_parallel);
1855 KF_TRACE(50, (
"T#%d found thunk: %p in serial queue: %p\n", global_tid, prev_thunk, queue));
1857 __kmp_execute_task_from_queue( tq, loc, global_tid, prev_thunk, in_parallel );
1865 KF_TRACE(100, (
"After enqueueing this Task on (%d):\n", global_tid));
1866 KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid ));
1868 ret = __kmp_enqueue_task ( tq, global_tid, queue, thunk, in_parallel );
1870 KF_TRACE(100, (
"Task Queue looks like this on (%d):\n", global_tid));
1871 KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
1873 KE_TRACE( 10, (
"__kmpc_task return (%d)\n", global_tid));
1882 __kmpc_taskq_task(
ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, kmp_int32 status)
1884 kmpc_task_queue_t *queue;
1885 kmp_taskq_t *tq = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
1886 int tid = __kmp_tid_from_gtid( global_tid );
1888 KE_TRACE( 10, (
"__kmpc_taskq_task called (%d)\n", global_tid));
1889 KF_TRACE(100, (
"TaskQ Task argument thunk on (%d):\n", global_tid));
1890 KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid ));
1892 queue = thunk->th.th_shareds->sv_queue;
1894 if ( __kmp_env_consistency_check )
1895 __kmp_pop_workshare( global_tid, ct_taskq, loc );
1898 KMP_DEBUG_ASSERT (thunk->th_flags & TQF_TASKQ_TASK);
1901 KMP_DEBUG_ASSERT (queue->tq_taskq_slot == NULL);
1904 tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
1905 thunk->th_encl_thunk = NULL;
1907 KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
1909 thunk->th_status = status;
1917 queue->tq_taskq_slot = thunk;
1919 KE_TRACE( 10, (
"__kmpc_taskq_task return (%d)\n", global_tid));
1925 __kmpc_end_taskq_task(
ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk)
1928 kmpc_task_queue_t *queue;
1932 KE_TRACE( 10, (
"__kmpc_end_taskq_task called (%d)\n", global_tid));
1934 tq = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
1935 queue = thunk->th.th_shareds->sv_queue;
1936 in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
1937 tid = __kmp_tid_from_gtid( global_tid );
1939 if ( __kmp_env_consistency_check )
1940 __kmp_pop_workshare( global_tid, ct_taskq, loc );
1943 #if KMP_ARCH_X86 || \
1946 KMP_TEST_THEN_OR32( &queue->tq_flags, (kmp_int32) TQF_ALL_TASKS_QUEUED );
1949 __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
1954 queue->tq_flags |= TQF_ALL_TASKS_QUEUED;
1956 __kmp_release_lock(& queue->tq_queue_lck, global_tid);
1961 if (thunk->th_flags & TQF_IS_LASTPRIVATE) {
1971 if (! in_parallel) {
1973 queue->tq_flags |= TQF_IS_LAST_TASK;
1976 #if KMP_ARCH_X86 || \
1979 KMP_TEST_THEN_OR32( &queue->tq_flags, (kmp_int32) TQF_IS_LAST_TASK );
1982 __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
1987 queue->tq_flags |= TQF_IS_LAST_TASK;
1989 __kmp_release_lock(& queue->tq_queue_lck, global_tid);
2000 tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
2001 thunk->th_encl_thunk = NULL;
2003 KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
2006 KE_TRACE( 10, (
"__kmpc_end_taskq_task return (%d)\n", global_tid));
2013 __kmpc_task_buffer(
ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk, kmpc_task_t task)
2016 kmpc_task_queue_t *queue;
2017 kmpc_thunk_t *new_thunk;
2020 KE_TRACE( 10, (
"__kmpc_task_buffer called (%d)\n", global_tid));
2022 KMP_DEBUG_ASSERT (taskq_thunk->th_flags & TQF_TASKQ_TASK);
2024 tq = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
2025 queue = taskq_thunk->th.th_shareds->sv_queue;
2026 in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
2034 new_thunk = __kmp_alloc_thunk (queue, in_parallel, global_tid);
2035 new_thunk->th.th_shareds = (kmpc_shared_vars_t *) queue->tq_shareds[0].ai_data;
2036 new_thunk->th_encl_thunk = NULL;
2037 new_thunk->th_task = task;
2040 new_thunk->th_flags = queue->tq_flags & TQF_INTERFACE_FLAGS;
2042 new_thunk->th_status = 0;
2044 KMP_DEBUG_ASSERT (!(new_thunk->th_flags & TQF_TASKQ_TASK));
2046 KF_TRACE(100, (
"Creating Regular Task on (%d):\n", global_tid));
2047 KF_DUMP(100, __kmp_dump_thunk( tq, new_thunk, global_tid ));
2049 KE_TRACE( 10, (
"__kmpc_task_buffer return (%d)\n", global_tid));
KMP_EXPORT void __kmpc_end_barrier_master(ident_t *, kmp_int32 global_tid)