40 #include "kmp_error.h"
42 #define MAX_MESSAGE 512
51 #define KMP_DEBUG_REF_CTS(x) KF_TRACE(1, x);
53 #define THREAD_ALLOC_FOR_TASKQ
56 in_parallel_context( kmp_team_t *team )
58 return ! team -> t.t_serialized;
62 __kmp_taskq_eo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
65 int tid = __kmp_tid_from_gtid( gtid );
68 kmpc_task_queue_t *taskq;
69 kmp_taskq_t *tq = & __kmp_threads[gtid] -> th.th_team -> t.t_taskq;
71 if ( __kmp_env_consistency_check )
72 __kmp_push_sync( gtid, ct_ordered_in_taskq, loc_ref, NULL );
74 if ( ! __kmp_threads[ gtid ]-> th.th_team -> t.t_serialized ) {
80 my_token =tq->tq_curr_thunk[ tid ]-> th_tasknum;
82 taskq = tq->tq_curr_thunk[ tid ]-> th.th_shareds -> sv_queue;
84 KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_EQ, NULL);
90 __kmp_taskq_xo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
93 int tid = __kmp_tid_from_gtid( gtid );
95 kmp_taskq_t *tq = & __kmp_threads[gtid] -> th.th_team -> t.t_taskq;
97 if ( __kmp_env_consistency_check )
98 __kmp_pop_sync( gtid, ct_ordered_in_taskq, loc_ref );
100 if ( ! __kmp_threads[ gtid ]-> th.th_team -> t.t_serialized ) {
106 my_token = tq->tq_curr_thunk[ tid ]->th_tasknum;
110 tq->tq_curr_thunk[ tid ]-> th.th_shareds -> sv_queue -> tq_tasknum_serving = my_token + 1;
117 __kmp_taskq_check_ordered( kmp_int32 gtid, kmpc_thunk_t *thunk )
121 kmpc_task_queue_t *taskq;
127 my_token = thunk -> th_tasknum;
129 taskq = thunk -> th.th_shareds -> sv_queue;
131 if(taskq->tq_tasknum_serving <= my_token) {
132 KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_GE, NULL);
134 taskq->tq_tasknum_serving = my_token +1;
140 __kmp_dump_TQF(kmp_int32 flags)
142 if (flags & TQF_IS_ORDERED)
143 __kmp_printf(
"ORDERED ");
144 if (flags & TQF_IS_LASTPRIVATE)
145 __kmp_printf(
"LAST_PRIV ");
146 if (flags & TQF_IS_NOWAIT)
147 __kmp_printf(
"NOWAIT ");
148 if (flags & TQF_HEURISTICS)
149 __kmp_printf(
"HEURIST ");
150 if (flags & TQF_INTERFACE_RESERVED1)
151 __kmp_printf(
"RESERV1 ");
152 if (flags & TQF_INTERFACE_RESERVED2)
153 __kmp_printf(
"RESERV2 ");
154 if (flags & TQF_INTERFACE_RESERVED3)
155 __kmp_printf(
"RESERV3 ");
156 if (flags & TQF_INTERFACE_RESERVED4)
157 __kmp_printf(
"RESERV4 ");
158 if (flags & TQF_IS_LAST_TASK)
159 __kmp_printf(
"LAST_TASK ");
160 if (flags & TQF_TASKQ_TASK)
161 __kmp_printf(
"TASKQ_TASK ");
162 if (flags & TQF_RELEASE_WORKERS)
163 __kmp_printf(
"RELEASE ");
164 if (flags & TQF_ALL_TASKS_QUEUED)
165 __kmp_printf(
"ALL_QUEUED ");
166 if (flags & TQF_PARALLEL_CONTEXT)
167 __kmp_printf(
"PARALLEL ");
168 if (flags & TQF_DEALLOCATED)
169 __kmp_printf(
"DEALLOC ");
170 if (!(flags & (TQF_INTERNAL_FLAGS|TQF_INTERFACE_FLAGS)))
171 __kmp_printf(
"(NONE)");
175 __kmp_dump_thunk( kmp_taskq_t *tq, kmpc_thunk_t *thunk, kmp_int32 global_tid )
178 int nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc;
180 __kmp_printf(
"\tThunk at %p on (%d): ", thunk, global_tid);
183 for (i = 0; i < nproc; i++) {
184 if( tq->tq_curr_thunk[i] == thunk ) {
185 __kmp_printf(
"[%i] ", i);
188 __kmp_printf(
"th_shareds=%p, ", thunk->th.th_shareds);
189 __kmp_printf(
"th_task=%p, ", thunk->th_task);
190 __kmp_printf(
"th_encl_thunk=%p, ", thunk->th_encl_thunk);
191 __kmp_printf(
"th_status=%d, ", thunk->th_status);
192 __kmp_printf(
"th_tasknum=%u, ", thunk->th_tasknum);
193 __kmp_printf(
"th_flags="); __kmp_dump_TQF(thunk->th_flags);
200 __kmp_dump_thunk_stack(kmpc_thunk_t *thunk, kmp_int32 thread_num)
204 __kmp_printf(
" Thunk stack for T#%d: ", thread_num);
206 for (th = thunk; th != NULL; th = th->th_encl_thunk )
207 __kmp_printf(
"%p ", th);
213 __kmp_dump_task_queue( kmp_taskq_t *tq, kmpc_task_queue_t *queue, kmp_int32 global_tid )
217 kmpc_task_queue_t *taskq;
219 __kmp_printf(
"Task Queue at %p on (%d):\n", queue, global_tid);
222 int in_parallel = queue->tq_flags & TQF_PARALLEL_CONTEXT;
224 if ( __kmp_env_consistency_check ) {
225 __kmp_printf(
" tq_loc : ");
237 __kmp_printf(
" tq_parent : %p\n", queue->tq.tq_parent);
238 __kmp_printf(
" tq_first_child : %p\n", queue->tq_first_child);
239 __kmp_printf(
" tq_next_child : %p\n", queue->tq_next_child);
240 __kmp_printf(
" tq_prev_child : %p\n", queue->tq_prev_child);
241 __kmp_printf(
" tq_ref_count : %d\n", queue->tq_ref_count);
255 __kmp_printf(
" tq_shareds : ");
256 for (i=0; i<((queue == tq->tq_root) ? queue->tq_nproc : 1); i++)
257 __kmp_printf(
"%p ", queue->tq_shareds[i].ai_data);
261 __kmp_printf(
" tq_tasknum_queuing : %u\n", queue->tq_tasknum_queuing);
262 __kmp_printf(
" tq_tasknum_serving : %u\n", queue->tq_tasknum_serving);
265 __kmp_printf(
" tq_queue : %p\n", queue->tq_queue);
266 __kmp_printf(
" tq_thunk_space : %p\n", queue->tq_thunk_space);
267 __kmp_printf(
" tq_taskq_slot : %p\n", queue->tq_taskq_slot);
269 __kmp_printf(
" tq_free_thunks : ");
270 for (thunk = queue->tq_free_thunks; thunk != NULL; thunk = thunk->th.th_next_free )
271 __kmp_printf(
"%p ", thunk);
274 __kmp_printf(
" tq_nslots : %d\n", queue->tq_nslots);
275 __kmp_printf(
" tq_head : %d\n", queue->tq_head);
276 __kmp_printf(
" tq_tail : %d\n", queue->tq_tail);
277 __kmp_printf(
" tq_nfull : %d\n", queue->tq_nfull);
278 __kmp_printf(
" tq_hiwat : %d\n", queue->tq_hiwat);
279 __kmp_printf(
" tq_flags : "); __kmp_dump_TQF(queue->tq_flags);
283 __kmp_printf(
" tq_th_thunks : ");
284 for (i = 0; i < queue->tq_nproc; i++) {
285 __kmp_printf(
"%d ", queue->tq_th_thunks[i].ai_data);
291 __kmp_printf(
" Queue slots:\n");
295 for ( count = 0; count < queue->tq_nfull; ++count ) {
296 __kmp_printf(
"(%d)", qs);
297 __kmp_dump_thunk( tq, queue->tq_queue[qs].qs_thunk, global_tid );
298 qs = (qs+1) % queue->tq_nslots;
304 if (queue->tq_taskq_slot != NULL) {
305 __kmp_printf(
" TaskQ slot:\n");
306 __kmp_dump_thunk( tq, (kmpc_thunk_t *) queue->tq_taskq_slot, global_tid );
314 __kmp_printf(
" Taskq freelist: ");
321 for( taskq = tq->tq_freelist; taskq != NULL; taskq = taskq->tq.tq_next_free )
322 __kmp_printf(
"%p ", taskq);
326 __kmp_printf(
"\n\n");
330 __kmp_aux_dump_task_queue_tree( kmp_taskq_t *tq, kmpc_task_queue_t *curr_queue, kmp_int32 level, kmp_int32 global_tid )
333 int nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc;
334 kmpc_task_queue_t *queue = curr_queue;
336 if (curr_queue == NULL)
341 for (i=0; i<level; i++)
344 __kmp_printf(
"%p", curr_queue);
346 for (i = 0; i < nproc; i++) {
347 if( tq->tq_curr_thunk[i] && tq->tq_curr_thunk[i]->th.th_shareds->sv_queue == curr_queue ) {
348 __kmp_printf(
" [%i]", i);
359 qs = curr_queue->tq_tail;
361 for ( count = 0; count < curr_queue->tq_nfull; ++count ) {
362 __kmp_printf(
"%p ", curr_queue->tq_queue[qs].qs_thunk);
363 qs = (qs+1) % curr_queue->tq_nslots;
370 if (curr_queue->tq_first_child) {
376 if (curr_queue->tq_first_child) {
377 for(queue = (kmpc_task_queue_t *)curr_queue->tq_first_child;
379 queue = queue->tq_next_child) {
380 __kmp_aux_dump_task_queue_tree( tq, queue, level+1, global_tid );
389 __kmp_dump_task_queue_tree( kmp_taskq_t *tq, kmpc_task_queue_t *tqroot, kmp_int32 global_tid)
391 __kmp_printf(
"TaskQ Tree at root %p on (%d):\n", tqroot, global_tid);
393 __kmp_aux_dump_task_queue_tree( tq, tqroot, 0, global_tid );
407 __kmp_taskq_allocate(
size_t size, kmp_int32 global_tid)
409 void *addr, *orig_addr;
412 KB_TRACE( 5, (
"__kmp_taskq_allocate: called size=%d, gtid=%d\n", (
int) size, global_tid ) );
414 bytes =
sizeof(
void *) + CACHE_LINE + size;
416 #ifdef THREAD_ALLOC_FOR_TASKQ
417 orig_addr = (
void *) __kmp_thread_malloc( __kmp_thread_from_gtid(global_tid), bytes );
419 KE_TRACE( 10, (
"%%%%%% MALLOC( %d )\n", bytes ) );
420 orig_addr = (
void *) KMP_INTERNAL_MALLOC( bytes );
424 KMP_FATAL( OutOfHeapMemory );
428 if (((kmp_uintptr_t) addr & ( CACHE_LINE - 1 )) != 0) {
429 KB_TRACE( 50, (
"__kmp_taskq_allocate: adjust for cache alignment\n" ) );
430 addr = (
void *) (((kmp_uintptr_t) addr + CACHE_LINE) & ~( CACHE_LINE - 1 ));
433 (* (
void **) addr) = orig_addr;
435 KB_TRACE( 10, (
"__kmp_taskq_allocate: allocate: %p, use: %p - %p, size: %d, gtid: %d\n",
436 orig_addr, ((
void **) addr) + 1, ((
char *)(((
void **) addr) + 1)) + size-1,
437 (
int) size, global_tid ));
439 return ( ((
void **) addr) + 1 );
443 __kmpc_taskq_free(
void *p, kmp_int32 global_tid)
445 KB_TRACE( 5, (
"__kmpc_taskq_free: called addr=%p, gtid=%d\n", p, global_tid ) );
447 KB_TRACE(10, (
"__kmpc_taskq_free: freeing: %p, gtid: %d\n", (*( ((
void **) p)-1)), global_tid ));
449 #ifdef THREAD_ALLOC_FOR_TASKQ
450 __kmp_thread_free( __kmp_thread_from_gtid(global_tid), *( ((
void **) p)-1) );
452 KMP_INTERNAL_FREE( *( ((
void **) p)-1) );
463 static kmpc_task_queue_t *
464 __kmp_alloc_taskq ( kmp_taskq_t *tq,
int in_parallel, kmp_int32 nslots, kmp_int32 nthunks,
465 kmp_int32 nshareds, kmp_int32 nproc,
size_t sizeof_thunk,
466 size_t sizeof_shareds, kmpc_thunk_t **new_taskq_thunk, kmp_int32 global_tid )
470 kmpc_task_queue_t *new_queue;
471 kmpc_aligned_shared_vars_t *shared_var_array;
472 char *shared_var_storage;
475 __kmp_acquire_lock( & tq->tq_freelist_lck, global_tid );
480 if( tq->tq_freelist ) {
481 new_queue = tq -> tq_freelist;
482 tq -> tq_freelist = tq -> tq_freelist -> tq.tq_next_free;
484 KMP_DEBUG_ASSERT(new_queue->tq_flags & TQF_DEALLOCATED);
486 new_queue->tq_flags = 0;
488 __kmp_release_lock( & tq->tq_freelist_lck, global_tid );
491 __kmp_release_lock( & tq->tq_freelist_lck, global_tid );
493 new_queue = (kmpc_task_queue_t *) __kmp_taskq_allocate (
sizeof (kmpc_task_queue_t), global_tid);
494 new_queue->tq_flags = 0;
500 sizeof_thunk += (CACHE_LINE - (sizeof_thunk % CACHE_LINE));
501 pt = (
char *) __kmp_taskq_allocate (nthunks * sizeof_thunk, global_tid);
502 new_queue->tq_thunk_space = (kmpc_thunk_t *)pt;
503 *new_taskq_thunk = (kmpc_thunk_t *)(pt + (nthunks - 1) * sizeof_thunk);
507 new_queue->tq_free_thunks = (kmpc_thunk_t *)pt;
509 for (i = 0; i < (nthunks - 2); i++) {
510 ((kmpc_thunk_t *)(pt+i*sizeof_thunk))->th.th_next_free = (kmpc_thunk_t *)(pt + (i+1)*sizeof_thunk);
512 ((kmpc_thunk_t *)(pt+i*sizeof_thunk))->th_flags = TQF_DEALLOCATED;
516 ((kmpc_thunk_t *)(pt+(nthunks-2)*sizeof_thunk))->th.th_next_free = NULL;
518 ((kmpc_thunk_t *)(pt+(nthunks-2)*sizeof_thunk))->th_flags = TQF_DEALLOCATED;
524 __kmp_init_lock( & new_queue->tq_link_lck );
525 __kmp_init_lock( & new_queue->tq_free_thunks_lck );
526 __kmp_init_lock( & new_queue->tq_queue_lck );
531 bytes = nslots *
sizeof (kmpc_aligned_queue_slot_t);
532 new_queue->tq_queue = (kmpc_aligned_queue_slot_t *) __kmp_taskq_allocate( bytes, global_tid );
535 sizeof_shareds +=
sizeof(kmpc_task_queue_t *);
536 sizeof_shareds += (CACHE_LINE - (sizeof_shareds % CACHE_LINE));
538 bytes = nshareds *
sizeof (kmpc_aligned_shared_vars_t);
539 shared_var_array = (kmpc_aligned_shared_vars_t *) __kmp_taskq_allocate ( bytes, global_tid);
541 bytes = nshareds * sizeof_shareds;
542 shared_var_storage = (
char *) __kmp_taskq_allocate ( bytes, global_tid);
544 for (i=0; i<nshareds; i++) {
545 shared_var_array[i].ai_data = (kmpc_shared_vars_t *) (shared_var_storage + i*sizeof_shareds);
546 shared_var_array[i].ai_data->sv_queue = new_queue;
548 new_queue->tq_shareds = shared_var_array;
554 bytes = nproc *
sizeof(kmpc_aligned_int32_t);
555 new_queue->tq_th_thunks = (kmpc_aligned_int32_t *) __kmp_taskq_allocate ( bytes, global_tid);
556 new_queue->tq_nproc = nproc;
558 for (i=0; i<nproc; i++)
559 new_queue->tq_th_thunks[i].ai_data = 0;
566 __kmp_free_taskq (kmp_taskq_t *tq, kmpc_task_queue_t *p,
int in_parallel, kmp_int32 global_tid)
568 __kmpc_taskq_free(p->tq_thunk_space, global_tid);
569 __kmpc_taskq_free(p->tq_queue, global_tid);
572 __kmpc_taskq_free((
void *) p->tq_shareds[0].ai_data, global_tid);
575 __kmpc_taskq_free(p->tq_shareds, global_tid);
578 p->tq_first_child = NULL;
579 p->tq_next_child = NULL;
580 p->tq_prev_child = NULL;
581 p->tq_ref_count = -10;
582 p->tq_shareds = NULL;
583 p->tq_tasknum_queuing = 0;
584 p->tq_tasknum_serving = 0;
586 p->tq_thunk_space = NULL;
587 p->tq_taskq_slot = NULL;
588 p->tq_free_thunks = NULL;
598 for (i=0; i<p->tq_nproc; i++)
599 p->tq_th_thunks[i].ai_data = 0;
601 if ( __kmp_env_consistency_check )
603 KMP_DEBUG_ASSERT( p->tq_flags & TQF_DEALLOCATED );
604 p->tq_flags = TQF_DEALLOCATED;
608 __kmpc_taskq_free(p->tq_th_thunks, global_tid);
609 __kmp_destroy_lock(& p->tq_link_lck);
610 __kmp_destroy_lock(& p->tq_queue_lck);
611 __kmp_destroy_lock(& p->tq_free_thunks_lck);
614 p->tq_th_thunks = NULL;
620 __kmp_acquire_lock( & tq->tq_freelist_lck, global_tid );
621 p->tq.tq_next_free = tq->tq_freelist;
624 __kmp_release_lock( & tq->tq_freelist_lck, global_tid );
633 static kmpc_thunk_t *
634 __kmp_alloc_thunk (kmpc_task_queue_t *queue,
int in_parallel, kmp_int32 global_tid)
639 __kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid);
645 fl = queue->tq_free_thunks;
647 KMP_DEBUG_ASSERT (fl != NULL);
649 queue->tq_free_thunks = fl->th.th_next_free;
653 __kmp_release_lock(& queue->tq_free_thunks_lck, global_tid);
659 __kmp_free_thunk (kmpc_task_queue_t *queue, kmpc_thunk_t *p,
int in_parallel, kmp_int32 global_tid)
663 p->th_encl_thunk = 0;
670 __kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid);
676 p->th.th_next_free = queue->tq_free_thunks;
677 queue->tq_free_thunks = p;
680 p->th_flags = TQF_DEALLOCATED;
684 __kmp_release_lock(& queue->tq_free_thunks_lck, global_tid);
692 __kmp_enqueue_task ( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue, kmpc_thunk_t *thunk,
int in_parallel )
698 __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
704 KMP_DEBUG_ASSERT (queue->tq_nfull < queue->tq_nslots);
706 queue->tq_queue[(queue->tq_head)++].qs_thunk = thunk;
708 if (queue->tq_head >= queue->tq_nslots)
715 ret = (in_parallel) ? (queue->tq_nfull == queue->tq_nslots) : FALSE;
719 __kmp_release_lock(& queue->tq_queue_lck, global_tid);
721 if( tq->tq_global_flags & TQF_RELEASE_WORKERS ) {
728 tq->tq_global_flags &= ~TQF_RELEASE_WORKERS;
739 static kmpc_thunk_t *
740 __kmp_dequeue_task (kmp_int32 global_tid, kmpc_task_queue_t *queue,
int in_parallel)
743 int tid = __kmp_tid_from_gtid( global_tid );
745 KMP_DEBUG_ASSERT (queue->tq_nfull > 0);
747 if (queue->tq.tq_parent != NULL && in_parallel) {
749 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
750 ct = ++(queue->tq_ref_count);
751 __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
752 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p inc %d\n",
753 __LINE__, global_tid, queue, ct));
756 pt = queue->tq_queue[(queue->tq_tail)++].qs_thunk;
758 if (queue->tq_tail >= queue->tq_nslots)
762 queue->tq_th_thunks[tid].ai_data++;
766 KF_TRACE(200, (
"__kmp_dequeue_task: T#%d(:%d) now has %d outstanding thunks from queue %p\n",
767 global_tid, tid, queue->tq_th_thunks[tid].ai_data, queue));
777 KMP_DEBUG_ASSERT(queue->tq_nfull >= 0);
780 KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data <= __KMP_TASKQ_THUNKS_PER_TH);
811 static kmpc_thunk_t *
812 __kmp_find_task_in_queue (kmp_int32 global_tid, kmpc_task_queue_t *queue)
814 kmpc_thunk_t *pt = NULL;
815 int tid = __kmp_tid_from_gtid( global_tid );
818 if ( !(queue->tq_flags & TQF_DEALLOCATED) ) {
820 __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
823 if ( !(queue->tq_flags & TQF_DEALLOCATED) ) {
828 if ((queue->tq_taskq_slot != NULL) && (queue->tq_nfull <= queue->tq_hiwat)) {
831 pt = (kmpc_thunk_t *) queue->tq_taskq_slot;
832 queue->tq_taskq_slot = NULL;
834 else if (queue->tq_nfull == 0 ||
835 queue->tq_th_thunks[tid].ai_data >= __KMP_TASKQ_THUNKS_PER_TH) {
841 else if (queue->tq_nfull > 1) {
844 pt = __kmp_dequeue_task (global_tid, queue, TRUE);
846 else if (!(queue->tq_flags & TQF_IS_LASTPRIVATE)) {
849 pt = __kmp_dequeue_task (global_tid, queue, TRUE);
851 else if (queue->tq_flags & TQF_IS_LAST_TASK) {
856 pt = __kmp_dequeue_task (global_tid, queue, TRUE);
857 pt->th_flags |= TQF_IS_LAST_TASK;
862 __kmp_release_lock(& queue->tq_queue_lck, global_tid);
874 static kmpc_thunk_t *
875 __kmp_find_task_in_descendant_queue (kmp_int32 global_tid, kmpc_task_queue_t *curr_queue)
877 kmpc_thunk_t *pt = NULL;
878 kmpc_task_queue_t *queue = curr_queue;
880 if (curr_queue->tq_first_child != NULL) {
881 __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
886 queue = (kmpc_task_queue_t *) curr_queue->tq_first_child;
888 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
892 while (queue != NULL) {
894 kmpc_task_queue_t *next;
896 ct= ++(queue->tq_ref_count);
897 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
898 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p inc %d\n",
899 __LINE__, global_tid, queue, ct));
901 pt = __kmp_find_task_in_queue (global_tid, queue);
906 __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
911 ct = --(queue->tq_ref_count);
912 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p dec %d\n",
913 __LINE__, global_tid, queue, ct));
914 KMP_DEBUG_ASSERT( queue->tq_ref_count >= 0 );
916 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
924 pt = __kmp_find_task_in_descendant_queue (global_tid, queue);
929 __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
934 ct = --(queue->tq_ref_count);
935 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p dec %d\n",
936 __LINE__, global_tid, queue, ct));
937 KMP_DEBUG_ASSERT( ct >= 0 );
939 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
944 __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
949 next = queue->tq_next_child;
951 ct = --(queue->tq_ref_count);
952 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p dec %d\n",
953 __LINE__, global_tid, queue, ct));
954 KMP_DEBUG_ASSERT( ct >= 0 );
959 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
971 static kmpc_thunk_t *
972 __kmp_find_task_in_ancestor_queue (kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *curr_queue)
974 kmpc_task_queue_t *queue;
979 if (curr_queue->tq.tq_parent != NULL) {
980 queue = curr_queue->tq.tq_parent;
982 while (queue != NULL) {
983 if (queue->tq.tq_parent != NULL) {
985 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
990 ct = ++(queue->tq_ref_count);
991 __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
992 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p inc %d\n",
993 __LINE__, global_tid, queue, ct));
996 pt = __kmp_find_task_in_queue (global_tid, queue);
998 if (queue->tq.tq_parent != NULL) {
1000 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1005 ct = --(queue->tq_ref_count);
1006 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p dec %d\n",
1007 __LINE__, global_tid, queue, ct));
1008 KMP_DEBUG_ASSERT( ct >= 0 );
1010 __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1016 if (queue->tq.tq_parent != NULL) {
1018 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1023 ct = --(queue->tq_ref_count);
1024 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p dec %d\n",
1025 __LINE__, global_tid, queue, ct));
1026 KMP_DEBUG_ASSERT( ct >= 0 );
1028 queue = queue->tq.tq_parent;
1031 __kmp_release_lock(& queue->tq_link_lck, global_tid);
1036 pt = __kmp_find_task_in_descendant_queue( global_tid, tq->tq_root );
1042 __kmp_taskq_tasks_finished (kmpc_task_queue_t *queue)
1048 for (i=0; i<queue->tq_nproc; i++) {
1049 if (queue->tq_th_thunks[i].ai_data != 0)
1057 __kmp_taskq_has_any_children (kmpc_task_queue_t *queue)
1059 return (queue->tq_first_child != NULL);
1063 __kmp_remove_queue_from_tree( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue,
int in_parallel )
1067 kmpc_thunk_t *thunk;
1070 KF_TRACE(50, (
"Before Deletion of TaskQ at %p on (%d):\n", queue, global_tid));
1071 KF_DUMP(50, __kmp_dump_task_queue( tq, queue, global_tid ));
1074 KMP_DEBUG_ASSERT (queue->tq.tq_parent != NULL);
1077 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1083 KMP_DEBUG_ASSERT (queue->tq_first_child == NULL);
1086 if (queue->tq_prev_child != NULL)
1087 queue->tq_prev_child->tq_next_child = queue->tq_next_child;
1088 if (queue->tq_next_child != NULL)
1089 queue->tq_next_child->tq_prev_child = queue->tq_prev_child;
1090 if (queue->tq.tq_parent->tq_first_child == queue)
1091 queue->tq.tq_parent->tq_first_child = queue->tq_next_child;
1093 queue->tq_prev_child = NULL;
1094 queue->tq_next_child = NULL;
1099 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p waiting for ref_count of %d to reach 1\n",
1100 __LINE__, global_tid, queue, queue->tq_ref_count));
1103 while (queue->tq_ref_count > 1) {
1104 __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1106 KMP_WAIT_YIELD((
volatile kmp_uint32*)&queue->tq_ref_count, 1, KMP_LE, NULL);
1108 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1114 __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1117 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p freeing queue\n",
1118 __LINE__, global_tid, queue));
1121 KMP_DEBUG_ASSERT(queue->tq_flags & TQF_ALL_TASKS_QUEUED);
1122 KMP_DEBUG_ASSERT(queue->tq_nfull == 0);
1124 for (i=0; i<queue->tq_nproc; i++) {
1125 KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0);
1129 for (thunk=queue->tq_free_thunks; thunk != NULL; thunk=thunk->th.th_next_free)
1132 KMP_ASSERT (i == queue->tq_nslots + (queue->tq_nproc * __KMP_TASKQ_THUNKS_PER_TH));
1136 __kmp_free_taskq ( tq, queue, TRUE, global_tid );
1138 KF_TRACE(50, (
"After Deletion of TaskQ at %p on (%d):\n", queue, global_tid));
1139 KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid ));
1150 __kmp_find_and_remove_finished_child_taskq( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *curr_queue )
1152 kmpc_task_queue_t *queue = curr_queue;
1154 if (curr_queue->tq_first_child != NULL) {
1155 __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
1160 queue = (kmpc_task_queue_t *) curr_queue->tq_first_child;
1161 if (queue != NULL) {
1162 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
1166 while (queue != NULL) {
1167 kmpc_task_queue_t *next;
1168 int ct = ++(queue->tq_ref_count);
1169 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p inc %d\n",
1170 __LINE__, global_tid, queue, ct));
1177 if (queue->tq_flags & TQF_IS_NOWAIT) {
1178 __kmp_find_and_remove_finished_child_taskq ( tq, global_tid, queue );
1180 if ((queue->tq_flags & TQF_ALL_TASKS_QUEUED) && (queue->tq_nfull == 0) &&
1181 __kmp_taskq_tasks_finished(queue) && ! __kmp_taskq_has_any_children(queue)) {
1188 if ( __kmp_test_lock(& queue->tq_queue_lck, global_tid) ) {
1189 if ( !(queue->tq_flags & TQF_DEALLOCATED) ) {
1190 queue->tq_flags |= TQF_DEALLOCATED;
1191 __kmp_release_lock(& queue->tq_queue_lck, global_tid);
1193 __kmp_remove_queue_from_tree( tq, global_tid, queue, TRUE );
1199 __kmp_release_lock(& queue->tq_queue_lck, global_tid);
1206 __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
1211 next = queue->tq_next_child;
1213 ct = --(queue->tq_ref_count);
1214 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p dec %d\n",
1215 __LINE__, global_tid, queue, ct));
1216 KMP_DEBUG_ASSERT( ct >= 0 );
1221 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
1232 __kmp_remove_all_child_taskq( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue )
1234 kmpc_task_queue_t *next_child;
1236 queue = (kmpc_task_queue_t *) queue->tq_first_child;
1238 while (queue != NULL) {
1239 __kmp_remove_all_child_taskq ( tq, global_tid, queue );
1241 next_child = queue->tq_next_child;
1242 queue->tq_flags |= TQF_DEALLOCATED;
1243 __kmp_remove_queue_from_tree ( tq, global_tid, queue, FALSE );
1249 __kmp_execute_task_from_queue( kmp_taskq_t *tq,
ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk,
int in_parallel )
1251 kmpc_task_queue_t *queue = thunk->th.th_shareds->sv_queue;
1252 kmp_int32 tid = __kmp_tid_from_gtid( global_tid );
1254 KF_TRACE(100, (
"After dequeueing this Task on (%d):\n", global_tid));
1255 KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid ));
1256 KF_TRACE(100, (
"Task Queue: %p looks like this (%d):\n", queue, global_tid));
1257 KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
1273 if (!(thunk->th_flags & TQF_TASKQ_TASK)) {
1274 kmp_int32 index = (queue == tq->tq_root) ? tid : 0;
1275 thunk->th.th_shareds = (kmpc_shared_vars_t *) queue->tq_shareds[index].ai_data;
1277 if ( __kmp_env_consistency_check ) {
1278 __kmp_push_workshare( global_tid,
1279 (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered : ct_task,
1284 if ( __kmp_env_consistency_check )
1285 __kmp_push_workshare( global_tid, ct_taskq, queue->tq_loc );
1289 thunk->th_encl_thunk = tq->tq_curr_thunk[tid];
1290 tq->tq_curr_thunk[tid] = thunk;
1292 KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
1295 KF_TRACE( 50, (
"Begin Executing Thunk %p from queue %p on (%d)\n", thunk, queue, global_tid));
1296 thunk->th_task (global_tid, thunk);
1297 KF_TRACE( 50, (
"End Executing Thunk %p from queue %p on (%d)\n", thunk, queue, global_tid));
1299 if (!(thunk->th_flags & TQF_TASKQ_TASK)) {
1300 if ( __kmp_env_consistency_check )
1301 __kmp_pop_workshare( global_tid, (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered : ct_task,
1305 tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
1306 thunk->th_encl_thunk = NULL;
1307 KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
1310 if ((thunk->th_flags & TQF_IS_ORDERED) && in_parallel) {
1311 __kmp_taskq_check_ordered(global_tid, thunk);
1314 __kmp_free_thunk (queue, thunk, in_parallel, global_tid);
1316 KF_TRACE(100, (
"T#%d After freeing thunk: %p, TaskQ looks like this:\n", global_tid, thunk));
1317 KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
1322 KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data >= 1);
1324 KF_TRACE( 200, (
"__kmp_execute_task_from_queue: T#%d has %d thunks in queue %p\n",
1325 global_tid, queue->tq_th_thunks[tid].ai_data-1, queue));
1327 queue->tq_th_thunks[tid].ai_data--;
1332 if (queue->tq.tq_parent != NULL && in_parallel) {
1334 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1335 ct = --(queue->tq_ref_count);
1336 __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1337 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p dec %d\n",
1338 __LINE__, global_tid, queue, ct));
1339 KMP_DEBUG_ASSERT( ct >= 0 );
1350 __kmpc_taskq(
ident_t *loc, kmp_int32 global_tid, kmpc_task_t taskq_task,
1351 size_t sizeof_thunk,
size_t sizeof_shareds,
1352 kmp_int32 flags, kmpc_shared_vars_t **shareds )
1355 kmp_int32 nslots, nthunks, nshareds, nproc;
1356 kmpc_task_queue_t *new_queue, *curr_queue;
1357 kmpc_thunk_t *new_taskq_thunk;
1363 KE_TRACE( 10, (
"__kmpc_taskq called (%d)\n", global_tid));
1365 th = __kmp_threads[ global_tid ];
1366 team = th -> th.th_team;
1367 tq = & team -> t.t_taskq;
1368 nproc = team -> t.t_nproc;
1369 tid = __kmp_tid_from_gtid( global_tid );
1372 in_parallel = in_parallel_context( team );
1374 if( ! tq->tq_root ) {
1377 th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo;
1380 th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo;
1386 if ( __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL )) {
1393 *shareds = (kmpc_shared_vars_t *) tq->tq_root->tq_shareds[tid].ai_data;
1395 KE_TRACE( 10, (
"__kmpc_taskq return (%d)\n", global_tid));
1403 if( tq->tq_curr_thunk_capacity < nproc ) {
1406 if(tq->tq_curr_thunk)
1407 __kmp_free(tq->tq_curr_thunk);
1410 __kmp_init_lock( & tq->tq_freelist_lck );
1413 tq->tq_curr_thunk = (kmpc_thunk_t **) __kmp_allocate( nproc *
sizeof(kmpc_thunk_t *) );
1414 tq -> tq_curr_thunk_capacity = nproc;
1418 tq->tq_global_flags = TQF_RELEASE_WORKERS;
1424 nslots = (in_parallel) ? (2 * nproc) : 1;
1429 nthunks = (in_parallel) ? (nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH) + 1) : nslots + 2;
1434 nshareds = ( !tq->tq_root && in_parallel) ? nproc : 1;
1438 new_queue = __kmp_alloc_taskq ( tq, in_parallel, nslots, nthunks, nshareds, nproc,
1439 sizeof_thunk, sizeof_shareds, &new_taskq_thunk, global_tid );
1443 new_queue->tq_flags = flags & TQF_INTERFACE_FLAGS;
1446 new_queue->tq_tasknum_queuing = 0;
1447 new_queue->tq_tasknum_serving = 0;
1448 new_queue->tq_flags |= TQF_PARALLEL_CONTEXT;
1451 new_queue->tq_taskq_slot = NULL;
1452 new_queue->tq_nslots = nslots;
1453 new_queue->tq_hiwat = HIGH_WATER_MARK (nslots);
1454 new_queue->tq_nfull = 0;
1455 new_queue->tq_head = 0;
1456 new_queue->tq_tail = 0;
1457 new_queue->tq_loc = loc;
1459 if ((new_queue->tq_flags & TQF_IS_ORDERED) && in_parallel) {
1461 new_queue->tq_tasknum_serving = 1;
1464 th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo;
1467 th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo;
1471 *shareds = (kmpc_shared_vars_t *) new_queue->tq_shareds[0].ai_data;
1473 new_taskq_thunk->th.th_shareds = *shareds;
1474 new_taskq_thunk->th_task = taskq_task;
1475 new_taskq_thunk->th_flags = new_queue->tq_flags | TQF_TASKQ_TASK;
1476 new_taskq_thunk->th_status = 0;
1478 KMP_DEBUG_ASSERT (new_taskq_thunk->th_flags & TQF_TASKQ_TASK);
1485 if( ! tq->tq_root ) {
1486 new_queue->tq.tq_parent = NULL;
1487 new_queue->tq_first_child = NULL;
1488 new_queue->tq_next_child = NULL;
1489 new_queue->tq_prev_child = NULL;
1490 new_queue->tq_ref_count = 1;
1491 tq->tq_root = new_queue;
1494 curr_queue = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue;
1495 new_queue->tq.tq_parent = curr_queue;
1496 new_queue->tq_first_child = NULL;
1497 new_queue->tq_prev_child = NULL;
1498 new_queue->tq_ref_count = 1;
1500 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p alloc %d\n",
1501 __LINE__, global_tid, new_queue, new_queue->tq_ref_count));
1503 __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
1508 new_queue->tq_next_child = (
struct kmpc_task_queue_t *) curr_queue->tq_first_child;
1510 if (curr_queue->tq_first_child != NULL)
1511 curr_queue->tq_first_child->tq_prev_child = new_queue;
1513 curr_queue->tq_first_child = new_queue;
1515 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
1519 new_taskq_thunk->th_encl_thunk = tq->tq_curr_thunk[tid];
1520 tq->tq_curr_thunk[tid] = new_taskq_thunk;
1522 KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
1525 new_taskq_thunk->th_encl_thunk = 0;
1526 new_queue->tq.tq_parent = NULL;
1527 new_queue->tq_first_child = NULL;
1528 new_queue->tq_next_child = NULL;
1529 new_queue->tq_prev_child = NULL;
1530 new_queue->tq_ref_count = 1;
1534 KF_TRACE(150, (
"Creating TaskQ Task on (%d):\n", global_tid));
1535 KF_DUMP(150, __kmp_dump_thunk( tq, new_taskq_thunk, global_tid ));
1538 KF_TRACE(25, (
"After TaskQ at %p Creation on (%d):\n", new_queue, global_tid));
1540 KF_TRACE(25, (
"After Serial TaskQ at %p Creation on (%d):\n", new_queue, global_tid));
1543 KF_DUMP(25, __kmp_dump_task_queue( tq, new_queue, global_tid ));
1546 KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid ));
1550 if ( __kmp_env_consistency_check )
1551 __kmp_push_workshare( global_tid, ct_taskq, new_queue->tq_loc );
1553 KE_TRACE( 10, (
"__kmpc_taskq return (%d)\n", global_tid));
1555 return new_taskq_thunk;
1562 __kmpc_end_taskq(
ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk)
1570 kmp_int32 is_outermost;
1571 kmpc_task_queue_t *queue;
1572 kmpc_thunk_t *thunk;
1575 KE_TRACE( 10, (
"__kmpc_end_taskq called (%d)\n", global_tid));
1577 tq = & __kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
1578 nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc;
1581 queue = (taskq_thunk == NULL) ? tq->tq_root : taskq_thunk->th.th_shareds->sv_queue;
1583 KE_TRACE( 50, (
"__kmpc_end_taskq queue=%p (%d) \n", queue, global_tid));
1584 is_outermost = (queue == tq->tq_root);
1585 in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
1593 if (is_outermost && (KMP_MASTER_GTID( global_tid ))) {
1594 if( tq->tq_global_flags & TQF_RELEASE_WORKERS ) {
1596 tq->tq_global_flags &= ~TQF_RELEASE_WORKERS;
1598 __kmp_end_split_barrier( bs_plain_barrier, global_tid );
1606 KMP_INIT_YIELD(spins);
1608 while ( (queue->tq_nfull == 0)
1609 && (queue->tq_taskq_slot == NULL)
1610 && (! __kmp_taskq_has_any_children(queue) )
1611 && (! (queue->tq_flags & TQF_ALL_TASKS_QUEUED) )
1613 KMP_YIELD_WHEN( TRUE, spins );
1617 while ( ( (queue->tq_nfull != 0) || (queue->tq_taskq_slot != NULL) )
1618 && (thunk = __kmp_find_task_in_queue(global_tid, queue)) != NULL
1620 KF_TRACE(50, (
"Found thunk: %p in primary queue %p (%d)\n", thunk, queue, global_tid));
1621 __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
1625 if ( (__kmp_taskq_has_any_children(queue))
1626 && (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) != NULL
1629 KF_TRACE(50, (
"Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n",
1630 thunk, thunk->th.th_shareds->sv_queue, queue, global_tid ));
1632 __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
1635 }
while ( (! (queue->tq_flags & TQF_ALL_TASKS_QUEUED))
1636 || (queue->tq_nfull != 0)
1639 KF_TRACE(50, (
"All tasks queued and dequeued in queue: %p (%d)\n", queue, global_tid));
1644 while ( (!__kmp_taskq_tasks_finished(queue))
1645 && (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) != NULL
1648 KF_TRACE(50, (
"Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n",
1649 thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
1651 __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
1654 KF_TRACE(50, (
"No work found in descendent queues or all work finished in queue: %p (%d)\n", queue, global_tid));
1656 if (!is_outermost) {
1659 if (queue->tq_flags & TQF_IS_NOWAIT) {
1660 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1661 queue->tq_ref_count--;
1662 KMP_DEBUG_ASSERT( queue->tq_ref_count >= 0 );
1663 __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
1665 KE_TRACE( 10, (
"__kmpc_end_taskq return for nowait case (%d)\n", global_tid));
1670 __kmp_find_and_remove_finished_child_taskq( tq, global_tid, queue );
1673 KMP_INIT_YIELD(spins);
1675 while (!__kmp_taskq_tasks_finished(queue) || __kmp_taskq_has_any_children(queue)) {
1676 thunk = __kmp_find_task_in_ancestor_queue( tq, global_tid, queue );
1678 if (thunk != NULL) {
1679 KF_TRACE(50, (
"Stole thunk: %p in ancestor queue: %p while waiting in queue: %p (%d)\n",
1680 thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
1681 __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
1684 KMP_YIELD_WHEN( thunk == NULL, spins );
1686 __kmp_find_and_remove_finished_child_taskq( tq, global_tid, queue );
1689 __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
1690 if ( !(queue->tq_flags & TQF_DEALLOCATED) ) {
1691 queue->tq_flags |= TQF_DEALLOCATED;
1693 __kmp_release_lock(& queue->tq_queue_lck, global_tid);
1696 if (taskq_thunk != NULL) {
1697 __kmp_remove_queue_from_tree( tq, global_tid, queue, TRUE );
1700 KE_TRACE( 10, (
"__kmpc_end_taskq return for non_outermost queue, wait case (%d)\n", global_tid));
1707 KMP_INIT_YIELD(spins);
1709 while (!__kmp_taskq_tasks_finished(queue)) {
1710 thunk = __kmp_find_task_in_descendant_queue(global_tid, queue);
1712 if (thunk != NULL) {
1713 KF_TRACE(50, (
"Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n",
1714 thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
1716 __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
1719 KMP_YIELD_WHEN( thunk == NULL, spins );
1725 if ( !__kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL )) {
1730 __kmp_remove_all_child_taskq( tq, global_tid, queue );
1733 KF_TRACE(100, (
"T#%d Before Deletion of top-level TaskQ at %p:\n", global_tid, queue ));
1734 KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
1738 KMP_DEBUG_ASSERT ((queue->tq.tq_parent == NULL) && (queue->tq_next_child == NULL));
1741 KMP_DEBUG_ASSERT (queue->tq_first_child == NULL);
1743 for (i=0; i<nproc; i++) {
1744 KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0);
1747 for (i=0, thunk=queue->tq_free_thunks; thunk != NULL; i++, thunk=thunk->th.th_next_free);
1749 KMP_DEBUG_ASSERT (i == queue->tq_nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH));
1751 for (i = 0; i < nproc; i++) {
1752 KMP_DEBUG_ASSERT( ! tq->tq_curr_thunk[i] );
1756 tq -> tq_root = NULL;
1759 KF_TRACE(50, (
"After Deletion of top-level TaskQ at %p on (%d):\n", queue, global_tid));
1761 queue->tq_flags |= TQF_DEALLOCATED;
1762 __kmp_free_taskq ( tq, queue, in_parallel, global_tid );
1764 KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid ));
1767 __kmp_end_split_barrier( bs_plain_barrier, global_tid );
1770 th = __kmp_threads[ global_tid ];
1773 th->th.th_dispatch->th_deo_fcn = 0;
1776 th->th.th_dispatch->th_dxo_fcn = 0;
1782 if (queue->tq_nfull > 0) {
1783 KMP_DEBUG_ASSERT(queue->tq_nfull == 1);
1785 thunk = __kmp_dequeue_task(global_tid, queue, in_parallel);
1787 if (queue->tq_flags & TQF_IS_LAST_TASK) {
1793 thunk->th_flags |= TQF_IS_LAST_TASK;
1796 KF_TRACE(50, (
"T#%d found thunk: %p in serial queue: %p\n", global_tid, thunk, queue));
1798 __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
1802 KF_TRACE(100, (
"Before Deletion of Serialized TaskQ at %p on (%d):\n", queue, global_tid));
1803 KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
1807 for (thunk=queue->tq_free_thunks; thunk != NULL; thunk=thunk->th.th_next_free)
1809 KMP_DEBUG_ASSERT (i == queue->tq_nslots + 1);
1812 KF_TRACE(50, (
"Serialized TaskQ at %p deleted on (%d).\n", queue, global_tid));
1814 queue->tq_flags |= TQF_DEALLOCATED;
1815 __kmp_free_taskq ( tq, queue, in_parallel, global_tid );
1818 KE_TRACE( 10, (
"__kmpc_end_taskq return (%d)\n", global_tid));
1825 __kmpc_task(
ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk)
1828 kmpc_task_queue_t *queue;
1832 KE_TRACE( 10, (
"__kmpc_task called (%d)\n", global_tid));
1834 KMP_DEBUG_ASSERT (!(thunk->th_flags & TQF_TASKQ_TASK));
1836 tq = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
1837 queue = thunk->th.th_shareds->sv_queue;
1838 in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
1840 if (in_parallel && (thunk->th_flags & TQF_IS_ORDERED))
1841 thunk->th_tasknum = ++queue->tq_tasknum_queuing;
1846 if (!in_parallel && queue->tq_nfull > 0) {
1847 kmpc_thunk_t *prev_thunk;
1849 KMP_DEBUG_ASSERT(queue->tq_nfull == 1);
1851 prev_thunk = __kmp_dequeue_task(global_tid, queue, in_parallel);
1853 KF_TRACE(50, (
"T#%d found thunk: %p in serial queue: %p\n", global_tid, prev_thunk, queue));
1855 __kmp_execute_task_from_queue( tq, loc, global_tid, prev_thunk, in_parallel );
1863 KF_TRACE(100, (
"After enqueueing this Task on (%d):\n", global_tid));
1864 KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid ));
1866 ret = __kmp_enqueue_task ( tq, global_tid, queue, thunk, in_parallel );
1868 KF_TRACE(100, (
"Task Queue looks like this on (%d):\n", global_tid));
1869 KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
1871 KE_TRACE( 10, (
"__kmpc_task return (%d)\n", global_tid));
1880 __kmpc_taskq_task(
ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, kmp_int32 status)
1882 kmpc_task_queue_t *queue;
1883 kmp_taskq_t *tq = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
1884 int tid = __kmp_tid_from_gtid( global_tid );
1886 KE_TRACE( 10, (
"__kmpc_taskq_task called (%d)\n", global_tid));
1887 KF_TRACE(100, (
"TaskQ Task argument thunk on (%d):\n", global_tid));
1888 KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid ));
1890 queue = thunk->th.th_shareds->sv_queue;
1892 if ( __kmp_env_consistency_check )
1893 __kmp_pop_workshare( global_tid, ct_taskq, loc );
1896 KMP_DEBUG_ASSERT (thunk->th_flags & TQF_TASKQ_TASK);
1899 KMP_DEBUG_ASSERT (queue->tq_taskq_slot == NULL);
1902 tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
1903 thunk->th_encl_thunk = NULL;
1905 KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
1907 thunk->th_status = status;
1915 queue->tq_taskq_slot = thunk;
1917 KE_TRACE( 10, (
"__kmpc_taskq_task return (%d)\n", global_tid));
1923 __kmpc_end_taskq_task(
ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk)
1926 kmpc_task_queue_t *queue;
1930 KE_TRACE( 10, (
"__kmpc_end_taskq_task called (%d)\n", global_tid));
1932 tq = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
1933 queue = thunk->th.th_shareds->sv_queue;
1934 in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
1935 tid = __kmp_tid_from_gtid( global_tid );
1937 if ( __kmp_env_consistency_check )
1938 __kmp_pop_workshare( global_tid, ct_taskq, loc );
1941 #if KMP_ARCH_X86 || \
1944 KMP_TEST_THEN_OR32( &queue->tq_flags, (kmp_int32) TQF_ALL_TASKS_QUEUED );
1947 __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
1952 queue->tq_flags |= TQF_ALL_TASKS_QUEUED;
1954 __kmp_release_lock(& queue->tq_queue_lck, global_tid);
1959 if (thunk->th_flags & TQF_IS_LASTPRIVATE) {
1969 if (! in_parallel) {
1971 queue->tq_flags |= TQF_IS_LAST_TASK;
1974 #if KMP_ARCH_X86 || \
1977 KMP_TEST_THEN_OR32( &queue->tq_flags, (kmp_int32) TQF_IS_LAST_TASK );
1980 __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
1985 queue->tq_flags |= TQF_IS_LAST_TASK;
1987 __kmp_release_lock(& queue->tq_queue_lck, global_tid);
1998 tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
1999 thunk->th_encl_thunk = NULL;
2001 KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
2004 KE_TRACE( 10, (
"__kmpc_end_taskq_task return (%d)\n", global_tid));
2011 __kmpc_task_buffer(
ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk, kmpc_task_t task)
2014 kmpc_task_queue_t *queue;
2015 kmpc_thunk_t *new_thunk;
2018 KE_TRACE( 10, (
"__kmpc_task_buffer called (%d)\n", global_tid));
2020 KMP_DEBUG_ASSERT (taskq_thunk->th_flags & TQF_TASKQ_TASK);
2022 tq = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
2023 queue = taskq_thunk->th.th_shareds->sv_queue;
2024 in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
2032 new_thunk = __kmp_alloc_thunk (queue, in_parallel, global_tid);
2033 new_thunk->th.th_shareds = (kmpc_shared_vars_t *) queue->tq_shareds[0].ai_data;
2034 new_thunk->th_encl_thunk = NULL;
2035 new_thunk->th_task = task;
2038 new_thunk->th_flags = queue->tq_flags & TQF_INTERFACE_FLAGS;
2040 new_thunk->th_status = 0;
2042 KMP_DEBUG_ASSERT (!(new_thunk->th_flags & TQF_TASKQ_TASK));
2044 KF_TRACE(100, (
"Creating Regular Task on (%d):\n", global_tid));
2045 KF_DUMP(100, __kmp_dump_thunk( tq, new_thunk, global_tid ));
2047 KE_TRACE( 10, (
"__kmpc_task_buffer return (%d)\n", global_tid));
KMP_EXPORT void __kmpc_end_barrier_master(ident_t *, kmp_int32 global_tid)