36 #include "kmp_atomic.h"
37 #include "kmp_wrapper_getpid.h"
38 #include "kmp_environment.h"
41 #include "kmp_settings.h"
44 #include "kmp_error.h"
45 #include "kmp_stats.h"
46 #include "kmp_wait_release.h"
49 #define KMP_USE_PRCTL 0
50 #define KMP_USE_POOLED_ALLOC 0
57 #if defined(KMP_GOMP_COMPAT)
58 char const __kmp_version_alt_comp[] = KMP_VERSION_PREFIX
"alternative compiler support: yes";
61 char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX
"API version: "
69 char const __kmp_version_lock[] = KMP_VERSION_PREFIX
"lock type: run time selectable";
73 #define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) )
78 kmp_info_t __kmp_monitor;
85 void __kmp_cleanup(
void );
87 static void __kmp_initialize_info( kmp_info_t *, kmp_team_t *,
int tid,
int gtid );
88 static void __kmp_initialize_team( kmp_team_t * team,
int new_nproc, kmp_internal_control_t * new_icvs,
ident_t * loc );
89 static void __kmp_partition_places( kmp_team_t *team );
90 static void __kmp_do_serial_initialize(
void );
91 void __kmp_fork_barrier(
int gtid,
int tid );
92 void __kmp_join_barrier(
int gtid );
93 void __kmp_setup_icv_copy( kmp_team_t *team,
int new_nproc, kmp_internal_control_t * new_icvs,
ident_t *loc );
96 #ifdef USE_LOAD_BALANCE
97 static int __kmp_load_balance_nproc( kmp_root_t * root,
int set_nproc );
100 static int __kmp_expand_threads(
int nWish,
int nNeed);
101 static int __kmp_unregister_root_other_thread(
int gtid );
102 static void __kmp_unregister_library(
void );
103 static void __kmp_reap_thread( kmp_info_t * thread,
int is_root );
104 static kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
115 __kmp_get_global_thread_id( )
118 kmp_info_t **other_threads;
124 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
125 __kmp_nth, __kmp_all_nth ));
132 if ( !TCR_4(__kmp_init_gtid) )
return KMP_GTID_DNE;
134 #ifdef KMP_TDATA_GTID
135 if ( TCR_4(__kmp_gtid_mode) >= 3) {
136 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: using TDATA\n" ));
140 if ( TCR_4(__kmp_gtid_mode) >= 2) {
141 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n" ));
142 return __kmp_gtid_get_specific();
144 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n" ));
146 stack_addr = (
char*) & stack_data;
147 other_threads = __kmp_threads;
162 for( i = 0 ; i < __kmp_threads_capacity ; i++ ) {
164 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
167 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
168 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
172 if( stack_addr <= stack_base ) {
173 size_t stack_diff = stack_base - stack_addr;
175 if( stack_diff <= stack_size ) {
178 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == i );
185 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
186 "thread, using TLS\n" ));
187 i = __kmp_gtid_get_specific();
195 if( ! TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow) ) {
196 KMP_FATAL( StackOverflow, i );
199 stack_base = (
char *) other_threads[i]->th.th_info.ds.ds_stackbase;
200 if( stack_addr > stack_base ) {
201 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
202 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
203 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base);
205 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, stack_base - stack_addr);
209 if ( __kmp_storage_map ) {
210 char *stack_end = (
char *) other_threads[i]->th.th_info.ds.ds_stackbase;
211 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
212 __kmp_print_storage_map_gtid( i, stack_beg, stack_end,
213 other_threads[i]->th.th_info.ds.ds_stacksize,
214 "th_%d stack (refinement)", i );
220 __kmp_get_global_thread_id_reg( )
224 if ( !__kmp_init_serial ) {
227 #ifdef KMP_TDATA_GTID
228 if ( TCR_4(__kmp_gtid_mode) >= 3 ) {
229 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n" ));
233 if ( TCR_4(__kmp_gtid_mode) >= 2 ) {
234 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n" ));
235 gtid = __kmp_gtid_get_specific();
237 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n" ));
238 gtid = __kmp_get_global_thread_id();
242 if( gtid == KMP_GTID_DNE ) {
243 KA_TRACE( 10, (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
244 "Registering a new gtid.\n" ));
245 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
246 if( !__kmp_init_serial ) {
247 __kmp_do_serial_initialize();
248 gtid = __kmp_gtid_get_specific();
250 gtid = __kmp_register_root(FALSE);
252 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
256 KMP_DEBUG_ASSERT( gtid >=0 );
263 __kmp_check_stack_overlap( kmp_info_t *th )
266 char *stack_beg = NULL;
267 char *stack_end = NULL;
270 KA_TRACE(10,(
"__kmp_check_stack_overlap: called\n"));
271 if ( __kmp_storage_map ) {
272 stack_end = (
char *) th->th.th_info.ds.ds_stackbase;
273 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
275 gtid = __kmp_gtid_from_thread( th );
277 if (gtid == KMP_GTID_MONITOR) {
278 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
279 "th_%s stack (%s)",
"mon",
280 ( th->th.th_info.ds.ds_stackgrow ) ?
"initial" :
"actual" );
282 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
283 "th_%d stack (%s)", gtid,
284 ( th->th.th_info.ds.ds_stackgrow ) ?
"initial" :
"actual" );
289 gtid = __kmp_gtid_from_thread( th );
290 if ( __kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid))
292 KA_TRACE(10,(
"__kmp_check_stack_overlap: performing extensive checking\n"));
293 if ( stack_beg == NULL ) {
294 stack_end = (
char *) th->th.th_info.ds.ds_stackbase;
295 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
298 for( f=0 ; f < __kmp_threads_capacity ; f++ ) {
299 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
301 if( f_th && f_th != th ) {
302 char *other_stack_end = (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
303 char *other_stack_beg = other_stack_end -
304 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
305 if((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
306 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
309 if ( __kmp_storage_map )
310 __kmp_print_storage_map_gtid( -1, other_stack_beg, other_stack_end,
311 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
312 "th_%d stack (overlapped)",
313 __kmp_gtid_from_thread( f_th ) );
315 __kmp_msg( kmp_ms_fatal, KMP_MSG( StackOverlap ), KMP_HNT( ChangeStackLimit ), __kmp_msg_null );
320 KA_TRACE(10,(
"__kmp_check_stack_overlap: returning\n"));
329 __kmp_infinite_loop(
void )
331 static int done = FALSE;
338 #define MAX_MESSAGE 512
341 __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
char const *format, ...) {
342 char buffer[MAX_MESSAGE];
346 va_start( ap, format);
347 KMP_SNPRINTF( buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1, p2, (
unsigned long) size, format );
348 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
349 __kmp_vprintf( kmp_err, buffer, ap );
350 #if KMP_PRINT_DATA_PLACEMENT
352 if(p1 <= p2 && (
char*)p2 - (
char*)p1 == size) {
353 if( __kmp_storage_map_verbose ) {
354 node = __kmp_get_host_node(p1);
356 __kmp_storage_map_verbose = FALSE;
360 int localProc = __kmp_get_cpu_from_gtid(gtid);
362 p1 = (
void *)( (
size_t)p1 & ~((size_t)PAGE_SIZE - 1) );
363 p2 = (
void *)( ((
size_t) p2 - 1) & ~((
size_t)PAGE_SIZE - 1) );
365 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid, localProc>>1);
367 __kmp_printf_no_lock(
" GTID %d\n", gtid);
375 (
char*)p1 += PAGE_SIZE;
376 }
while(p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
377 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last,
378 (
char*)p1 - 1, lastNode);
381 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
382 (
char*)p1 + (PAGE_SIZE - 1), __kmp_get_host_node(p1));
384 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
385 (
char*)p2 + (PAGE_SIZE - 1), __kmp_get_host_node(p2));
391 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR( StorageMapWarning ) );
394 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
398 __kmp_warn(
char const * format, ... )
400 char buffer[MAX_MESSAGE];
403 if ( __kmp_generate_warnings == kmp_warnings_off ) {
407 va_start( ap, format );
409 KMP_SNPRINTF( buffer,
sizeof(buffer) ,
"OMP warning: %s\n", format );
410 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
411 __kmp_vprintf( kmp_err, buffer, ap );
412 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
418 __kmp_abort_process()
422 __kmp_acquire_bootstrap_lock( & __kmp_exit_lock );
424 if ( __kmp_debug_buf ) {
425 __kmp_dump_debug_buffer();
428 if ( KMP_OS_WINDOWS ) {
431 __kmp_global.g.g_abort = SIGABRT;
449 __kmp_infinite_loop();
450 __kmp_release_bootstrap_lock( & __kmp_exit_lock );
455 __kmp_abort_thread(
void )
459 __kmp_infinite_loop();
470 __kmp_print_thread_storage_map( kmp_info_t *thr,
int gtid )
472 __kmp_print_storage_map_gtid( gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d", gtid );
474 __kmp_print_storage_map_gtid( gtid, &thr->th.th_info, &thr->th.th_team,
sizeof(kmp_desc_t),
475 "th_%d.th_info", gtid );
477 __kmp_print_storage_map_gtid( gtid, &thr->th.th_local, &thr->th.th_pri_head,
sizeof(kmp_local_t),
478 "th_%d.th_local", gtid );
480 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
481 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid );
483 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_plain_barrier],
484 &thr->th.th_bar[bs_plain_barrier+1],
485 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]", gtid);
487 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_forkjoin_barrier],
488 &thr->th.th_bar[bs_forkjoin_barrier+1],
489 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]", gtid);
491 #if KMP_FAST_REDUCTION_BARRIER
492 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_reduction_barrier],
493 &thr->th.th_bar[bs_reduction_barrier+1],
494 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]", gtid);
495 #endif // KMP_FAST_REDUCTION_BARRIER
504 __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
int team_id,
int num_thr )
506 int num_disp_buff = team->t.t_max_nproc > 1 ? KMP_MAX_DISP_BUF : 2;
507 __kmp_print_storage_map_gtid( -1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
510 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[0], &team->t.t_bar[bs_last_barrier],
511 sizeof(kmp_balign_team_t) * bs_last_barrier,
"%s_%d.t_bar", header, team_id );
514 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_plain_barrier], &team->t.t_bar[bs_plain_barrier+1],
515 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]", header, team_id );
517 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_forkjoin_barrier], &team->t.t_bar[bs_forkjoin_barrier+1],
518 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[forkjoin]", header, team_id );
520 #if KMP_FAST_REDUCTION_BARRIER
521 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_reduction_barrier], &team->t.t_bar[bs_reduction_barrier+1],
522 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[reduction]", header, team_id );
523 #endif // KMP_FAST_REDUCTION_BARRIER
525 __kmp_print_storage_map_gtid( -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
526 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id );
528 __kmp_print_storage_map_gtid( -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
529 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id );
531 __kmp_print_storage_map_gtid( -1, &team->t.t_disp_buffer[0], &team->t.t_disp_buffer[num_disp_buff],
532 sizeof(dispatch_shared_info_t) * num_disp_buff,
"%s_%d.t_disp_buffer",
565 __kmp_print_storage_map_gtid( -1, &team->t.t_taskq, &team->t.t_copypriv_data,
566 sizeof(kmp_taskq_t),
"%s_%d.t_taskq", header, team_id );
569 static void __kmp_init_allocator() {}
570 static void __kmp_fini_allocator() {}
571 static void __kmp_fini_allocator_thread() {}
575 #ifdef KMP_DYNAMIC_LIB
580 __kmp_reset_lock( kmp_bootstrap_lock_t* lck ) {
582 __kmp_init_bootstrap_lock( lck );
586 __kmp_reset_locks_on_process_detach(
int gtid_req ) {
603 for( i = 0; i < __kmp_threads_capacity; ++i ) {
604 if( !__kmp_threads )
continue;
605 kmp_info_t* th = __kmp_threads[ i ];
606 if( th == NULL )
continue;
607 int gtid = th->th.th_info.ds.ds_gtid;
608 if( gtid == gtid_req )
continue;
609 if( gtid < 0 )
continue;
611 int alive = __kmp_is_thread_alive( th, &exit_val );
616 if( thread_count == 0 )
break;
623 __kmp_reset_lock( &__kmp_forkjoin_lock );
625 __kmp_reset_lock( &__kmp_stdio_lock );
632 DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) {
635 switch( fdwReason ) {
637 case DLL_PROCESS_ATTACH:
638 KA_TRACE( 10, (
"DllMain: PROCESS_ATTACH\n" ));
642 case DLL_PROCESS_DETACH:
643 KA_TRACE( 10, (
"DllMain: PROCESS_DETACH T#%d\n",
644 __kmp_gtid_get_specific() ));
646 if( lpReserved != NULL )
673 __kmp_reset_locks_on_process_detach( __kmp_gtid_get_specific() );
676 __kmp_internal_end_library( __kmp_gtid_get_specific() );
680 case DLL_THREAD_ATTACH:
681 KA_TRACE( 10, (
"DllMain: THREAD_ATTACH\n" ));
687 case DLL_THREAD_DETACH:
688 KA_TRACE( 10, (
"DllMain: THREAD_DETACH T#%d\n",
689 __kmp_gtid_get_specific() ));
691 __kmp_internal_end_thread( __kmp_gtid_get_specific() );
707 __kmp_change_library(
int status )
711 old_status = __kmp_yield_init & 1;
714 __kmp_yield_init |= 1;
717 __kmp_yield_init &= ~1;
730 __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
732 int gtid = *gtid_ref;
733 #ifdef BUILD_PARALLEL_ORDERED
734 kmp_team_t *team = __kmp_team_from_gtid( gtid );
737 if( __kmp_env_consistency_check ) {
738 if( __kmp_threads[gtid]->th.th_root->r.r_active )
739 #if KMP_USE_DYNAMIC_LOCK
740 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL, 0 );
742 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL );
745 #ifdef BUILD_PARALLEL_ORDERED
746 if( !team->t.t_serialized ) {
750 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid( gtid ), KMP_EQ, NULL);
761 __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
763 int gtid = *gtid_ref;
764 #ifdef BUILD_PARALLEL_ORDERED
765 int tid = __kmp_tid_from_gtid( gtid );
766 kmp_team_t *team = __kmp_team_from_gtid( gtid );
769 if( __kmp_env_consistency_check ) {
770 if( __kmp_threads[gtid]->th.th_root->r.r_active )
771 __kmp_pop_sync( gtid, ct_ordered_in_parallel, loc_ref );
773 #ifdef BUILD_PARALLEL_ORDERED
774 if ( ! team->t.t_serialized ) {
779 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc );
795 __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws )
801 if( ! TCR_4(__kmp_init_parallel) )
802 __kmp_parallel_initialize();
804 th = __kmp_threads[ gtid ];
805 team = th->th.th_team;
808 th->th.th_ident = id_ref;
810 if ( team->t.t_serialized ) {
813 kmp_int32 old_this = th->th.th_local.this_construct;
815 ++th->th.th_local.this_construct;
820 status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this,
821 th->th.th_local.this_construct);
823 if ( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && KMP_MASTER_GTID(gtid) &&
825 th->th.th_teams_microtask == NULL &&
827 team->t.t_active_level == 1 )
829 __kmp_itt_metadata_single( id_ref );
834 if( __kmp_env_consistency_check ) {
835 if (status && push_ws) {
836 __kmp_push_workshare( gtid, ct_psingle, id_ref );
838 __kmp_check_workshare( gtid, ct_psingle, id_ref );
843 __kmp_itt_single_start( gtid );
850 __kmp_exit_single(
int gtid )
853 __kmp_itt_single_end( gtid );
855 if( __kmp_env_consistency_check )
856 __kmp_pop_workshare( gtid, ct_psingle, NULL );
869 __kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team,
870 int master_tid,
int set_nthreads
878 int use_rml_to_adjust_nth;
879 KMP_DEBUG_ASSERT( __kmp_init_serial );
880 KMP_DEBUG_ASSERT( root && parent_team );
885 if ( set_nthreads == 1 ) {
886 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d reserving 1 thread; requested %d threads\n",
887 __kmp_get_gtid(), set_nthreads ));
890 if ( ( !get__nested_2(parent_team,master_tid) && (root->r.r_in_parallel
894 ) ) || ( __kmp_library == library_serial ) ) {
895 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d serializing team; requested %d threads\n",
896 __kmp_get_gtid(), set_nthreads ));
904 new_nthreads = set_nthreads;
905 use_rml_to_adjust_nth = FALSE;
906 if ( ! get__dynamic_2( parent_team, master_tid ) ) {
909 #ifdef USE_LOAD_BALANCE
910 else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) {
911 new_nthreads = __kmp_load_balance_nproc( root, set_nthreads );
912 if ( new_nthreads == 1 ) {
913 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d load balance reduced reservation to 1 thread\n",
917 if ( new_nthreads < set_nthreads ) {
918 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d load balance reduced reservation to %d threads\n",
919 master_tid, new_nthreads ));
923 else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) {
924 new_nthreads = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
925 : root->r.r_hot_team->t.t_nproc);
926 if ( new_nthreads <= 1 ) {
927 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d thread limit reduced reservation to 1 thread\n",
931 if ( new_nthreads < set_nthreads ) {
932 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d thread limit reduced reservation to %d threads\n",
933 master_tid, new_nthreads ));
936 new_nthreads = set_nthreads;
939 else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) {
940 if ( set_nthreads > 2 ) {
941 new_nthreads = __kmp_get_random( parent_team->t.t_threads[master_tid] );
942 new_nthreads = ( new_nthreads % set_nthreads ) + 1;
943 if ( new_nthreads == 1 ) {
944 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d dynamic random reduced reservation to 1 thread\n",
948 if ( new_nthreads < set_nthreads ) {
949 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d dynamic random reduced reservation to %d threads\n",
950 master_tid, new_nthreads ));
961 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
962 root->r.r_hot_team->t.t_nproc ) > __kmp_max_nth ) {
963 int tl_nthreads = __kmp_max_nth - __kmp_nth + ( root->r.r_active ? 1 :
964 root->r.r_hot_team->t.t_nproc );
965 if ( tl_nthreads <= 0 ) {
972 if ( ! get__dynamic_2( parent_team, master_tid )
973 && ( ! __kmp_reserve_warn ) ) {
974 __kmp_reserve_warn = 1;
977 KMP_MSG( CantFormThrTeam, set_nthreads, tl_nthreads ),
978 KMP_HNT( Unset_ALL_THREADS ),
982 if ( tl_nthreads == 1 ) {
983 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to 1 thread\n",
987 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to %d threads\n",
988 master_tid, tl_nthreads ));
989 new_nthreads = tl_nthreads;
999 capacity = __kmp_threads_capacity;
1000 if ( TCR_PTR(__kmp_threads[0]) == NULL ) {
1003 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
1004 root->r.r_hot_team->t.t_nproc ) > capacity ) {
1008 int slotsRequired = __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
1009 root->r.r_hot_team->t.t_nproc ) - capacity;
1010 int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired);
1011 if ( slotsAdded < slotsRequired ) {
1015 new_nthreads -= ( slotsRequired - slotsAdded );
1016 KMP_ASSERT( new_nthreads >= 1 );
1021 if ( ! get__dynamic_2( parent_team, master_tid )
1022 && ( ! __kmp_reserve_warn ) ) {
1023 __kmp_reserve_warn = 1;
1024 if ( __kmp_tp_cached ) {
1027 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
1028 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
1029 KMP_HNT( PossibleSystemLimitOnThreads ),
1036 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
1037 KMP_HNT( SystemLimitOnThreads ),
1045 if ( new_nthreads == 1 ) {
1046 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d serializing team after reclaiming dead roots and rechecking; requested %d threads\n",
1047 __kmp_get_gtid(), set_nthreads ) );
1051 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested %d threads\n",
1052 __kmp_get_gtid(), new_nthreads, set_nthreads ));
1053 return new_nthreads;
1064 __kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team,
1065 kmp_info_t *master_th,
int master_gtid )
1070 KA_TRACE( 10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc ) );
1071 KMP_DEBUG_ASSERT( master_gtid == __kmp_get_gtid() );
1075 master_th->th.th_info.ds.ds_tid = 0;
1076 master_th->th.th_team = team;
1077 master_th->th.th_team_nproc = team->t.t_nproc;
1078 master_th->th.th_team_master = master_th;
1079 master_th->th.th_team_serialized = FALSE;
1080 master_th->th.th_dispatch = & team->t.t_dispatch[ 0 ];
1083 #if KMP_NESTED_HOT_TEAMS
1085 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
1087 int level = team->t.t_active_level - 1;
1088 if( master_th->th.th_teams_microtask ) {
1089 if( master_th->th.th_teams_size.nteams > 1 ) {
1092 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1093 master_th->th.th_teams_level == team->t.t_level ) {
1097 if( level < __kmp_hot_teams_max_level ) {
1098 if( hot_teams[level].hot_team ) {
1100 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1104 hot_teams[level].hot_team = team;
1105 hot_teams[level].hot_team_nth = team->t.t_nproc;
1112 use_hot_team = team == root->r.r_hot_team;
1114 if ( !use_hot_team ) {
1117 team->t.t_threads[ 0 ] = master_th;
1118 __kmp_initialize_info( master_th, team, 0, master_gtid );
1121 for ( i=1 ; i < team->t.t_nproc ; i++ ) {
1124 kmp_info_t *thr = __kmp_allocate_thread( root, team, i );
1125 team->t.t_threads[ i ] = thr;
1126 KMP_DEBUG_ASSERT( thr );
1127 KMP_DEBUG_ASSERT( thr->th.th_team == team );
1129 KA_TRACE( 20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived T#%d(%d:%d) join =%u, plain=%u\n",
1130 __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0,
1131 __kmp_gtid_from_tid( i, team ), team->t.t_id, i,
1132 team->t.t_bar[ bs_forkjoin_barrier ].b_arrived,
1133 team->t.t_bar[ bs_plain_barrier ].b_arrived ) );
1135 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1136 thr->th.th_teams_level = master_th->th.th_teams_level;
1137 thr->th.th_teams_size = master_th->th.th_teams_size;
1141 kmp_balign_t * balign = team->t.t_threads[ i ]->th.th_bar;
1142 for ( b = 0; b < bs_last_barrier; ++ b ) {
1143 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
1144 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1146 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
1152 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
1153 __kmp_partition_places( team );
1161 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1168 propagateFPControl(kmp_team_t * team)
1170 if ( __kmp_inherit_fp_control ) {
1171 kmp_int16 x87_fpu_control_word;
1175 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1176 __kmp_store_mxcsr( &mxcsr );
1177 mxcsr &= KMP_X86_MXCSR_MASK;
1186 if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) {
1187 team->t.t_x87_fpu_control_word = x87_fpu_control_word;
1189 if ( team->t.t_mxcsr != mxcsr ) {
1190 team->t.t_mxcsr = mxcsr;
1194 if (!team->t.t_fp_control_saved) {
1195 team->t.t_fp_control_saved = TRUE;
1200 if (team->t.t_fp_control_saved)
1201 team->t.t_fp_control_saved = FALSE;
1207 updateHWFPControl(kmp_team_t * team)
1209 if ( __kmp_inherit_fp_control && team->t.t_fp_control_saved ) {
1214 kmp_int16 x87_fpu_control_word;
1216 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1217 __kmp_store_mxcsr( &mxcsr );
1218 mxcsr &= KMP_X86_MXCSR_MASK;
1220 if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) {
1221 __kmp_clear_x87_fpu_status_word();
1222 __kmp_load_x87_fpu_control_word( &team->t.t_x87_fpu_control_word );
1225 if ( team->t.t_mxcsr != mxcsr ) {
1226 __kmp_load_mxcsr( &team->t.t_mxcsr );
1231 # define propagateFPControl(x) ((void)0)
1232 # define updateHWFPControl(x) ((void)0)
1236 __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc );
1242 __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid)
1244 kmp_info_t *this_thr;
1245 kmp_team_t *serial_team;
1247 KC_TRACE( 10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid ) );
1254 if( ! TCR_4( __kmp_init_parallel ) )
1255 __kmp_parallel_initialize();
1257 this_thr = __kmp_threads[ global_tid ];
1258 serial_team = this_thr->th.th_serial_team;
1261 KMP_DEBUG_ASSERT( serial_team );
1264 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1265 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1266 KMP_DEBUG_ASSERT( serial_team->t.t_task_team[this_thr->th.th_task_state] == NULL );
1267 KA_TRACE( 20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / team %p, new task_team = NULL\n",
1268 global_tid, this_thr->th.th_task_team, this_thr->th.th_team ) );
1269 this_thr->th.th_task_team = NULL;
1273 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1274 if ( this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1275 proc_bind = proc_bind_false;
1277 else if ( proc_bind == proc_bind_default ) {
1282 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1287 this_thr->th.th_set_proc_bind = proc_bind_default;
1290 if( this_thr->th.th_team != serial_team ) {
1292 int level = this_thr->th.th_team->t.t_level;
1294 if( serial_team->t.t_serialized ) {
1297 kmp_team_t *new_team;
1298 int tid = this_thr->th.th_info.ds.ds_tid;
1300 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1302 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1306 & this_thr->th.th_current_task->td_icvs,
1307 0 USE_NESTED_HOT_ARG(NULL) );
1308 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1309 KMP_ASSERT( new_team );
1312 new_team->t.t_threads[0] = this_thr;
1313 new_team->t.t_parent = this_thr->th.th_team;
1314 serial_team = new_team;
1315 this_thr->th.th_serial_team = serial_team;
1317 KF_TRACE( 10, (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1318 global_tid, serial_team ) );
1325 KF_TRACE( 10, (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1326 global_tid, serial_team ) );
1330 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1331 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1332 KMP_DEBUG_ASSERT( this_thr->th.th_team != serial_team );
1333 serial_team->t.t_ident = loc;
1334 serial_team->t.t_serialized = 1;
1335 serial_team->t.t_nproc = 1;
1336 serial_team->t.t_parent = this_thr->th.th_team;
1337 serial_team->t.t_sched = this_thr->th.th_team->t.t_sched;
1338 this_thr->th.th_team = serial_team;
1339 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1341 KF_TRACE( 10, (
"__kmpc_serialized_parallel: T#d curtask=%p\n",
1342 global_tid, this_thr->th.th_current_task ) );
1343 KMP_ASSERT( this_thr->th.th_current_task->td_flags.executing == 1 );
1344 this_thr->th.th_current_task->td_flags.executing = 0;
1346 __kmp_push_current_task_to_thread( this_thr, serial_team, 0 );
1351 & this_thr->th.th_current_task->td_icvs,
1352 & this_thr->th.th_current_task->td_parent->td_icvs );
1355 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1356 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1360 if ( __kmp_nested_proc_bind.used && ( level + 1 < __kmp_nested_proc_bind.used ) ) {
1361 this_thr->th.th_current_task->td_icvs.proc_bind
1362 = __kmp_nested_proc_bind.bind_types[ level + 1 ];
1367 serial_team->t.t_pkfn = (microtask_t)( ~0 );
1369 this_thr->th.th_info.ds.ds_tid = 0;
1372 this_thr->th.th_team_nproc = 1;
1373 this_thr->th.th_team_master = this_thr;
1374 this_thr->th.th_team_serialized = 1;
1376 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1377 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1379 propagateFPControl (serial_team);
1382 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1383 if ( !serial_team->t.t_dispatch->th_disp_buffer ) {
1384 serial_team->t.t_dispatch->th_disp_buffer = (dispatch_private_info_t *)
1385 __kmp_allocate(
sizeof( dispatch_private_info_t ) );
1387 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1394 KMP_DEBUG_ASSERT( this_thr->th.th_team == serial_team );
1395 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1396 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1397 ++ serial_team->t.t_serialized;
1398 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1401 int level = this_thr->th.th_team->t.t_level;
1403 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1404 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1406 serial_team->t.t_level++;
1407 KF_TRACE( 10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level of serial team %p to %d\n",
1408 global_tid, serial_team, serial_team->t.t_level ) );
1411 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1413 dispatch_private_info_t * disp_buffer = (dispatch_private_info_t *)
1414 __kmp_allocate(
sizeof( dispatch_private_info_t ) );
1415 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1416 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1418 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1423 if ( __kmp_env_consistency_check )
1424 __kmp_push_parallel( global_tid, NULL );
1428 if ( serial_team->t.t_level == 1
1430 && this_thr->th.th_teams_microtask == NULL
1435 if ( ( __itt_get_timestamp_ptr || KMP_ITT_DEBUG ) &&
1436 ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
1438 serial_team->t.t_region_time = this_thr->th.th_frame_time_serialized = __itt_get_timestamp();
1441 if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) &&
1442 __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode )
1444 this_thr->th.th_ident = loc;
1446 __kmp_itt_region_forking( global_tid, this_thr->th.th_team_nproc, 0, 1 );
1458 enum fork_context_e call_context,
1460 microtask_t microtask,
1463 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1473 int master_this_cons;
1475 kmp_team_t *parent_team;
1476 kmp_info_t *master_th;
1480 int master_set_numthreads;
1486 #if KMP_NESTED_HOT_TEAMS
1487 kmp_hot_team_ptr_t **p_hot_teams;
1492 KA_TRACE( 20, (
"__kmp_fork_call: enter T#%d\n", gtid ));
1493 if ( __kmp_stkpadding > 0 && __kmp_root[gtid] != NULL ) {
1496 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1498 if ( __kmp_stkpadding > KMP_MAX_STKPADDING )
1499 __kmp_stkpadding += (short)((kmp_int64)dummy);
1503 KMP_DEBUG_ASSERT( __kmp_init_serial );
1504 if( ! TCR_4(__kmp_init_parallel) )
1505 __kmp_parallel_initialize();
1508 master_th = __kmp_threads[ gtid ];
1509 parent_team = master_th->th.th_team;
1510 master_tid = master_th->th.th_info.ds.ds_tid;
1511 master_this_cons = master_th->th.th_local.this_construct;
1512 root = master_th->th.th_root;
1513 master_active = root->r.r_active;
1514 master_set_numthreads = master_th->th.th_set_nproc;
1516 level = parent_team->t.t_level;
1518 active_level = parent_team->t.t_active_level;
1519 teams_level = master_th->th.th_teams_level;
1521 #if KMP_NESTED_HOT_TEAMS
1522 p_hot_teams = &master_th->th.th_hot_teams;
1523 if( *p_hot_teams == NULL && __kmp_hot_teams_max_level > 0 ) {
1524 *p_hot_teams = (kmp_hot_team_ptr_t*)__kmp_allocate(
1525 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1526 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1527 (*p_hot_teams)[0].hot_team_nth = 1;
1532 if ( __kmp_debugging ) {
1533 int nth = __kmp_omp_num_threads( loc );
1535 master_set_numthreads = nth;
1540 master_th->th.th_ident = loc;
1543 if ( master_th->th.th_teams_microtask &&
1544 ap && microtask != (microtask_t)__kmp_teams_master && level == teams_level ) {
1548 parent_team->t.t_ident = loc;
1549 parent_team->t.t_argc = argc;
1550 argv = (
void**)parent_team->t.t_argv;
1551 for( i=argc-1; i >= 0; --i )
1553 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1554 *argv++ = va_arg( *ap,
void * );
1556 *argv++ = va_arg( ap,
void * );
1559 if ( parent_team == master_th->th.th_serial_team ) {
1562 KMP_DEBUG_ASSERT( parent_team->t.t_serialized > 1 );
1563 parent_team->t.t_serialized--;
1566 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv );
1569 parent_team->t.t_pkfn = microtask;
1570 parent_team->t.t_invoke = invoker;
1571 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1572 parent_team->t.t_active_level ++;
1573 parent_team->t.t_level ++;
1576 if ( master_set_numthreads ) {
1577 if ( master_set_numthreads < master_th->th.th_teams_size.nth ) {
1579 kmp_info_t **other_threads = parent_team->t.t_threads;
1580 parent_team->t.t_nproc = master_set_numthreads;
1581 for ( i = 0; i < master_set_numthreads; ++i ) {
1582 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1586 master_th->th.th_set_nproc = 0;
1590 KF_TRACE( 10, (
"__kmp_fork_call: before internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1591 __kmp_internal_fork( loc, gtid, parent_team );
1592 KF_TRACE( 10, (
"__kmp_fork_call: after internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1595 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
1596 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1600 if (! parent_team->t.t_invoke( gtid )) {
1601 KMP_ASSERT2( 0,
"cannot invoke microtask for MASTER thread" );
1604 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
1605 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1608 KA_TRACE( 20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid ));
1615 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1616 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
1621 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1623 if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) {
1626 nthreads = master_set_numthreads ?
1627 master_set_numthreads : get__nproc_2( parent_team, master_tid );
1628 nthreads = __kmp_reserve_threads(root, parent_team, master_tid, nthreads
1633 , ((ap==NULL && active_level==0) ||
1634 (ap && teams_level>0 && teams_level==level))
1638 KMP_DEBUG_ASSERT( nthreads > 0 );
1641 master_th->th.th_set_nproc = 0;
1645 if ( nthreads == 1 ) {
1647 #if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1648 void * args[ argc ];
1650 void * * args = (
void**) KMP_ALLOCA( argc *
sizeof(
void * ) );
1653 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1654 KA_TRACE( 20, (
"__kmp_fork_call: T#%d serializing parallel region\n", gtid ));
1658 if ( call_context == fork_context_intel ) {
1660 master_th->th.th_serial_team->t.t_ident = loc;
1664 master_th->th.th_serial_team->t.t_level--;
1668 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv );
1670 }
else if ( microtask == (microtask_t)__kmp_teams_master ) {
1671 KMP_DEBUG_ASSERT( master_th->th.th_team == master_th->th.th_serial_team );
1672 team = master_th->th.th_team;
1674 team->t.t_invoke = invoker;
1675 __kmp_alloc_argv_entries( argc, team, TRUE );
1676 team->t.t_argc = argc;
1677 argv = (
void**) team->t.t_argv;
1679 for( i=argc-1; i >= 0; --i )
1681 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1682 *argv++ = va_arg( *ap,
void * );
1684 *argv++ = va_arg( ap,
void * );
1687 for( i=0; i < argc; ++i )
1689 argv[i] = parent_team->t.t_argv[i];
1702 for( i=argc-1; i >= 0; --i )
1704 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1705 *argv++ = va_arg( *ap,
void * );
1707 *argv++ = va_arg( ap,
void * );
1712 __kmp_invoke_microtask( microtask, gtid, 0, argc, args );
1718 else if ( call_context == fork_context_gnu ) {
1720 KA_TRACE( 20, (
"__kmp_fork_call: T#%d serial exit\n", gtid ));
1724 KMP_ASSERT2( call_context < fork_context_last,
"__kmp_fork_call: unknown fork_context parameter" );
1728 KA_TRACE( 20, (
"__kmp_fork_call: T#%d serial exit\n", gtid ));
1735 KF_TRACE( 10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, curtask=%p, curtask_max_aclevel=%d\n",
1736 parent_team->t.t_active_level, master_th, master_th->th.th_current_task,
1737 master_th->th.th_current_task->td_icvs.max_active_levels ) );
1740 master_th->th.th_current_task->td_flags.executing = 0;
1743 if ( !master_th->th.th_teams_microtask || level > teams_level )
1747 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1751 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
1752 if ((level+1 < __kmp_nested_nth.used) && (__kmp_nested_nth.nth[level+1] != nthreads_icv)) {
1753 nthreads_icv = __kmp_nested_nth.nth[level+1];
1761 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1762 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1763 if ( master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1764 proc_bind = proc_bind_false;
1767 if (proc_bind == proc_bind_default) {
1769 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1774 if ((level+1 < __kmp_nested_proc_bind.used)
1775 && (__kmp_nested_proc_bind.bind_types[level+1] != master_th->th.th_current_task->td_icvs.proc_bind)) {
1776 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level+1];
1781 master_th->th.th_set_proc_bind = proc_bind_default;
1784 if ((nthreads_icv > 0)
1786 || (proc_bind_icv != proc_bind_default)
1789 kmp_internal_control_t new_icvs;
1790 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
1791 new_icvs.next = NULL;
1792 if (nthreads_icv > 0) {
1793 new_icvs.nproc = nthreads_icv;
1797 if (proc_bind_icv != proc_bind_default) {
1798 new_icvs.proc_bind = proc_bind_icv;
1803 KF_TRACE( 10, (
"__kmp_fork_call: before __kmp_allocate_team\n" ) );
1804 team = __kmp_allocate_team(root, nthreads, nthreads,
1808 &new_icvs, argc USE_NESTED_HOT_ARG(master_th) );
1811 KF_TRACE( 10, (
"__kmp_fork_call: before __kmp_allocate_team\n" ) );
1812 team = __kmp_allocate_team(root, nthreads, nthreads,
1816 &master_th->th.th_current_task->td_icvs, argc
1817 USE_NESTED_HOT_ARG(master_th) );
1819 KF_TRACE( 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team ) );
1822 team->t.t_master_tid = master_tid;
1823 team->t.t_master_this_cons = master_this_cons;
1824 team->t.t_ident = loc;
1825 team->t.t_parent = parent_team;
1826 TCW_SYNC_PTR(team->t.t_pkfn, microtask);
1827 team->t.t_invoke = invoker;
1830 if ( !master_th->th.th_teams_microtask || level > teams_level ) {
1832 team->t.t_level = parent_team->t.t_level + 1;
1833 team->t.t_active_level = parent_team->t.t_active_level + 1;
1837 team->t.t_level = parent_team->t.t_level;
1838 team->t.t_active_level = parent_team->t.t_active_level;
1841 team->t.t_sched = get__sched_2(parent_team, master_tid);
1844 propagateFPControl(team);
1846 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1848 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
1849 KA_TRACE(20, (
"__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n",
1850 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
1851 parent_team, team->t.t_task_team[master_th->th.th_task_state], team ) );
1854 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
1855 if (master_th->th.th_task_state_top >= master_th->th.th_task_state_stack_sz) {
1856 kmp_uint8 *old_stack, *new_stack = (kmp_uint8 *) __kmp_allocate( 2*master_th->th.th_task_state_stack_sz );
1858 for (i=0; i<master_th->th.th_task_state_stack_sz; ++i) {
1859 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
1861 old_stack = master_th->th.th_task_state_memo_stack;
1862 master_th->th.th_task_state_memo_stack = new_stack;
1863 master_th->th.th_task_state_stack_sz *= 2;
1864 __kmp_free(old_stack);
1867 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
1868 master_th->th.th_task_state_top++;
1869 master_th->th.th_task_state = 0;
1871 master_th->th.th_task_team = team->t.t_task_team[master_th->th.th_task_state];
1873 #if !KMP_NESTED_HOT_TEAMS
1874 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team));
1878 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
1879 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, team->t.t_nproc ));
1880 KMP_DEBUG_ASSERT( team != root->r.r_hot_team ||
1881 ( team->t.t_master_tid == 0 &&
1882 ( team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized ) ));
1886 argv = (
void**)team->t.t_argv;
1890 for ( i=argc-1; i >= 0; --i )
1892 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1893 *argv++ = va_arg( *ap,
void * );
1895 *argv++ = va_arg( ap,
void * );
1899 for ( i=0; i < argc; ++i )
1901 argv[i] = team->t.t_parent->t.t_argv[i];
1906 team->t.t_master_active = master_active;
1907 if (!root->r.r_active)
1908 root->r.r_active = TRUE;
1910 __kmp_fork_team_threads( root, team, master_th, gtid );
1911 __kmp_setup_icv_copy( team, nthreads, &master_th->th.th_current_task->td_icvs, loc );
1914 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1918 if ( team->t.t_active_level == 1
1920 && !master_th->th.th_teams_microtask
1924 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) &&
1925 ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
1927 kmp_uint64 tmp_time = 0;
1928 if ( __itt_get_timestamp_ptr )
1929 tmp_time = __itt_get_timestamp();
1931 master_th->th.th_frame_time = tmp_time;
1932 if ( __kmp_forkjoin_frames_mode == 3 )
1933 team->t.t_region_time = tmp_time;
1936 if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) &&
1937 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode )
1939 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
1945 KMP_DEBUG_ASSERT( team == __kmp_threads[gtid]->th.th_team );
1947 KF_TRACE(10, (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
1948 root, team, master_th, gtid));
1951 if ( __itt_stack_caller_create_ptr ) {
1952 team->t.t_stack_id = __kmp_itt_stack_caller_create();
1960 __kmp_internal_fork( loc, gtid, team );
1961 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n",
1962 root, team, master_th, gtid));
1965 if (call_context == fork_context_gnu) {
1966 KA_TRACE( 20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid ));
1971 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
1972 gtid, team->t.t_id, team->t.t_pkfn ) );
1978 if (! team->t.t_invoke( gtid )) {
1979 KMP_ASSERT2( 0,
"cannot invoke microtask for MASTER thread" );
1982 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
1983 gtid, team->t.t_id, team->t.t_pkfn ) );
1986 KA_TRACE( 20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid ));
1992 __kmp_join_call(
ident_t *loc,
int gtid
2000 kmp_team_t *parent_team;
2001 kmp_info_t *master_th;
2006 KA_TRACE( 20, (
"__kmp_join_call: enter T#%d\n", gtid ));
2009 master_th = __kmp_threads[ gtid ];
2010 root = master_th->th.th_root;
2011 team = master_th->th.th_team;
2012 parent_team = team->t.t_parent;
2014 master_th->th.th_ident = loc;
2017 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2018 KA_TRACE( 20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n",
2019 __kmp_gtid_from_thread( master_th ), team,
2020 team->t.t_task_team[master_th->th.th_task_state], master_th->th.th_task_team) );
2021 KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state] );
2025 if( team->t.t_serialized ) {
2027 if ( master_th->th.th_teams_microtask ) {
2029 int level = team->t.t_level;
2030 int tlevel = master_th->th.th_teams_level;
2031 if ( level == tlevel ) {
2035 }
else if ( level == tlevel + 1 ) {
2038 team->t.t_serialized++;
2046 master_active = team->t.t_master_active;
2054 __kmp_internal_join( loc, gtid, team );
2057 master_th->th.th_task_state = 0;
2063 if ( __itt_stack_caller_create_ptr ) {
2064 __kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id );
2068 if ( team->t.t_active_level == 1
2070 && !master_th->th.th_teams_microtask
2073 master_th->th.th_ident = loc;
2075 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && __kmp_forkjoin_frames_mode == 3 )
2076 __kmp_itt_frame_submit( gtid, team->t.t_region_time, master_th->th.th_frame_time,
2077 0, loc, master_th->th.th_team_nproc, 1 );
2078 else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) &&
2079 ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames )
2080 __kmp_itt_region_joined( gtid );
2085 if ( master_th->th.th_teams_microtask &&
2087 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2088 team->t.t_level == master_th->th.th_teams_level + 1 ) {
2095 team->t.t_active_level --;
2096 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2099 if ( master_th->th.th_team_nproc < master_th->th.th_teams_size.nth ) {
2100 int old_num = master_th->th.th_team_nproc;
2101 int new_num = master_th->th.th_teams_size.nth;
2102 kmp_info_t **other_threads = team->t.t_threads;
2103 kmp_task_team_t * task_team = master_th->th.th_task_team;
2104 team->t.t_nproc = new_num;
2106 task_team->tt.tt_ref_ct = new_num - 1;
2107 task_team->tt.tt_unfinished_threads = new_num;
2109 for ( i = 0; i < old_num; ++i ) {
2110 other_threads[i]->th.th_team_nproc = new_num;
2113 for ( i = old_num; i < new_num; ++i ) {
2116 kmp_balign_t * balign = other_threads[i]->th.th_bar;
2117 for ( b = 0; b < bs_last_barrier; ++ b ) {
2118 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
2119 KMP_DEBUG_ASSERT(balign[ b ].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2121 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
2124 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2126 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2135 master_th->th.th_info .ds.ds_tid = team->t.t_master_tid;
2136 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2138 master_th->th.th_dispatch =
2139 & parent_team->t.t_dispatch[ team->t.t_master_tid ];
2145 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2148 if ( !master_th->th.th_teams_microtask || team->t.t_level > master_th->th.th_teams_level )
2152 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2154 KMP_DEBUG_ASSERT( root->r.r_in_parallel >= 0 );
2156 KF_TRACE( 10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n",
2157 0, master_th, team ) );
2158 __kmp_pop_current_task_from_thread( master_th );
2160 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
2164 master_th->th.th_first_place = team->t.t_first_place;
2165 master_th->th.th_last_place = team->t.t_last_place;
2168 updateHWFPControl (team);
2170 if ( root->r.r_active != master_active )
2171 root->r.r_active = master_active;
2173 __kmp_free_team( root, team USE_NESTED_HOT_ARG(master_th) );
2181 master_th->th.th_team = parent_team;
2182 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2183 master_th->th.th_team_master = parent_team->t.t_threads[0];
2184 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2187 if( parent_team->t.t_serialized &&
2188 parent_team != master_th->th.th_serial_team &&
2189 parent_team != root->r.r_root_team ) {
2190 __kmp_free_team( root, master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL) );
2191 master_th->th.th_serial_team = parent_team;
2194 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2196 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2197 if (master_th->th.th_task_state_top > 0) {
2198 --master_th->th.th_task_state_top;
2199 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
2202 master_th->th.th_task_team = parent_team->t.t_task_team[master_th->th.th_task_state];
2204 KA_TRACE( 20, (
"__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
2205 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
2211 master_th->th.th_current_task->td_flags.executing = 1;
2213 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2216 KA_TRACE( 20, (
"__kmp_join_call: exit T#%d\n", gtid ));
2225 __kmp_save_internal_controls ( kmp_info_t * thread )
2228 if ( thread->th.th_team != thread->th.th_serial_team ) {
2231 if (thread->th.th_team->t.t_serialized > 1) {
2234 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2237 if ( thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2238 thread->th.th_team->t.t_serialized ) {
2243 kmp_internal_control_t * control = (kmp_internal_control_t *) __kmp_allocate(
sizeof(kmp_internal_control_t));
2245 copy_icvs( control, & thread->th.th_current_task->td_icvs );
2247 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2249 control->next = thread->th.th_team->t.t_control_stack_top;
2250 thread->th.th_team->t.t_control_stack_top = control;
2257 __kmp_set_num_threads(
int new_nth,
int gtid )
2262 KF_TRACE( 10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth ));
2263 KMP_DEBUG_ASSERT( __kmp_init_serial );
2267 else if (new_nth > __kmp_max_nth)
2268 new_nth = __kmp_max_nth;
2270 thread = __kmp_threads[gtid];
2272 __kmp_save_internal_controls( thread );
2274 set__nproc( thread, new_nth );
2281 root = thread->th.th_root;
2282 if ( __kmp_init_parallel && ( ! root->r.r_active )
2283 && ( root->r.r_hot_team->t.t_nproc > new_nth )
2284 #
if KMP_NESTED_HOT_TEAMS
2285 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2288 kmp_team_t *hot_team = root->r.r_hot_team;
2291 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2294 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2296 for (tt_idx=0; tt_idx<2; ++tt_idx) {
2297 kmp_task_team_t *task_team = hot_team->t.t_task_team[tt_idx];
2298 if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) {
2301 KMP_DEBUG_ASSERT( hot_team->t.t_nproc > 1 );
2302 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2304 KA_TRACE( 20, (
"__kmp_set_num_threads: setting task_team %p to NULL\n",
2305 &hot_team->t.t_task_team[tt_idx] ) );
2306 hot_team->t.t_task_team[tt_idx] = NULL;
2309 KMP_DEBUG_ASSERT( task_team == NULL );
2317 for ( f = new_nth; f < hot_team->t.t_nproc; f++ ) {
2318 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
2319 __kmp_free_thread( hot_team->t.t_threads[f] );
2320 hot_team->t.t_threads[f] = NULL;
2322 hot_team->t.t_nproc = new_nth;
2323 #if KMP_NESTED_HOT_TEAMS
2324 if( thread->th.th_hot_teams ) {
2325 KMP_DEBUG_ASSERT( hot_team == thread->th.th_hot_teams[0].hot_team );
2326 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2331 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2336 for( f=0 ; f < new_nth; f++ ) {
2337 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
2338 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2341 hot_team->t.t_size_changed = -1;
2348 __kmp_set_max_active_levels(
int gtid,
int max_active_levels )
2352 KF_TRACE( 10, (
"__kmp_set_max_active_levels: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2353 KMP_DEBUG_ASSERT( __kmp_init_serial );
2356 if( max_active_levels < 0 ) {
2357 KMP_WARNING( ActiveLevelsNegative, max_active_levels );
2361 KF_TRACE( 10, (
"__kmp_set_max_active_levels: the call is ignored: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2364 if( max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT ) {
2368 KMP_WARNING( ActiveLevelsExceedLimit, max_active_levels, KMP_MAX_ACTIVE_LEVELS_LIMIT );
2369 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2374 KF_TRACE( 10, (
"__kmp_set_max_active_levels: after validation: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2376 thread = __kmp_threads[ gtid ];
2378 __kmp_save_internal_controls( thread );
2380 set__max_active_levels( thread, max_active_levels );
2386 __kmp_get_max_active_levels(
int gtid )
2390 KF_TRACE( 10, (
"__kmp_get_max_active_levels: thread %d\n", gtid ) );
2391 KMP_DEBUG_ASSERT( __kmp_init_serial );
2393 thread = __kmp_threads[ gtid ];
2394 KMP_DEBUG_ASSERT( thread->th.th_current_task );
2395 KF_TRACE( 10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, curtask_maxaclevel=%d\n",
2396 gtid, thread->th.th_current_task, thread->th.th_current_task->td_icvs.max_active_levels ) );
2397 return thread->th.th_current_task->td_icvs.max_active_levels;
2402 __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk )
2407 KF_TRACE( 10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", gtid, (
int)kind, chunk ));
2408 KMP_DEBUG_ASSERT( __kmp_init_serial );
2414 if ( kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2415 ( kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std ) )
2420 KMP_MSG( ScheduleKindOutOfRange, kind ),
2421 KMP_HNT( DefaultScheduleKindUsed,
"static, no chunk" ),
2424 kind = kmp_sched_default;
2428 thread = __kmp_threads[ gtid ];
2430 __kmp_save_internal_controls( thread );
2432 if ( kind < kmp_sched_upper_std ) {
2433 if ( kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK ) {
2436 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2438 thread->th.th_current_task->td_icvs.sched.r_sched_type = __kmp_sch_map[ kind - kmp_sched_lower - 1 ];
2442 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2443 __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
2445 if ( kind == kmp_sched_auto ) {
2447 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2449 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2455 __kmp_get_schedule(
int gtid, kmp_sched_t * kind,
int * chunk )
2461 KF_TRACE( 10, (
"__kmp_get_schedule: thread %d\n", gtid ));
2462 KMP_DEBUG_ASSERT( __kmp_init_serial );
2464 thread = __kmp_threads[ gtid ];
2467 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2469 switch ( th_type ) {
2471 case kmp_sch_static_greedy:
2472 case kmp_sch_static_balanced:
2473 *kind = kmp_sched_static;
2476 case kmp_sch_static_chunked:
2477 *kind = kmp_sched_static;
2479 case kmp_sch_dynamic_chunked:
2480 *kind = kmp_sched_dynamic;
2483 case kmp_sch_guided_iterative_chunked:
2484 case kmp_sch_guided_analytical_chunked:
2485 *kind = kmp_sched_guided;
2488 *kind = kmp_sched_auto;
2490 case kmp_sch_trapezoidal:
2491 *kind = kmp_sched_trapezoidal;
2499 KMP_FATAL( UnknownSchedulingType, th_type );
2503 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
2507 __kmp_get_ancestor_thread_num(
int gtid,
int level ) {
2513 KF_TRACE( 10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level ));
2514 KMP_DEBUG_ASSERT( __kmp_init_serial );
2517 if( level == 0 )
return 0;
2518 if( level < 0 )
return -1;
2519 thr = __kmp_threads[ gtid ];
2520 team = thr->th.th_team;
2521 ii = team->t.t_level;
2522 if( level > ii )
return -1;
2525 if( thr->th.th_teams_microtask ) {
2527 int tlevel = thr->th.th_teams_level;
2528 if( level <= tlevel ) {
2529 KMP_DEBUG_ASSERT( ii >= tlevel );
2531 if ( ii == tlevel ) {
2540 if( ii == level )
return __kmp_tid_from_gtid( gtid );
2542 dd = team->t.t_serialized;
2546 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
2549 if( ( team->t.t_serialized ) && ( !dd ) ) {
2550 team = team->t.t_parent;
2554 team = team->t.t_parent;
2555 dd = team->t.t_serialized;
2560 return ( dd > 1 ) ? ( 0 ) : ( team->t.t_master_tid );
2564 __kmp_get_team_size(
int gtid,
int level ) {
2570 KF_TRACE( 10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level ));
2571 KMP_DEBUG_ASSERT( __kmp_init_serial );
2574 if( level == 0 )
return 1;
2575 if( level < 0 )
return -1;
2576 thr = __kmp_threads[ gtid ];
2577 team = thr->th.th_team;
2578 ii = team->t.t_level;
2579 if( level > ii )
return -1;
2582 if( thr->th.th_teams_microtask ) {
2584 int tlevel = thr->th.th_teams_level;
2585 if( level <= tlevel ) {
2586 KMP_DEBUG_ASSERT( ii >= tlevel );
2588 if ( ii == tlevel ) {
2599 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
2602 if( team->t.t_serialized && ( !dd ) ) {
2603 team = team->t.t_parent;
2607 team = team->t.t_parent;
2612 return team->t.t_nproc;
2616 __kmp_get_schedule_global() {
2620 kmp_r_sched_t r_sched;
2626 r_sched.r_sched_type = __kmp_static;
2628 r_sched.r_sched_type = __kmp_guided;
2630 r_sched.r_sched_type = __kmp_sched;
2633 if ( __kmp_chunk < KMP_DEFAULT_CHUNK ) {
2634 r_sched.chunk = KMP_DEFAULT_CHUNK;
2636 r_sched.chunk = __kmp_chunk;
2651 __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc )
2654 KMP_DEBUG_ASSERT( team );
2655 if( !realloc || argc > team->t.t_max_argc ) {
2657 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, current entries=%d\n",
2658 team->t.t_id, argc, ( realloc ) ? team->t.t_max_argc : 0 ));
2660 if ( realloc && team->t.t_argv != &team->t.t_inline_argv[0] )
2661 __kmp_free( (
void *) team->t.t_argv );
2663 if ( argc <= KMP_INLINE_ARGV_ENTRIES ) {
2665 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
2666 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d argv entries\n",
2667 team->t.t_id, team->t.t_max_argc ));
2668 team->t.t_argv = &team->t.t_inline_argv[0];
2669 if ( __kmp_storage_map ) {
2670 __kmp_print_storage_map_gtid( -1, &team->t.t_inline_argv[0],
2671 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
2672 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
2673 "team_%d.t_inline_argv",
2678 team->t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ?
2679 KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc;
2680 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n",
2681 team->t.t_id, team->t.t_max_argc ));
2682 team->t.t_argv = (
void**) __kmp_page_allocate(
sizeof(
void*) * team->t.t_max_argc );
2683 if ( __kmp_storage_map ) {
2684 __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc],
2685 sizeof(
void *) * team->t.t_max_argc,
"team_%d.t_argv",
2693 __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth)
2696 int num_disp_buff = max_nth > 1 ? KMP_MAX_DISP_BUF : 2;
2697 #if KMP_USE_POOLED_ALLOC
2699 char *ptr = __kmp_allocate(max_nth *
2700 (
sizeof(kmp_info_t*) +
sizeof(dispatch_shared_info_t)*num_disp_buf
2701 +
sizeof(kmp_disp_t) +
sizeof(
int)*6
2703 +
sizeof(kmp_r_sched_t)
2704 +
sizeof(kmp_taskdata_t) ) );
2706 team->t.t_threads = (kmp_info_t**) ptr; ptr +=
sizeof(kmp_info_t*) * max_nth;
2707 team->t.t_disp_buffer = (dispatch_shared_info_t*) ptr;
2708 ptr +=
sizeof(dispatch_shared_info_t) * num_disp_buff;
2709 team->t.t_dispatch = (kmp_disp_t*) ptr; ptr +=
sizeof(kmp_disp_t) * max_nth;
2710 team->t.t_set_nproc = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
2711 team->t.t_set_dynamic = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
2712 team->t.t_set_nested = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
2713 team->t.t_set_blocktime = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
2714 team->t.t_set_bt_intervals = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
2715 team->t.t_set_bt_set = (
int*) ptr;
2716 ptr +=
sizeof(int) * max_nth;
2718 team->t.t_set_sched = (kmp_r_sched_t*) ptr;
2719 ptr +=
sizeof(kmp_r_sched_t) * max_nth;
2720 team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) ptr;
2721 ptr +=
sizeof(kmp_taskdata_t) * max_nth;
2724 team->t.t_threads = (kmp_info_t**) __kmp_allocate(
sizeof(kmp_info_t*) * max_nth );
2725 team->t.t_disp_buffer = (dispatch_shared_info_t*)
2726 __kmp_allocate(
sizeof(dispatch_shared_info_t) * num_disp_buff );
2727 team->t.t_dispatch = (kmp_disp_t*) __kmp_allocate(
sizeof(kmp_disp_t) * max_nth );
2730 team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) __kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth );
2732 team->t.t_max_nproc = max_nth;
2735 for(i = 0 ; i < num_disp_buff; ++i)
2736 team->t.t_disp_buffer[i].buffer_index = i;
2740 __kmp_free_team_arrays(kmp_team_t *team) {
2743 for ( i = 0; i < team->t.t_max_nproc; ++ i ) {
2744 if ( team->t.t_dispatch[ i ].th_disp_buffer != NULL ) {
2745 __kmp_free( team->t.t_dispatch[ i ].th_disp_buffer );
2746 team->t.t_dispatch[ i ].th_disp_buffer = NULL;
2749 __kmp_free(team->t.t_threads);
2750 #if !KMP_USE_POOLED_ALLOC
2751 __kmp_free(team->t.t_disp_buffer);
2752 __kmp_free(team->t.t_dispatch);
2755 __kmp_free(team->t.t_implicit_task_taskdata);
2757 team->t.t_threads = NULL;
2758 team->t.t_disp_buffer = NULL;
2759 team->t.t_dispatch = NULL;
2762 team->t.t_implicit_task_taskdata = 0;
2766 __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
2767 kmp_info_t **oldThreads = team->t.t_threads;
2769 #if !KMP_USE_POOLED_ALLOC
2770 __kmp_free(team->t.t_disp_buffer);
2771 __kmp_free(team->t.t_dispatch);
2774 __kmp_free(team->t.t_implicit_task_taskdata);
2776 __kmp_allocate_team_arrays(team, max_nth);
2778 KMP_MEMCPY(team->t.t_threads, oldThreads, team->t.t_nproc * sizeof (kmp_info_t*));
2780 __kmp_free(oldThreads);
2783 static kmp_internal_control_t
2784 __kmp_get_global_icvs(
void ) {
2786 kmp_r_sched_t r_sched = __kmp_get_schedule_global();
2789 KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.used > 0 );
2792 kmp_internal_control_t g_icvs = {
2794 (kmp_int8)__kmp_dflt_nested,
2795 (kmp_int8)__kmp_global.g.g_dynamic,
2796 (kmp_int8)__kmp_env_blocktime,
2797 __kmp_dflt_blocktime,
2799 __kmp_dflt_team_nth,
2801 __kmp_dflt_max_active_levels,
2804 __kmp_nested_proc_bind.bind_types[0],
2812 static kmp_internal_control_t
2813 __kmp_get_x_global_icvs(
const kmp_team_t *team ) {
2815 kmp_internal_control_t gx_icvs;
2816 gx_icvs.serial_nesting_level = 0;
2817 copy_icvs( & gx_icvs, & team->t.t_threads[0]->th.th_current_task->td_icvs );
2818 gx_icvs.next = NULL;
2824 __kmp_initialize_root( kmp_root_t *root )
2827 kmp_team_t *root_team;
2828 kmp_team_t *hot_team;
2829 size_t disp_size, dispatch_size, bar_size;
2830 int hot_team_max_nth;
2831 kmp_r_sched_t r_sched = __kmp_get_schedule_global();
2832 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
2833 KMP_DEBUG_ASSERT( root );
2834 KMP_ASSERT( ! root->r.r_begin );
2837 __kmp_init_lock( &root->r.r_begin_lock );
2838 root->r.r_begin = FALSE;
2839 root->r.r_active = FALSE;
2840 root->r.r_in_parallel = 0;
2841 root->r.r_blocktime = __kmp_dflt_blocktime;
2842 root->r.r_nested = __kmp_dflt_nested;
2846 KF_TRACE( 10, (
"__kmp_initialize_root: before root_team\n" ) );
2848 __kmp_allocate_team(
2853 __kmp_nested_proc_bind.bind_types[0],
2857 USE_NESTED_HOT_ARG(NULL)
2861 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)( ~ 0 ));
2864 KF_TRACE( 10, (
"__kmp_initialize_root: after root_team = %p\n", root_team ) );
2866 root->r.r_root_team = root_team;
2867 root_team->t.t_control_stack_top = NULL;
2870 root_team->t.t_threads[0] = NULL;
2871 root_team->t.t_nproc = 1;
2872 root_team->t.t_serialized = 1;
2874 root_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
2875 root_team->t.t_sched.chunk = r_sched.chunk;
2876 KA_TRACE( 20, (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
2877 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
2881 KF_TRACE( 10, (
"__kmp_initialize_root: before hot_team\n" ) );
2883 __kmp_allocate_team(
2886 __kmp_dflt_team_nth_ub * 2,
2888 __kmp_nested_proc_bind.bind_types[0],
2892 USE_NESTED_HOT_ARG(NULL)
2894 KF_TRACE( 10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team ) );
2896 root->r.r_hot_team = hot_team;
2897 root_team->t.t_control_stack_top = NULL;
2900 hot_team->t.t_parent = root_team;
2903 hot_team_max_nth = hot_team->t.t_max_nproc;
2904 for ( f = 0; f < hot_team_max_nth; ++ f ) {
2905 hot_team->t.t_threads[ f ] = NULL;
2907 hot_team->t.t_nproc = 1;
2909 hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
2910 hot_team->t.t_sched.chunk = r_sched.chunk;
2911 hot_team->t.t_size_changed = 0;
2918 typedef struct kmp_team_list_item {
2919 kmp_team_p
const * entry;
2920 struct kmp_team_list_item * next;
2921 } kmp_team_list_item_t;
2922 typedef kmp_team_list_item_t * kmp_team_list_t;
2926 __kmp_print_structure_team_accum(
2927 kmp_team_list_t list,
2928 kmp_team_p
const * team
2938 KMP_DEBUG_ASSERT( list != NULL );
2939 if ( team == NULL ) {
2943 __kmp_print_structure_team_accum( list, team->t.t_parent );
2944 __kmp_print_structure_team_accum( list, team->t.t_next_pool );
2948 while ( l->next != NULL && l->entry != team ) {
2951 if ( l->next != NULL ) {
2957 while ( l->next != NULL && l->entry->t.t_id <= team->t.t_id ) {
2963 kmp_team_list_item_t * item =
2964 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof( kmp_team_list_item_t ) );
2973 __kmp_print_structure_team(
2975 kmp_team_p
const * team
2978 __kmp_printf(
"%s", title );
2979 if ( team != NULL ) {
2980 __kmp_printf(
"%2x %p\n", team->t.t_id, team );
2982 __kmp_printf(
" - (nil)\n" );
2987 __kmp_print_structure_thread(
2989 kmp_info_p
const * thread
2992 __kmp_printf(
"%s", title );
2993 if ( thread != NULL ) {
2994 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread );
2996 __kmp_printf(
" - (nil)\n" );
3001 __kmp_print_structure(
3005 kmp_team_list_t list;
3008 list = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof( kmp_team_list_item_t ) );
3012 __kmp_printf(
"\n------------------------------\nGlobal Thread Table\n------------------------------\n" );
3015 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3016 __kmp_printf(
"%2d", gtid );
3017 if ( __kmp_threads != NULL ) {
3018 __kmp_printf(
" %p", __kmp_threads[ gtid ] );
3020 if ( __kmp_root != NULL ) {
3021 __kmp_printf(
" %p", __kmp_root[ gtid ] );
3023 __kmp_printf(
"\n" );
3028 __kmp_printf(
"\n------------------------------\nThreads\n------------------------------\n" );
3029 if ( __kmp_threads != NULL ) {
3031 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3032 kmp_info_t
const * thread = __kmp_threads[ gtid ];
3033 if ( thread != NULL ) {
3034 __kmp_printf(
"GTID %2d %p:\n", gtid, thread );
3035 __kmp_printf(
" Our Root: %p\n", thread->th.th_root );
3036 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team );
3037 __kmp_print_structure_team(
" Serial Team: ", thread->th.th_serial_team );
3038 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc );
3039 __kmp_print_structure_thread(
" Master: ", thread->th.th_team_master );
3040 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized );
3041 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc );
3043 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind );
3045 __kmp_print_structure_thread(
" Next in pool: ", thread->th.th_next_pool );
3046 __kmp_printf(
"\n" );
3047 __kmp_print_structure_team_accum( list, thread->th.th_team );
3048 __kmp_print_structure_team_accum( list, thread->th.th_serial_team );
3052 __kmp_printf(
"Threads array is not allocated.\n" );
3056 __kmp_printf(
"\n------------------------------\nUbers\n------------------------------\n" );
3057 if ( __kmp_root != NULL ) {
3059 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3060 kmp_root_t
const * root = __kmp_root[ gtid ];
3061 if ( root != NULL ) {
3062 __kmp_printf(
"GTID %2d %p:\n", gtid, root );
3063 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team );
3064 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team );
3065 __kmp_print_structure_thread(
" Uber Thread: ", root->r.r_uber_thread );
3066 __kmp_printf(
" Active?: %2d\n", root->r.r_active );
3067 __kmp_printf(
" Nested?: %2d\n", root->r.r_nested );
3068 __kmp_printf(
" In Parallel: %2d\n", root->r.r_in_parallel );
3069 __kmp_printf(
"\n" );
3070 __kmp_print_structure_team_accum( list, root->r.r_root_team );
3071 __kmp_print_structure_team_accum( list, root->r.r_hot_team );
3075 __kmp_printf(
"Ubers array is not allocated.\n" );
3078 __kmp_printf(
"\n------------------------------\nTeams\n------------------------------\n" );
3079 while ( list->next != NULL ) {
3080 kmp_team_p
const * team = list->entry;
3082 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team );
3083 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent );
3084 __kmp_printf(
" Master TID: %2d\n", team->t.t_master_tid );
3085 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc );
3086 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized );
3087 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc );
3088 for ( i = 0; i < team->t.t_nproc; ++ i ) {
3089 __kmp_printf(
" Thread %2d: ", i );
3090 __kmp_print_structure_thread(
"", team->t.t_threads[ i ] );
3092 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool );
3093 __kmp_printf(
"\n" );
3098 __kmp_printf(
"\n------------------------------\nPools\n------------------------------\n" );
3099 __kmp_print_structure_thread(
"Thread pool: ", (kmp_info_t *)__kmp_thread_pool );
3100 __kmp_print_structure_team(
"Team pool: ", (kmp_team_t *)__kmp_team_pool );
3101 __kmp_printf(
"\n" );
3104 while ( list != NULL ) {
3105 kmp_team_list_item_t * item = list;
3107 KMP_INTERNAL_FREE( item );
3119 static const unsigned __kmp_primes[] = {
3120 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5,
3121 0xba5703f5, 0xb495a877, 0xe1626741, 0x79695e6b,
3122 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3123 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b,
3124 0xbe4d6fe9, 0x5f15e201, 0x99afc3fd, 0xf3f16801,
3125 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3126 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed,
3127 0x085a3d61, 0x46eb5ea7, 0x3d9910ed, 0x2e687b5b,
3128 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3129 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7,
3130 0x54581edb, 0xf2480f45, 0x0bb9288f, 0xef1affc7,
3131 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3132 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b,
3133 0xfc411073, 0xc3749363, 0xb892d829, 0x3549366b,
3134 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3135 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f
3142 __kmp_get_random( kmp_info_t * thread )
3144 unsigned x = thread->th.th_x;
3145 unsigned short r = x>>16;
3147 thread->th.th_x = x*thread->th.th_a+1;
3149 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3150 thread->th.th_info.ds.ds_tid, r) );
3158 __kmp_init_random( kmp_info_t * thread )
3160 unsigned seed = thread->th.th_info.ds.ds_tid;
3162 thread->th.th_a = __kmp_primes[seed%(
sizeof(__kmp_primes)/
sizeof(__kmp_primes[0]))];
3163 thread->th.th_x = (seed+1)*thread->th.th_a+1;
3164 KA_TRACE(30, (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a) );
3171 __kmp_reclaim_dead_roots(
void) {
3174 for(i = 0; i < __kmp_threads_capacity; ++i) {
3175 if( KMP_UBER_GTID( i ) &&
3176 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3177 !__kmp_root[i]->r.r_active ) {
3178 r += __kmp_unregister_root_other_thread(i);
3207 __kmp_expand_threads(
int nWish,
int nNeed) {
3210 int __kmp_actual_max_nth;
3214 #if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB
3217 added = __kmp_reclaim_dead_roots();
3235 int minimumRequiredCapacity;
3237 kmp_info_t **newThreads;
3238 kmp_root_t **newRoot;
3260 old_tp_cached = __kmp_tp_cached;
3261 __kmp_actual_max_nth = old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth;
3262 KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity);
3266 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3270 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3276 nTarget = __kmp_actual_max_nth - __kmp_threads_capacity;
3283 minimumRequiredCapacity = __kmp_threads_capacity + nTarget;
3285 newCapacity = __kmp_threads_capacity;
3288 newCapacity <= (__kmp_actual_max_nth >> 1) ?
3289 (newCapacity << 1) :
3290 __kmp_actual_max_nth;
3291 }
while(newCapacity < minimumRequiredCapacity);
3292 newThreads = (kmp_info_t**) __kmp_allocate((
sizeof(kmp_info_t*) +
sizeof(kmp_root_t*)) * newCapacity + CACHE_LINE);
3293 newRoot = (kmp_root_t**) ((
char*)newThreads +
sizeof(kmp_info_t*) * newCapacity );
3294 KMP_MEMCPY(newThreads, __kmp_threads, __kmp_threads_capacity *
sizeof(kmp_info_t*));
3295 KMP_MEMCPY(newRoot, __kmp_root, __kmp_threads_capacity *
sizeof(kmp_root_t*));
3296 memset(newThreads + __kmp_threads_capacity, 0,
3297 (newCapacity - __kmp_threads_capacity) *
sizeof(kmp_info_t*));
3298 memset(newRoot + __kmp_threads_capacity, 0,
3299 (newCapacity - __kmp_threads_capacity) *
sizeof(kmp_root_t*));
3301 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3307 __kmp_free(newThreads);
3310 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3311 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3313 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3314 __kmp_free(newThreads);
3320 *(kmp_info_t**
volatile*)&__kmp_threads = newThreads;
3321 *(kmp_root_t**
volatile*)&__kmp_root = newRoot;
3322 added += newCapacity - __kmp_threads_capacity;
3323 *(
volatile int*)&__kmp_threads_capacity = newCapacity;
3324 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3335 __kmp_register_root(
int initial_thread )
3337 kmp_info_t *root_thread;
3341 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3342 KA_TRACE( 20, (
"__kmp_register_root: entered\n"));
3360 capacity = __kmp_threads_capacity;
3361 if ( ! initial_thread && TCR_PTR(__kmp_threads[0]) == NULL ) {
3366 if ( __kmp_all_nth >= capacity && !__kmp_expand_threads( 1, 1 ) ) {
3367 if ( __kmp_tp_cached ) {
3370 KMP_MSG( CantRegisterNewThread ),
3371 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
3372 KMP_HNT( PossibleSystemLimitOnThreads ),
3379 KMP_MSG( CantRegisterNewThread ),
3380 KMP_HNT( SystemLimitOnThreads ),
3389 for( gtid=(initial_thread ? 0 : 1) ; TCR_PTR(__kmp_threads[gtid]) != NULL ; gtid++ )
3391 KA_TRACE( 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid ));
3392 KMP_ASSERT( gtid < __kmp_threads_capacity );
3396 TCW_4(__kmp_nth, __kmp_nth + 1);
3403 if ( __kmp_adjust_gtid_mode ) {
3404 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
3405 if ( TCR_4(__kmp_gtid_mode) != 2) {
3406 TCW_4(__kmp_gtid_mode, 2);
3410 if (TCR_4(__kmp_gtid_mode) != 1 ) {
3411 TCW_4(__kmp_gtid_mode, 1);
3416 #ifdef KMP_ADJUST_BLOCKTIME
3419 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
3420 if ( __kmp_nth > __kmp_avail_proc ) {
3421 __kmp_zero_bt = TRUE;
3427 if( ! ( root = __kmp_root[gtid] )) {
3428 root = __kmp_root[gtid] = (kmp_root_t*) __kmp_allocate(
sizeof(kmp_root_t) );
3429 KMP_DEBUG_ASSERT( ! root->r.r_root_team );
3432 __kmp_initialize_root( root );
3435 if( root->r.r_uber_thread ) {
3436 root_thread = root->r.r_uber_thread;
3438 root_thread = (kmp_info_t*) __kmp_allocate(
sizeof(kmp_info_t) );
3439 if ( __kmp_storage_map ) {
3440 __kmp_print_thread_storage_map( root_thread, gtid );
3442 root_thread->th.th_info .ds.ds_gtid = gtid;
3443 root_thread->th.th_root = root;
3444 if( __kmp_env_consistency_check ) {
3445 root_thread->th.th_cons = __kmp_allocate_cons_stack( gtid );
3448 __kmp_initialize_fast_memory( root_thread );
3452 KMP_DEBUG_ASSERT( root_thread->th.th_local.bget_data == NULL );
3453 __kmp_initialize_bget( root_thread );
3455 __kmp_init_random( root_thread );
3459 if( ! root_thread->th.th_serial_team ) {
3460 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3461 KF_TRACE( 10, (
"__kmp_register_root: before serial_team\n" ) );
3462 root_thread->th.th_serial_team = __kmp_allocate_team( root, 1, 1,
3467 0 USE_NESTED_HOT_ARG(NULL) );
3469 KMP_ASSERT( root_thread->th.th_serial_team );
3470 KF_TRACE( 10, (
"__kmp_register_root: after serial_team = %p\n",
3471 root_thread->th.th_serial_team ) );
3474 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3476 root->r.r_root_team->t.t_threads[0] = root_thread;
3477 root->r.r_hot_team ->t.t_threads[0] = root_thread;
3478 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3479 root_thread->th.th_serial_team->t.t_serialized = 0;
3480 root->r.r_uber_thread = root_thread;
3483 __kmp_initialize_info( root_thread, root->r.r_root_team, 0, gtid );
3486 __kmp_gtid_set_specific( gtid );
3488 __kmp_itt_thread_name( gtid );
3490 #ifdef KMP_TDATA_GTID
3493 __kmp_create_worker( gtid, root_thread, __kmp_stksize );
3494 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == gtid );
3495 TCW_4(__kmp_init_gtid, TRUE);
3497 KA_TRACE( 20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, plain=%u\n",
3498 gtid, __kmp_gtid_from_tid( 0, root->r.r_hot_team ),
3499 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3500 KMP_INIT_BARRIER_STATE ) );
3503 for ( b = 0; b < bs_last_barrier; ++ b ) {
3504 root_thread->th.th_bar[ b ].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3506 root_thread->th.th_bar[ b ].bb.b_worker_arrived = 0;
3510 KMP_DEBUG_ASSERT( root->r.r_hot_team->t.t_bar[ bs_forkjoin_barrier ].b_arrived == KMP_INIT_BARRIER_STATE );
3513 #if KMP_AFFINITY_SUPPORTED
3514 if ( TCR_4(__kmp_init_middle) ) {
3515 __kmp_affinity_set_init_mask( gtid, TRUE );
3519 __kmp_root_counter ++;
3522 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3527 #if KMP_NESTED_HOT_TEAMS
3529 __kmp_free_hot_teams( kmp_root_t *root, kmp_info_t *thr,
int level,
const int max_level )
3532 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3533 if( !hot_teams || !hot_teams[level].hot_team ) {
3536 KMP_DEBUG_ASSERT( level < max_level );
3537 kmp_team_t *team = hot_teams[level].hot_team;
3538 nth = hot_teams[level].hot_team_nth;
3540 if( level < max_level - 1 ) {
3541 for( i = 0; i < nth; ++i ) {
3542 kmp_info_t *th = team->t.t_threads[i];
3543 n += __kmp_free_hot_teams( root, th, level + 1, max_level );
3544 if( i > 0 && th->th.th_hot_teams ) {
3545 __kmp_free( th->th.th_hot_teams );
3546 th->th.th_hot_teams = NULL;
3550 __kmp_free_team( root, team, NULL );
3559 __kmp_reset_root(
int gtid, kmp_root_t *root)
3561 kmp_team_t * root_team = root->r.r_root_team;
3562 kmp_team_t * hot_team = root->r.r_hot_team;
3563 int n = hot_team->t.t_nproc;
3566 KMP_DEBUG_ASSERT( ! root->r.r_active );
3568 root->r.r_root_team = NULL;
3569 root->r.r_hot_team = NULL;
3572 __kmp_free_team( root, root_team USE_NESTED_HOT_ARG(NULL) );
3573 #if KMP_NESTED_HOT_TEAMS
3574 if( __kmp_hot_teams_max_level > 1 ) {
3575 for( i = 0; i < hot_team->t.t_nproc; ++i ) {
3576 kmp_info_t *th = hot_team->t.t_threads[i];
3577 n += __kmp_free_hot_teams( root, th, 1, __kmp_hot_teams_max_level );
3578 if( th->th.th_hot_teams ) {
3579 __kmp_free( th->th.th_hot_teams );
3580 th->th.th_hot_teams = NULL;
3585 __kmp_free_team( root, hot_team USE_NESTED_HOT_ARG(NULL) );
3591 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
3592 __kmp_wait_to_unref_task_teams();
3597 KA_TRACE( 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
"\n",
3598 (LPVOID)&(root->r.r_uber_thread->th),
3599 root->r.r_uber_thread->th.th_info.ds.ds_thread ) );
3600 __kmp_free_handle( root->r.r_uber_thread->th.th_info.ds.ds_thread );
3603 TCW_4(__kmp_nth, __kmp_nth - 1);
3604 __kmp_reap_thread( root->r.r_uber_thread, 1 );
3607 root->r.r_uber_thread = NULL;
3609 root->r.r_begin = FALSE;
3615 __kmp_unregister_root_current_thread(
int gtid )
3617 KA_TRACE( 1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid ));
3622 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3623 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
3624 KC_TRACE( 10, (
"__kmp_unregister_root_current_thread: already finished, exiting T#%d\n", gtid ));
3625 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3628 kmp_root_t *root = __kmp_root[gtid];
3630 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
3631 KMP_ASSERT( KMP_UBER_GTID( gtid ));
3632 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
3633 KMP_ASSERT( root->r.r_active == FALSE );
3639 kmp_info_t * thread = __kmp_threads[gtid];
3640 kmp_team_t * team = thread->th.th_team;
3641 kmp_task_team_t * task_team = thread->th.th_task_team;
3644 if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks )
3645 __kmp_task_team_wait(thread, team, NULL );
3648 __kmp_reset_root(gtid, root);
3651 __kmp_gtid_set_specific( KMP_GTID_DNE );
3652 #ifdef KMP_TDATA_GTID
3653 __kmp_gtid = KMP_GTID_DNE;
3657 KC_TRACE( 10, (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid ));
3659 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3667 __kmp_unregister_root_other_thread(
int gtid )
3669 kmp_root_t *root = __kmp_root[gtid];
3672 KA_TRACE( 1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid ));
3673 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
3674 KMP_ASSERT( KMP_UBER_GTID( gtid ));
3675 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
3676 KMP_ASSERT( root->r.r_active == FALSE );
3678 r = __kmp_reset_root(gtid, root);
3679 KC_TRACE( 10, (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid ));
3684 void __kmp_task_info() {
3686 kmp_int32 gtid = __kmp_entry_gtid();
3687 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
3688 kmp_info_t *this_thr = __kmp_threads[ gtid ];
3689 kmp_team_t *steam = this_thr->th.th_serial_team;
3690 kmp_team_t *team = this_thr->th.th_team;
3692 __kmp_printf(
"__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p ptask=%p\n",
3693 gtid, tid, this_thr, team, this_thr->th.th_current_task, team->t.t_implicit_task_taskdata[tid].td_parent );
3701 __kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team,
int tid,
int gtid )
3705 kmp_info_t *master = team->t.t_threads[0];
3706 KMP_DEBUG_ASSERT( this_thr != NULL );
3707 KMP_DEBUG_ASSERT( this_thr->th.th_serial_team );
3708 KMP_DEBUG_ASSERT( team );
3709 KMP_DEBUG_ASSERT( team->t.t_threads );
3710 KMP_DEBUG_ASSERT( team->t.t_dispatch );
3711 KMP_DEBUG_ASSERT( master );
3712 KMP_DEBUG_ASSERT( master->th.th_root );
3716 TCW_SYNC_PTR(this_thr->th.th_team, team);
3718 this_thr->th.th_info.ds.ds_tid = tid;
3719 this_thr->th.th_set_nproc = 0;
3721 this_thr->th.th_set_proc_bind = proc_bind_default;
3722 # if KMP_AFFINITY_SUPPORTED
3723 this_thr->th.th_new_place = this_thr->th.th_current_place;
3726 this_thr->th.th_root = master->th.th_root;
3729 this_thr->th.th_team_nproc = team->t.t_nproc;
3730 this_thr->th.th_team_master = master;
3731 this_thr->th.th_team_serialized = team->t.t_serialized;
3732 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
3734 KMP_DEBUG_ASSERT( team->t.t_implicit_task_taskdata );
3735 this_thr->th.th_task_state = 0;
3737 KF_TRACE( 10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
3738 tid, gtid, this_thr, this_thr->th.th_current_task ) );
3740 __kmp_init_implicit_task( this_thr->th.th_team_master->th.th_ident, this_thr, team, tid, TRUE );
3742 KF_TRACE( 10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
3743 tid, gtid, this_thr, this_thr->th.th_current_task ) );
3747 this_thr->th.th_dispatch = &team->t.t_dispatch[ tid ];
3749 this_thr->th.th_local.this_construct = 0;
3752 this_thr->th.th_local.tv_data = 0;
3755 if ( ! this_thr->th.th_pri_common ) {
3756 this_thr->th.th_pri_common = (
struct common_table *) __kmp_allocate(
sizeof(
struct common_table) );
3757 if ( __kmp_storage_map ) {
3758 __kmp_print_storage_map_gtid(
3759 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
3760 sizeof(
struct common_table ),
"th_%d.th_pri_common\n", gtid
3763 this_thr->th.th_pri_head = NULL;
3768 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
3772 size_t disp_size =
sizeof( dispatch_private_info_t ) *
3773 ( team->t.t_max_nproc == 1 ? 1 : KMP_MAX_DISP_BUF );
3774 KD_TRACE( 10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, team->t.t_max_nproc ) );
3775 KMP_ASSERT( dispatch );
3776 KMP_DEBUG_ASSERT( team->t.t_dispatch );
3777 KMP_DEBUG_ASSERT( dispatch == &team->t.t_dispatch[ tid ] );
3779 dispatch->th_disp_index = 0;
3781 if( ! dispatch->th_disp_buffer ) {
3782 dispatch->th_disp_buffer = (dispatch_private_info_t *) __kmp_allocate( disp_size );
3784 if ( __kmp_storage_map ) {
3785 __kmp_print_storage_map_gtid( gtid, &dispatch->th_disp_buffer[ 0 ],
3786 &dispatch->th_disp_buffer[ team->t.t_max_nproc == 1 ? 1 : KMP_MAX_DISP_BUF ],
3787 disp_size,
"th_%d.th_dispatch.th_disp_buffer "
3788 "(team_%d.t_dispatch[%d].th_disp_buffer)",
3789 gtid, team->t.t_id, gtid );
3792 memset( & dispatch->th_disp_buffer[0],
'\0', disp_size );
3795 dispatch->th_dispatch_pr_current = 0;
3796 dispatch->th_dispatch_sh_current = 0;
3798 dispatch->th_deo_fcn = 0;
3799 dispatch->th_dxo_fcn = 0;
3802 this_thr->th.th_next_pool = NULL;
3804 if (!this_thr->th.th_task_state_memo_stack) {
3805 this_thr->th.th_task_state_memo_stack = (kmp_uint8 *) __kmp_allocate( 4*
sizeof(kmp_uint8) );
3806 this_thr->th.th_task_state_top = 0;
3807 this_thr->th.th_task_state_stack_sz = 4;
3810 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
3811 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
3824 __kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team,
int new_tid )
3826 kmp_team_t *serial_team;
3827 kmp_info_t *new_thr;
3830 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid() ));
3831 KMP_DEBUG_ASSERT( root && team );
3832 #if !KMP_NESTED_HOT_TEAMS
3833 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( __kmp_get_gtid() ));
3838 if ( __kmp_thread_pool ) {
3840 new_thr = (kmp_info_t*)__kmp_thread_pool;
3841 __kmp_thread_pool = (
volatile kmp_info_t *) new_thr->th.th_next_pool;
3842 if ( new_thr == __kmp_thread_pool_insert_pt ) {
3843 __kmp_thread_pool_insert_pt = NULL;
3845 TCW_4(new_thr->th.th_in_pool, FALSE);
3851 __kmp_thread_pool_nth--;
3853 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
3854 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid ));
3855 KMP_ASSERT( ! new_thr->th.th_team );
3856 KMP_DEBUG_ASSERT( __kmp_nth < __kmp_threads_capacity );
3857 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth >= 0 );
3860 __kmp_initialize_info( new_thr, team, new_tid, new_thr->th.th_info.ds.ds_gtid );
3861 KMP_DEBUG_ASSERT( new_thr->th.th_serial_team );
3863 TCW_4(__kmp_nth, __kmp_nth + 1);
3865 new_thr->th.th_task_state_top = 0;
3866 new_thr->th.th_task_state_stack_sz = 4;
3868 #ifdef KMP_ADJUST_BLOCKTIME
3871 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
3872 if ( __kmp_nth > __kmp_avail_proc ) {
3873 __kmp_zero_bt = TRUE;
3881 kmp_balign_t * balign = new_thr->th.th_bar;
3882 for( b = 0; b < bs_last_barrier; ++ b )
3883 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
3886 KF_TRACE( 10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
3887 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid ));
3895 KMP_ASSERT( __kmp_nth == __kmp_all_nth );
3896 KMP_ASSERT( __kmp_all_nth < __kmp_threads_capacity );
3902 if ( ! TCR_4( __kmp_init_monitor ) ) {
3903 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
3904 if ( ! TCR_4( __kmp_init_monitor ) ) {
3905 KF_TRACE( 10, (
"before __kmp_create_monitor\n" ) );
3906 TCW_4( __kmp_init_monitor, 1 );
3907 __kmp_create_monitor( & __kmp_monitor );
3908 KF_TRACE( 10, (
"after __kmp_create_monitor\n" ) );
3917 while ( TCR_4(__kmp_init_monitor) < 2 ) {
3920 KF_TRACE( 10, (
"after monitor thread has started\n" ) );
3923 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
3927 for( new_gtid=1 ; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid ) {
3928 KMP_DEBUG_ASSERT( new_gtid < __kmp_threads_capacity );
3932 new_thr = (kmp_info_t*) __kmp_allocate(
sizeof(kmp_info_t) );
3934 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
3936 if ( __kmp_storage_map ) {
3937 __kmp_print_thread_storage_map( new_thr, new_gtid );
3942 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs( team );
3943 KF_TRACE( 10, (
"__kmp_allocate_thread: before th_serial/serial_team\n" ) );
3944 new_thr->th.th_serial_team = serial_team =
3945 (kmp_team_t*) __kmp_allocate_team( root, 1, 1,
3950 0 USE_NESTED_HOT_ARG(NULL) );
3952 KMP_ASSERT ( serial_team );
3953 serial_team->t.t_serialized = 0;
3954 serial_team->t.t_threads[0] = new_thr;
3955 KF_TRACE( 10, (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
3959 __kmp_initialize_info( new_thr, team, new_tid, new_gtid );
3962 __kmp_initialize_fast_memory( new_thr );
3966 KMP_DEBUG_ASSERT( new_thr->th.th_local.bget_data == NULL );
3967 __kmp_initialize_bget( new_thr );
3970 __kmp_init_random( new_thr );
3973 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
3974 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
3977 kmp_balign_t * balign = new_thr->th.th_bar;
3978 for(b=0; b<bs_last_barrier; ++b) {
3979 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
3980 balign[b].bb.team = NULL;
3981 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
3982 balign[b].bb.use_oncore_barrier = 0;
3985 new_thr->th.th_spin_here = FALSE;
3986 new_thr->th.th_next_waiting = 0;
3988 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
3989 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
3990 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
3991 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
3992 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
3995 TCW_4(new_thr->th.th_in_pool, FALSE);
3996 new_thr->th.th_active_in_pool = FALSE;
3997 TCW_4(new_thr->th.th_active, TRUE);
4008 if ( __kmp_adjust_gtid_mode ) {
4009 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
4010 if ( TCR_4(__kmp_gtid_mode) != 2) {
4011 TCW_4(__kmp_gtid_mode, 2);
4015 if (TCR_4(__kmp_gtid_mode) != 1 ) {
4016 TCW_4(__kmp_gtid_mode, 1);
4021 #ifdef KMP_ADJUST_BLOCKTIME
4024 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4025 if ( __kmp_nth > __kmp_avail_proc ) {
4026 __kmp_zero_bt = TRUE;
4032 KF_TRACE( 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr ));
4033 __kmp_create_worker( new_gtid, new_thr, __kmp_stksize );
4034 KF_TRACE( 10, (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr ));
4037 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), new_gtid ));
4052 __kmp_reinitialize_team( kmp_team_t *team, kmp_internal_control_t *new_icvs,
ident_t *loc ) {
4053 KF_TRACE( 10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4054 team->t.t_threads[0], team ) );
4055 KMP_DEBUG_ASSERT( team && new_icvs);
4056 KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
4057 team->t.t_ident = loc;
4059 team->t.t_id = KMP_GEN_TEAM_ID();
4062 __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
4063 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4065 KF_TRACE( 10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4066 team->t.t_threads[0], team ) );
4074 __kmp_initialize_team(
4077 kmp_internal_control_t * new_icvs,
4080 KF_TRACE( 10, (
"__kmp_initialize_team: enter: team=%p\n", team ) );
4083 KMP_DEBUG_ASSERT( team );
4084 KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc );
4085 KMP_DEBUG_ASSERT( team->t.t_threads );
4088 team->t.t_master_tid = 0;
4090 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4091 team->t.t_nproc = new_nproc;
4094 team->t.t_next_pool = NULL;
4097 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4098 team->t.t_invoke = NULL;
4101 team->t.t_sched = new_icvs->sched;
4103 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
4104 team->t.t_fp_control_saved = FALSE;
4105 team->t.t_x87_fpu_control_word = 0;
4106 team->t.t_mxcsr = 0;
4109 team->t.t_construct = 0;
4110 __kmp_init_lock( & team->t.t_single_lock );
4112 team->t.t_ordered .dt.t_value = 0;
4113 team->t.t_master_active = FALSE;
4115 memset( & team->t.t_taskq,
'\0',
sizeof( kmp_taskq_t ));
4118 team->t.t_copypriv_data = NULL;
4120 team->t.t_copyin_counter = 0;
4122 team->t.t_control_stack_top = NULL;
4124 __kmp_reinitialize_team( team, new_icvs, loc );
4127 KF_TRACE( 10, (
"__kmp_initialize_team: exit: team=%p\n", team ) );
4130 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
4133 __kmp_set_thread_affinity_mask_full_tmp( kmp_affin_mask_t *old_mask )
4135 if ( KMP_AFFINITY_CAPABLE() ) {
4137 if ( old_mask != NULL ) {
4138 status = __kmp_get_system_affinity( old_mask, TRUE );
4140 if ( status != 0 ) {
4143 KMP_MSG( ChangeThreadAffMaskError ),
4149 __kmp_set_system_affinity( __kmp_affinity_get_fullMask(), TRUE );
4154 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
4163 __kmp_partition_places( kmp_team_t *team )
4168 kmp_info_t *master_th = team->t.t_threads[0];
4169 KMP_DEBUG_ASSERT( master_th != NULL );
4170 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4171 int first_place = master_th->th.th_first_place;
4172 int last_place = master_th->th.th_last_place;
4173 int masters_place = master_th->th.th_current_place;
4174 team->t.t_first_place = first_place;
4175 team->t.t_last_place = last_place;
4177 KA_TRACE( 20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) bound to place %d partition = [%d,%d]\n",
4178 proc_bind, __kmp_gtid_from_thread( team->t.t_threads[0] ), team->t.t_id,
4179 masters_place, first_place, last_place ) );
4181 switch ( proc_bind ) {
4183 case proc_bind_default:
4189 KMP_DEBUG_ASSERT( team->t.t_nproc == 1 );
4192 case proc_bind_master:
4195 int n_th = team->t.t_nproc;
4196 for ( f = 1; f < n_th; f++ ) {
4197 kmp_info_t *th = team->t.t_threads[f];
4198 KMP_DEBUG_ASSERT( th != NULL );
4199 th->th.th_first_place = first_place;
4200 th->th.th_last_place = last_place;
4201 th->th.th_new_place = masters_place;
4203 KA_TRACE( 100, (
"__kmp_partition_places: master: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4204 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4205 team->t.t_id, f, masters_place, first_place, last_place ) );
4210 case proc_bind_close:
4213 int n_th = team->t.t_nproc;
4215 if ( first_place <= last_place ) {
4216 n_places = last_place - first_place + 1;
4219 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4221 if ( n_th <= n_places ) {
4222 int place = masters_place;
4223 for ( f = 1; f < n_th; f++ ) {
4224 kmp_info_t *th = team->t.t_threads[f];
4225 KMP_DEBUG_ASSERT( th != NULL );
4227 if ( place == last_place ) {
4228 place = first_place;
4230 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4236 th->th.th_first_place = first_place;
4237 th->th.th_last_place = last_place;
4238 th->th.th_new_place = place;
4240 KA_TRACE( 100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4241 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4242 team->t.t_id, f, place, first_place, last_place ) );
4246 int S, rem, gap, s_count;
4247 S = n_th / n_places;
4249 rem = n_th - ( S * n_places );
4250 gap = rem > 0 ? n_places/rem : n_places;
4251 int place = masters_place;
4253 for ( f = 0; f < n_th; f++ ) {
4254 kmp_info_t *th = team->t.t_threads[f];
4255 KMP_DEBUG_ASSERT( th != NULL );
4257 th->th.th_first_place = first_place;
4258 th->th.th_last_place = last_place;
4259 th->th.th_new_place = place;
4262 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4265 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4267 if ( place == last_place ) {
4268 place = first_place;
4270 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4280 else if (s_count == S) {
4281 if ( place == last_place ) {
4282 place = first_place;
4284 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4294 KA_TRACE( 100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4295 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4296 team->t.t_id, f, th->th.th_new_place, first_place,
4299 KMP_DEBUG_ASSERT( place == masters_place );
4304 case proc_bind_spread:
4307 int n_th = team->t.t_nproc;
4309 if ( first_place <= last_place ) {
4310 n_places = last_place - first_place + 1;
4313 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4315 if ( n_th <= n_places ) {
4316 int place = masters_place;
4317 int S = n_places/n_th;
4318 int s_count, rem, gap, gap_ct;
4319 rem = n_places - n_th*S;
4320 gap = rem ? n_th/rem : 1;
4322 for ( f = 0; f < n_th; f++ ) {
4323 kmp_info_t *th = team->t.t_threads[f];
4324 KMP_DEBUG_ASSERT( th != NULL );
4326 th->th.th_first_place = place;
4327 th->th.th_new_place = place;
4329 while (s_count < S) {
4330 if ( place == last_place ) {
4331 place = first_place;
4333 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4341 if (rem && (gap_ct == gap)) {
4342 if ( place == last_place ) {
4343 place = first_place;
4345 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4354 th->th.th_last_place = place;
4357 if ( place == last_place ) {
4358 place = first_place;
4360 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4367 KA_TRACE( 100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4368 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4369 team->t.t_id, f, th->th.th_new_place,
4370 th->th.th_first_place, th->th.th_last_place ) );
4372 KMP_DEBUG_ASSERT( place == masters_place );
4375 int S, rem, gap, s_count;
4376 S = n_th / n_places;
4378 rem = n_th - ( S * n_places );
4379 gap = rem > 0 ? n_places/rem : n_places;
4380 int place = masters_place;
4382 for ( f = 0; f < n_th; f++ ) {
4383 kmp_info_t *th = team->t.t_threads[f];
4384 KMP_DEBUG_ASSERT( th != NULL );
4386 th->th.th_first_place = place;
4387 th->th.th_last_place = place;
4388 th->th.th_new_place = place;
4391 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4394 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4396 if ( place == last_place ) {
4397 place = first_place;
4399 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4409 else if (s_count == S) {
4410 if ( place == last_place ) {
4411 place = first_place;
4413 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4423 KA_TRACE( 100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4424 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4425 team->t.t_id, f, th->th.th_new_place,
4426 th->th.th_first_place, th->th.th_last_place) );
4428 KMP_DEBUG_ASSERT( place == masters_place );
4437 KA_TRACE( 20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id ) );
4444 __kmp_allocate_team( kmp_root_t *root,
int new_nproc,
int max_nproc,
4446 kmp_proc_bind_t new_proc_bind,
4448 kmp_internal_control_t *new_icvs,
4449 int argc USE_NESTED_HOT_ARG(kmp_info_t *master) )
4456 int use_hot_team = ! root->r.r_active;
4459 KA_TRACE( 20, (
"__kmp_allocate_team: called\n"));
4460 KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 );
4461 KMP_DEBUG_ASSERT( max_nproc >= new_nproc );
4464 #if KMP_NESTED_HOT_TEAMS
4465 kmp_hot_team_ptr_t *hot_teams;
4467 team = master->th.th_team;
4468 level = team->t.t_active_level;
4469 if( master->th.th_teams_microtask ) {
4470 if( master->th.th_teams_size.nteams > 1 && (
4471 team->t.t_pkfn == (microtask_t)__kmp_teams_master ||
4472 master->th.th_teams_level < team->t.t_level ) ) {
4476 hot_teams = master->th.th_hot_teams;
4477 if( level < __kmp_hot_teams_max_level && hot_teams && hot_teams[level].hot_team )
4486 if( use_hot_team && new_nproc > 1 ) {
4487 KMP_DEBUG_ASSERT( new_nproc == max_nproc );
4488 #if KMP_NESTED_HOT_TEAMS
4489 team = hot_teams[level].hot_team;
4491 team = root->r.r_hot_team;
4494 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
4495 KA_TRACE( 20, (
"__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p before reinit\n",
4496 team->t.t_task_team[0], team->t.t_task_team[1] ));
4503 if (team->t.t_nproc == new_nproc) {
4504 KA_TRACE( 20, (
"__kmp_allocate_team: reusing hot team\n" ));
4507 if ( team->t.t_size_changed == -1 ) {
4508 team->t.t_size_changed = 1;
4510 team->t.t_size_changed = 0;
4514 team->t.t_sched = new_icvs->sched;
4516 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
4518 KF_TRACE( 10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n",
4519 0, team->t.t_threads[0], team ) );
4520 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
4523 # if KMP_AFFINITY_SUPPORTED
4524 if ( team->t.t_proc_bind == new_proc_bind ) {
4525 KA_TRACE( 200, (
"__kmp_allocate_team: reusing hot team #%d bindings: proc_bind = %d, partition = [%d,%d]\n",
4526 team->t.t_id, new_proc_bind, team->t.t_first_place,
4527 team->t.t_last_place ) );
4530 team->t.t_proc_bind = new_proc_bind;
4531 __kmp_partition_places( team );
4534 if ( team->t.t_proc_bind != new_proc_bind ) {
4535 team->t.t_proc_bind = new_proc_bind;
4541 for(f = 0; f < new_nproc; ++f) {
4542 team->t.t_threads[f]->th.th_task_state = 0;
4546 else if( team->t.t_nproc > new_nproc ) {
4547 KA_TRACE( 20, (
"__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc ));
4549 team->t.t_size_changed = 1;
4550 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
4554 for (tt_idx=0; tt_idx<2; ++tt_idx) {
4556 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
4557 if ( (task_team != NULL) && TCR_SYNC_4(task_team->tt.tt_active) ) {
4558 KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
4559 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
4561 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team %p to NULL\n",
4562 &team->t.t_task_team[tt_idx]));
4563 team->t.t_task_team[tt_idx] = NULL;
4566 KMP_DEBUG_ASSERT( task_team == NULL );
4570 #if KMP_NESTED_HOT_TEAMS
4571 if( __kmp_hot_teams_mode == 0 ) {
4574 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
4575 hot_teams[level].hot_team_nth = new_nproc;
4576 #endif // KMP_NESTED_HOT_TEAMS
4578 for( f = new_nproc ; f < team->t.t_nproc ; f++ ) {
4579 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
4580 __kmp_free_thread( team->t.t_threads[ f ] );
4581 team->t.t_threads[ f ] = NULL;
4583 #if KMP_NESTED_HOT_TEAMS
4585 #endif // KMP_NESTED_HOT_TEAMS
4586 team->t.t_nproc = new_nproc;
4588 team->t.t_sched = new_icvs->sched;
4589 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
4591 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
4594 for (tt_idx=0; tt_idx<2; ++tt_idx) {
4595 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
4596 if ( task_team != NULL ) {
4597 KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) );
4598 task_team->tt.tt_nproc = new_nproc;
4599 task_team->tt.tt_unfinished_threads = new_nproc;
4600 task_team->tt.tt_ref_ct = new_nproc - 1;
4607 for(f = 0; f < new_nproc; ++f) {
4608 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
4609 team->t.t_threads[f]->th.th_task_state = 0;
4613 for(f = 0; f < new_nproc; ++f) {
4614 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
4618 KF_TRACE( 10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n",
4619 0, team->t.t_threads[0], team ) );
4621 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
4624 for ( f = 0; f < team->t.t_nproc; f++ ) {
4625 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
4626 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
4631 team->t.t_proc_bind = new_proc_bind;
4632 # if KMP_AFFINITY_SUPPORTED
4633 __kmp_partition_places( team );
4638 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
4639 kmp_affin_mask_t *old_mask;
4640 if ( KMP_AFFINITY_CAPABLE() ) {
4641 KMP_CPU_ALLOC(old_mask);
4645 KA_TRACE( 20, (
"__kmp_allocate_team: increasing hot team thread count to %d\n", new_nproc ));
4647 team->t.t_size_changed = 1;
4650 #if KMP_NESTED_HOT_TEAMS
4651 int avail_threads = hot_teams[level].hot_team_nth;
4652 if( new_nproc < avail_threads )
4653 avail_threads = new_nproc;
4654 kmp_info_t **other_threads = team->t.t_threads;
4655 for ( f = team->t.t_nproc; f < avail_threads; ++f ) {
4659 kmp_balign_t * balign = other_threads[f]->th.th_bar;
4660 for ( b = 0; b < bs_last_barrier; ++ b ) {
4661 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
4662 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4664 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
4668 if( hot_teams[level].hot_team_nth >= new_nproc ) {
4671 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
4672 team->t.t_nproc = new_nproc;
4675 team->t.t_nproc = hot_teams[level].hot_team_nth;
4676 hot_teams[level].hot_team_nth = new_nproc;
4677 #endif // KMP_NESTED_HOT_TEAMS
4678 if(team->t.t_max_nproc < new_nproc) {
4680 __kmp_reallocate_team_arrays(team, new_nproc);
4681 __kmp_reinitialize_team( team, new_icvs, NULL );
4684 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
4691 __kmp_set_thread_affinity_mask_full_tmp( old_mask );
4695 for( f = team->t.t_nproc ; f < new_nproc ; f++ ) {
4696 kmp_info_t * new_worker = __kmp_allocate_thread( root, team, f );
4697 KMP_DEBUG_ASSERT( new_worker );
4698 team->t.t_threads[ f ] = new_worker;
4699 new_worker->th.th_team_nproc = team->t.t_nproc;
4701 KA_TRACE( 20, (
"__kmp_allocate_team: team %d init T#%d arrived: join=%u, plain=%u\n",
4702 team->t.t_id, __kmp_gtid_from_tid( f, team ), team->t.t_id, f,
4703 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
4704 team->t.t_bar[bs_plain_barrier].b_arrived ) );
4708 kmp_balign_t * balign = new_worker->th.th_bar;
4709 for( b = 0; b < bs_last_barrier; ++ b ) {
4710 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
4711 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4713 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
4719 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
4720 if ( KMP_AFFINITY_CAPABLE() ) {
4722 __kmp_set_system_affinity( old_mask, TRUE );
4723 KMP_CPU_FREE(old_mask);
4726 #if KMP_NESTED_HOT_TEAMS
4728 #endif // KMP_NESTED_HOT_TEAMS
4730 __kmp_initialize_team( team, new_nproc, new_icvs, root->r.r_uber_thread->th.th_ident );
4732 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
4734 for (tt_idx=0; tt_idx<2; ++tt_idx) {
4735 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
4736 if ( task_team != NULL ) {
4737 KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) );
4738 task_team->tt.tt_nproc = new_nproc;
4739 task_team->tt.tt_unfinished_threads = new_nproc;
4740 task_team->tt.tt_ref_ct = new_nproc - 1;
4747 for( f = 0 ; f < team->t.t_nproc ; f++ ) {
4748 __kmp_initialize_info( team->t.t_threads[ f ], team, f,
4749 __kmp_gtid_from_tid( f, team ) );
4753 int old_state = team->t.t_threads[0]->th.th_task_state;
4754 for (f=0; f < team->t.t_nproc; ++f) {
4755 __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) );
4759 for( f = 1 ; f < new_nproc ; f++ ) {
4760 team->t.t_threads[f]->th.th_task_state = old_state;
4761 team->t.t_threads[f]->th.th_task_team = team->t.t_task_team[team->t.t_threads[f]->th.th_task_state];
4766 for ( f = 0; f < team->t.t_nproc; ++ f ) {
4767 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
4768 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
4773 team->t.t_proc_bind = new_proc_bind;
4774 # if KMP_AFFINITY_SUPPORTED
4775 __kmp_partition_places( team );
4781 kmp_info_t *master = team->t.t_threads[0];
4782 if( master->th.th_teams_microtask ) {
4783 for( f = 1; f < new_nproc; ++f ) {
4785 kmp_info_t *thr = team->t.t_threads[f];
4786 thr->th.th_teams_microtask = master->th.th_teams_microtask;
4787 thr->th.th_teams_level = master->th.th_teams_level;
4788 thr->th.th_teams_size = master->th.th_teams_size;
4792 #if KMP_NESTED_HOT_TEAMS
4795 for( f = 1; f < new_nproc; ++f ) {
4796 kmp_info_t *thr = team->t.t_threads[f];
4797 thr->th.th_task_state = 0;
4799 kmp_balign_t * balign = thr->th.th_bar;
4800 for( b = 0; b < bs_last_barrier; ++ b ) {
4801 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
4802 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4804 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
4809 #endif // KMP_NESTED_HOT_TEAMS
4812 __kmp_alloc_argv_entries( argc, team, TRUE );
4813 team->t.t_argc = argc;
4819 KF_TRACE( 10, (
" hot_team = %p\n", team ) );
4822 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
4823 KA_TRACE( 20, (
"__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p after reinit\n",
4824 team->t.t_task_team[0], team->t.t_task_team[1] ));
4835 for( team = (kmp_team_t*) __kmp_team_pool ; (team) ; )
4838 if ( team->t.t_max_nproc >= max_nproc ) {
4840 __kmp_team_pool = team->t.t_next_pool;
4843 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
4845 KA_TRACE( 20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
4846 &team->t.t_task_team[0], &team->t.t_task_team[1]) );
4847 team->t.t_task_team[0] = NULL;
4848 team->t.t_task_team[1] = NULL;
4851 __kmp_alloc_argv_entries( argc, team, TRUE );
4852 team->t.t_argc = argc;
4854 KA_TRACE( 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
4855 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
4858 for ( b = 0; b < bs_last_barrier; ++ b) {
4859 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
4861 team->t.t_bar[ b ].b_master_arrived = 0;
4862 team->t.t_bar[ b ].b_team_arrived = 0;
4868 team->t.t_proc_bind = new_proc_bind;
4871 KA_TRACE( 20, (
"__kmp_allocate_team: using team from pool %d.\n", team->t.t_id ));
4880 team = __kmp_reap_team( team );
4881 __kmp_team_pool = team;
4886 team = (kmp_team_t*) __kmp_allocate(
sizeof( kmp_team_t ) );
4889 team->t.t_max_nproc = max_nproc;
4893 __kmp_allocate_team_arrays( team, max_nproc );
4895 KA_TRACE( 20, (
"__kmp_allocate_team: making a new team\n" ) );
4896 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
4898 KA_TRACE( 20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
4899 &team->t.t_task_team[0], &team->t.t_task_team[1] ) );
4900 team->t.t_task_team[0] = NULL;
4901 team->t.t_task_team[1] = NULL;
4903 if ( __kmp_storage_map ) {
4904 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc );
4908 __kmp_alloc_argv_entries( argc, team, FALSE );
4909 team->t.t_argc = argc;
4911 KA_TRACE( 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
4912 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
4915 for ( b = 0; b < bs_last_barrier; ++ b ) {
4916 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
4918 team->t.t_bar[ b ].b_master_arrived = 0;
4919 team->t.t_bar[ b ].b_team_arrived = 0;
4925 team->t.t_proc_bind = new_proc_bind;
4930 KA_TRACE( 20, (
"__kmp_allocate_team: done creating a new team %d.\n", team->t.t_id ));
4941 __kmp_free_team( kmp_root_t *root, kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master) )
4944 KA_TRACE( 20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), team->t.t_id ));
4947 KMP_DEBUG_ASSERT( root );
4948 KMP_DEBUG_ASSERT( team );
4949 KMP_DEBUG_ASSERT( team->t.t_nproc <= team->t.t_max_nproc );
4950 KMP_DEBUG_ASSERT( team->t.t_threads );
4952 int use_hot_team = team == root->r.r_hot_team;
4953 #if KMP_NESTED_HOT_TEAMS
4955 kmp_hot_team_ptr_t *hot_teams;
4957 level = team->t.t_active_level - 1;
4958 if( master->th.th_teams_microtask ) {
4959 if( master->th.th_teams_size.nteams > 1 ) {
4962 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
4963 master->th.th_teams_level == team->t.t_level ) {
4967 hot_teams = master->th.th_hot_teams;
4968 if( level < __kmp_hot_teams_max_level ) {
4969 KMP_DEBUG_ASSERT( team == hot_teams[level].hot_team );
4973 #endif // KMP_NESTED_HOT_TEAMS
4976 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4977 team->t.t_copyin_counter = 0;
4981 if( ! use_hot_team ) {
4982 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
4984 for (tt_idx=0; tt_idx<2; ++tt_idx) {
4986 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
4987 if ( task_team != NULL ) {
4991 KA_TRACE( 20, (
"__kmp_free_team: deactivating task_team %p\n", task_team ) );
4992 KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
4993 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
4995 team->t.t_task_team[tt_idx] = NULL;
5001 team->t.t_parent = NULL;
5005 for ( f = 1; f < team->t.t_nproc; ++ f ) {
5006 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
5007 __kmp_free_thread( team->t.t_threads[ f ] );
5008 team->t.t_threads[ f ] = NULL;
5014 team->t.t_next_pool = (kmp_team_t*) __kmp_team_pool;
5015 __kmp_team_pool = (
volatile kmp_team_t*) team;
5024 __kmp_reap_team( kmp_team_t *team )
5026 kmp_team_t *next_pool = team->t.t_next_pool;
5028 KMP_DEBUG_ASSERT( team );
5029 KMP_DEBUG_ASSERT( team->t.t_dispatch );
5030 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
5031 KMP_DEBUG_ASSERT( team->t.t_threads );
5032 KMP_DEBUG_ASSERT( team->t.t_argv );
5038 __kmp_free_team_arrays( team );
5039 if ( team->t.t_argv != &team->t.t_inline_argv[0] )
5040 __kmp_free( (
void*) team->t.t_argv );
5075 __kmp_free_thread( kmp_info_t *this_th )
5080 KA_TRACE( 20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5081 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid ));
5083 KMP_DEBUG_ASSERT( this_th );
5087 kmp_balign_t *balign = this_th->th.th_bar;
5088 for (b=0; b<bs_last_barrier; ++b) {
5089 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5090 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5091 balign[b].bb.team = NULL;
5096 TCW_PTR(this_th->th.th_team, NULL);
5097 TCW_PTR(this_th->th.th_root, NULL);
5098 TCW_PTR(this_th->th.th_dispatch, NULL);
5104 gtid = this_th->th.th_info.ds.ds_gtid;
5105 if ( __kmp_thread_pool_insert_pt != NULL ) {
5106 KMP_DEBUG_ASSERT( __kmp_thread_pool != NULL );
5107 if ( __kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid ) {
5108 __kmp_thread_pool_insert_pt = NULL;
5119 if ( __kmp_thread_pool_insert_pt != NULL ) {
5120 scan = &( __kmp_thread_pool_insert_pt->th.th_next_pool );
5123 scan = (kmp_info_t **)&__kmp_thread_pool;
5125 for (; ( *scan != NULL ) && ( (*scan)->th.th_info.ds.ds_gtid < gtid );
5126 scan = &( (*scan)->th.th_next_pool ) );
5132 TCW_PTR(this_th->th.th_next_pool, *scan);
5133 __kmp_thread_pool_insert_pt = *scan = this_th;
5134 KMP_DEBUG_ASSERT( ( this_th->th.th_next_pool == NULL )
5135 || ( this_th->th.th_info.ds.ds_gtid
5136 < this_th->th.th_next_pool->th.th_info.ds.ds_gtid ) );
5137 TCW_4(this_th->th.th_in_pool, TRUE);
5138 __kmp_thread_pool_nth++;
5140 TCW_4(__kmp_nth, __kmp_nth - 1);
5142 #ifdef KMP_ADJUST_BLOCKTIME
5145 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5146 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5147 if ( __kmp_nth <= __kmp_avail_proc ) {
5148 __kmp_zero_bt = FALSE;
5160 __kmp_launch_thread( kmp_info_t *this_thr )
5162 int gtid = this_thr->th.th_info.ds.ds_gtid;
5164 kmp_team_t *(*
volatile pteam);
5167 KA_TRACE( 10, (
"__kmp_launch_thread: T#%d start\n", gtid ) );
5169 if( __kmp_env_consistency_check ) {
5170 this_thr->th.th_cons = __kmp_allocate_cons_stack( gtid );
5174 while( ! TCR_4(__kmp_global.g.g_done) ) {
5175 KMP_DEBUG_ASSERT( this_thr == __kmp_threads[ gtid ] );
5179 KA_TRACE( 20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid ));
5182 __kmp_fork_barrier( gtid, KMP_GTID_DNE );
5184 pteam = (kmp_team_t *(*))(& this_thr->th.th_team);
5187 if ( TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done) ) {
5189 if ( TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL ) {
5191 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5192 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
5194 updateHWFPControl (*pteam);
5199 rc = (*pteam)->t.t_invoke( gtid );
5205 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5206 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
5209 __kmp_join_barrier( gtid );
5212 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
5214 if ( TCR_PTR( this_thr->th.th_task_team ) != NULL ) {
5215 __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
5218 __kmp_common_destroy_gtid( gtid );
5220 KA_TRACE( 10, (
"__kmp_launch_thread: T#%d done\n", gtid ) );
5229 __kmp_internal_end_dest(
void *specific_gtid )
5231 #if KMP_COMPILER_ICC
5232 #pragma warning( push )
5233 #pragma warning( disable: 810 ) // conversion from "void *" to "int" may lose significant bits
5236 int gtid = (kmp_intptr_t)specific_gtid - 1;
5237 #if KMP_COMPILER_ICC
5238 #pragma warning( pop )
5241 KA_TRACE( 30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
5255 if(gtid >= 0 && KMP_UBER_GTID(gtid))
5256 __kmp_gtid_set_specific( gtid );
5257 #ifdef KMP_TDATA_GTID
5260 __kmp_internal_end_thread( gtid );
5263 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
5269 __attribute__(( destructor ))
5271 __kmp_internal_end_dtor(
void )
5273 __kmp_internal_end_atexit();
5277 __kmp_internal_end_fini(
void )
5279 __kmp_internal_end_atexit();
5286 __kmp_internal_end_atexit(
void )
5288 KA_TRACE( 30, (
"__kmp_internal_end_atexit\n" ) );
5310 __kmp_internal_end_library( -1 );
5312 __kmp_close_console();
5318 kmp_info_t * thread,
5326 KMP_DEBUG_ASSERT( thread != NULL );
5328 gtid = thread->th.th_info.ds.ds_gtid;
5332 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
5334 KA_TRACE( 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", gtid ) );
5336 kmp_flag_64 flag(&thread->th.th_bar[ bs_forkjoin_barrier ].bb.b_go, thread);
5337 __kmp_release_64(&flag);
5342 __kmp_reap_worker( thread );
5357 if ( thread->th.th_active_in_pool ) {
5358 thread->th.th_active_in_pool = FALSE;
5359 KMP_TEST_THEN_DEC32(
5360 (kmp_int32 *) &__kmp_thread_pool_active_nth );
5361 KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
5365 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth > 0 );
5366 --__kmp_thread_pool_nth;
5371 __kmp_free_fast_memory( thread );
5374 __kmp_suspend_uninitialize_thread( thread );
5376 KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] == thread );
5377 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5382 #ifdef KMP_ADJUST_BLOCKTIME
5385 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5386 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5387 if ( __kmp_nth <= __kmp_avail_proc ) {
5388 __kmp_zero_bt = FALSE;
5394 if( __kmp_env_consistency_check ) {
5395 if ( thread->th.th_cons ) {
5396 __kmp_free_cons_stack( thread->th.th_cons );
5397 thread->th.th_cons = NULL;
5401 if ( thread->th.th_pri_common != NULL ) {
5402 __kmp_free( thread->th.th_pri_common );
5403 thread->th.th_pri_common = NULL;
5406 if (thread->th.th_task_state_memo_stack != NULL) {
5407 __kmp_free(thread->th.th_task_state_memo_stack);
5408 thread->th.th_task_state_memo_stack = NULL;
5412 if ( thread->th.th_local.bget_data != NULL ) {
5413 __kmp_finalize_bget( thread );
5417 #if KMP_AFFINITY_SUPPORTED
5418 if ( thread->th.th_affin_mask != NULL ) {
5419 KMP_CPU_FREE( thread->th.th_affin_mask );
5420 thread->th.th_affin_mask = NULL;
5424 __kmp_reap_team( thread->th.th_serial_team );
5425 thread->th.th_serial_team = NULL;
5426 __kmp_free( thread );
5433 __kmp_internal_end(
void)
5438 __kmp_unregister_library();
5446 __kmp_reclaim_dead_roots();
5449 for( i=0 ; i<__kmp_threads_capacity ; i++ )
5451 if( __kmp_root[i]->r.r_active )
5454 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5456 if ( i < __kmp_threads_capacity ) {
5474 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5475 if ( TCR_4( __kmp_init_monitor ) ) {
5476 __kmp_reap_monitor( & __kmp_monitor );
5477 TCW_4( __kmp_init_monitor, 0 );
5479 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5480 KA_TRACE( 10, (
"__kmp_internal_end: monitor reaped\n" ) );
5485 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
5486 if( __kmp_root[i] ) {
5488 KMP_ASSERT( ! __kmp_root[i]->r.r_active );
5497 while ( __kmp_thread_pool != NULL ) {
5499 kmp_info_t * thread = (kmp_info_t *) __kmp_thread_pool;
5500 __kmp_thread_pool = thread->th.th_next_pool;
5502 thread->th.th_next_pool = NULL;
5503 thread->th.th_in_pool = FALSE;
5504 __kmp_reap_thread( thread, 0 );
5506 __kmp_thread_pool_insert_pt = NULL;
5509 while ( __kmp_team_pool != NULL ) {
5511 kmp_team_t * team = (kmp_team_t *) __kmp_team_pool;
5512 __kmp_team_pool = team->t.t_next_pool;
5514 team->t.t_next_pool = NULL;
5515 __kmp_reap_team( team );
5518 __kmp_reap_task_teams( );
5520 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
5527 TCW_SYNC_4(__kmp_init_common, FALSE);
5529 KA_TRACE( 10, (
"__kmp_internal_end: all workers reaped\n" ) );
5538 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5539 if ( TCR_4( __kmp_init_monitor ) ) {
5540 __kmp_reap_monitor( & __kmp_monitor );
5541 TCW_4( __kmp_init_monitor, 0 );
5543 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5544 KA_TRACE( 10, (
"__kmp_internal_end: monitor reaped\n" ) );
5547 TCW_4(__kmp_init_gtid, FALSE);
5555 __kmp_internal_end_library(
int gtid_req )
5565 if( __kmp_global.g.g_abort ) {
5566 KA_TRACE( 11, (
"__kmp_internal_end_library: abort, exiting\n" ));
5570 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5571 KA_TRACE( 10, (
"__kmp_internal_end_library: already finished\n" ));
5580 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
5581 KA_TRACE( 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req ));
5582 if( gtid == KMP_GTID_SHUTDOWN ) {
5583 KA_TRACE( 10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system already shutdown\n" ));
5585 }
else if( gtid == KMP_GTID_MONITOR ) {
5586 KA_TRACE( 10, (
"__kmp_internal_end_library: monitor thread, gtid not registered, or system shutdown\n" ));
5588 }
else if( gtid == KMP_GTID_DNE ) {
5589 KA_TRACE( 10, (
"__kmp_internal_end_library: gtid not registered or system shutdown\n" ));
5591 }
else if( KMP_UBER_GTID( gtid )) {
5593 if( __kmp_root[gtid]->r.r_active ) {
5594 __kmp_global.g.g_abort = -1;
5595 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5596 KA_TRACE( 10, (
"__kmp_internal_end_library: root still active, abort T#%d\n", gtid ));
5599 KA_TRACE( 10, (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid ));
5600 __kmp_unregister_root_current_thread( gtid );
5607 #ifdef DUMP_DEBUG_ON_EXIT
5608 if ( __kmp_debug_buf )
5609 __kmp_dump_debug_buffer( );
5615 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
5618 if( __kmp_global.g.g_abort ) {
5619 KA_TRACE( 10, (
"__kmp_internal_end_library: abort, exiting\n" ));
5621 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5624 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5625 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5635 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
5638 __kmp_internal_end();
5640 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
5641 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5643 KA_TRACE( 10, (
"__kmp_internal_end_library: exit\n" ) );
5645 #ifdef DUMP_DEBUG_ON_EXIT
5646 if ( __kmp_debug_buf )
5647 __kmp_dump_debug_buffer();
5651 __kmp_close_console();
5654 __kmp_fini_allocator();
5659 __kmp_internal_end_thread(
int gtid_req )
5669 if( __kmp_global.g.g_abort ) {
5670 KA_TRACE( 11, (
"__kmp_internal_end_thread: abort, exiting\n" ));
5674 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5675 KA_TRACE( 10, (
"__kmp_internal_end_thread: already finished\n" ));
5683 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
5684 KA_TRACE( 10, (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req ));
5685 if( gtid == KMP_GTID_SHUTDOWN ) {
5686 KA_TRACE( 10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system already shutdown\n" ));
5688 }
else if( gtid == KMP_GTID_MONITOR ) {
5689 KA_TRACE( 10, (
"__kmp_internal_end_thread: monitor thread, gtid not registered, or system shutdown\n" ));
5691 }
else if( gtid == KMP_GTID_DNE ) {
5692 KA_TRACE( 10, (
"__kmp_internal_end_thread: gtid not registered or system shutdown\n" ));
5695 }
else if( KMP_UBER_GTID( gtid )) {
5697 if( __kmp_root[gtid]->r.r_active ) {
5698 __kmp_global.g.g_abort = -1;
5699 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5700 KA_TRACE( 10, (
"__kmp_internal_end_thread: root still active, abort T#%d\n", gtid ));
5703 KA_TRACE( 10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n", gtid ));
5704 __kmp_unregister_root_current_thread( gtid );
5708 KA_TRACE( 10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid ));
5711 kmp_info_t *this_thr = __kmp_threads[ gtid ];
5712 if (TCR_PTR(this_thr->th.th_task_team) != NULL) {
5713 __kmp_unref_task_team(this_thr->th.th_task_team, this_thr);
5717 KA_TRACE( 10, (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid ));
5721 #if defined KMP_DYNAMIC_LIB
5729 KA_TRACE( 10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req) );
5733 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
5736 if( __kmp_global.g.g_abort ) {
5737 KA_TRACE( 10, (
"__kmp_internal_end_thread: abort, exiting\n" ));
5739 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5742 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5743 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5755 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
5757 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
5758 if ( KMP_UBER_GTID( i ) ) {
5759 KA_TRACE( 10, (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i ));
5760 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
5761 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5768 __kmp_internal_end();
5770 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
5771 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5773 KA_TRACE( 10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req ) );
5775 #ifdef DUMP_DEBUG_ON_EXIT
5776 if ( __kmp_debug_buf )
5777 __kmp_dump_debug_buffer();
5784 static long __kmp_registration_flag = 0;
5786 static char * __kmp_registration_str = NULL;
5792 __kmp_reg_status_name() {
5798 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int) getpid() );
5803 __kmp_register_library_startup(
5807 char * name = __kmp_reg_status_name();
5814 __kmp_initialize_system_tick();
5816 __kmp_read_system_time( & time.dtime );
5817 __kmp_registration_flag = 0xCAFE0000L | ( time.ltime & 0x0000FFFFL );
5818 __kmp_registration_str =
5821 & __kmp_registration_flag,
5822 __kmp_registration_flag,
5826 KA_TRACE( 50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name, __kmp_registration_str ) );
5830 char * value = NULL;
5833 __kmp_env_set( name, __kmp_registration_str, 0 );
5835 value = __kmp_env_get( name );
5836 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
5845 char * tail = value;
5846 char * flag_addr_str = NULL;
5847 char * flag_val_str = NULL;
5848 char const * file_name = NULL;
5849 __kmp_str_split( tail,
'-', & flag_addr_str, & tail );
5850 __kmp_str_split( tail,
'-', & flag_val_str, & tail );
5852 if ( tail != NULL ) {
5853 long * flag_addr = 0;
5855 KMP_SSCANF( flag_addr_str,
"%p", & flag_addr );
5856 KMP_SSCANF( flag_val_str,
"%lx", & flag_val );
5857 if ( flag_addr != 0 && flag_val != 0 && strcmp( file_name,
"" ) != 0 ) {
5861 if ( __kmp_is_address_mapped( flag_addr ) && * flag_addr == flag_val ) {
5869 switch ( neighbor ) {
5874 file_name =
"unknown library";
5878 char * duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK" );
5879 if ( ! __kmp_str_match_true( duplicate_ok ) ) {
5883 KMP_MSG( DuplicateLibrary, KMP_LIBRARY_FILE, file_name ),
5884 KMP_HNT( DuplicateLibrary ),
5888 KMP_INTERNAL_FREE( duplicate_ok );
5889 __kmp_duplicate_library_ok = 1;
5894 __kmp_env_unset( name );
5897 KMP_DEBUG_ASSERT( 0 );
5902 KMP_INTERNAL_FREE( (
void *) value );
5905 KMP_INTERNAL_FREE( (
void *) name );
5911 __kmp_unregister_library(
void ) {
5913 char * name = __kmp_reg_status_name();
5914 char * value = __kmp_env_get( name );
5916 KMP_DEBUG_ASSERT( __kmp_registration_flag != 0 );
5917 KMP_DEBUG_ASSERT( __kmp_registration_str != NULL );
5918 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
5920 __kmp_env_unset( name );
5923 KMP_INTERNAL_FREE( __kmp_registration_str );
5924 KMP_INTERNAL_FREE( value );
5925 KMP_INTERNAL_FREE( name );
5927 __kmp_registration_flag = 0;
5928 __kmp_registration_str = NULL;
5936 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
5938 static void __kmp_check_mic_type()
5940 kmp_cpuid_t cpuid_state = {0};
5941 kmp_cpuid_t * cs_p = &cpuid_state;
5944 __asm__ __volatile__(
"cpuid"
5945 :
"+a" (cs_p->eax),
"=b" (cs_p->ebx),
"+c" (cs_p->ecx),
"=d" (cs_p->edx));
5947 if( (cs_p->eax & 0xff0) == 0xB10 ) {
5948 __kmp_mic_type = mic2;
5949 }
else if( (cs_p->eax & 0xf0ff0) == 0x50670 ) {
5950 __kmp_mic_type = mic3;
5952 __kmp_mic_type = non_mic;
5959 __kmp_do_serial_initialize(
void )
5964 KA_TRACE( 10, (
"__kmp_do_serial_initialize: enter\n" ) );
5966 KMP_DEBUG_ASSERT(
sizeof( kmp_int32 ) == 4 );
5967 KMP_DEBUG_ASSERT(
sizeof( kmp_uint32 ) == 4 );
5968 KMP_DEBUG_ASSERT(
sizeof( kmp_int64 ) == 8 );
5969 KMP_DEBUG_ASSERT(
sizeof( kmp_uint64 ) == 8 );
5970 KMP_DEBUG_ASSERT(
sizeof( kmp_intptr_t ) ==
sizeof(
void * ) );
5972 __kmp_validate_locks();
5975 __kmp_init_allocator();
5981 __kmp_register_library_startup( );
5984 if( TCR_4(__kmp_global.g.g_done) ) {
5985 KA_TRACE( 10, (
"__kmp_do_serial_initialize: reinitialization of library\n" ) );
5988 __kmp_global.g.g_abort = 0;
5989 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
5992 #if KMP_USE_ADAPTIVE_LOCKS
5993 #if KMP_DEBUG_ADAPTIVE_LOCKS
5994 __kmp_init_speculative_stats();
5997 __kmp_init_lock( & __kmp_global_lock );
5998 __kmp_init_queuing_lock( & __kmp_dispatch_lock );
5999 __kmp_init_lock( & __kmp_debug_lock );
6000 __kmp_init_atomic_lock( & __kmp_atomic_lock );
6001 __kmp_init_atomic_lock( & __kmp_atomic_lock_1i );
6002 __kmp_init_atomic_lock( & __kmp_atomic_lock_2i );
6003 __kmp_init_atomic_lock( & __kmp_atomic_lock_4i );
6004 __kmp_init_atomic_lock( & __kmp_atomic_lock_4r );
6005 __kmp_init_atomic_lock( & __kmp_atomic_lock_8i );
6006 __kmp_init_atomic_lock( & __kmp_atomic_lock_8r );
6007 __kmp_init_atomic_lock( & __kmp_atomic_lock_8c );
6008 __kmp_init_atomic_lock( & __kmp_atomic_lock_10r );
6009 __kmp_init_atomic_lock( & __kmp_atomic_lock_16r );
6010 __kmp_init_atomic_lock( & __kmp_atomic_lock_16c );
6011 __kmp_init_atomic_lock( & __kmp_atomic_lock_20c );
6012 __kmp_init_atomic_lock( & __kmp_atomic_lock_32c );
6013 __kmp_init_bootstrap_lock( & __kmp_forkjoin_lock );
6014 __kmp_init_bootstrap_lock( & __kmp_exit_lock );
6015 __kmp_init_bootstrap_lock( & __kmp_monitor_lock );
6016 __kmp_init_bootstrap_lock( & __kmp_tp_cached_lock );
6020 __kmp_runtime_initialize();
6022 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
6023 __kmp_check_mic_type();
6030 __kmp_abort_delay = 0;
6034 __kmp_dflt_team_nth_ub = __kmp_xproc;
6035 if( __kmp_dflt_team_nth_ub < KMP_MIN_NTH ) {
6036 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6038 if( __kmp_dflt_team_nth_ub > __kmp_sys_max_nth ) {
6039 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6041 __kmp_max_nth = __kmp_sys_max_nth;
6044 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
6045 __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
6046 __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
6048 __kmp_library = library_throughput;
6050 __kmp_static = kmp_sch_static_balanced;
6056 #if KMP_FAST_REDUCTION_BARRIER
6057 #define kmp_reduction_barrier_gather_bb ((int)1)
6058 #define kmp_reduction_barrier_release_bb ((int)1)
6059 #define kmp_reduction_barrier_gather_pat bp_hyper_bar
6060 #define kmp_reduction_barrier_release_pat bp_hyper_bar
6061 #endif // KMP_FAST_REDUCTION_BARRIER
6062 for ( i=bs_plain_barrier; i<bs_last_barrier; i++ ) {
6063 __kmp_barrier_gather_branch_bits [ i ] = __kmp_barrier_gather_bb_dflt;
6064 __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt;
6065 __kmp_barrier_gather_pattern [ i ] = __kmp_barrier_gather_pat_dflt;
6066 __kmp_barrier_release_pattern[ i ] = __kmp_barrier_release_pat_dflt;
6067 #if KMP_FAST_REDUCTION_BARRIER
6068 if( i == bs_reduction_barrier ) {
6069 __kmp_barrier_gather_branch_bits [ i ] = kmp_reduction_barrier_gather_bb;
6070 __kmp_barrier_release_branch_bits[ i ] = kmp_reduction_barrier_release_bb;
6071 __kmp_barrier_gather_pattern [ i ] = kmp_reduction_barrier_gather_pat;
6072 __kmp_barrier_release_pattern[ i ] = kmp_reduction_barrier_release_pat;
6074 #endif // KMP_FAST_REDUCTION_BARRIER
6076 #if KMP_FAST_REDUCTION_BARRIER
6077 #undef kmp_reduction_barrier_release_pat
6078 #undef kmp_reduction_barrier_gather_pat
6079 #undef kmp_reduction_barrier_release_bb
6080 #undef kmp_reduction_barrier_gather_bb
6081 #endif // KMP_FAST_REDUCTION_BARRIER
6082 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
6083 if( __kmp_mic_type != non_mic ) {
6085 __kmp_barrier_gather_branch_bits [ bs_plain_barrier ] = 3;
6086 __kmp_barrier_release_branch_bits[ bs_forkjoin_barrier ] = 1;
6087 __kmp_barrier_gather_pattern [ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6088 __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6090 #if KMP_FAST_REDUCTION_BARRIER
6091 if( __kmp_mic_type != non_mic ) {
6092 __kmp_barrier_gather_pattern [ bs_reduction_barrier ] = bp_hierarchical_bar;
6093 __kmp_barrier_release_pattern[ bs_reduction_barrier ] = bp_hierarchical_bar;
6100 __kmp_env_checks = TRUE;
6102 __kmp_env_checks = FALSE;
6106 __kmp_foreign_tp = TRUE;
6108 __kmp_global.g.g_dynamic = FALSE;
6109 __kmp_global.g.g_dynamic_mode = dynamic_default;
6111 __kmp_env_initialize( NULL );
6115 char const * val = __kmp_env_get(
"KMP_DUMP_CATALOG" );
6116 if ( __kmp_str_match_true( val ) ) {
6117 kmp_str_buf_t buffer;
6118 __kmp_str_buf_init( & buffer );
6119 __kmp_i18n_dump_catalog( & buffer );
6120 __kmp_printf(
"%s", buffer.str );
6121 __kmp_str_buf_free( & buffer );
6123 __kmp_env_free( & val );
6126 __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub );
6128 __kmp_tp_capacity = __kmp_default_tp_capacity(__kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
6133 KMP_DEBUG_ASSERT( __kmp_thread_pool == NULL );
6134 KMP_DEBUG_ASSERT( __kmp_thread_pool_insert_pt == NULL );
6135 KMP_DEBUG_ASSERT( __kmp_team_pool == NULL );
6136 __kmp_thread_pool = NULL;
6137 __kmp_thread_pool_insert_pt = NULL;
6138 __kmp_team_pool = NULL;
6143 size = (
sizeof(kmp_info_t*) +
sizeof(kmp_root_t*))*__kmp_threads_capacity + CACHE_LINE;
6144 __kmp_threads = (kmp_info_t**) __kmp_allocate( size );
6145 __kmp_root = (kmp_root_t**) ((
char*)__kmp_threads +
sizeof(kmp_info_t*) * __kmp_threads_capacity );
6148 KMP_DEBUG_ASSERT( __kmp_all_nth == 0 );
6149 KMP_DEBUG_ASSERT( __kmp_nth == 0 );
6154 gtid = __kmp_register_root( TRUE );
6155 KA_TRACE( 10, (
"__kmp_do_serial_initialize T#%d\n", gtid ));
6156 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6157 KMP_ASSERT( KMP_INITIAL_GTID( gtid ) );
6161 __kmp_common_initialize();
6165 __kmp_register_atfork();
6168 #if ! defined KMP_DYNAMIC_LIB
6173 int rc = atexit( __kmp_internal_end_atexit );
6175 __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError,
"atexit()" ), KMP_ERR( rc ), __kmp_msg_null );
6180 #if KMP_HANDLE_SIGNALS
6187 __kmp_install_signals( FALSE );
6190 __kmp_install_signals( TRUE );
6195 __kmp_init_counter ++;
6197 __kmp_init_serial = TRUE;
6199 if (__kmp_settings) {
6204 if (__kmp_display_env || __kmp_display_env_verbose) {
6205 __kmp_env_print_2();
6207 #endif // OMP_40_ENABLED
6211 KA_TRACE( 10, (
"__kmp_do_serial_initialize: exit\n" ) );
6215 __kmp_serial_initialize(
void )
6217 if ( __kmp_init_serial ) {
6220 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6221 if ( __kmp_init_serial ) {
6222 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6225 __kmp_do_serial_initialize();
6226 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6230 __kmp_do_middle_initialize(
void )
6233 int prev_dflt_team_nth;
6235 if( !__kmp_init_serial ) {
6236 __kmp_do_serial_initialize();
6239 KA_TRACE( 10, (
"__kmp_middle_initialize: enter\n" ) );
6245 prev_dflt_team_nth = __kmp_dflt_team_nth;
6247 #if KMP_AFFINITY_SUPPORTED
6252 __kmp_affinity_initialize();
6258 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6259 if ( TCR_PTR( __kmp_threads[ i ] ) != NULL ) {
6260 __kmp_affinity_set_init_mask( i, TRUE );
6265 KMP_ASSERT( __kmp_xproc > 0 );
6266 if ( __kmp_avail_proc == 0 ) {
6267 __kmp_avail_proc = __kmp_xproc;
6272 while ( ( j < __kmp_nested_nth.used ) && ! __kmp_nested_nth.nth[ j ] ) {
6273 __kmp_nested_nth.nth[ j ] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_avail_proc;
6277 if ( __kmp_dflt_team_nth == 0 ) {
6278 #ifdef KMP_DFLT_NTH_CORES
6282 __kmp_dflt_team_nth = __kmp_ncores;
6283 KA_TRACE( 20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_ncores (%d)\n",
6284 __kmp_dflt_team_nth ) );
6289 __kmp_dflt_team_nth = __kmp_avail_proc;
6290 KA_TRACE( 20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_avail_proc(%d)\n",
6291 __kmp_dflt_team_nth ) );
6295 if ( __kmp_dflt_team_nth < KMP_MIN_NTH ) {
6296 __kmp_dflt_team_nth = KMP_MIN_NTH;
6298 if( __kmp_dflt_team_nth > __kmp_sys_max_nth ) {
6299 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6306 KMP_DEBUG_ASSERT( __kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub );
6308 if ( __kmp_dflt_team_nth != prev_dflt_team_nth ) {
6315 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6316 kmp_info_t *thread = __kmp_threads[ i ];
6317 if ( thread == NULL )
continue;
6318 if ( thread->th.th_current_task->td_icvs.nproc != 0 )
continue;
6320 set__nproc( __kmp_threads[ i ], __kmp_dflt_team_nth );
6323 KA_TRACE( 20, (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6324 __kmp_dflt_team_nth) );
6326 #ifdef KMP_ADJUST_BLOCKTIME
6329 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
6330 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
6331 if ( __kmp_nth > __kmp_avail_proc ) {
6332 __kmp_zero_bt = TRUE;
6338 TCW_SYNC_4(__kmp_init_middle, TRUE);
6340 KA_TRACE( 10, (
"__kmp_do_middle_initialize: exit\n" ) );
6344 __kmp_middle_initialize(
void )
6346 if ( __kmp_init_middle ) {
6349 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6350 if ( __kmp_init_middle ) {
6351 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6354 __kmp_do_middle_initialize();
6355 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6359 __kmp_parallel_initialize(
void )
6361 int gtid = __kmp_entry_gtid();
6364 if( TCR_4(__kmp_init_parallel) )
return;
6365 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6366 if( TCR_4(__kmp_init_parallel) ) { __kmp_release_bootstrap_lock( &__kmp_initz_lock );
return; }
6369 if( TCR_4(__kmp_global.g.g_done) ) {
6370 KA_TRACE( 10, (
"__kmp_parallel_initialize: attempt to init while shutting down\n" ) );
6371 __kmp_infinite_loop();
6377 if( !__kmp_init_middle ) {
6378 __kmp_do_middle_initialize();
6382 KA_TRACE( 10, (
"__kmp_parallel_initialize: enter\n" ) );
6383 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6385 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
6390 __kmp_store_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word );
6391 __kmp_store_mxcsr( &__kmp_init_mxcsr );
6392 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
6396 # if KMP_HANDLE_SIGNALS
6398 __kmp_install_signals( TRUE );
6402 __kmp_suspend_initialize();
6404 # if defined(USE_LOAD_BALANCE)
6405 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6406 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
6409 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6410 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6414 if ( __kmp_version ) {
6415 __kmp_print_version_2();
6419 TCW_SYNC_4(__kmp_init_parallel, TRUE);
6422 KA_TRACE( 10, (
"__kmp_parallel_initialize: exit\n" ) );
6424 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6431 __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
6434 kmp_disp_t *dispatch;
6439 this_thr->th.th_local.this_construct = 0;
6440 #if KMP_CACHE_MANAGE
6441 KMP_CACHE_PREFETCH( &this_thr->th.th_bar[ bs_forkjoin_barrier ].bb.b_arrived );
6443 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
6444 KMP_DEBUG_ASSERT( dispatch );
6445 KMP_DEBUG_ASSERT( team->t.t_dispatch );
6448 dispatch->th_disp_index = 0;
6450 if( __kmp_env_consistency_check )
6451 __kmp_push_parallel( gtid, team->t.t_ident );
6457 __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
6460 if( __kmp_env_consistency_check )
6461 __kmp_pop_parallel( gtid, team->t.t_ident );
6465 __kmp_invoke_task_func(
int gtid )
6468 int tid = __kmp_tid_from_gtid( gtid );
6469 kmp_info_t *this_thr = __kmp_threads[ gtid ];
6470 kmp_team_t *team = this_thr->th.th_team;
6472 __kmp_run_before_invoked_task( gtid, tid, this_thr, team );
6474 if ( __itt_stack_caller_create_ptr ) {
6475 __kmp_itt_stack_callee_enter( (__itt_caller)team->t.t_stack_id );
6478 #if INCLUDE_SSC_MARKS
6479 SSC_MARK_INVOKING();
6481 rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
6482 gtid, tid, (
int) team->t.t_argc, (
void **) team->t.t_argv );
6485 if ( __itt_stack_caller_create_ptr ) {
6486 __kmp_itt_stack_callee_leave( (__itt_caller)team->t.t_stack_id );
6489 __kmp_run_after_invoked_task( gtid, tid, this_thr, team );
6496 __kmp_teams_master(
int gtid )
6499 kmp_info_t *thr = __kmp_threads[ gtid ];
6500 kmp_team_t *team = thr->th.th_team;
6501 ident_t *loc = team->t.t_ident;
6502 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
6503 KMP_DEBUG_ASSERT( thr->th.th_teams_microtask );
6504 KMP_DEBUG_ASSERT( thr->th.th_set_nproc );
6505 KA_TRACE( 20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n",
6506 gtid, __kmp_tid_from_gtid( gtid ), thr->th.th_teams_microtask ) );
6509 #if INCLUDE_SSC_MARKS
6512 __kmp_fork_call( loc, gtid, fork_context_intel,
6514 (microtask_t)thr->th.th_teams_microtask,
6515 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
6517 #if INCLUDE_SSC_MARKS
6520 __kmp_join_call( loc, gtid, 1 );
6525 __kmp_invoke_teams_master(
int gtid )
6527 kmp_info_t *this_thr = __kmp_threads[ gtid ];
6528 kmp_team_t *team = this_thr->th.th_team;
6530 if ( !__kmp_threads[gtid]-> th.th_team->t.t_serialized )
6531 KMP_DEBUG_ASSERT( (
void*)__kmp_threads[gtid]-> th.th_team->t.t_pkfn == (
void*)__kmp_teams_master );
6533 __kmp_run_before_invoked_task( gtid, 0, this_thr, team );
6534 __kmp_teams_master( gtid );
6535 __kmp_run_after_invoked_task( gtid, 0, this_thr, team );
6546 __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads )
6548 kmp_info_t *thr = __kmp_threads[gtid];
6550 if( num_threads > 0 )
6551 thr->th.th_set_nproc = num_threads;
6559 __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
int num_threads )
6561 kmp_info_t *thr = __kmp_threads[gtid];
6562 KMP_DEBUG_ASSERT(num_teams >= 0);
6563 KMP_DEBUG_ASSERT(num_threads >= 0);
6564 if( num_teams == 0 ) {
6568 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
6571 if( num_threads > 0 ) {
6572 thr->th.th_teams_size.nth = num_threads;
6574 if( !TCR_4(__kmp_init_middle) )
6575 __kmp_middle_initialize();
6576 thr->th.th_teams_size.nth = __kmp_avail_proc / num_teams;
6585 __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind )
6587 kmp_info_t *thr = __kmp_threads[gtid];
6588 thr->th.th_set_proc_bind = proc_bind;
6596 __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team )
6598 kmp_info_t *this_thr = __kmp_threads[gtid];
6604 KMP_DEBUG_ASSERT( team );
6605 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
6606 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
6609 team->t.t_construct = 0;
6610 team->t.t_ordered.dt.t_value = 0;
6613 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
6614 if ( team->t.t_max_nproc > 1 ) {
6616 for (i = 0; i < KMP_MAX_DISP_BUF; ++i)
6617 team->t.t_disp_buffer[ i ].buffer_index = i;
6619 team->t.t_disp_buffer[ 0 ].buffer_index = 0;
6623 KMP_ASSERT( this_thr->th.th_team == team );
6626 for( f=0 ; f<team->t.t_nproc ; f++ ) {
6627 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
6628 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
6633 __kmp_fork_barrier( gtid, 0 );
6638 __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team )
6640 kmp_info_t *this_thr = __kmp_threads[gtid];
6642 KMP_DEBUG_ASSERT( team );
6643 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
6644 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
6650 if (__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc ) {
6651 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n",gtid, gtid, __kmp_threads[gtid]);
6652 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, team->t.t_nproc=%d\n",
6653 gtid, __kmp_threads[gtid]->th.th_team_nproc, team, team->t.t_nproc);
6654 __kmp_print_structure();
6656 KMP_DEBUG_ASSERT( __kmp_threads[gtid] &&
6657 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc );
6660 __kmp_join_barrier( gtid );
6663 KMP_ASSERT( this_thr->th.th_team == team );
6670 #ifdef USE_LOAD_BALANCE
6677 __kmp_active_hot_team_nproc( kmp_root_t *root )
6681 kmp_team_t *hot_team;
6683 if ( root->r.r_active ) {
6686 hot_team = root->r.r_hot_team;
6687 if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
6688 return hot_team->t.t_nproc - 1;
6695 for ( i = 1; i < hot_team->t.t_nproc; i++ ) {
6696 if ( hot_team->t.t_threads[i]->th.th_active ) {
6708 __kmp_load_balance_nproc( kmp_root_t *root,
int set_nproc )
6712 int hot_team_active;
6713 int team_curr_active;
6716 KB_TRACE( 20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n",
6717 root, set_nproc ) );
6718 KMP_DEBUG_ASSERT( root );
6719 KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_threads[0]->th.th_current_task->td_icvs.dynamic == TRUE );
6720 KMP_DEBUG_ASSERT( set_nproc > 1 );
6722 if ( set_nproc == 1) {
6723 KB_TRACE( 20, (
"__kmp_load_balance_nproc: serial execution.\n" ) );
6734 pool_active = TCR_4(__kmp_thread_pool_active_nth);
6735 hot_team_active = __kmp_active_hot_team_nproc( root );
6736 team_curr_active = pool_active + hot_team_active + 1;
6741 system_active = __kmp_get_load_balance( __kmp_avail_proc + team_curr_active );
6742 KB_TRACE( 30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d hot team active = %d\n",
6743 system_active, pool_active, hot_team_active ) );
6745 if ( system_active < 0 ) {
6752 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6753 KMP_WARNING( CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit" );
6758 retval = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
6759 : root->r.r_hot_team->t.t_nproc);
6760 if ( retval > set_nproc ) {
6763 if ( retval < KMP_MIN_NTH ) {
6764 retval = KMP_MIN_NTH;
6767 KB_TRACE( 20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n", retval ) );
6777 if ( system_active < team_curr_active ) {
6778 system_active = team_curr_active;
6780 retval = __kmp_avail_proc - system_active + team_curr_active;
6781 if ( retval > set_nproc ) {
6784 if ( retval < KMP_MIN_NTH ) {
6785 retval = KMP_MIN_NTH;
6788 KB_TRACE( 20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval ) );
6800 __kmp_cleanup(
void )
6804 KA_TRACE( 10, (
"__kmp_cleanup: enter\n" ) );
6806 if (TCR_4(__kmp_init_parallel)) {
6807 #if KMP_HANDLE_SIGNALS
6808 __kmp_remove_signals();
6810 TCW_4(__kmp_init_parallel, FALSE);
6813 if (TCR_4(__kmp_init_middle)) {
6814 #if KMP_AFFINITY_SUPPORTED
6815 __kmp_affinity_uninitialize();
6817 TCW_4(__kmp_init_middle, FALSE);
6820 KA_TRACE( 10, (
"__kmp_cleanup: go serial cleanup\n" ) );
6822 if (__kmp_init_serial) {
6824 __kmp_runtime_destroy();
6826 __kmp_init_serial = FALSE;
6829 for ( f = 0; f < __kmp_threads_capacity; f++ ) {
6830 if ( __kmp_root[ f ] != NULL ) {
6831 __kmp_free( __kmp_root[ f ] );
6832 __kmp_root[ f ] = NULL;
6835 __kmp_free( __kmp_threads );
6838 __kmp_threads = NULL;
6840 __kmp_threads_capacity = 0;
6842 #if KMP_USE_DYNAMIC_LOCK
6843 __kmp_cleanup_indirect_user_locks();
6845 __kmp_cleanup_user_locks();
6848 #if KMP_AFFINITY_SUPPORTED
6849 KMP_INTERNAL_FREE( (
void *) __kmp_cpuinfo_file );
6850 __kmp_cpuinfo_file = NULL;
6853 #if KMP_USE_ADAPTIVE_LOCKS
6854 #if KMP_DEBUG_ADAPTIVE_LOCKS
6855 __kmp_print_speculative_stats();
6858 KMP_INTERNAL_FREE( __kmp_nested_nth.nth );
6859 __kmp_nested_nth.nth = NULL;
6860 __kmp_nested_nth.size = 0;
6861 __kmp_nested_nth.used = 0;
6863 __kmp_i18n_catclose();
6865 #if KMP_STATS_ENABLED
6866 __kmp_accumulate_stats_at_exit();
6867 __kmp_stats_list.deallocate();
6870 KA_TRACE( 10, (
"__kmp_cleanup: exit\n" ) );
6877 __kmp_ignore_mppbeg(
void )
6881 if ((env = getenv(
"KMP_IGNORE_MPPBEG" )) != NULL) {
6882 if (__kmp_str_match_false( env ))
6890 __kmp_ignore_mppend(
void )
6894 if ((env = getenv(
"KMP_IGNORE_MPPEND" )) != NULL) {
6895 if (__kmp_str_match_false( env ))
6903 __kmp_internal_begin(
void )
6910 gtid = __kmp_entry_gtid();
6911 root = __kmp_threads[ gtid ]->th.th_root;
6912 KMP_ASSERT( KMP_UBER_GTID( gtid ));
6914 if( root->r.r_begin )
return;
6915 __kmp_acquire_lock( &root->r.r_begin_lock, gtid );
6916 if( root->r.r_begin ) {
6917 __kmp_release_lock( & root->r.r_begin_lock, gtid );
6921 root->r.r_begin = TRUE;
6923 __kmp_release_lock( & root->r.r_begin_lock, gtid );
6931 __kmp_user_set_library (
enum library_type arg)
6939 gtid = __kmp_entry_gtid();
6940 thread = __kmp_threads[ gtid ];
6942 root = thread->th.th_root;
6944 KA_TRACE( 20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, library_serial ));
6945 if (root->r.r_in_parallel) {
6946 KMP_WARNING( SetLibraryIncorrectCall );
6951 case library_serial :
6952 thread->th.th_set_nproc = 0;
6953 set__nproc( thread, 1 );
6955 case library_turnaround :
6956 thread->th.th_set_nproc = 0;
6957 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
6959 case library_throughput :
6960 thread->th.th_set_nproc = 0;
6961 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
6964 KMP_FATAL( UnknownLibraryType, arg );
6967 __kmp_aux_set_library ( arg );
6971 __kmp_aux_set_stacksize(
size_t arg )
6973 if (! __kmp_init_serial)
6974 __kmp_serial_initialize();
6977 if (arg & (0x1000 - 1)) {
6978 arg &= ~(0x1000 - 1);
6983 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6986 if (! TCR_4(__kmp_init_parallel)) {
6989 if (value < __kmp_sys_min_stksize )
6990 value = __kmp_sys_min_stksize ;
6991 else if (value > KMP_MAX_STKSIZE)
6992 value = KMP_MAX_STKSIZE;
6994 __kmp_stksize = value;
6996 __kmp_env_stksize = TRUE;
6999 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7005 __kmp_aux_set_library (
enum library_type arg)
7007 __kmp_library = arg;
7009 switch ( __kmp_library ) {
7010 case library_serial :
7012 KMP_INFORM( LibraryIsSerial );
7013 (void) __kmp_change_library( TRUE );
7016 case library_turnaround :
7017 (void) __kmp_change_library( TRUE );
7019 case library_throughput :
7020 (void) __kmp_change_library( FALSE );
7023 KMP_FATAL( UnknownLibraryType, arg );
7031 __kmp_aux_set_blocktime (
int arg, kmp_info_t *thread,
int tid)
7033 int blocktime = arg;
7037 __kmp_save_internal_controls( thread );
7040 if (blocktime < KMP_MIN_BLOCKTIME)
7041 blocktime = KMP_MIN_BLOCKTIME;
7042 else if (blocktime > KMP_MAX_BLOCKTIME)
7043 blocktime = KMP_MAX_BLOCKTIME;
7045 set__blocktime_team( thread->th.th_team, tid, blocktime );
7046 set__blocktime_team( thread->th.th_serial_team, 0, blocktime );
7049 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
7051 set__bt_intervals_team( thread->th.th_team, tid, bt_intervals );
7052 set__bt_intervals_team( thread->th.th_serial_team, 0, bt_intervals );
7057 set__bt_set_team( thread->th.th_team, tid, bt_set );
7058 set__bt_set_team( thread->th.th_serial_team, 0, bt_set );
7059 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, bt_intervals=%d, monitor_updates=%d\n",
7060 __kmp_gtid_from_tid(tid, thread->th.th_team),
7061 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals, __kmp_monitor_wakeups ) );
7065 __kmp_aux_set_defaults(
7069 if ( ! __kmp_init_serial ) {
7070 __kmp_serial_initialize();
7072 __kmp_env_initialize( str );
7076 || __kmp_display_env || __kmp_display_env_verbose
7089 PACKED_REDUCTION_METHOD_T
7090 __kmp_determine_reduction_method(
ident_t *loc, kmp_int32 global_tid,
7091 kmp_int32 num_vars,
size_t reduce_size,
void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
7092 kmp_critical_name *lck )
7100 PACKED_REDUCTION_METHOD_T retval;
7104 int teamsize_cutoff = 4;
7106 KMP_DEBUG_ASSERT( loc );
7107 KMP_DEBUG_ASSERT( lck );
7109 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED ( ( loc->flags & ( KMP_IDENT_ATOMIC_REDUCE ) ) == ( KMP_IDENT_ATOMIC_REDUCE ) )
7110 #define FAST_REDUCTION_TREE_METHOD_GENERATED ( ( reduce_data ) && ( reduce_func ) )
7112 retval = critical_reduce_block;
7114 team_size = __kmp_get_team_num_threads( global_tid );
7116 if( team_size == 1 ) {
7118 retval = empty_reduce_block;
7122 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7123 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7125 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64
7127 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
7128 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
7129 if( __kmp_mic_type != non_mic ) {
7130 teamsize_cutoff = 8;
7133 if( tree_available ) {
7134 if( team_size <= teamsize_cutoff ) {
7135 if ( atomic_available ) {
7136 retval = atomic_reduce_block;
7139 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7141 }
else if ( atomic_available ) {
7142 retval = atomic_reduce_block;
7145 #error "Unknown or unsupported OS"
7146 #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
7148 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH
7150 #if KMP_OS_LINUX || KMP_OS_WINDOWS
7154 if( atomic_available ) {
7155 if( num_vars <= 2 ) {
7156 retval = atomic_reduce_block;
7162 if( atomic_available && ( num_vars <= 3 ) ) {
7163 retval = atomic_reduce_block;
7164 }
else if( tree_available ) {
7165 if( ( reduce_size > ( 9 *
sizeof( kmp_real64 ) ) ) && ( reduce_size < ( 2000 *
sizeof( kmp_real64 ) ) ) ) {
7166 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
7171 #error "Unknown or unsupported OS"
7175 #error "Unknown or unsupported architecture"
7182 if( __kmp_force_reduction_method != reduction_method_not_defined ) {
7184 PACKED_REDUCTION_METHOD_T forced_retval;
7186 int atomic_available, tree_available;
7188 switch( ( forced_retval = __kmp_force_reduction_method ) )
7190 case critical_reduce_block:
7192 if( team_size <= 1 ) {
7193 forced_retval = empty_reduce_block;
7197 case atomic_reduce_block:
7198 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7199 KMP_ASSERT( atomic_available );
7202 case tree_reduce_block:
7203 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7204 KMP_ASSERT( tree_available );
7205 #if KMP_FAST_REDUCTION_BARRIER
7206 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7214 retval = forced_retval;
7217 KA_TRACE(10, (
"reduction method selected=%08x\n", retval ) );
7219 #undef FAST_REDUCTION_TREE_METHOD_GENERATED
7220 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
7227 __kmp_get_reduce_method(
void ) {
7228 return ( ( __kmp_entry_thread()->th.th_local.packed_reduction_method ) >> 8 );
#define KMP_START_EXPLICIT_TIMER(name)
"Starts" an explicit timer which will need a corresponding KMP_STOP_EXPLICIT_TIMER() macro...
#define KMP_STOP_EXPLICIT_TIMER(name)
"Stops" an explicit timer.
#define KMP_TIME_BLOCK(name)
Uses specified timer (name) to time code block.
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_IDENT_AUTOPAR
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)