38 #include "kmp_atomic.h"
39 #include "kmp_wrapper_getpid.h"
40 #include "kmp_environment.h"
43 #include "kmp_settings.h"
46 #include "kmp_error.h"
47 #include "kmp_stats.h"
48 #include "kmp_wait_release.h"
51 #define KMP_USE_PRCTL 0
52 #define KMP_USE_POOLED_ALLOC 0
59 #if defined(KMP_GOMP_COMPAT)
60 char const __kmp_version_alt_comp[] = KMP_VERSION_PREFIX
"alternative compiler support: yes";
63 char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX
"API version: "
71 char const __kmp_version_lock[] = KMP_VERSION_PREFIX
"lock type: run time selectable";
75 #define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) )
80 kmp_info_t __kmp_monitor;
87 void __kmp_cleanup(
void );
89 static void __kmp_initialize_info( kmp_info_t *, kmp_team_t *,
int tid,
int gtid );
90 static void __kmp_initialize_team( kmp_team_t * team,
int new_nproc, kmp_internal_control_t * new_icvs,
ident_t * loc );
91 static void __kmp_partition_places( kmp_team_t *team );
92 static void __kmp_do_serial_initialize(
void );
93 void __kmp_fork_barrier(
int gtid,
int tid );
94 void __kmp_join_barrier(
int gtid );
95 void __kmp_setup_icv_copy( kmp_team_t *team,
int new_nproc, kmp_internal_control_t * new_icvs,
ident_t *loc );
98 #ifdef USE_LOAD_BALANCE
99 static int __kmp_load_balance_nproc( kmp_root_t * root,
int set_nproc );
102 static int __kmp_expand_threads(
int nWish,
int nNeed);
103 static int __kmp_unregister_root_other_thread(
int gtid );
104 static void __kmp_unregister_library(
void );
105 static void __kmp_reap_thread( kmp_info_t * thread,
int is_root );
106 static kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
117 __kmp_get_global_thread_id( )
120 kmp_info_t **other_threads;
126 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
127 __kmp_nth, __kmp_all_nth ));
134 if ( !TCR_4(__kmp_init_gtid) )
return KMP_GTID_DNE;
136 #ifdef KMP_TDATA_GTID
137 if ( TCR_4(__kmp_gtid_mode) >= 3) {
138 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: using TDATA\n" ));
142 if ( TCR_4(__kmp_gtid_mode) >= 2) {
143 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n" ));
144 return __kmp_gtid_get_specific();
146 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n" ));
148 stack_addr = (
char*) & stack_data;
149 other_threads = __kmp_threads;
164 for( i = 0 ; i < __kmp_threads_capacity ; i++ ) {
166 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
169 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
170 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
174 if( stack_addr <= stack_base ) {
175 size_t stack_diff = stack_base - stack_addr;
177 if( stack_diff <= stack_size ) {
180 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == i );
187 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
188 "thread, using TLS\n" ));
189 i = __kmp_gtid_get_specific();
197 if( ! TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow) ) {
198 KMP_FATAL( StackOverflow, i );
201 stack_base = (
char *) other_threads[i]->th.th_info.ds.ds_stackbase;
202 if( stack_addr > stack_base ) {
203 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
204 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
205 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base);
207 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, stack_base - stack_addr);
211 if ( __kmp_storage_map ) {
212 char *stack_end = (
char *) other_threads[i]->th.th_info.ds.ds_stackbase;
213 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
214 __kmp_print_storage_map_gtid( i, stack_beg, stack_end,
215 other_threads[i]->th.th_info.ds.ds_stacksize,
216 "th_%d stack (refinement)", i );
222 __kmp_get_global_thread_id_reg( )
226 if ( !__kmp_init_serial ) {
229 #ifdef KMP_TDATA_GTID
230 if ( TCR_4(__kmp_gtid_mode) >= 3 ) {
231 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n" ));
235 if ( TCR_4(__kmp_gtid_mode) >= 2 ) {
236 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n" ));
237 gtid = __kmp_gtid_get_specific();
239 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n" ));
240 gtid = __kmp_get_global_thread_id();
244 if( gtid == KMP_GTID_DNE ) {
245 KA_TRACE( 10, (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
246 "Registering a new gtid.\n" ));
247 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
248 if( !__kmp_init_serial ) {
249 __kmp_do_serial_initialize();
250 gtid = __kmp_gtid_get_specific();
252 gtid = __kmp_register_root(FALSE);
254 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
258 KMP_DEBUG_ASSERT( gtid >=0 );
265 __kmp_check_stack_overlap( kmp_info_t *th )
268 char *stack_beg = NULL;
269 char *stack_end = NULL;
272 KA_TRACE(10,(
"__kmp_check_stack_overlap: called\n"));
273 if ( __kmp_storage_map ) {
274 stack_end = (
char *) th->th.th_info.ds.ds_stackbase;
275 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
277 gtid = __kmp_gtid_from_thread( th );
279 if (gtid == KMP_GTID_MONITOR) {
280 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
281 "th_%s stack (%s)",
"mon",
282 ( th->th.th_info.ds.ds_stackgrow ) ?
"initial" :
"actual" );
284 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
285 "th_%d stack (%s)", gtid,
286 ( th->th.th_info.ds.ds_stackgrow ) ?
"initial" :
"actual" );
291 if ( __kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid = __kmp_gtid_from_thread( th )))
293 KA_TRACE(10,(
"__kmp_check_stack_overlap: performing extensive checking\n"));
294 if ( stack_beg == NULL ) {
295 stack_end = (
char *) th->th.th_info.ds.ds_stackbase;
296 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
299 for( f=0 ; f < __kmp_threads_capacity ; f++ ) {
300 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
302 if( f_th && f_th != th ) {
303 char *other_stack_end = (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
304 char *other_stack_beg = other_stack_end -
305 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
306 if((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
307 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
310 if ( __kmp_storage_map )
311 __kmp_print_storage_map_gtid( -1, other_stack_beg, other_stack_end,
312 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
313 "th_%d stack (overlapped)",
314 __kmp_gtid_from_thread( f_th ) );
316 __kmp_msg( kmp_ms_fatal, KMP_MSG( StackOverlap ), KMP_HNT( ChangeStackLimit ), __kmp_msg_null );
321 KA_TRACE(10,(
"__kmp_check_stack_overlap: returning\n"));
330 __kmp_infinite_loop(
void )
332 static int done = FALSE;
339 #define MAX_MESSAGE 512
342 __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
char const *format, ...) {
343 char buffer[MAX_MESSAGE];
347 va_start( ap, format);
348 sprintf( buffer,
"OMP storage map: %p %p%8lu %s\n", p1, p2, (
unsigned long) size, format );
349 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
350 __kmp_vprintf( kmp_err, buffer, ap );
351 #if KMP_PRINT_DATA_PLACEMENT
353 if(p1 <= p2 && (
char*)p2 - (
char*)p1 == size) {
354 if( __kmp_storage_map_verbose ) {
355 node = __kmp_get_host_node(p1);
357 __kmp_storage_map_verbose = FALSE;
361 int localProc = __kmp_get_cpu_from_gtid(gtid);
363 p1 = (
void *)( (
size_t)p1 & ~((size_t)PAGE_SIZE - 1) );
364 p2 = (
void *)( ((
size_t) p2 - 1) & ~((
size_t)PAGE_SIZE - 1) );
366 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid, localProc>>1);
368 __kmp_printf_no_lock(
" GTID %d\n", gtid);
376 (
char*)p1 += PAGE_SIZE;
377 }
while(p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
378 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last,
379 (
char*)p1 - 1, lastNode);
382 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
383 (
char*)p1 + (PAGE_SIZE - 1), __kmp_get_host_node(p1));
385 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
386 (
char*)p2 + (PAGE_SIZE - 1), __kmp_get_host_node(p2));
392 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR( StorageMapWarning ) );
395 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
399 __kmp_warn(
char const * format, ... )
401 char buffer[MAX_MESSAGE];
404 if ( __kmp_generate_warnings == kmp_warnings_off ) {
408 va_start( ap, format );
410 snprintf( buffer,
sizeof(buffer) ,
"OMP warning: %s\n", format );
411 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
412 __kmp_vprintf( kmp_err, buffer, ap );
413 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
419 __kmp_abort_process()
423 __kmp_acquire_bootstrap_lock( & __kmp_exit_lock );
425 if ( __kmp_debug_buf ) {
426 __kmp_dump_debug_buffer();
429 if ( KMP_OS_WINDOWS ) {
432 __kmp_global.g.g_abort = SIGABRT;
450 __kmp_infinite_loop();
451 __kmp_release_bootstrap_lock( & __kmp_exit_lock );
456 __kmp_abort_thread(
void )
460 __kmp_infinite_loop();
471 __kmp_print_thread_storage_map( kmp_info_t *thr,
int gtid )
473 __kmp_print_storage_map_gtid( gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d", gtid );
475 __kmp_print_storage_map_gtid( gtid, &thr->th.th_info, &thr->th.th_team,
sizeof(kmp_desc_t),
476 "th_%d.th_info", gtid );
478 __kmp_print_storage_map_gtid( gtid, &thr->th.th_local, &thr->th.th_pri_head,
sizeof(kmp_local_t),
479 "th_%d.th_local", gtid );
481 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
482 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid );
484 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_plain_barrier],
485 &thr->th.th_bar[bs_plain_barrier+1],
486 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]", gtid);
488 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_forkjoin_barrier],
489 &thr->th.th_bar[bs_forkjoin_barrier+1],
490 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]", gtid);
492 #if KMP_FAST_REDUCTION_BARRIER
493 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_reduction_barrier],
494 &thr->th.th_bar[bs_reduction_barrier+1],
495 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]", gtid);
496 #endif // KMP_FAST_REDUCTION_BARRIER
505 __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
int team_id,
int num_thr )
507 int num_disp_buff = team->t.t_max_nproc > 1 ? KMP_MAX_DISP_BUF : 2;
508 __kmp_print_storage_map_gtid( -1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
511 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[0], &team->t.t_bar[bs_last_barrier],
512 sizeof(kmp_balign_team_t) * bs_last_barrier,
"%s_%d.t_bar", header, team_id );
515 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_plain_barrier], &team->t.t_bar[bs_plain_barrier+1],
516 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]", header, team_id );
518 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_forkjoin_barrier], &team->t.t_bar[bs_forkjoin_barrier+1],
519 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[forkjoin]", header, team_id );
521 #if KMP_FAST_REDUCTION_BARRIER
522 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_reduction_barrier], &team->t.t_bar[bs_reduction_barrier+1],
523 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[reduction]", header, team_id );
524 #endif // KMP_FAST_REDUCTION_BARRIER
526 __kmp_print_storage_map_gtid( -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
527 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id );
529 __kmp_print_storage_map_gtid( -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
530 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id );
532 __kmp_print_storage_map_gtid( -1, &team->t.t_disp_buffer[0], &team->t.t_disp_buffer[num_disp_buff],
533 sizeof(dispatch_shared_info_t) * num_disp_buff,
"%s_%d.t_disp_buffer",
566 __kmp_print_storage_map_gtid( -1, &team->t.t_taskq, &team->t.t_copypriv_data,
567 sizeof(kmp_taskq_t),
"%s_%d.t_taskq", header, team_id );
570 static void __kmp_init_allocator() {}
571 static void __kmp_fini_allocator() {}
572 static void __kmp_fini_allocator_thread() {}
576 #ifdef GUIDEDLL_EXPORTS
581 __kmp_reset_lock( kmp_bootstrap_lock_t* lck ) {
583 __kmp_init_bootstrap_lock( lck );
587 __kmp_reset_locks_on_process_detach(
int gtid_req ) {
604 for( i = 0; i < __kmp_threads_capacity; ++i ) {
605 if( !__kmp_threads )
continue;
606 kmp_info_t* th = __kmp_threads[ i ];
607 if( th == NULL )
continue;
608 int gtid = th->th.th_info.ds.ds_gtid;
609 if( gtid == gtid_req )
continue;
610 if( gtid < 0 )
continue;
612 int alive = __kmp_is_thread_alive( th, &exit_val );
617 if( thread_count == 0 )
break;
624 __kmp_reset_lock( &__kmp_forkjoin_lock );
626 __kmp_reset_lock( &__kmp_stdio_lock );
633 DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) {
636 switch( fdwReason ) {
638 case DLL_PROCESS_ATTACH:
639 KA_TRACE( 10, (
"DllMain: PROCESS_ATTACH\n" ));
643 case DLL_PROCESS_DETACH:
644 KA_TRACE( 10, (
"DllMain: PROCESS_DETACH T#%d\n",
645 __kmp_gtid_get_specific() ));
647 if( lpReserved != NULL )
674 __kmp_reset_locks_on_process_detach( __kmp_gtid_get_specific() );
677 __kmp_internal_end_library( __kmp_gtid_get_specific() );
681 case DLL_THREAD_ATTACH:
682 KA_TRACE( 10, (
"DllMain: THREAD_ATTACH\n" ));
688 case DLL_THREAD_DETACH:
689 KA_TRACE( 10, (
"DllMain: THREAD_DETACH T#%d\n",
690 __kmp_gtid_get_specific() ));
692 __kmp_internal_end_thread( __kmp_gtid_get_specific() );
708 __kmp_change_library(
int status )
712 old_status = __kmp_yield_init & 1;
715 __kmp_yield_init |= 1;
718 __kmp_yield_init &= ~1;
731 __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
733 int gtid = *gtid_ref;
734 #ifdef BUILD_PARALLEL_ORDERED
735 kmp_team_t *team = __kmp_team_from_gtid( gtid );
738 if( __kmp_env_consistency_check ) {
739 if( __kmp_threads[gtid]->th.th_root->r.r_active )
740 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL );
742 #ifdef BUILD_PARALLEL_ORDERED
743 if( !team->t.t_serialized ) {
747 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid( gtid ), KMP_EQ, NULL);
758 __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
760 int gtid = *gtid_ref;
761 #ifdef BUILD_PARALLEL_ORDERED
762 int tid = __kmp_tid_from_gtid( gtid );
763 kmp_team_t *team = __kmp_team_from_gtid( gtid );
766 if( __kmp_env_consistency_check ) {
767 if( __kmp_threads[gtid]->th.th_root->r.r_active )
768 __kmp_pop_sync( gtid, ct_ordered_in_parallel, loc_ref );
770 #ifdef BUILD_PARALLEL_ORDERED
771 if ( ! team->t.t_serialized ) {
776 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc );
792 __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws )
798 if( ! TCR_4(__kmp_init_parallel) )
799 __kmp_parallel_initialize();
801 th = __kmp_threads[ gtid ];
802 team = th->th.th_team;
805 th->th.th_ident = id_ref;
807 if ( team->t.t_serialized ) {
810 kmp_int32 old_this = th->th.th_local.this_construct;
812 ++th->th.th_local.this_construct;
817 status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this,
818 th->th.th_local.this_construct);
821 if( __kmp_env_consistency_check ) {
822 if (status && push_ws) {
823 __kmp_push_workshare( gtid, ct_psingle, id_ref );
825 __kmp_check_workshare( gtid, ct_psingle, id_ref );
830 __kmp_itt_single_start( gtid );
832 if( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 ) {
833 __kmp_itt_metadata_single();
841 __kmp_exit_single(
int gtid )
844 __kmp_itt_single_end( gtid );
846 if( __kmp_env_consistency_check )
847 __kmp_pop_workshare( gtid, ct_psingle, NULL );
860 __kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team,
861 int master_tid,
int set_nthreads
869 int use_rml_to_adjust_nth;
870 KMP_DEBUG_ASSERT( __kmp_init_serial );
871 KMP_DEBUG_ASSERT( root && parent_team );
876 if ( set_nthreads == 1 ) {
877 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d reserving 1 thread; requested %d threads\n",
878 __kmp_get_gtid(), set_nthreads ));
881 if ( ( !get__nested_2(parent_team,master_tid) && (root->r.r_in_parallel
885 ) ) || ( __kmp_library == library_serial ) ) {
886 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d serializing team; requested %d threads\n",
887 __kmp_get_gtid(), set_nthreads ));
895 new_nthreads = set_nthreads;
896 use_rml_to_adjust_nth = FALSE;
897 if ( ! get__dynamic_2( parent_team, master_tid ) ) {
900 #ifdef USE_LOAD_BALANCE
901 else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) {
902 new_nthreads = __kmp_load_balance_nproc( root, set_nthreads );
903 if ( new_nthreads == 1 ) {
904 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d load balance reduced reservation to 1 thread\n",
908 if ( new_nthreads < set_nthreads ) {
909 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d load balance reduced reservation to %d threads\n",
910 master_tid, new_nthreads ));
914 else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) {
915 new_nthreads = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
916 : root->r.r_hot_team->t.t_nproc);
917 if ( new_nthreads <= 1 ) {
918 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d thread limit reduced reservation to 1 thread\n",
922 if ( new_nthreads < set_nthreads ) {
923 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d thread limit reduced reservation to %d threads\n",
924 master_tid, new_nthreads ));
927 new_nthreads = set_nthreads;
930 else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) {
931 if ( set_nthreads > 2 ) {
932 new_nthreads = __kmp_get_random( parent_team->t.t_threads[master_tid] );
933 new_nthreads = ( new_nthreads % set_nthreads ) + 1;
934 if ( new_nthreads == 1 ) {
935 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d dynamic random reduced reservation to 1 thread\n",
939 if ( new_nthreads < set_nthreads ) {
940 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d dynamic random reduced reservation to %d threads\n",
941 master_tid, new_nthreads ));
952 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
953 root->r.r_hot_team->t.t_nproc ) > __kmp_max_nth ) {
954 int tl_nthreads = __kmp_max_nth - __kmp_nth + ( root->r.r_active ? 1 :
955 root->r.r_hot_team->t.t_nproc );
956 if ( tl_nthreads <= 0 ) {
963 if ( ! get__dynamic_2( parent_team, master_tid )
964 && ( ! __kmp_reserve_warn ) ) {
965 __kmp_reserve_warn = 1;
968 KMP_MSG( CantFormThrTeam, set_nthreads, tl_nthreads ),
969 KMP_HNT( Unset_ALL_THREADS ),
973 if ( tl_nthreads == 1 ) {
974 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to 1 thread\n",
978 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to %d threads\n",
979 master_tid, tl_nthreads ));
980 new_nthreads = tl_nthreads;
990 capacity = __kmp_threads_capacity;
991 if ( TCR_PTR(__kmp_threads[0]) == NULL ) {
994 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
995 root->r.r_hot_team->t.t_nproc ) > capacity ) {
999 int slotsRequired = __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
1000 root->r.r_hot_team->t.t_nproc ) - capacity;
1001 int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired);
1002 if ( slotsAdded < slotsRequired ) {
1006 new_nthreads -= ( slotsRequired - slotsAdded );
1007 KMP_ASSERT( new_nthreads >= 1 );
1012 if ( ! get__dynamic_2( parent_team, master_tid )
1013 && ( ! __kmp_reserve_warn ) ) {
1014 __kmp_reserve_warn = 1;
1015 if ( __kmp_tp_cached ) {
1018 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
1019 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
1020 KMP_HNT( PossibleSystemLimitOnThreads ),
1027 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
1028 KMP_HNT( SystemLimitOnThreads ),
1036 if ( new_nthreads == 1 ) {
1037 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d serializing team after reclaiming dead roots and rechecking; requested %d threads\n",
1038 __kmp_get_gtid(), set_nthreads ) );
1042 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested %d threads\n",
1043 __kmp_get_gtid(), new_nthreads, set_nthreads ));
1044 return new_nthreads;
1055 __kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team,
1056 kmp_info_t *master_th,
int master_gtid )
1061 KA_TRACE( 10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc ) );
1062 KMP_DEBUG_ASSERT( master_gtid == __kmp_get_gtid() );
1066 master_th->th.th_info.ds.ds_tid = 0;
1067 master_th->th.th_team = team;
1068 master_th->th.th_team_nproc = team->t.t_nproc;
1069 master_th->th.th_team_master = master_th;
1070 master_th->th.th_team_serialized = FALSE;
1071 master_th->th.th_dispatch = & team->t.t_dispatch[ 0 ];
1074 #if KMP_NESTED_HOT_TEAMS
1076 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
1078 int level = team->t.t_active_level - 1;
1079 if( master_th->th.th_teams_microtask ) {
1080 if( master_th->th.th_teams_size.nteams > 1 ) {
1083 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1084 master_th->th.th_teams_level == team->t.t_level ) {
1088 if( level < __kmp_hot_teams_max_level ) {
1089 if( hot_teams[level].hot_team ) {
1091 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1095 hot_teams[level].hot_team = team;
1096 hot_teams[level].hot_team_nth = team->t.t_nproc;
1103 use_hot_team = team == root->r.r_hot_team;
1105 if ( !use_hot_team ) {
1108 team->t.t_threads[ 0 ] = master_th;
1109 __kmp_initialize_info( master_th, team, 0, master_gtid );
1112 for ( i=1 ; i < team->t.t_nproc ; i++ ) {
1115 kmp_info_t *thr = __kmp_allocate_thread( root, team, i );
1116 team->t.t_threads[ i ] = thr;
1117 KMP_DEBUG_ASSERT( thr );
1118 KMP_DEBUG_ASSERT( thr->th.th_team == team );
1120 KA_TRACE( 20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived T#%d(%d:%d) join =%u, plain=%u\n",
1121 __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0,
1122 __kmp_gtid_from_tid( i, team ), team->t.t_id, i,
1123 team->t.t_bar[ bs_forkjoin_barrier ].b_arrived,
1124 team->t.t_bar[ bs_plain_barrier ].b_arrived ) );
1126 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1127 thr->th.th_teams_level = master_th->th.th_teams_level;
1128 thr->th.th_teams_size = master_th->th.th_teams_size;
1132 kmp_balign_t * balign = team->t.t_threads[ i ]->th.th_bar;
1133 for ( b = 0; b < bs_last_barrier; ++ b ) {
1134 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
1135 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1140 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
1141 __kmp_partition_places( team );
1149 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1156 propagateFPControl(kmp_team_t * team)
1158 if ( __kmp_inherit_fp_control ) {
1159 kmp_int16 x87_fpu_control_word;
1163 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1164 __kmp_store_mxcsr( &mxcsr );
1165 mxcsr &= KMP_X86_MXCSR_MASK;
1174 if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) {
1175 team->t.t_x87_fpu_control_word = x87_fpu_control_word;
1177 if ( team->t.t_mxcsr != mxcsr ) {
1178 team->t.t_mxcsr = mxcsr;
1182 if (!team->t.t_fp_control_saved) {
1183 team->t.t_fp_control_saved = TRUE;
1188 if (team->t.t_fp_control_saved)
1189 team->t.t_fp_control_saved = FALSE;
1195 updateHWFPControl(kmp_team_t * team)
1197 if ( __kmp_inherit_fp_control && team->t.t_fp_control_saved ) {
1202 kmp_int16 x87_fpu_control_word;
1204 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1205 __kmp_store_mxcsr( &mxcsr );
1206 mxcsr &= KMP_X86_MXCSR_MASK;
1208 if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) {
1209 __kmp_clear_x87_fpu_status_word();
1210 __kmp_load_x87_fpu_control_word( &team->t.t_x87_fpu_control_word );
1213 if ( team->t.t_mxcsr != mxcsr ) {
1214 __kmp_load_mxcsr( &team->t.t_mxcsr );
1219 # define propagateFPControl(x) ((void)0)
1220 # define updateHWFPControl(x) ((void)0)
1224 __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc );
1230 __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid)
1232 kmp_info_t *this_thr;
1233 kmp_team_t *serial_team;
1235 KC_TRACE( 10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid ) );
1242 if( ! TCR_4( __kmp_init_parallel ) )
1243 __kmp_parallel_initialize();
1245 this_thr = __kmp_threads[ global_tid ];
1246 serial_team = this_thr->th.th_serial_team;
1249 KMP_DEBUG_ASSERT( serial_team );
1252 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1253 KMP_DEBUG_ASSERT( this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team );
1254 KMP_DEBUG_ASSERT( serial_team->t.t_task_team == NULL );
1255 KA_TRACE( 20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / team %p, new task_team = NULL\n",
1256 global_tid, this_thr->th.th_task_team, this_thr->th.th_team ) );
1257 this_thr->th.th_task_team = NULL;
1261 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1262 if ( this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1263 proc_bind = proc_bind_false;
1265 else if ( proc_bind == proc_bind_default ) {
1270 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1275 this_thr->th.th_set_proc_bind = proc_bind_default;
1278 if( this_thr->th.th_team != serial_team ) {
1280 int level = this_thr->th.th_team->t.t_level;
1282 if( serial_team->t.t_serialized ) {
1285 kmp_team_t *new_team;
1286 int tid = this_thr->th.th_info.ds.ds_tid;
1288 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1290 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1294 & this_thr->th.th_current_task->td_icvs,
1295 0 USE_NESTED_HOT_ARG(NULL) );
1296 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1297 KMP_ASSERT( new_team );
1300 new_team->t.t_threads[0] = this_thr;
1301 new_team->t.t_parent = this_thr->th.th_team;
1302 serial_team = new_team;
1303 this_thr->th.th_serial_team = serial_team;
1305 KF_TRACE( 10, (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1306 global_tid, serial_team ) );
1313 KF_TRACE( 10, (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1314 global_tid, serial_team ) );
1318 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1319 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1320 KMP_DEBUG_ASSERT( this_thr->th.th_team != serial_team );
1321 serial_team->t.t_ident = loc;
1322 serial_team->t.t_serialized = 1;
1323 serial_team->t.t_nproc = 1;
1324 serial_team->t.t_parent = this_thr->th.th_team;
1325 serial_team->t.t_sched = this_thr->th.th_team->t.t_sched;
1326 this_thr->th.th_team = serial_team;
1327 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1329 KF_TRACE( 10, (
"__kmpc_serialized_parallel: T#d curtask=%p\n",
1330 global_tid, this_thr->th.th_current_task ) );
1331 KMP_ASSERT( this_thr->th.th_current_task->td_flags.executing == 1 );
1332 this_thr->th.th_current_task->td_flags.executing = 0;
1334 __kmp_push_current_task_to_thread( this_thr, serial_team, 0 );
1339 & this_thr->th.th_current_task->td_icvs,
1340 & this_thr->th.th_current_task->td_parent->td_icvs );
1343 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1344 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1348 if ( __kmp_nested_proc_bind.used && ( level + 1 < __kmp_nested_proc_bind.used ) ) {
1349 this_thr->th.th_current_task->td_icvs.proc_bind
1350 = __kmp_nested_proc_bind.bind_types[ level + 1 ];
1354 this_thr->th.th_info.ds.ds_tid = 0;
1357 this_thr->th.th_team_nproc = 1;
1358 this_thr->th.th_team_master = this_thr;
1359 this_thr->th.th_team_serialized = 1;
1361 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1362 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1364 propagateFPControl (serial_team);
1367 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1368 if ( !serial_team->t.t_dispatch->th_disp_buffer ) {
1369 serial_team->t.t_dispatch->th_disp_buffer = (dispatch_private_info_t *)
1370 __kmp_allocate(
sizeof( dispatch_private_info_t ) );
1372 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1379 KMP_DEBUG_ASSERT( this_thr->th.th_team == serial_team );
1380 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1381 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1382 ++ serial_team->t.t_serialized;
1383 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1386 int level = this_thr->th.th_team->t.t_level;
1388 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1389 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1391 serial_team->t.t_level++;
1392 KF_TRACE( 10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level of serial team %p to %d\n",
1393 global_tid, serial_team, serial_team->t.t_level ) );
1396 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1398 dispatch_private_info_t * disp_buffer = (dispatch_private_info_t *)
1399 __kmp_allocate(
sizeof( dispatch_private_info_t ) );
1400 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1401 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1403 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1408 if ( __kmp_env_consistency_check )
1409 __kmp_push_parallel( global_tid, NULL );
1413 if ( ( __itt_frame_begin_v3_ptr && __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode ) || KMP_ITT_DEBUG )
1415 this_thr->th.th_ident = loc;
1417 __kmp_itt_region_forking( global_tid, this_thr->th.th_team_nproc, 0, 1 );
1420 if( ( ( __kmp_forkjoin_frames_mode == 1 || __kmp_forkjoin_frames_mode == 3 ) &&
1421 __itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr ) || KMP_ITT_DEBUG )
1423 this_thr->th.th_ident = loc;
1425 if( this_thr->th.th_team->t.t_level == 1 ) {
1426 serial_team->t.t_region_time = this_thr->th.th_frame_time_serialized = __itt_get_timestamp();
1439 enum fork_context_e call_context,
1441 microtask_t microtask,
1444 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
1454 int master_this_cons;
1456 kmp_team_t *parent_team;
1457 kmp_info_t *master_th;
1461 int master_set_numthreads;
1467 #if KMP_NESTED_HOT_TEAMS
1468 kmp_hot_team_ptr_t **p_hot_teams;
1473 KA_TRACE( 20, (
"__kmp_fork_call: enter T#%d\n", gtid ));
1474 if ( __kmp_stkpadding > 0 && __kmp_root[gtid] != NULL ) {
1477 void *dummy = alloca(__kmp_stkpadding);
1479 if ( __kmp_stkpadding > KMP_MAX_STKPADDING )
1480 __kmp_stkpadding += (short)((kmp_int64)dummy);
1484 KMP_DEBUG_ASSERT( __kmp_init_serial );
1485 if( ! TCR_4(__kmp_init_parallel) )
1486 __kmp_parallel_initialize();
1489 master_th = __kmp_threads[ gtid ];
1490 parent_team = master_th->th.th_team;
1491 master_tid = master_th->th.th_info.ds.ds_tid;
1492 master_this_cons = master_th->th.th_local.this_construct;
1493 root = master_th->th.th_root;
1494 master_active = root->r.r_active;
1495 master_set_numthreads = master_th->th.th_set_nproc;
1497 level = parent_team->t.t_level;
1499 active_level = parent_team->t.t_active_level;
1500 teams_level = master_th->th.th_teams_level;
1502 #if KMP_NESTED_HOT_TEAMS
1503 p_hot_teams = &master_th->th.th_hot_teams;
1504 if( *p_hot_teams == NULL && __kmp_hot_teams_max_level > 0 ) {
1505 *p_hot_teams = (kmp_hot_team_ptr_t*)__kmp_allocate(
1506 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1507 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1508 (*p_hot_teams)[0].hot_team_nth = 1;
1513 master_th->th.th_ident = loc;
1516 if ( master_th->th.th_teams_microtask &&
1517 ap && microtask != (microtask_t)__kmp_teams_master && level == teams_level ) {
1521 parent_team->t.t_ident = loc;
1522 parent_team->t.t_argc = argc;
1523 argv = (
void**)parent_team->t.t_argv;
1524 for( i=argc-1; i >= 0; --i )
1526 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
1527 *argv++ = va_arg( *ap,
void * );
1529 *argv++ = va_arg( ap,
void * );
1532 if ( parent_team == master_th->th.th_serial_team ) {
1535 KMP_DEBUG_ASSERT( parent_team->t.t_serialized > 1 );
1536 parent_team->t.t_serialized--;
1539 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv );
1542 parent_team->t.t_pkfn = microtask;
1543 parent_team->t.t_invoke = invoker;
1544 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1545 parent_team->t.t_active_level ++;
1546 parent_team->t.t_level ++;
1549 if ( master_set_numthreads ) {
1550 if ( master_set_numthreads < master_th->th.th_teams_size.nth ) {
1552 kmp_info_t **other_threads = parent_team->t.t_threads;
1553 parent_team->t.t_nproc = master_set_numthreads;
1554 for ( i = 0; i < master_set_numthreads; ++i ) {
1555 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1559 master_th->th.th_set_nproc = 0;
1563 KF_TRACE( 10, (
"__kmp_fork_call: before internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1564 __kmp_internal_fork( loc, gtid, parent_team );
1565 KF_TRACE( 10, (
"__kmp_fork_call: after internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1568 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
1569 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1573 if (! parent_team->t.t_invoke( gtid )) {
1574 KMP_ASSERT2( 0,
"cannot invoke microtask for MASTER thread" );
1577 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
1578 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1581 KA_TRACE( 20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid ));
1588 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1589 KMP_DEBUG_ASSERT( master_th->th.th_task_team == parent_team->t.t_task_team );
1594 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1596 if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) {
1599 nthreads = master_set_numthreads ?
1600 master_set_numthreads : get__nproc_2( parent_team, master_tid );
1601 nthreads = __kmp_reserve_threads(root, parent_team, master_tid, nthreads
1606 , ((ap==NULL && active_level==0) ||
1607 (ap && teams_level>0 && teams_level==level))
1611 KMP_DEBUG_ASSERT( nthreads > 0 );
1614 master_th->th.th_set_nproc = 0;
1618 if ( nthreads == 1 ) {
1620 #if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM )
1621 void * args[ argc ];
1623 void * * args = (
void**) alloca( argc *
sizeof(
void * ) );
1626 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1627 KA_TRACE( 20, (
"__kmp_fork_call: T#%d serializing parallel region\n", gtid ));
1631 if ( call_context == fork_context_intel ) {
1633 master_th->th.th_serial_team->t.t_ident = loc;
1637 master_th->th.th_serial_team->t.t_level--;
1641 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv );
1643 }
else if ( microtask == (microtask_t)__kmp_teams_master ) {
1644 KMP_DEBUG_ASSERT( master_th->th.th_team == master_th->th.th_serial_team );
1645 team = master_th->th.th_team;
1647 team->t.t_invoke = invoker;
1648 __kmp_alloc_argv_entries( argc, team, TRUE );
1649 team->t.t_argc = argc;
1650 argv = (
void**) team->t.t_argv;
1652 for( i=argc-1; i >= 0; --i )
1654 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
1655 *argv++ = va_arg( *ap,
void * );
1657 *argv++ = va_arg( ap,
void * );
1660 for( i=0; i < argc; ++i )
1662 argv[i] = parent_team->t.t_argv[i];
1675 for( i=argc-1; i >= 0; --i )
1677 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
1678 *argv++ = va_arg( *ap,
void * );
1680 *argv++ = va_arg( ap,
void * );
1685 __kmp_invoke_microtask( microtask, gtid, 0, argc, args );
1691 else if ( call_context == fork_context_gnu ) {
1693 KA_TRACE( 20, (
"__kmp_fork_call: T#%d serial exit\n", gtid ));
1697 KMP_ASSERT2( call_context < fork_context_last,
"__kmp_fork_call: unknown fork_context parameter" );
1701 KA_TRACE( 20, (
"__kmp_fork_call: T#%d serial exit\n", gtid ));
1708 KF_TRACE( 10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, curtask=%p, curtask_max_aclevel=%d\n",
1709 parent_team->t.t_active_level, master_th, master_th->th.th_current_task,
1710 master_th->th.th_current_task->td_icvs.max_active_levels ) );
1713 master_th->th.th_current_task->td_flags.executing = 0;
1716 if ( !master_th->th.th_teams_microtask || level > teams_level )
1720 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1724 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
1725 if ((level+1 < __kmp_nested_nth.used) && (__kmp_nested_nth.nth[level+1] != nthreads_icv)) {
1726 nthreads_icv = __kmp_nested_nth.nth[level+1];
1734 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1735 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1736 if ( master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1737 proc_bind = proc_bind_false;
1740 if (proc_bind == proc_bind_default) {
1742 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1747 if ((level+1 < __kmp_nested_proc_bind.used)
1748 && (__kmp_nested_proc_bind.bind_types[level+1] != master_th->th.th_current_task->td_icvs.proc_bind)) {
1749 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level+1];
1754 master_th->th.th_set_proc_bind = proc_bind_default;
1757 if ((nthreads_icv > 0)
1759 || (proc_bind_icv != proc_bind_default)
1762 kmp_internal_control_t new_icvs;
1763 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
1764 new_icvs.next = NULL;
1765 if (nthreads_icv > 0) {
1766 new_icvs.nproc = nthreads_icv;
1770 if (proc_bind_icv != proc_bind_default) {
1771 new_icvs.proc_bind = proc_bind_icv;
1776 KF_TRACE( 10, (
"__kmp_fork_call: before __kmp_allocate_team\n" ) );
1777 team = __kmp_allocate_team(root, nthreads, nthreads,
1781 &new_icvs, argc USE_NESTED_HOT_ARG(master_th) );
1784 KF_TRACE( 10, (
"__kmp_fork_call: before __kmp_allocate_team\n" ) );
1785 team = __kmp_allocate_team(root, nthreads, nthreads,
1789 &master_th->th.th_current_task->td_icvs, argc
1790 USE_NESTED_HOT_ARG(master_th) );
1792 KF_TRACE( 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team ) );
1795 team->t.t_master_tid = master_tid;
1796 team->t.t_master_this_cons = master_this_cons;
1797 team->t.t_ident = loc;
1798 team->t.t_parent = parent_team;
1799 TCW_SYNC_PTR(team->t.t_pkfn, microtask);
1800 team->t.t_invoke = invoker;
1803 if ( !master_th->th.th_teams_microtask || level > teams_level ) {
1805 team->t.t_level = parent_team->t.t_level + 1;
1806 team->t.t_active_level = parent_team->t.t_active_level + 1;
1810 team->t.t_level = parent_team->t.t_level;
1811 team->t.t_active_level = parent_team->t.t_active_level;
1814 team->t.t_sched = get__sched_2(parent_team, master_tid);
1817 propagateFPControl(team);
1819 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1821 KMP_DEBUG_ASSERT( master_th->th.th_task_team == parent_team->t.t_task_team );
1822 KA_TRACE( 20, (
"__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n",
1823 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
1824 parent_team, team->t.t_task_team, team ) );
1825 master_th->th.th_task_team = team->t.t_task_team;
1826 #if !KMP_NESTED_HOT_TEAMS
1827 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team));
1831 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
1832 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, team->t.t_nproc ));
1833 KMP_DEBUG_ASSERT( team != root->r.r_hot_team ||
1834 ( team->t.t_master_tid == 0 &&
1835 ( team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized ) ));
1839 argv = (
void**)team->t.t_argv;
1843 for ( i=argc-1; i >= 0; --i )
1845 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
1846 *argv++ = va_arg( *ap,
void * );
1848 *argv++ = va_arg( ap,
void * );
1852 for ( i=0; i < argc; ++i )
1854 argv[i] = team->t.t_parent->t.t_argv[i];
1859 team->t.t_master_active = master_active;
1860 if (!root->r.r_active)
1861 root->r.r_active = TRUE;
1863 __kmp_fork_team_threads( root, team, master_th, gtid );
1864 __kmp_setup_icv_copy( team, nthreads, &master_th->th.th_current_task->td_icvs, loc );
1867 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1872 if ((__itt_frame_begin_v3_ptr && __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) || KMP_ITT_DEBUG)
1874 if (!master_th->th.th_teams_microtask || microtask == (microtask_t)__kmp_teams_master)
1878 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
1880 kmp_uint64 tmp_time = 0;
1882 if ( __itt_get_timestamp_ptr )
1883 tmp_time = __itt_get_timestamp();
1885 if ((__itt_frame_submit_v3_ptr && __kmp_forkjoin_frames_mode==3)|| KMP_ITT_DEBUG)
1887 if (!master_th->th.th_teams_microtask || microtask == (microtask_t)__kmp_teams_master)
1890 team->t.t_region_time = tmp_time;
1893 if ((__kmp_forkjoin_frames_mode == 1 || __kmp_forkjoin_frames_mode == 3) && __itt_frame_submit_v3_ptr ) {
1894 if (!(team->t.t_active_level > 1)) {
1895 master_th->th.th_frame_time = tmp_time;
1901 KMP_DEBUG_ASSERT( team == __kmp_threads[gtid]->th.th_team );
1903 KF_TRACE(10, (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
1904 root, team, master_th, gtid));
1907 if ( __itt_stack_caller_create_ptr ) {
1908 team->t.t_stack_id = __kmp_itt_stack_caller_create();
1916 __kmp_internal_fork( loc, gtid, team );
1917 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n",
1918 root, team, master_th, gtid));
1921 if (call_context == fork_context_gnu) {
1922 KA_TRACE( 20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid ));
1927 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
1928 gtid, team->t.t_id, team->t.t_pkfn ) );
1934 if (! team->t.t_invoke( gtid )) {
1935 KMP_ASSERT2( 0,
"cannot invoke microtask for MASTER thread" );
1938 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
1939 gtid, team->t.t_id, team->t.t_pkfn ) );
1942 KA_TRACE( 20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid ));
1948 __kmp_join_call(
ident_t *loc,
int gtid
1956 kmp_team_t *parent_team;
1957 kmp_info_t *master_th;
1962 KA_TRACE( 20, (
"__kmp_join_call: enter T#%d\n", gtid ));
1965 master_th = __kmp_threads[ gtid ];
1966 root = master_th->th.th_root;
1967 team = master_th->th.th_team;
1968 parent_team = team->t.t_parent;
1970 master_th->th.th_ident = loc;
1973 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1974 KA_TRACE( 20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n",
1975 __kmp_gtid_from_thread( master_th ), team,
1976 team->t.t_task_team, master_th->th.th_task_team) );
1977 KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team );
1981 if( team->t.t_serialized ) {
1983 if ( master_th->th.th_teams_microtask ) {
1985 int level = team->t.t_level;
1986 int tlevel = master_th->th.th_teams_level;
1987 if ( level == tlevel ) {
1991 }
else if ( level == tlevel + 1 ) {
1994 team->t.t_serialized++;
2002 master_active = team->t.t_master_active;
2010 __kmp_internal_join( loc, gtid, team );
2015 if ( __itt_stack_caller_create_ptr ) {
2016 __kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id );
2020 if ( ( __itt_frame_end_v3_ptr && __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode ) || KMP_ITT_DEBUG )
2022 if ( !master_th->th.th_teams_microtask ||
2023 ( !exit_teams && team->t.t_level == master_th->th.th_teams_level ) )
2028 master_th->th.th_ident = loc;
2029 __kmp_itt_region_joined( gtid );
2031 if ( ( __itt_frame_submit_v3_ptr && __kmp_forkjoin_frames_mode == 3 ) || KMP_ITT_DEBUG )
2033 if ( !master_th->th.th_teams_microtask ||
2034 ( !exit_teams && team->t.t_level == master_th->th.th_teams_level ) )
2039 master_th->th.th_ident = loc;
2040 __kmp_itt_frame_submit( gtid, team->t.t_region_time, master_th->th.th_frame_time, 0, loc, master_th->th.th_team_nproc, 1 );
2045 if ( master_th->th.th_teams_microtask &&
2047 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2048 team->t.t_level == master_th->th.th_teams_level + 1 ) {
2055 team->t.t_active_level --;
2056 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2059 if ( master_th->th.th_team_nproc < master_th->th.th_teams_size.nth ) {
2060 int old_num = master_th->th.th_team_nproc;
2061 int new_num = master_th->th.th_teams_size.nth;
2062 kmp_info_t **other_threads = team->t.t_threads;
2063 team->t.t_nproc = new_num;
2064 for ( i = 0; i < old_num; ++i ) {
2065 other_threads[i]->th.th_team_nproc = new_num;
2068 for ( i = old_num; i < new_num; ++i ) {
2071 kmp_balign_t * balign = other_threads[i]->th.th_bar;
2072 for ( b = 0; b < bs_last_barrier; ++ b ) {
2073 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
2074 KMP_DEBUG_ASSERT(balign[ b ].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2077 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2085 master_th->th.th_info .ds.ds_tid = team->t.t_master_tid;
2086 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2088 master_th->th.th_dispatch =
2089 & parent_team->t.t_dispatch[ team->t.t_master_tid ];
2095 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2098 if ( !master_th->th.th_teams_microtask || team->t.t_level > master_th->th.th_teams_level )
2102 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2104 KMP_DEBUG_ASSERT( root->r.r_in_parallel >= 0 );
2106 KF_TRACE( 10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n",
2107 0, master_th, team ) );
2108 __kmp_pop_current_task_from_thread( master_th );
2110 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
2114 master_th->th.th_first_place = team->t.t_first_place;
2115 master_th->th.th_last_place = team->t.t_last_place;
2118 updateHWFPControl (team);
2120 if ( root->r.r_active != master_active )
2121 root->r.r_active = master_active;
2123 __kmp_free_team( root, team USE_NESTED_HOT_ARG(master_th) );
2131 master_th->th.th_team = parent_team;
2132 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2133 master_th->th.th_team_master = parent_team->t.t_threads[0];
2134 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2137 if( parent_team->t.t_serialized &&
2138 parent_team != master_th->th.th_serial_team &&
2139 parent_team != root->r.r_root_team ) {
2140 __kmp_free_team( root, master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL) );
2141 master_th->th.th_serial_team = parent_team;
2144 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2149 if ( ( master_th->th.th_task_team = parent_team->t.t_task_team ) != NULL ) {
2150 master_th->th.th_task_state = master_th->th.th_task_team->tt.tt_state;
2152 KA_TRACE( 20, (
"__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
2153 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
2159 master_th->th.th_current_task->td_flags.executing = 1;
2161 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2164 KA_TRACE( 20, (
"__kmp_join_call: exit T#%d\n", gtid ));
2173 __kmp_save_internal_controls ( kmp_info_t * thread )
2176 if ( thread->th.th_team != thread->th.th_serial_team ) {
2179 if (thread->th.th_team->t.t_serialized > 1) {
2182 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2185 if ( thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2186 thread->th.th_team->t.t_serialized ) {
2191 kmp_internal_control_t * control = (kmp_internal_control_t *) __kmp_allocate(
sizeof(kmp_internal_control_t));
2193 copy_icvs( control, & thread->th.th_current_task->td_icvs );
2195 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2197 control->next = thread->th.th_team->t.t_control_stack_top;
2198 thread->th.th_team->t.t_control_stack_top = control;
2205 __kmp_set_num_threads(
int new_nth,
int gtid )
2210 KF_TRACE( 10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth ));
2211 KMP_DEBUG_ASSERT( __kmp_init_serial );
2215 else if (new_nth > __kmp_max_nth)
2216 new_nth = __kmp_max_nth;
2218 thread = __kmp_threads[gtid];
2220 __kmp_save_internal_controls( thread );
2222 set__nproc( thread, new_nth );
2229 root = thread->th.th_root;
2230 if ( __kmp_init_parallel && ( ! root->r.r_active )
2231 && ( root->r.r_hot_team->t.t_nproc > new_nth )
2232 #
if KMP_NESTED_HOT_TEAMS
2233 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2236 kmp_team_t *hot_team = root->r.r_hot_team;
2239 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2242 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2243 kmp_task_team_t *task_team = hot_team->t.t_task_team;
2244 if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) {
2251 KMP_DEBUG_ASSERT( hot_team->t.t_nproc > 1 );
2252 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2255 KA_TRACE( 20, (
"__kmp_set_num_threads: setting task_team %p to NULL\n",
2256 &hot_team->t.t_task_team ) );
2257 hot_team->t.t_task_team = NULL;
2260 KMP_DEBUG_ASSERT( task_team == NULL );
2267 for ( f = new_nth; f < hot_team->t.t_nproc; f++ ) {
2268 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
2269 __kmp_free_thread( hot_team->t.t_threads[f] );
2270 hot_team->t.t_threads[f] = NULL;
2272 hot_team->t.t_nproc = new_nth;
2273 #if KMP_NESTED_HOT_TEAMS
2274 if( thread->th.th_hot_teams ) {
2275 KMP_DEBUG_ASSERT( hot_team == thread->th.th_hot_teams[0].hot_team );
2276 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2281 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2286 for( f=0 ; f < new_nth; f++ ) {
2287 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
2288 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2292 hot_team->t.t_size_changed = -1;
2300 __kmp_set_max_active_levels(
int gtid,
int max_active_levels )
2304 KF_TRACE( 10, (
"__kmp_set_max_active_levels: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2305 KMP_DEBUG_ASSERT( __kmp_init_serial );
2308 if( max_active_levels < 0 ) {
2309 KMP_WARNING( ActiveLevelsNegative, max_active_levels );
2313 KF_TRACE( 10, (
"__kmp_set_max_active_levels: the call is ignored: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2316 if( max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT ) {
2320 KMP_WARNING( ActiveLevelsExceedLimit, max_active_levels, KMP_MAX_ACTIVE_LEVELS_LIMIT );
2321 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2326 KF_TRACE( 10, (
"__kmp_set_max_active_levels: after validation: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2328 thread = __kmp_threads[ gtid ];
2330 __kmp_save_internal_controls( thread );
2332 set__max_active_levels( thread, max_active_levels );
2338 __kmp_get_max_active_levels(
int gtid )
2342 KF_TRACE( 10, (
"__kmp_get_max_active_levels: thread %d\n", gtid ) );
2343 KMP_DEBUG_ASSERT( __kmp_init_serial );
2345 thread = __kmp_threads[ gtid ];
2346 KMP_DEBUG_ASSERT( thread->th.th_current_task );
2347 KF_TRACE( 10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, curtask_maxaclevel=%d\n",
2348 gtid, thread->th.th_current_task, thread->th.th_current_task->td_icvs.max_active_levels ) );
2349 return thread->th.th_current_task->td_icvs.max_active_levels;
2354 __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk )
2359 KF_TRACE( 10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", gtid, (
int)kind, chunk ));
2360 KMP_DEBUG_ASSERT( __kmp_init_serial );
2366 if ( kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2367 ( kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std ) )
2372 KMP_MSG( ScheduleKindOutOfRange, kind ),
2373 KMP_HNT( DefaultScheduleKindUsed,
"static, no chunk" ),
2376 kind = kmp_sched_default;
2380 thread = __kmp_threads[ gtid ];
2382 __kmp_save_internal_controls( thread );
2384 if ( kind < kmp_sched_upper_std ) {
2385 if ( kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK ) {
2388 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2390 thread->th.th_current_task->td_icvs.sched.r_sched_type = __kmp_sch_map[ kind - kmp_sched_lower - 1 ];
2394 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2395 __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
2397 if ( kind == kmp_sched_auto ) {
2399 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2401 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2407 __kmp_get_schedule(
int gtid, kmp_sched_t * kind,
int * chunk )
2413 KF_TRACE( 10, (
"__kmp_get_schedule: thread %d\n", gtid ));
2414 KMP_DEBUG_ASSERT( __kmp_init_serial );
2416 thread = __kmp_threads[ gtid ];
2419 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2421 switch ( th_type ) {
2423 case kmp_sch_static_greedy:
2424 case kmp_sch_static_balanced:
2425 *kind = kmp_sched_static;
2428 case kmp_sch_static_chunked:
2429 *kind = kmp_sched_static;
2431 case kmp_sch_dynamic_chunked:
2432 *kind = kmp_sched_dynamic;
2435 case kmp_sch_guided_iterative_chunked:
2436 case kmp_sch_guided_analytical_chunked:
2437 *kind = kmp_sched_guided;
2440 *kind = kmp_sched_auto;
2442 case kmp_sch_trapezoidal:
2443 *kind = kmp_sched_trapezoidal;
2451 KMP_FATAL( UnknownSchedulingType, th_type );
2455 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
2459 __kmp_get_ancestor_thread_num(
int gtid,
int level ) {
2465 KF_TRACE( 10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level ));
2466 KMP_DEBUG_ASSERT( __kmp_init_serial );
2469 if( level == 0 )
return 0;
2470 if( level < 0 )
return -1;
2471 thr = __kmp_threads[ gtid ];
2472 team = thr->th.th_team;
2473 ii = team->t.t_level;
2474 if( level > ii )
return -1;
2477 if( thr->th.th_teams_microtask ) {
2479 int tlevel = thr->th.th_teams_level;
2480 if( level <= tlevel ) {
2481 KMP_DEBUG_ASSERT( ii >= tlevel );
2483 if ( ii == tlevel ) {
2492 if( ii == level )
return __kmp_tid_from_gtid( gtid );
2494 dd = team->t.t_serialized;
2498 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
2501 if( ( team->t.t_serialized ) && ( !dd ) ) {
2502 team = team->t.t_parent;
2506 team = team->t.t_parent;
2507 dd = team->t.t_serialized;
2512 return ( dd > 1 ) ? ( 0 ) : ( team->t.t_master_tid );
2516 __kmp_get_team_size(
int gtid,
int level ) {
2522 KF_TRACE( 10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level ));
2523 KMP_DEBUG_ASSERT( __kmp_init_serial );
2526 if( level == 0 )
return 1;
2527 if( level < 0 )
return -1;
2528 thr = __kmp_threads[ gtid ];
2529 team = thr->th.th_team;
2530 ii = team->t.t_level;
2531 if( level > ii )
return -1;
2534 if( thr->th.th_teams_microtask ) {
2536 int tlevel = thr->th.th_teams_level;
2537 if( level <= tlevel ) {
2538 KMP_DEBUG_ASSERT( ii >= tlevel );
2540 if ( ii == tlevel ) {
2551 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
2554 if( team->t.t_serialized && ( !dd ) ) {
2555 team = team->t.t_parent;
2559 team = team->t.t_parent;
2564 return team->t.t_nproc;
2568 __kmp_get_schedule_global() {
2572 kmp_r_sched_t r_sched;
2578 r_sched.r_sched_type = __kmp_static;
2580 r_sched.r_sched_type = __kmp_guided;
2582 r_sched.r_sched_type = __kmp_sched;
2585 if ( __kmp_chunk < KMP_DEFAULT_CHUNK ) {
2586 r_sched.chunk = KMP_DEFAULT_CHUNK;
2588 r_sched.chunk = __kmp_chunk;
2603 __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc )
2606 KMP_DEBUG_ASSERT( team );
2607 if( !realloc || argc > team->t.t_max_argc ) {
2609 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, current entries=%d\n",
2610 team->t.t_id, argc, ( realloc ) ? team->t.t_max_argc : 0 ));
2612 if ( realloc && team->t.t_argv != &team->t.t_inline_argv[0] )
2613 __kmp_free( (
void *) team->t.t_argv );
2615 if ( argc <= KMP_INLINE_ARGV_ENTRIES ) {
2617 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
2618 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d argv entries\n",
2619 team->t.t_id, team->t.t_max_argc ));
2620 team->t.t_argv = &team->t.t_inline_argv[0];
2621 if ( __kmp_storage_map ) {
2622 __kmp_print_storage_map_gtid( -1, &team->t.t_inline_argv[0],
2623 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
2624 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
2625 "team_%d.t_inline_argv",
2630 team->t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ?
2631 KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc;
2632 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n",
2633 team->t.t_id, team->t.t_max_argc ));
2634 team->t.t_argv = (
void**) __kmp_page_allocate(
sizeof(
void*) * team->t.t_max_argc );
2635 if ( __kmp_storage_map ) {
2636 __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc],
2637 sizeof(
void *) * team->t.t_max_argc,
"team_%d.t_argv",
2645 __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth)
2648 int num_disp_buff = max_nth > 1 ? KMP_MAX_DISP_BUF : 2;
2649 #if KMP_USE_POOLED_ALLOC
2651 char *ptr = __kmp_allocate(max_nth *
2652 (
sizeof(kmp_info_t*) +
sizeof(dispatch_shared_info_t)*num_disp_buf
2653 +
sizeof(kmp_disp_t) +
sizeof(
int)*6
2655 +
sizeof(kmp_r_sched_t)
2656 +
sizeof(kmp_taskdata_t) ) );
2658 team->t.t_threads = (kmp_info_t**) ptr; ptr +=
sizeof(kmp_info_t*) * max_nth;
2659 team->t.t_disp_buffer = (dispatch_shared_info_t*) ptr;
2660 ptr +=
sizeof(dispatch_shared_info_t) * num_disp_buff;
2661 team->t.t_dispatch = (kmp_disp_t*) ptr; ptr +=
sizeof(kmp_disp_t) * max_nth;
2662 team->t.t_set_nproc = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
2663 team->t.t_set_dynamic = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
2664 team->t.t_set_nested = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
2665 team->t.t_set_blocktime = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
2666 team->t.t_set_bt_intervals = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
2667 team->t.t_set_bt_set = (
int*) ptr;
2668 ptr +=
sizeof(int) * max_nth;
2670 team->t.t_set_sched = (kmp_r_sched_t*) ptr;
2671 ptr +=
sizeof(kmp_r_sched_t) * max_nth;
2672 team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) ptr;
2673 ptr +=
sizeof(kmp_taskdata_t) * max_nth;
2676 team->t.t_threads = (kmp_info_t**) __kmp_allocate(
sizeof(kmp_info_t*) * max_nth );
2677 team->t.t_disp_buffer = (dispatch_shared_info_t*)
2678 __kmp_allocate(
sizeof(dispatch_shared_info_t) * num_disp_buff );
2679 team->t.t_dispatch = (kmp_disp_t*) __kmp_allocate(
sizeof(kmp_disp_t) * max_nth );
2682 team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) __kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth );
2684 team->t.t_max_nproc = max_nth;
2687 for(i = 0 ; i < num_disp_buff; ++i)
2688 team->t.t_disp_buffer[i].buffer_index = i;
2692 __kmp_free_team_arrays(kmp_team_t *team) {
2695 for ( i = 0; i < team->t.t_max_nproc; ++ i ) {
2696 if ( team->t.t_dispatch[ i ].th_disp_buffer != NULL ) {
2697 __kmp_free( team->t.t_dispatch[ i ].th_disp_buffer );
2698 team->t.t_dispatch[ i ].th_disp_buffer = NULL;
2701 __kmp_free(team->t.t_threads);
2702 #if !KMP_USE_POOLED_ALLOC
2703 __kmp_free(team->t.t_disp_buffer);
2704 __kmp_free(team->t.t_dispatch);
2707 __kmp_free(team->t.t_implicit_task_taskdata);
2709 team->t.t_threads = NULL;
2710 team->t.t_disp_buffer = NULL;
2711 team->t.t_dispatch = NULL;
2714 team->t.t_implicit_task_taskdata = 0;
2718 __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
2719 kmp_info_t **oldThreads = team->t.t_threads;
2721 #if !KMP_USE_POOLED_ALLOC
2722 __kmp_free(team->t.t_disp_buffer);
2723 __kmp_free(team->t.t_dispatch);
2726 __kmp_free(team->t.t_implicit_task_taskdata);
2728 __kmp_allocate_team_arrays(team, max_nth);
2730 memcpy(team->t.t_threads, oldThreads, team->t.t_nproc * sizeof (kmp_info_t*));
2732 __kmp_free(oldThreads);
2735 static kmp_internal_control_t
2736 __kmp_get_global_icvs(
void ) {
2738 kmp_r_sched_t r_sched = __kmp_get_schedule_global();
2741 KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.used > 0 );
2744 kmp_internal_control_t g_icvs = {
2746 (kmp_int8)__kmp_dflt_nested,
2747 (kmp_int8)__kmp_global.g.g_dynamic,
2748 (kmp_int8)__kmp_env_blocktime,
2749 __kmp_dflt_blocktime,
2751 __kmp_dflt_team_nth,
2753 __kmp_dflt_max_active_levels,
2756 __kmp_nested_proc_bind.bind_types[0],
2764 static kmp_internal_control_t
2765 __kmp_get_x_global_icvs(
const kmp_team_t *team ) {
2767 kmp_internal_control_t gx_icvs;
2768 gx_icvs.serial_nesting_level = 0;
2769 copy_icvs( & gx_icvs, & team->t.t_threads[0]->th.th_current_task->td_icvs );
2770 gx_icvs.next = NULL;
2776 __kmp_initialize_root( kmp_root_t *root )
2779 kmp_team_t *root_team;
2780 kmp_team_t *hot_team;
2781 size_t disp_size, dispatch_size, bar_size;
2782 int hot_team_max_nth;
2783 kmp_r_sched_t r_sched = __kmp_get_schedule_global();
2784 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
2785 KMP_DEBUG_ASSERT( root );
2786 KMP_ASSERT( ! root->r.r_begin );
2789 __kmp_init_lock( &root->r.r_begin_lock );
2790 root->r.r_begin = FALSE;
2791 root->r.r_active = FALSE;
2792 root->r.r_in_parallel = 0;
2793 root->r.r_blocktime = __kmp_dflt_blocktime;
2794 root->r.r_nested = __kmp_dflt_nested;
2798 KF_TRACE( 10, (
"__kmp_initialize_root: before root_team\n" ) );
2800 __kmp_allocate_team(
2805 __kmp_nested_proc_bind.bind_types[0],
2809 USE_NESTED_HOT_ARG(NULL)
2812 KF_TRACE( 10, (
"__kmp_initialize_root: after root_team = %p\n", root_team ) );
2814 root->r.r_root_team = root_team;
2815 root_team->t.t_control_stack_top = NULL;
2818 root_team->t.t_threads[0] = NULL;
2819 root_team->t.t_nproc = 1;
2820 root_team->t.t_serialized = 1;
2822 root_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
2823 root_team->t.t_sched.chunk = r_sched.chunk;
2824 KA_TRACE( 20, (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
2825 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
2829 KF_TRACE( 10, (
"__kmp_initialize_root: before hot_team\n" ) );
2831 __kmp_allocate_team(
2834 __kmp_dflt_team_nth_ub * 2,
2836 __kmp_nested_proc_bind.bind_types[0],
2840 USE_NESTED_HOT_ARG(NULL)
2842 KF_TRACE( 10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team ) );
2844 root->r.r_hot_team = hot_team;
2845 root_team->t.t_control_stack_top = NULL;
2848 hot_team->t.t_parent = root_team;
2851 hot_team_max_nth = hot_team->t.t_max_nproc;
2852 for ( f = 0; f < hot_team_max_nth; ++ f ) {
2853 hot_team->t.t_threads[ f ] = NULL;
2855 hot_team->t.t_nproc = 1;
2857 hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
2858 hot_team->t.t_sched.chunk = r_sched.chunk;
2860 hot_team->t.t_size_changed = 0;
2868 typedef struct kmp_team_list_item {
2869 kmp_team_p
const * entry;
2870 struct kmp_team_list_item * next;
2871 } kmp_team_list_item_t;
2872 typedef kmp_team_list_item_t * kmp_team_list_t;
2876 __kmp_print_structure_team_accum(
2877 kmp_team_list_t list,
2878 kmp_team_p
const * team
2888 KMP_DEBUG_ASSERT( list != NULL );
2889 if ( team == NULL ) {
2893 __kmp_print_structure_team_accum( list, team->t.t_parent );
2894 __kmp_print_structure_team_accum( list, team->t.t_next_pool );
2898 while ( l->next != NULL && l->entry != team ) {
2901 if ( l->next != NULL ) {
2907 while ( l->next != NULL && l->entry->t.t_id <= team->t.t_id ) {
2913 kmp_team_list_item_t * item =
2914 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof( kmp_team_list_item_t ) );
2923 __kmp_print_structure_team(
2925 kmp_team_p
const * team
2928 __kmp_printf(
"%s", title );
2929 if ( team != NULL ) {
2930 __kmp_printf(
"%2x %p\n", team->t.t_id, team );
2932 __kmp_printf(
" - (nil)\n" );
2937 __kmp_print_structure_thread(
2939 kmp_info_p
const * thread
2942 __kmp_printf(
"%s", title );
2943 if ( thread != NULL ) {
2944 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread );
2946 __kmp_printf(
" - (nil)\n" );
2951 __kmp_print_structure(
2955 kmp_team_list_t list;
2958 list = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof( kmp_team_list_item_t ) );
2962 __kmp_printf(
"\n------------------------------\nGlobal Thread Table\n------------------------------\n" );
2965 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
2966 __kmp_printf(
"%2d", gtid );
2967 if ( __kmp_threads != NULL ) {
2968 __kmp_printf(
" %p", __kmp_threads[ gtid ] );
2970 if ( __kmp_root != NULL ) {
2971 __kmp_printf(
" %p", __kmp_root[ gtid ] );
2973 __kmp_printf(
"\n" );
2978 __kmp_printf(
"\n------------------------------\nThreads\n------------------------------\n" );
2979 if ( __kmp_threads != NULL ) {
2981 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
2982 kmp_info_t
const * thread = __kmp_threads[ gtid ];
2983 if ( thread != NULL ) {
2984 __kmp_printf(
"GTID %2d %p:\n", gtid, thread );
2985 __kmp_printf(
" Our Root: %p\n", thread->th.th_root );
2986 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team );
2987 __kmp_print_structure_team(
" Serial Team: ", thread->th.th_serial_team );
2988 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc );
2989 __kmp_print_structure_thread(
" Master: ", thread->th.th_team_master );
2990 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized );
2991 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc );
2993 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind );
2995 __kmp_print_structure_thread(
" Next in pool: ", thread->th.th_next_pool );
2996 __kmp_printf(
"\n" );
2997 __kmp_print_structure_team_accum( list, thread->th.th_team );
2998 __kmp_print_structure_team_accum( list, thread->th.th_serial_team );
3002 __kmp_printf(
"Threads array is not allocated.\n" );
3006 __kmp_printf(
"\n------------------------------\nUbers\n------------------------------\n" );
3007 if ( __kmp_root != NULL ) {
3009 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3010 kmp_root_t
const * root = __kmp_root[ gtid ];
3011 if ( root != NULL ) {
3012 __kmp_printf(
"GTID %2d %p:\n", gtid, root );
3013 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team );
3014 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team );
3015 __kmp_print_structure_thread(
" Uber Thread: ", root->r.r_uber_thread );
3016 __kmp_printf(
" Active?: %2d\n", root->r.r_active );
3017 __kmp_printf(
" Nested?: %2d\n", root->r.r_nested );
3018 __kmp_printf(
" In Parallel: %2d\n", root->r.r_in_parallel );
3019 __kmp_printf(
"\n" );
3020 __kmp_print_structure_team_accum( list, root->r.r_root_team );
3021 __kmp_print_structure_team_accum( list, root->r.r_hot_team );
3025 __kmp_printf(
"Ubers array is not allocated.\n" );
3028 __kmp_printf(
"\n------------------------------\nTeams\n------------------------------\n" );
3029 while ( list->next != NULL ) {
3030 kmp_team_p
const * team = list->entry;
3032 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team );
3033 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent );
3034 __kmp_printf(
" Master TID: %2d\n", team->t.t_master_tid );
3035 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc );
3036 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized );
3037 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc );
3038 for ( i = 0; i < team->t.t_nproc; ++ i ) {
3039 __kmp_printf(
" Thread %2d: ", i );
3040 __kmp_print_structure_thread(
"", team->t.t_threads[ i ] );
3042 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool );
3043 __kmp_printf(
"\n" );
3048 __kmp_printf(
"\n------------------------------\nPools\n------------------------------\n" );
3049 __kmp_print_structure_thread(
"Thread pool: ", (kmp_info_t *)__kmp_thread_pool );
3050 __kmp_print_structure_team(
"Team pool: ", (kmp_team_t *)__kmp_team_pool );
3051 __kmp_printf(
"\n" );
3054 while ( list != NULL ) {
3055 kmp_team_list_item_t * item = list;
3057 KMP_INTERNAL_FREE( item );
3069 static const unsigned __kmp_primes[] = {
3070 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5,
3071 0xba5703f5, 0xb495a877, 0xe1626741, 0x79695e6b,
3072 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3073 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b,
3074 0xbe4d6fe9, 0x5f15e201, 0x99afc3fd, 0xf3f16801,
3075 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3076 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed,
3077 0x085a3d61, 0x46eb5ea7, 0x3d9910ed, 0x2e687b5b,
3078 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3079 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7,
3080 0x54581edb, 0xf2480f45, 0x0bb9288f, 0xef1affc7,
3081 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3082 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b,
3083 0xfc411073, 0xc3749363, 0xb892d829, 0x3549366b,
3084 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3085 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f
3092 __kmp_get_random( kmp_info_t * thread )
3094 unsigned x = thread->th.th_x;
3095 unsigned short r = x>>16;
3097 thread->th.th_x = x*thread->th.th_a+1;
3099 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3100 thread->th.th_info.ds.ds_tid, r) );
3108 __kmp_init_random( kmp_info_t * thread )
3110 unsigned seed = thread->th.th_info.ds.ds_tid;
3112 thread->th.th_a = __kmp_primes[seed%(
sizeof(__kmp_primes)/
sizeof(__kmp_primes[0]))];
3113 thread->th.th_x = (seed+1)*thread->th.th_a+1;
3114 KA_TRACE(30, (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a) );
3121 __kmp_reclaim_dead_roots(
void) {
3124 for(i = 0; i < __kmp_threads_capacity; ++i) {
3125 if( KMP_UBER_GTID( i ) &&
3126 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3127 !__kmp_root[i]->r.r_active ) {
3128 r += __kmp_unregister_root_other_thread(i);
3157 __kmp_expand_threads(
int nWish,
int nNeed) {
3160 int __kmp_actual_max_nth;
3164 #if KMP_OS_WINDOWS && !defined GUIDEDLL_EXPORTS
3167 added = __kmp_reclaim_dead_roots();
3185 int minimumRequiredCapacity;
3187 kmp_info_t **newThreads;
3188 kmp_root_t **newRoot;
3210 old_tp_cached = __kmp_tp_cached;
3211 __kmp_actual_max_nth = old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth;
3212 KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity);
3216 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3220 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3226 nTarget = __kmp_actual_max_nth - __kmp_threads_capacity;
3233 minimumRequiredCapacity = __kmp_threads_capacity + nTarget;
3235 newCapacity = __kmp_threads_capacity;
3238 newCapacity <= (__kmp_actual_max_nth >> 1) ?
3239 (newCapacity << 1) :
3240 __kmp_actual_max_nth;
3241 }
while(newCapacity < minimumRequiredCapacity);
3242 newThreads = (kmp_info_t**) __kmp_allocate((
sizeof(kmp_info_t*) +
sizeof(kmp_root_t*)) * newCapacity + CACHE_LINE);
3243 newRoot = (kmp_root_t**) ((
char*)newThreads +
sizeof(kmp_info_t*) * newCapacity );
3244 memcpy(newThreads, __kmp_threads, __kmp_threads_capacity *
sizeof(kmp_info_t*));
3245 memcpy(newRoot, __kmp_root, __kmp_threads_capacity *
sizeof(kmp_root_t*));
3246 memset(newThreads + __kmp_threads_capacity, 0,
3247 (newCapacity - __kmp_threads_capacity) *
sizeof(kmp_info_t*));
3248 memset(newRoot + __kmp_threads_capacity, 0,
3249 (newCapacity - __kmp_threads_capacity) *
sizeof(kmp_root_t*));
3251 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3257 __kmp_free(newThreads);
3260 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3261 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3263 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3264 __kmp_free(newThreads);
3270 *(kmp_info_t**
volatile*)&__kmp_threads = newThreads;
3271 *(kmp_root_t**
volatile*)&__kmp_root = newRoot;
3272 added += newCapacity - __kmp_threads_capacity;
3273 *(
volatile int*)&__kmp_threads_capacity = newCapacity;
3274 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3285 __kmp_register_root(
int initial_thread )
3287 kmp_info_t *root_thread;
3291 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3292 KA_TRACE( 20, (
"__kmp_register_root: entered\n"));
3310 capacity = __kmp_threads_capacity;
3311 if ( ! initial_thread && TCR_PTR(__kmp_threads[0]) == NULL ) {
3316 if ( __kmp_all_nth >= capacity && !__kmp_expand_threads( 1, 1 ) ) {
3317 if ( __kmp_tp_cached ) {
3320 KMP_MSG( CantRegisterNewThread ),
3321 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
3322 KMP_HNT( PossibleSystemLimitOnThreads ),
3329 KMP_MSG( CantRegisterNewThread ),
3330 KMP_HNT( SystemLimitOnThreads ),
3339 for( gtid=(initial_thread ? 0 : 1) ; TCR_PTR(__kmp_threads[gtid]) != NULL ; gtid++ )
3341 KA_TRACE( 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid ));
3342 KMP_ASSERT( gtid < __kmp_threads_capacity );
3346 TCW_4(__kmp_nth, __kmp_nth + 1);
3353 if ( __kmp_adjust_gtid_mode ) {
3354 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
3355 if ( TCR_4(__kmp_gtid_mode) != 2) {
3356 TCW_4(__kmp_gtid_mode, 2);
3360 if (TCR_4(__kmp_gtid_mode) != 1 ) {
3361 TCW_4(__kmp_gtid_mode, 1);
3366 #ifdef KMP_ADJUST_BLOCKTIME
3369 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
3370 if ( __kmp_nth > __kmp_avail_proc ) {
3371 __kmp_zero_bt = TRUE;
3377 if( ! ( root = __kmp_root[gtid] )) {
3378 root = __kmp_root[gtid] = (kmp_root_t*) __kmp_allocate(
sizeof(kmp_root_t) );
3379 KMP_DEBUG_ASSERT( ! root->r.r_root_team );
3382 __kmp_initialize_root( root );
3385 if( root->r.r_uber_thread ) {
3386 root_thread = root->r.r_uber_thread;
3388 root_thread = (kmp_info_t*) __kmp_allocate(
sizeof(kmp_info_t) );
3389 if ( __kmp_storage_map ) {
3390 __kmp_print_thread_storage_map( root_thread, gtid );
3392 root_thread->th.th_info .ds.ds_gtid = gtid;
3393 root_thread->th.th_root = root;
3394 if( __kmp_env_consistency_check ) {
3395 root_thread->th.th_cons = __kmp_allocate_cons_stack( gtid );
3398 __kmp_initialize_fast_memory( root_thread );
3402 KMP_DEBUG_ASSERT( root_thread->th.th_local.bget_data == NULL );
3403 __kmp_initialize_bget( root_thread );
3405 __kmp_init_random( root_thread );
3409 if( ! root_thread->th.th_serial_team ) {
3410 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3411 KF_TRACE( 10, (
"__kmp_register_root: before serial_team\n" ) );
3412 root_thread->th.th_serial_team = __kmp_allocate_team( root, 1, 1,
3417 0 USE_NESTED_HOT_ARG(NULL) );
3419 KMP_ASSERT( root_thread->th.th_serial_team );
3420 KF_TRACE( 10, (
"__kmp_register_root: after serial_team = %p\n",
3421 root_thread->th.th_serial_team ) );
3424 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3426 root->r.r_root_team->t.t_threads[0] = root_thread;
3427 root->r.r_hot_team ->t.t_threads[0] = root_thread;
3428 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3429 root_thread->th.th_serial_team->t.t_serialized = 0;
3430 root->r.r_uber_thread = root_thread;
3433 __kmp_initialize_info( root_thread, root->r.r_root_team, 0, gtid );
3436 __kmp_gtid_set_specific( gtid );
3438 __kmp_itt_thread_name( gtid );
3440 #ifdef KMP_TDATA_GTID
3443 __kmp_create_worker( gtid, root_thread, __kmp_stksize );
3444 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == gtid );
3445 TCW_4(__kmp_init_gtid, TRUE);
3447 KA_TRACE( 20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, plain=%u\n",
3448 gtid, __kmp_gtid_from_tid( 0, root->r.r_hot_team ),
3449 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3450 KMP_INIT_BARRIER_STATE ) );
3453 for ( b = 0; b < bs_last_barrier; ++ b ) {
3454 root_thread->th.th_bar[ b ].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3457 KMP_DEBUG_ASSERT( root->r.r_hot_team->t.t_bar[ bs_forkjoin_barrier ].b_arrived == KMP_INIT_BARRIER_STATE );
3460 #if KMP_AFFINITY_SUPPORTED
3461 if ( TCR_4(__kmp_init_middle) ) {
3462 __kmp_affinity_set_init_mask( gtid, TRUE );
3466 __kmp_root_counter ++;
3469 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3474 #if KMP_NESTED_HOT_TEAMS
3476 __kmp_free_hot_teams( kmp_root_t *root, kmp_info_t *thr,
int level,
const int max_level )
3479 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3480 if( !hot_teams || !hot_teams[level].hot_team ) {
3483 KMP_DEBUG_ASSERT( level < max_level );
3484 kmp_team_t *team = hot_teams[level].hot_team;
3485 nth = hot_teams[level].hot_team_nth;
3487 if( level < max_level - 1 ) {
3488 for( i = 0; i < nth; ++i ) {
3489 kmp_info_t *th = team->t.t_threads[i];
3490 n += __kmp_free_hot_teams( root, th, level + 1, max_level );
3491 if( i > 0 && th->th.th_hot_teams ) {
3492 __kmp_free( th->th.th_hot_teams );
3493 th->th.th_hot_teams = NULL;
3497 __kmp_free_team( root, team, NULL );
3506 __kmp_reset_root(
int gtid, kmp_root_t *root)
3508 kmp_team_t * root_team = root->r.r_root_team;
3509 kmp_team_t * hot_team = root->r.r_hot_team;
3510 int n = hot_team->t.t_nproc;
3513 KMP_DEBUG_ASSERT( ! root->r.r_active );
3515 root->r.r_root_team = NULL;
3516 root->r.r_hot_team = NULL;
3519 __kmp_free_team( root, root_team USE_NESTED_HOT_ARG(NULL) );
3520 #if KMP_NESTED_HOT_TEAMS
3521 if( __kmp_hot_teams_max_level > 1 ) {
3522 for( i = 0; i < hot_team->t.t_nproc; ++i ) {
3523 kmp_info_t *th = hot_team->t.t_threads[i];
3524 n += __kmp_free_hot_teams( root, th, 1, __kmp_hot_teams_max_level );
3525 if( th->th.th_hot_teams ) {
3526 __kmp_free( th->th.th_hot_teams );
3527 th->th.th_hot_teams = NULL;
3532 __kmp_free_team( root, hot_team USE_NESTED_HOT_ARG(NULL) );
3538 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
3539 __kmp_wait_to_unref_task_teams();
3544 KA_TRACE( 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
"\n",
3545 (LPVOID)&(root->r.r_uber_thread->th),
3546 root->r.r_uber_thread->th.th_info.ds.ds_thread ) );
3547 __kmp_free_handle( root->r.r_uber_thread->th.th_info.ds.ds_thread );
3550 TCW_4(__kmp_nth, __kmp_nth - 1);
3551 __kmp_reap_thread( root->r.r_uber_thread, 1 );
3554 root->r.r_uber_thread = NULL;
3556 root->r.r_begin = FALSE;
3562 __kmp_unregister_root_current_thread(
int gtid )
3564 KA_TRACE( 1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid ));
3569 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3570 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
3571 KC_TRACE( 10, (
"__kmp_unregister_root_current_thread: already finished, exiting T#%d\n", gtid ));
3572 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3575 kmp_root_t *root = __kmp_root[gtid];
3577 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
3578 KMP_ASSERT( KMP_UBER_GTID( gtid ));
3579 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
3580 KMP_ASSERT( root->r.r_active == FALSE );
3585 __kmp_reset_root(gtid, root);
3588 __kmp_gtid_set_specific( KMP_GTID_DNE );
3589 #ifdef KMP_TDATA_GTID
3590 __kmp_gtid = KMP_GTID_DNE;
3594 KC_TRACE( 10, (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid ));
3596 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3604 __kmp_unregister_root_other_thread(
int gtid )
3606 kmp_root_t *root = __kmp_root[gtid];
3609 KA_TRACE( 1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid ));
3610 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
3611 KMP_ASSERT( KMP_UBER_GTID( gtid ));
3612 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
3613 KMP_ASSERT( root->r.r_active == FALSE );
3615 r = __kmp_reset_root(gtid, root);
3616 KC_TRACE( 10, (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid ));
3621 void __kmp_task_info() {
3623 kmp_int32 gtid = __kmp_entry_gtid();
3624 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
3625 kmp_info_t *this_thr = __kmp_threads[ gtid ];
3626 kmp_team_t *steam = this_thr->th.th_serial_team;
3627 kmp_team_t *team = this_thr->th.th_team;
3629 __kmp_printf(
"__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p ptask=%p\n",
3630 gtid, tid, this_thr, team, this_thr->th.th_current_task, team->t.t_implicit_task_taskdata[tid].td_parent );
3638 __kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team,
int tid,
int gtid )
3642 kmp_info_t *master = team->t.t_threads[0];
3643 KMP_DEBUG_ASSERT( this_thr != NULL );
3644 KMP_DEBUG_ASSERT( this_thr->th.th_serial_team );
3645 KMP_DEBUG_ASSERT( team );
3646 KMP_DEBUG_ASSERT( team->t.t_threads );
3647 KMP_DEBUG_ASSERT( team->t.t_dispatch );
3648 KMP_DEBUG_ASSERT( master );
3649 KMP_DEBUG_ASSERT( master->th.th_root );
3653 TCW_SYNC_PTR(this_thr->th.th_team, team);
3655 this_thr->th.th_info.ds.ds_tid = tid;
3656 this_thr->th.th_set_nproc = 0;
3658 this_thr->th.th_set_proc_bind = proc_bind_default;
3659 # if KMP_AFFINITY_SUPPORTED
3660 this_thr->th.th_new_place = this_thr->th.th_current_place;
3663 this_thr->th.th_root = master->th.th_root;
3666 this_thr->th.th_team_nproc = team->t.t_nproc;
3667 this_thr->th.th_team_master = master;
3668 this_thr->th.th_team_serialized = team->t.t_serialized;
3669 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
3671 KMP_DEBUG_ASSERT( team->t.t_implicit_task_taskdata );
3672 this_thr->th.th_task_state = 0;
3674 KF_TRACE( 10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
3675 tid, gtid, this_thr, this_thr->th.th_current_task ) );
3677 __kmp_init_implicit_task( this_thr->th.th_team_master->th.th_ident, this_thr, team, tid, TRUE );
3679 KF_TRACE( 10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
3680 tid, gtid, this_thr, this_thr->th.th_current_task ) );
3684 this_thr->th.th_dispatch = &team->t.t_dispatch[ tid ];
3686 this_thr->th.th_local.this_construct = 0;
3689 this_thr->th.th_local.tv_data = 0;
3692 if ( ! this_thr->th.th_pri_common ) {
3693 this_thr->th.th_pri_common = (
struct common_table *) __kmp_allocate(
sizeof(
struct common_table) );
3694 if ( __kmp_storage_map ) {
3695 __kmp_print_storage_map_gtid(
3696 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
3697 sizeof(
struct common_table ),
"th_%d.th_pri_common\n", gtid
3700 this_thr->th.th_pri_head = NULL;
3705 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
3709 size_t disp_size =
sizeof( dispatch_private_info_t ) *
3710 ( team->t.t_max_nproc == 1 ? 1 : KMP_MAX_DISP_BUF );
3711 KD_TRACE( 10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, team->t.t_max_nproc ) );
3712 KMP_ASSERT( dispatch );
3713 KMP_DEBUG_ASSERT( team->t.t_dispatch );
3714 KMP_DEBUG_ASSERT( dispatch == &team->t.t_dispatch[ tid ] );
3716 dispatch->th_disp_index = 0;
3718 if( ! dispatch->th_disp_buffer ) {
3719 dispatch->th_disp_buffer = (dispatch_private_info_t *) __kmp_allocate( disp_size );
3721 if ( __kmp_storage_map ) {
3722 __kmp_print_storage_map_gtid( gtid, &dispatch->th_disp_buffer[ 0 ],
3723 &dispatch->th_disp_buffer[ team->t.t_max_nproc == 1 ? 1 : KMP_MAX_DISP_BUF ],
3724 disp_size,
"th_%d.th_dispatch.th_disp_buffer "
3725 "(team_%d.t_dispatch[%d].th_disp_buffer)",
3726 gtid, team->t.t_id, gtid );
3729 memset( & dispatch->th_disp_buffer[0],
'\0', disp_size );
3732 dispatch->th_dispatch_pr_current = 0;
3733 dispatch->th_dispatch_sh_current = 0;
3735 dispatch->th_deo_fcn = 0;
3736 dispatch->th_dxo_fcn = 0;
3739 this_thr->th.th_next_pool = NULL;
3741 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
3742 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
3755 __kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team,
int new_tid )
3757 kmp_team_t *serial_team;
3758 kmp_info_t *new_thr;
3761 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid() ));
3762 KMP_DEBUG_ASSERT( root && team );
3763 #if !KMP_NESTED_HOT_TEAMS
3764 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( __kmp_get_gtid() ));
3769 if ( __kmp_thread_pool ) {
3771 new_thr = (kmp_info_t*)__kmp_thread_pool;
3772 __kmp_thread_pool = (
volatile kmp_info_t *) new_thr->th.th_next_pool;
3773 if ( new_thr == __kmp_thread_pool_insert_pt ) {
3774 __kmp_thread_pool_insert_pt = NULL;
3776 TCW_4(new_thr->th.th_in_pool, FALSE);
3782 __kmp_thread_pool_nth--;
3784 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
3785 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid ));
3786 KMP_ASSERT( ! new_thr->th.th_team );
3787 KMP_DEBUG_ASSERT( __kmp_nth < __kmp_threads_capacity );
3788 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth >= 0 );
3791 __kmp_initialize_info( new_thr, team, new_tid, new_thr->th.th_info.ds.ds_gtid );
3792 KMP_DEBUG_ASSERT( new_thr->th.th_serial_team );
3794 TCW_4(__kmp_nth, __kmp_nth + 1);
3796 #ifdef KMP_ADJUST_BLOCKTIME
3799 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
3800 if ( __kmp_nth > __kmp_avail_proc ) {
3801 __kmp_zero_bt = TRUE;
3809 kmp_balign_t * balign = new_thr->th.th_bar;
3810 for( b = 0; b < bs_last_barrier; ++ b )
3811 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
3814 KF_TRACE( 10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
3815 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid ));
3823 KMP_ASSERT( __kmp_nth == __kmp_all_nth );
3824 KMP_ASSERT( __kmp_all_nth < __kmp_threads_capacity );
3830 if ( ! TCR_4( __kmp_init_monitor ) ) {
3831 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
3832 if ( ! TCR_4( __kmp_init_monitor ) ) {
3833 KF_TRACE( 10, (
"before __kmp_create_monitor\n" ) );
3834 TCW_4( __kmp_init_monitor, 1 );
3835 __kmp_create_monitor( & __kmp_monitor );
3836 KF_TRACE( 10, (
"after __kmp_create_monitor\n" ) );
3845 while ( TCR_4(__kmp_init_monitor) < 2 ) {
3848 KF_TRACE( 10, (
"after monitor thread has started\n" ) );
3851 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
3855 for( new_gtid=1 ; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid ) {
3856 KMP_DEBUG_ASSERT( new_gtid < __kmp_threads_capacity );
3860 new_thr = (kmp_info_t*) __kmp_allocate(
sizeof(kmp_info_t) );
3862 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
3864 if ( __kmp_storage_map ) {
3865 __kmp_print_thread_storage_map( new_thr, new_gtid );
3870 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs( team );
3871 KF_TRACE( 10, (
"__kmp_allocate_thread: before th_serial/serial_team\n" ) );
3872 new_thr->th.th_serial_team = serial_team =
3873 (kmp_team_t*) __kmp_allocate_team( root, 1, 1,
3878 0 USE_NESTED_HOT_ARG(NULL) );
3880 KMP_ASSERT ( serial_team );
3881 serial_team->t.t_serialized = 0;
3882 serial_team->t.t_threads[0] = new_thr;
3883 KF_TRACE( 10, (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
3887 __kmp_initialize_info( new_thr, team, new_tid, new_gtid );
3890 __kmp_initialize_fast_memory( new_thr );
3894 KMP_DEBUG_ASSERT( new_thr->th.th_local.bget_data == NULL );
3895 __kmp_initialize_bget( new_thr );
3898 __kmp_init_random( new_thr );
3901 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
3902 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
3905 kmp_balign_t * balign = new_thr->th.th_bar;
3906 for(b=0; b<bs_last_barrier; ++b) {
3907 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
3908 balign[b].bb.team = NULL;
3909 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
3910 balign[b].bb.use_oncore_barrier = 0;
3913 new_thr->th.th_spin_here = FALSE;
3914 new_thr->th.th_next_waiting = 0;
3916 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
3917 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
3918 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
3919 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
3920 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
3923 TCW_4(new_thr->th.th_in_pool, FALSE);
3924 new_thr->th.th_active_in_pool = FALSE;
3925 TCW_4(new_thr->th.th_active, TRUE);
3936 if ( __kmp_adjust_gtid_mode ) {
3937 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
3938 if ( TCR_4(__kmp_gtid_mode) != 2) {
3939 TCW_4(__kmp_gtid_mode, 2);
3943 if (TCR_4(__kmp_gtid_mode) != 1 ) {
3944 TCW_4(__kmp_gtid_mode, 1);
3949 #ifdef KMP_ADJUST_BLOCKTIME
3952 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
3953 if ( __kmp_nth > __kmp_avail_proc ) {
3954 __kmp_zero_bt = TRUE;
3960 KF_TRACE( 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr ));
3961 __kmp_create_worker( new_gtid, new_thr, __kmp_stksize );
3962 KF_TRACE( 10, (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr ));
3965 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), new_gtid ));
3980 __kmp_reinitialize_team( kmp_team_t *team, kmp_internal_control_t *new_icvs,
ident_t *loc ) {
3981 KF_TRACE( 10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
3982 team->t.t_threads[0], team ) );
3983 KMP_DEBUG_ASSERT( team && new_icvs);
3984 KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
3985 team->t.t_ident = loc;
3987 team->t.t_id = KMP_GEN_TEAM_ID();
3990 __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
3991 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
3993 KF_TRACE( 10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
3994 team->t.t_threads[0], team ) );
4002 __kmp_initialize_team(
4005 kmp_internal_control_t * new_icvs,
4008 KF_TRACE( 10, (
"__kmp_initialize_team: enter: team=%p\n", team ) );
4011 KMP_DEBUG_ASSERT( team );
4012 KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc );
4013 KMP_DEBUG_ASSERT( team->t.t_threads );
4016 team->t.t_master_tid = 0;
4018 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4019 team->t.t_nproc = new_nproc;
4022 team->t.t_next_pool = NULL;
4025 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4026 team->t.t_invoke = NULL;
4029 team->t.t_sched = new_icvs->sched;
4031 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
4032 team->t.t_fp_control_saved = FALSE;
4033 team->t.t_x87_fpu_control_word = 0;
4034 team->t.t_mxcsr = 0;
4037 team->t.t_construct = 0;
4038 __kmp_init_lock( & team->t.t_single_lock );
4040 team->t.t_ordered .dt.t_value = 0;
4041 team->t.t_master_active = FALSE;
4043 memset( & team->t.t_taskq,
'\0',
sizeof( kmp_taskq_t ));
4046 team->t.t_copypriv_data = NULL;
4048 team->t.t_copyin_counter = 0;
4050 team->t.t_control_stack_top = NULL;
4052 __kmp_reinitialize_team( team, new_icvs, loc );
4055 KF_TRACE( 10, (
"__kmp_initialize_team: exit: team=%p\n", team ) );
4058 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
4061 __kmp_set_thread_affinity_mask_full_tmp( kmp_affin_mask_t *old_mask )
4063 if ( KMP_AFFINITY_CAPABLE() ) {
4065 if ( old_mask != NULL ) {
4066 status = __kmp_get_system_affinity( old_mask, TRUE );
4068 if ( status != 0 ) {
4071 KMP_MSG( ChangeThreadAffMaskError ),
4077 __kmp_set_system_affinity( __kmp_affinity_get_fullMask(), TRUE );
4082 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
4091 __kmp_partition_places( kmp_team_t *team )
4096 kmp_info_t *master_th = team->t.t_threads[0];
4097 KMP_DEBUG_ASSERT( master_th != NULL );
4098 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4099 int first_place = master_th->th.th_first_place;
4100 int last_place = master_th->th.th_last_place;
4101 int masters_place = master_th->th.th_current_place;
4102 team->t.t_first_place = first_place;
4103 team->t.t_last_place = last_place;
4105 KA_TRACE( 20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) bound to place %d partition = [%d,%d]\n",
4106 proc_bind, __kmp_gtid_from_thread( team->t.t_threads[0] ), team->t.t_id,
4107 masters_place, first_place, last_place ) );
4109 switch ( proc_bind ) {
4111 case proc_bind_default:
4117 KMP_DEBUG_ASSERT( team->t.t_nproc == 1 );
4120 case proc_bind_master:
4123 int n_th = team->t.t_nproc;
4124 for ( f = 1; f < n_th; f++ ) {
4125 kmp_info_t *th = team->t.t_threads[f];
4126 KMP_DEBUG_ASSERT( th != NULL );
4127 th->th.th_first_place = first_place;
4128 th->th.th_last_place = last_place;
4129 th->th.th_new_place = masters_place;
4131 KA_TRACE( 100, (
"__kmp_partition_places: master: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4132 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4133 team->t.t_id, f, masters_place, first_place, last_place ) );
4138 case proc_bind_close:
4141 int n_th = team->t.t_nproc;
4143 if ( first_place <= last_place ) {
4144 n_places = last_place - first_place + 1;
4147 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4149 if ( n_th <= n_places ) {
4150 int place = masters_place;
4151 for ( f = 1; f < n_th; f++ ) {
4152 kmp_info_t *th = team->t.t_threads[f];
4153 KMP_DEBUG_ASSERT( th != NULL );
4155 if ( place == last_place ) {
4156 place = first_place;
4158 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4164 th->th.th_first_place = first_place;
4165 th->th.th_last_place = last_place;
4166 th->th.th_new_place = place;
4168 KA_TRACE( 100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4169 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4170 team->t.t_id, f, place, first_place, last_place ) );
4174 int S, rem, gap, s_count;
4175 S = n_th / n_places;
4177 rem = n_th - ( S * n_places );
4178 gap = rem > 0 ? n_places/rem : n_places;
4179 int place = masters_place;
4181 for ( f = 0; f < n_th; f++ ) {
4182 kmp_info_t *th = team->t.t_threads[f];
4183 KMP_DEBUG_ASSERT( th != NULL );
4185 th->th.th_first_place = first_place;
4186 th->th.th_last_place = last_place;
4187 th->th.th_new_place = place;
4190 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4193 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4195 if ( place == last_place ) {
4196 place = first_place;
4198 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4208 else if (s_count == S) {
4209 if ( place == last_place ) {
4210 place = first_place;
4212 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4222 KA_TRACE( 100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4223 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4224 team->t.t_id, f, th->th.th_new_place, first_place,
4227 KMP_DEBUG_ASSERT( place == masters_place );
4232 case proc_bind_spread:
4235 int n_th = team->t.t_nproc;
4237 if ( first_place <= last_place ) {
4238 n_places = last_place - first_place + 1;
4241 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4243 if ( n_th <= n_places ) {
4244 int place = masters_place;
4245 int S = n_places/n_th;
4246 int s_count, rem, gap, gap_ct;
4247 rem = n_places - n_th*S;
4248 gap = rem ? n_th/rem : 1;
4250 for ( f = 0; f < n_th; f++ ) {
4251 kmp_info_t *th = team->t.t_threads[f];
4252 KMP_DEBUG_ASSERT( th != NULL );
4254 th->th.th_first_place = place;
4255 th->th.th_new_place = place;
4257 while (s_count < S) {
4258 if ( place == last_place ) {
4259 place = first_place;
4261 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4269 if (rem && (gap_ct == gap)) {
4270 if ( place == last_place ) {
4271 place = first_place;
4273 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4282 th->th.th_last_place = place;
4285 if ( place == last_place ) {
4286 place = first_place;
4288 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4295 KA_TRACE( 100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4296 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4297 team->t.t_id, f, th->th.th_new_place,
4298 th->th.th_first_place, th->th.th_last_place ) );
4300 KMP_DEBUG_ASSERT( place == masters_place );
4303 int S, rem, gap, s_count;
4304 S = n_th / n_places;
4306 rem = n_th - ( S * n_places );
4307 gap = rem > 0 ? n_places/rem : n_places;
4308 int place = masters_place;
4310 for ( f = 0; f < n_th; f++ ) {
4311 kmp_info_t *th = team->t.t_threads[f];
4312 KMP_DEBUG_ASSERT( th != NULL );
4314 th->th.th_first_place = place;
4315 th->th.th_last_place = place;
4316 th->th.th_new_place = place;
4319 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4322 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4324 if ( place == last_place ) {
4325 place = first_place;
4327 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4337 else if (s_count == S) {
4338 if ( place == last_place ) {
4339 place = first_place;
4341 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4351 KA_TRACE( 100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4352 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4353 team->t.t_id, f, th->th.th_new_place,
4354 th->th.th_first_place, th->th.th_last_place) );
4356 KMP_DEBUG_ASSERT( place == masters_place );
4365 KA_TRACE( 20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id ) );
4372 __kmp_allocate_team( kmp_root_t *root,
int new_nproc,
int max_nproc,
4374 kmp_proc_bind_t new_proc_bind,
4376 kmp_internal_control_t *new_icvs,
4377 int argc USE_NESTED_HOT_ARG(kmp_info_t *master) )
4384 int use_hot_team = ! root->r.r_active;
4386 KA_TRACE( 20, (
"__kmp_allocate_team: called\n"));
4387 KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 );
4388 KMP_DEBUG_ASSERT( max_nproc >= new_nproc );
4391 #if KMP_NESTED_HOT_TEAMS
4393 kmp_hot_team_ptr_t *hot_teams;
4395 team = master->th.th_team;
4396 level = team->t.t_active_level;
4397 if( master->th.th_teams_microtask ) {
4398 if( master->th.th_teams_size.nteams > 1 && (
4399 team->t.t_pkfn == (microtask_t)__kmp_teams_master ||
4400 master->th.th_teams_level < team->t.t_level ) ) {
4404 hot_teams = master->th.th_hot_teams;
4405 if( level < __kmp_hot_teams_max_level && hot_teams && hot_teams[level].hot_team )
4414 if( use_hot_team && new_nproc > 1 ) {
4415 KMP_DEBUG_ASSERT( new_nproc == max_nproc );
4416 #if KMP_NESTED_HOT_TEAMS
4417 team = hot_teams[level].hot_team;
4419 team = root->r.r_hot_team;
4422 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
4423 KA_TRACE( 20, (
"__kmp_allocate_team: hot team task_team = %p before reinit\n",
4424 team->t.t_task_team ));
4431 if (team->t.t_nproc == new_nproc) {
4432 KA_TRACE( 20, (
"__kmp_allocate_team: reusing hot team\n" ));
4436 if ( team->t.t_size_changed == -1 ) {
4437 team->t.t_size_changed = 1;
4439 team->t.t_size_changed = 0;
4444 team->t.t_sched = new_icvs->sched;
4446 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
4448 KF_TRACE( 10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n",
4449 0, team->t.t_threads[0], team ) );
4450 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
4453 # if KMP_AFFINITY_SUPPORTED
4454 if ( team->t.t_proc_bind == new_proc_bind ) {
4455 KA_TRACE( 200, (
"__kmp_allocate_team: reusing hot team #%d bindings: proc_bind = %d, partition = [%d,%d]\n",
4456 team->t.t_id, new_proc_bind, team->t.t_first_place,
4457 team->t.t_last_place ) );
4460 team->t.t_proc_bind = new_proc_bind;
4461 __kmp_partition_places( team );
4464 if ( team->t.t_proc_bind != new_proc_bind ) {
4465 team->t.t_proc_bind = new_proc_bind;
4470 else if( team->t.t_nproc > new_nproc ) {
4471 KA_TRACE( 20, (
"__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc ));
4474 team->t.t_size_changed = 1;
4476 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
4477 kmp_task_team_t *task_team = team->t.t_task_team;
4478 if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) {
4485 KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
4486 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
4489 KA_TRACE( 20, (
"__kmp_allocate_team: setting task_team %p to NULL\n",
4490 &team->t.t_task_team ) );
4491 team->t.t_task_team = NULL;
4494 KMP_DEBUG_ASSERT( task_team == NULL );
4497 #if KMP_NESTED_HOT_TEAMS
4498 if( __kmp_hot_teams_mode == 0 ) {
4501 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
4502 hot_teams[level].hot_team_nth = new_nproc;
4503 #endif // KMP_NESTED_HOT_TEAMS
4505 for( f = new_nproc ; f < team->t.t_nproc ; f++ ) {
4506 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
4507 __kmp_free_thread( team->t.t_threads[ f ] );
4508 team->t.t_threads[ f ] = NULL;
4510 #if KMP_NESTED_HOT_TEAMS
4512 #endif // KMP_NESTED_HOT_TEAMS
4513 team->t.t_nproc = new_nproc;
4515 team->t.t_sched = new_icvs->sched;
4516 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
4518 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
4519 kmp_task_team_t *task_team = team->t.t_task_team;
4520 if ( task_team != NULL ) {
4521 KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) );
4522 task_team->tt.tt_nproc = new_nproc;
4523 task_team->tt.tt_unfinished_threads = new_nproc;
4524 task_team->tt.tt_ref_ct = new_nproc - 1;
4529 for(f = 0; f < new_nproc; ++f) {
4530 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
4534 KF_TRACE( 10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n",
4535 0, team->t.t_threads[0], team ) );
4537 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
4540 for ( f = 0; f < team->t.t_nproc; f++ ) {
4541 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
4542 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
4547 team->t.t_proc_bind = new_proc_bind;
4548 # if KMP_AFFINITY_SUPPORTED
4549 __kmp_partition_places( team );
4554 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
4555 kmp_affin_mask_t *old_mask;
4556 if ( KMP_AFFINITY_CAPABLE() ) {
4557 KMP_CPU_ALLOC(old_mask);
4561 KA_TRACE( 20, (
"__kmp_allocate_team: increasing hot team thread count to %d\n", new_nproc ));
4564 team->t.t_size_changed = 1;
4568 #if KMP_NESTED_HOT_TEAMS
4569 int avail_threads = hot_teams[level].hot_team_nth;
4570 if( new_nproc < avail_threads )
4571 avail_threads = new_nproc;
4572 kmp_info_t **other_threads = team->t.t_threads;
4573 for ( f = team->t.t_nproc; f < avail_threads; ++f ) {
4577 kmp_balign_t * balign = other_threads[f]->th.th_bar;
4578 for ( b = 0; b < bs_last_barrier; ++ b ) {
4579 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
4580 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4583 if( hot_teams[level].hot_team_nth >= new_nproc ) {
4586 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
4587 team->t.t_nproc = new_nproc;
4590 team->t.t_nproc = hot_teams[level].hot_team_nth;
4591 hot_teams[level].hot_team_nth = new_nproc;
4592 #endif // KMP_NESTED_HOT_TEAMS
4593 if(team->t.t_max_nproc < new_nproc) {
4595 __kmp_reallocate_team_arrays(team, new_nproc);
4596 __kmp_reinitialize_team( team, new_icvs, NULL );
4599 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
4606 __kmp_set_thread_affinity_mask_full_tmp( old_mask );
4610 for( f = team->t.t_nproc ; f < new_nproc ; f++ ) {
4611 kmp_info_t * new_worker = __kmp_allocate_thread( root, team, f );
4612 KMP_DEBUG_ASSERT( new_worker );
4613 team->t.t_threads[ f ] = new_worker;
4614 new_worker->th.th_team_nproc = team->t.t_nproc;
4616 KA_TRACE( 20, (
"__kmp_allocate_team: team %d init T#%d arrived: join=%u, plain=%u\n",
4617 team->t.t_id, __kmp_gtid_from_tid( f, team ), team->t.t_id, f,
4618 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
4619 team->t.t_bar[bs_plain_barrier].b_arrived ) );
4623 kmp_balign_t * balign = new_worker->th.th_bar;
4624 for( b = 0; b < bs_last_barrier; ++ b ) {
4625 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
4626 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4631 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
4632 if ( KMP_AFFINITY_CAPABLE() ) {
4634 __kmp_set_system_affinity( old_mask, TRUE );
4635 KMP_CPU_FREE(old_mask);
4638 #if KMP_NESTED_HOT_TEAMS
4640 #endif // KMP_NESTED_HOT_TEAMS
4642 __kmp_initialize_team( team, new_nproc, new_icvs, root->r.r_uber_thread->th.th_ident );
4644 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
4645 kmp_task_team_t *task_team = team->t.t_task_team;
4646 if ( task_team != NULL ) {
4647 KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) );
4648 task_team->tt.tt_nproc = new_nproc;
4649 task_team->tt.tt_unfinished_threads = new_nproc;
4650 task_team->tt.tt_ref_ct = new_nproc - 1;
4655 for( f = 0 ; f < team->t.t_nproc ; f++ )
4656 __kmp_initialize_info( team->t.t_threads[ f ], team, f,
4657 __kmp_gtid_from_tid( f, team ) );
4659 for ( f = 0; f < team->t.t_nproc; ++ f ) {
4660 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
4661 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
4666 team->t.t_proc_bind = new_proc_bind;
4667 # if KMP_AFFINITY_SUPPORTED
4668 __kmp_partition_places( team );
4674 kmp_info_t *master = team->t.t_threads[0];
4675 if( master->th.th_teams_microtask ) {
4676 for( f = 1; f < new_nproc; ++f ) {
4678 kmp_info_t *thr = team->t.t_threads[f];
4679 thr->th.th_teams_microtask = master->th.th_teams_microtask;
4680 thr->th.th_teams_level = master->th.th_teams_level;
4681 thr->th.th_teams_size = master->th.th_teams_size;
4685 #if KMP_NESTED_HOT_TEAMS
4688 for( f = 1; f < new_nproc; ++f ) {
4689 kmp_info_t *thr = team->t.t_threads[f];
4690 thr->th.th_task_state = master->th.th_task_state;
4692 kmp_balign_t * balign = thr->th.th_bar;
4693 for( b = 0; b < bs_last_barrier; ++ b ) {
4694 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
4695 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4699 #endif // KMP_NESTED_HOT_TEAMS
4702 __kmp_alloc_argv_entries( argc, team, TRUE );
4703 team->t.t_argc = argc;
4709 KF_TRACE( 10, (
" hot_team = %p\n", team ) );
4712 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
4713 KA_TRACE( 20, (
"__kmp_allocate_team: hot team task_team = %p after reinit\n",
4714 team->t.t_task_team ));
4725 for( team = (kmp_team_t*) __kmp_team_pool ; (team) ; )
4728 if ( team->t.t_max_nproc >= max_nproc ) {
4730 __kmp_team_pool = team->t.t_next_pool;
4733 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
4735 KA_TRACE( 20, (
"__kmp_allocate_team: setting task_team %p to NULL\n",
4736 &team->t.t_task_team ) );
4737 team->t.t_task_team = NULL;
4740 __kmp_alloc_argv_entries( argc, team, TRUE );
4741 team->t.t_argc = argc;
4743 KA_TRACE( 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
4744 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
4747 for ( b = 0; b < bs_last_barrier; ++ b) {
4748 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
4753 team->t.t_proc_bind = new_proc_bind;
4756 KA_TRACE( 20, (
"__kmp_allocate_team: using team from pool %d.\n", team->t.t_id ));
4765 team = __kmp_reap_team( team );
4766 __kmp_team_pool = team;
4771 team = (kmp_team_t*) __kmp_allocate(
sizeof( kmp_team_t ) );
4774 team->t.t_max_nproc = max_nproc;
4778 __kmp_allocate_team_arrays( team, max_nproc );
4780 KA_TRACE( 20, (
"__kmp_allocate_team: making a new team\n" ) );
4781 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
4783 KA_TRACE( 20, (
"__kmp_allocate_team: setting task_team %p to NULL\n",
4784 &team->t.t_task_team ) );
4785 team->t.t_task_team = NULL;
4787 if ( __kmp_storage_map ) {
4788 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc );
4792 __kmp_alloc_argv_entries( argc, team, FALSE );
4793 team->t.t_argc = argc;
4795 KA_TRACE( 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
4796 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
4799 for ( b = 0; b < bs_last_barrier; ++ b ) {
4800 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
4805 team->t.t_proc_bind = new_proc_bind;
4810 KA_TRACE( 20, (
"__kmp_allocate_team: done creating a new team %d.\n", team->t.t_id ));
4821 __kmp_free_team( kmp_root_t *root, kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master) )
4824 KA_TRACE( 20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), team->t.t_id ));
4827 KMP_DEBUG_ASSERT( root );
4828 KMP_DEBUG_ASSERT( team );
4829 KMP_DEBUG_ASSERT( team->t.t_nproc <= team->t.t_max_nproc );
4830 KMP_DEBUG_ASSERT( team->t.t_threads );
4832 int use_hot_team = team == root->r.r_hot_team;
4833 #if KMP_NESTED_HOT_TEAMS
4835 kmp_hot_team_ptr_t *hot_teams;
4837 level = team->t.t_active_level - 1;
4838 if( master->th.th_teams_microtask ) {
4839 if( master->th.th_teams_size.nteams > 1 ) {
4842 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
4843 master->th.th_teams_level == team->t.t_level ) {
4847 hot_teams = master->th.th_hot_teams;
4848 if( level < __kmp_hot_teams_max_level ) {
4849 KMP_DEBUG_ASSERT( team == hot_teams[level].hot_team );
4853 #endif // KMP_NESTED_HOT_TEAMS
4856 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4857 team->t.t_copyin_counter = 0;
4861 if( ! use_hot_team ) {
4863 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
4864 kmp_task_team_t *task_team = team->t.t_task_team;
4865 if ( task_team != NULL ) {
4872 KA_TRACE( 20, (
"__kmp_free_team: deactivating task_team %p\n",
4874 KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
4875 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
4877 team->t.t_task_team = NULL;
4882 team->t.t_parent = NULL;
4886 for ( f = 1; f < team->t.t_nproc; ++ f ) {
4887 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
4888 __kmp_free_thread( team->t.t_threads[ f ] );
4889 team->t.t_threads[ f ] = NULL;
4895 team->t.t_next_pool = (kmp_team_t*) __kmp_team_pool;
4896 __kmp_team_pool = (
volatile kmp_team_t*) team;
4905 __kmp_reap_team( kmp_team_t *team )
4907 kmp_team_t *next_pool = team->t.t_next_pool;
4909 KMP_DEBUG_ASSERT( team );
4910 KMP_DEBUG_ASSERT( team->t.t_dispatch );
4911 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
4912 KMP_DEBUG_ASSERT( team->t.t_threads );
4913 KMP_DEBUG_ASSERT( team->t.t_argv );
4919 __kmp_free_team_arrays( team );
4920 if ( team->t.t_argv != &team->t.t_inline_argv[0] )
4921 __kmp_free( (
void*) team->t.t_argv );
4956 __kmp_free_thread( kmp_info_t *this_th )
4961 KA_TRACE( 20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
4962 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid ));
4964 KMP_DEBUG_ASSERT( this_th );
4968 kmp_balign_t *balign = this_th->th.th_bar;
4969 for (b=0; b<bs_last_barrier; ++b) {
4970 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
4971 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
4972 balign[b].bb.team = NULL;
4977 TCW_PTR(this_th->th.th_team, NULL);
4978 TCW_PTR(this_th->th.th_root, NULL);
4979 TCW_PTR(this_th->th.th_dispatch, NULL);
4985 gtid = this_th->th.th_info.ds.ds_gtid;
4986 if ( __kmp_thread_pool_insert_pt != NULL ) {
4987 KMP_DEBUG_ASSERT( __kmp_thread_pool != NULL );
4988 if ( __kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid ) {
4989 __kmp_thread_pool_insert_pt = NULL;
5000 if ( __kmp_thread_pool_insert_pt != NULL ) {
5001 scan = &( __kmp_thread_pool_insert_pt->th.th_next_pool );
5004 scan = (kmp_info_t **)&__kmp_thread_pool;
5006 for (; ( *scan != NULL ) && ( (*scan)->th.th_info.ds.ds_gtid < gtid );
5007 scan = &( (*scan)->th.th_next_pool ) );
5013 TCW_PTR(this_th->th.th_next_pool, *scan);
5014 __kmp_thread_pool_insert_pt = *scan = this_th;
5015 KMP_DEBUG_ASSERT( ( this_th->th.th_next_pool == NULL )
5016 || ( this_th->th.th_info.ds.ds_gtid
5017 < this_th->th.th_next_pool->th.th_info.ds.ds_gtid ) );
5018 TCW_4(this_th->th.th_in_pool, TRUE);
5019 __kmp_thread_pool_nth++;
5021 TCW_4(__kmp_nth, __kmp_nth - 1);
5023 #ifdef KMP_ADJUST_BLOCKTIME
5026 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5027 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5028 if ( __kmp_nth <= __kmp_avail_proc ) {
5029 __kmp_zero_bt = FALSE;
5041 __kmp_launch_thread( kmp_info_t *this_thr )
5043 int gtid = this_thr->th.th_info.ds.ds_gtid;
5045 kmp_team_t *(*
volatile pteam);
5048 KA_TRACE( 10, (
"__kmp_launch_thread: T#%d start\n", gtid ) );
5050 if( __kmp_env_consistency_check ) {
5051 this_thr->th.th_cons = __kmp_allocate_cons_stack( gtid );
5055 while( ! TCR_4(__kmp_global.g.g_done) ) {
5056 KMP_DEBUG_ASSERT( this_thr == __kmp_threads[ gtid ] );
5060 KA_TRACE( 20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid ));
5063 __kmp_fork_barrier( gtid, KMP_GTID_DNE );
5065 pteam = (kmp_team_t *(*))(& this_thr->th.th_team);
5068 if ( TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done) ) {
5070 if ( TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL ) {
5072 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5073 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
5075 updateHWFPControl (*pteam);
5080 rc = (*pteam)->t.t_invoke( gtid );
5086 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5087 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
5090 __kmp_join_barrier( gtid );
5093 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
5095 if ( TCR_PTR( this_thr->th.th_task_team ) != NULL ) {
5096 __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
5099 __kmp_common_destroy_gtid( gtid );
5101 KA_TRACE( 10, (
"__kmp_launch_thread: T#%d done\n", gtid ) );
5110 __kmp_internal_end_dest(
void *specific_gtid )
5112 #if KMP_COMPILER_ICC
5113 #pragma warning( push )
5114 #pragma warning( disable: 810 ) // conversion from "void *" to "int" may lose significant bits
5117 int gtid = (kmp_intptr_t)specific_gtid - 1;
5118 #if KMP_COMPILER_ICC
5119 #pragma warning( pop )
5122 KA_TRACE( 30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
5136 if(gtid >= 0 && KMP_UBER_GTID(gtid))
5137 __kmp_gtid_set_specific( gtid );
5138 #ifdef KMP_TDATA_GTID
5141 __kmp_internal_end_thread( gtid );
5144 #if KMP_OS_UNIX && GUIDEDLL_EXPORTS
5150 __attribute__(( destructor ))
5152 __kmp_internal_end_dtor(
void )
5154 __kmp_internal_end_atexit();
5158 __kmp_internal_end_fini(
void )
5160 __kmp_internal_end_atexit();
5167 __kmp_internal_end_atexit(
void )
5169 KA_TRACE( 30, (
"__kmp_internal_end_atexit\n" ) );
5191 __kmp_internal_end_library( -1 );
5193 __kmp_close_console();
5199 kmp_info_t * thread,
5207 KMP_DEBUG_ASSERT( thread != NULL );
5209 gtid = thread->th.th_info.ds.ds_gtid;
5213 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
5215 KA_TRACE( 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", gtid ) );
5217 kmp_flag_64 flag(&thread->th.th_bar[ bs_forkjoin_barrier ].bb.b_go, thread);
5218 __kmp_release_64(&flag);
5223 __kmp_reap_worker( thread );
5238 if ( thread->th.th_active_in_pool ) {
5239 thread->th.th_active_in_pool = FALSE;
5240 KMP_TEST_THEN_DEC32(
5241 (kmp_int32 *) &__kmp_thread_pool_active_nth );
5242 KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
5246 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth > 0 );
5247 --__kmp_thread_pool_nth;
5252 __kmp_free_fast_memory( thread );
5255 __kmp_suspend_uninitialize_thread( thread );
5257 KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] == thread );
5258 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5263 #ifdef KMP_ADJUST_BLOCKTIME
5266 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5267 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5268 if ( __kmp_nth <= __kmp_avail_proc ) {
5269 __kmp_zero_bt = FALSE;
5275 if( __kmp_env_consistency_check ) {
5276 if ( thread->th.th_cons ) {
5277 __kmp_free_cons_stack( thread->th.th_cons );
5278 thread->th.th_cons = NULL;
5282 if ( thread->th.th_pri_common != NULL ) {
5283 __kmp_free( thread->th.th_pri_common );
5284 thread->th.th_pri_common = NULL;
5288 if ( thread->th.th_local.bget_data != NULL ) {
5289 __kmp_finalize_bget( thread );
5293 #if KMP_AFFINITY_SUPPORTED
5294 if ( thread->th.th_affin_mask != NULL ) {
5295 KMP_CPU_FREE( thread->th.th_affin_mask );
5296 thread->th.th_affin_mask = NULL;
5300 __kmp_reap_team( thread->th.th_serial_team );
5301 thread->th.th_serial_team = NULL;
5302 __kmp_free( thread );
5309 __kmp_internal_end(
void)
5314 __kmp_unregister_library();
5322 __kmp_reclaim_dead_roots();
5325 for( i=0 ; i<__kmp_threads_capacity ; i++ )
5327 if( __kmp_root[i]->r.r_active )
5330 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5332 if ( i < __kmp_threads_capacity ) {
5350 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5351 if ( TCR_4( __kmp_init_monitor ) ) {
5352 __kmp_reap_monitor( & __kmp_monitor );
5353 TCW_4( __kmp_init_monitor, 0 );
5355 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5356 KA_TRACE( 10, (
"__kmp_internal_end: monitor reaped\n" ) );
5361 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
5362 if( __kmp_root[i] ) {
5364 KMP_ASSERT( ! __kmp_root[i]->r.r_active );
5373 while ( __kmp_thread_pool != NULL ) {
5375 kmp_info_t * thread = (kmp_info_t *) __kmp_thread_pool;
5376 __kmp_thread_pool = thread->th.th_next_pool;
5378 thread->th.th_next_pool = NULL;
5379 thread->th.th_in_pool = FALSE;
5380 __kmp_reap_thread( thread, 0 );
5382 __kmp_thread_pool_insert_pt = NULL;
5385 while ( __kmp_team_pool != NULL ) {
5387 kmp_team_t * team = (kmp_team_t *) __kmp_team_pool;
5388 __kmp_team_pool = team->t.t_next_pool;
5390 team->t.t_next_pool = NULL;
5391 __kmp_reap_team( team );
5394 __kmp_reap_task_teams( );
5396 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
5403 TCW_SYNC_4(__kmp_init_common, FALSE);
5405 KA_TRACE( 10, (
"__kmp_internal_end: all workers reaped\n" ) );
5414 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5415 if ( TCR_4( __kmp_init_monitor ) ) {
5416 __kmp_reap_monitor( & __kmp_monitor );
5417 TCW_4( __kmp_init_monitor, 0 );
5419 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5420 KA_TRACE( 10, (
"__kmp_internal_end: monitor reaped\n" ) );
5423 TCW_4(__kmp_init_gtid, FALSE);
5431 __kmp_internal_end_library(
int gtid_req )
5441 if( __kmp_global.g.g_abort ) {
5442 KA_TRACE( 11, (
"__kmp_internal_end_library: abort, exiting\n" ));
5446 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5447 KA_TRACE( 10, (
"__kmp_internal_end_library: already finished\n" ));
5456 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
5457 KA_TRACE( 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req ));
5458 if( gtid == KMP_GTID_SHUTDOWN ) {
5459 KA_TRACE( 10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system already shutdown\n" ));
5461 }
else if( gtid == KMP_GTID_MONITOR ) {
5462 KA_TRACE( 10, (
"__kmp_internal_end_library: monitor thread, gtid not registered, or system shutdown\n" ));
5464 }
else if( gtid == KMP_GTID_DNE ) {
5465 KA_TRACE( 10, (
"__kmp_internal_end_library: gtid not registered or system shutdown\n" ));
5467 }
else if( KMP_UBER_GTID( gtid )) {
5469 if( __kmp_root[gtid]->r.r_active ) {
5470 __kmp_global.g.g_abort = -1;
5471 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5472 KA_TRACE( 10, (
"__kmp_internal_end_library: root still active, abort T#%d\n", gtid ));
5475 KA_TRACE( 10, (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid ));
5476 __kmp_unregister_root_current_thread( gtid );
5483 #ifdef DUMP_DEBUG_ON_EXIT
5484 if ( __kmp_debug_buf )
5485 __kmp_dump_debug_buffer( );
5491 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
5494 if( __kmp_global.g.g_abort ) {
5495 KA_TRACE( 10, (
"__kmp_internal_end_library: abort, exiting\n" ));
5497 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5500 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5501 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5511 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
5514 __kmp_internal_end();
5516 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
5517 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5519 KA_TRACE( 10, (
"__kmp_internal_end_library: exit\n" ) );
5521 #ifdef DUMP_DEBUG_ON_EXIT
5522 if ( __kmp_debug_buf )
5523 __kmp_dump_debug_buffer();
5527 __kmp_close_console();
5530 __kmp_fini_allocator();
5535 __kmp_internal_end_thread(
int gtid_req )
5545 if( __kmp_global.g.g_abort ) {
5546 KA_TRACE( 11, (
"__kmp_internal_end_thread: abort, exiting\n" ));
5550 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5551 KA_TRACE( 10, (
"__kmp_internal_end_thread: already finished\n" ));
5559 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
5560 KA_TRACE( 10, (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req ));
5561 if( gtid == KMP_GTID_SHUTDOWN ) {
5562 KA_TRACE( 10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system already shutdown\n" ));
5564 }
else if( gtid == KMP_GTID_MONITOR ) {
5565 KA_TRACE( 10, (
"__kmp_internal_end_thread: monitor thread, gtid not registered, or system shutdown\n" ));
5567 }
else if( gtid == KMP_GTID_DNE ) {
5568 KA_TRACE( 10, (
"__kmp_internal_end_thread: gtid not registered or system shutdown\n" ));
5571 }
else if( KMP_UBER_GTID( gtid )) {
5573 if( __kmp_root[gtid]->r.r_active ) {
5574 __kmp_global.g.g_abort = -1;
5575 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5576 KA_TRACE( 10, (
"__kmp_internal_end_thread: root still active, abort T#%d\n", gtid ));
5579 KA_TRACE( 10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n", gtid ));
5580 __kmp_unregister_root_current_thread( gtid );
5584 KA_TRACE( 10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid ));
5587 kmp_info_t *this_thr = __kmp_threads[ gtid ];
5588 if (TCR_PTR(this_thr->th.th_task_team) != NULL) {
5589 __kmp_unref_task_team(this_thr->th.th_task_team, this_thr);
5593 KA_TRACE( 10, (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid ));
5597 #if defined GUIDEDLL_EXPORTS
5605 KA_TRACE( 10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req) );
5609 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
5612 if( __kmp_global.g.g_abort ) {
5613 KA_TRACE( 10, (
"__kmp_internal_end_thread: abort, exiting\n" ));
5615 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5618 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5619 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5631 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
5633 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
5634 if ( KMP_UBER_GTID( i ) ) {
5635 KA_TRACE( 10, (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i ));
5636 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
5637 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5644 __kmp_internal_end();
5646 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
5647 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5649 KA_TRACE( 10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req ) );
5651 #ifdef DUMP_DEBUG_ON_EXIT
5652 if ( __kmp_debug_buf )
5653 __kmp_dump_debug_buffer();
5660 static long __kmp_registration_flag = 0;
5662 static char * __kmp_registration_str = NULL;
5668 __kmp_reg_status_name() {
5674 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int) getpid() );
5679 __kmp_register_library_startup(
5683 char * name = __kmp_reg_status_name();
5690 __kmp_initialize_system_tick();
5692 __kmp_read_system_time( & time.dtime );
5693 __kmp_registration_flag = 0xCAFE0000L | ( time.ltime & 0x0000FFFFL );
5694 __kmp_registration_str =
5697 & __kmp_registration_flag,
5698 __kmp_registration_flag,
5702 KA_TRACE( 50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name, __kmp_registration_str ) );
5706 char * value = NULL;
5709 __kmp_env_set( name, __kmp_registration_str, 0 );
5711 value = __kmp_env_get( name );
5712 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
5721 char * tail = value;
5722 char * flag_addr_str = NULL;
5723 char * flag_val_str = NULL;
5724 char const * file_name = NULL;
5725 __kmp_str_split( tail,
'-', & flag_addr_str, & tail );
5726 __kmp_str_split( tail,
'-', & flag_val_str, & tail );
5728 if ( tail != NULL ) {
5729 long * flag_addr = 0;
5731 sscanf( flag_addr_str,
"%p", & flag_addr );
5732 sscanf( flag_val_str,
"%lx", & flag_val );
5733 if ( flag_addr != 0 && flag_val != 0 && strcmp( file_name,
"" ) != 0 ) {
5737 if ( __kmp_is_address_mapped( flag_addr ) && * flag_addr == flag_val ) {
5745 switch ( neighbor ) {
5750 file_name =
"unknown library";
5754 char * duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK" );
5755 if ( ! __kmp_str_match_true( duplicate_ok ) ) {
5759 KMP_MSG( DuplicateLibrary, KMP_LIBRARY_FILE, file_name ),
5760 KMP_HNT( DuplicateLibrary ),
5764 KMP_INTERNAL_FREE( duplicate_ok );
5765 __kmp_duplicate_library_ok = 1;
5770 __kmp_env_unset( name );
5773 KMP_DEBUG_ASSERT( 0 );
5778 KMP_INTERNAL_FREE( (
void *) value );
5781 KMP_INTERNAL_FREE( (
void *) name );
5787 __kmp_unregister_library(
void ) {
5789 char * name = __kmp_reg_status_name();
5790 char * value = __kmp_env_get( name );
5792 KMP_DEBUG_ASSERT( __kmp_registration_flag != 0 );
5793 KMP_DEBUG_ASSERT( __kmp_registration_str != NULL );
5794 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
5796 __kmp_env_unset( name );
5799 KMP_INTERNAL_FREE( __kmp_registration_str );
5800 KMP_INTERNAL_FREE( value );
5801 KMP_INTERNAL_FREE( name );
5803 __kmp_registration_flag = 0;
5804 __kmp_registration_str = NULL;
5813 __kmp_do_serial_initialize(
void )
5818 KA_TRACE( 10, (
"__kmp_do_serial_initialize: enter\n" ) );
5820 KMP_DEBUG_ASSERT(
sizeof( kmp_int32 ) == 4 );
5821 KMP_DEBUG_ASSERT(
sizeof( kmp_uint32 ) == 4 );
5822 KMP_DEBUG_ASSERT(
sizeof( kmp_int64 ) == 8 );
5823 KMP_DEBUG_ASSERT(
sizeof( kmp_uint64 ) == 8 );
5824 KMP_DEBUG_ASSERT(
sizeof( kmp_intptr_t ) ==
sizeof(
void * ) );
5826 __kmp_validate_locks();
5829 __kmp_init_allocator();
5835 __kmp_register_library_startup( );
5838 if( TCR_4(__kmp_global.g.g_done) ) {
5839 KA_TRACE( 10, (
"__kmp_do_serial_initialize: reinitialization of library\n" ) );
5842 __kmp_global.g.g_abort = 0;
5843 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
5846 #if KMP_USE_ADAPTIVE_LOCKS
5847 #if KMP_DEBUG_ADAPTIVE_LOCKS
5848 __kmp_init_speculative_stats();
5851 __kmp_init_lock( & __kmp_global_lock );
5852 __kmp_init_queuing_lock( & __kmp_dispatch_lock );
5853 __kmp_init_lock( & __kmp_debug_lock );
5854 __kmp_init_atomic_lock( & __kmp_atomic_lock );
5855 __kmp_init_atomic_lock( & __kmp_atomic_lock_1i );
5856 __kmp_init_atomic_lock( & __kmp_atomic_lock_2i );
5857 __kmp_init_atomic_lock( & __kmp_atomic_lock_4i );
5858 __kmp_init_atomic_lock( & __kmp_atomic_lock_4r );
5859 __kmp_init_atomic_lock( & __kmp_atomic_lock_8i );
5860 __kmp_init_atomic_lock( & __kmp_atomic_lock_8r );
5861 __kmp_init_atomic_lock( & __kmp_atomic_lock_8c );
5862 __kmp_init_atomic_lock( & __kmp_atomic_lock_10r );
5863 __kmp_init_atomic_lock( & __kmp_atomic_lock_16r );
5864 __kmp_init_atomic_lock( & __kmp_atomic_lock_16c );
5865 __kmp_init_atomic_lock( & __kmp_atomic_lock_20c );
5866 __kmp_init_atomic_lock( & __kmp_atomic_lock_32c );
5867 __kmp_init_bootstrap_lock( & __kmp_forkjoin_lock );
5868 __kmp_init_bootstrap_lock( & __kmp_exit_lock );
5869 __kmp_init_bootstrap_lock( & __kmp_monitor_lock );
5870 __kmp_init_bootstrap_lock( & __kmp_tp_cached_lock );
5874 __kmp_runtime_initialize();
5880 __kmp_abort_delay = 0;
5884 __kmp_dflt_team_nth_ub = __kmp_xproc;
5885 if( __kmp_dflt_team_nth_ub < KMP_MIN_NTH ) {
5886 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
5888 if( __kmp_dflt_team_nth_ub > __kmp_sys_max_nth ) {
5889 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
5891 __kmp_max_nth = __kmp_sys_max_nth;
5894 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
5895 __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
5896 __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
5898 __kmp_library = library_throughput;
5900 __kmp_static = kmp_sch_static_balanced;
5906 #if KMP_FAST_REDUCTION_BARRIER
5907 #define kmp_reduction_barrier_gather_bb ((int)1)
5908 #define kmp_reduction_barrier_release_bb ((int)1)
5909 #define kmp_reduction_barrier_gather_pat bp_hyper_bar
5910 #define kmp_reduction_barrier_release_pat bp_hyper_bar
5911 #endif // KMP_FAST_REDUCTION_BARRIER
5912 for ( i=bs_plain_barrier; i<bs_last_barrier; i++ ) {
5913 __kmp_barrier_gather_branch_bits [ i ] = __kmp_barrier_gather_bb_dflt;
5914 __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt;
5915 __kmp_barrier_gather_pattern [ i ] = __kmp_barrier_gather_pat_dflt;
5916 __kmp_barrier_release_pattern[ i ] = __kmp_barrier_release_pat_dflt;
5917 #if KMP_FAST_REDUCTION_BARRIER
5918 if( i == bs_reduction_barrier ) {
5919 __kmp_barrier_gather_branch_bits [ i ] = kmp_reduction_barrier_gather_bb;
5920 __kmp_barrier_release_branch_bits[ i ] = kmp_reduction_barrier_release_bb;
5921 __kmp_barrier_gather_pattern [ i ] = kmp_reduction_barrier_gather_pat;
5922 __kmp_barrier_release_pattern[ i ] = kmp_reduction_barrier_release_pat;
5924 #endif // KMP_FAST_REDUCTION_BARRIER
5926 #if KMP_FAST_REDUCTION_BARRIER
5927 #undef kmp_reduction_barrier_release_pat
5928 #undef kmp_reduction_barrier_gather_pat
5929 #undef kmp_reduction_barrier_release_bb
5930 #undef kmp_reduction_barrier_gather_bb
5931 #endif // KMP_FAST_REDUCTION_BARRIER
5934 __kmp_barrier_gather_branch_bits [ bs_plain_barrier ] = 3;
5935 __kmp_barrier_release_branch_bits[ bs_forkjoin_barrier ] = 1;
5936 __kmp_barrier_gather_pattern [ bs_forkjoin_barrier ] = bp_hierarchical_bar;
5937 __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] = bp_hierarchical_bar;
5938 #if KMP_FAST_REDUCTION_BARRIER
5939 __kmp_barrier_gather_pattern [ bs_reduction_barrier ] = bp_hierarchical_bar;
5940 __kmp_barrier_release_pattern[ bs_reduction_barrier ] = bp_hierarchical_bar;
5946 __kmp_env_checks = TRUE;
5948 __kmp_env_checks = FALSE;
5952 __kmp_foreign_tp = TRUE;
5954 __kmp_global.g.g_dynamic = FALSE;
5955 __kmp_global.g.g_dynamic_mode = dynamic_default;
5957 __kmp_env_initialize( NULL );
5961 char const * val = __kmp_env_get(
"KMP_DUMP_CATALOG" );
5962 if ( __kmp_str_match_true( val ) ) {
5963 kmp_str_buf_t buffer;
5964 __kmp_str_buf_init( & buffer );
5965 __kmp_i18n_dump_catalog( & buffer );
5966 __kmp_printf(
"%s", buffer.str );
5967 __kmp_str_buf_free( & buffer );
5969 __kmp_env_free( & val );
5972 __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub );
5974 __kmp_tp_capacity = __kmp_default_tp_capacity(__kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
5979 KMP_DEBUG_ASSERT( __kmp_thread_pool == NULL );
5980 KMP_DEBUG_ASSERT( __kmp_thread_pool_insert_pt == NULL );
5981 KMP_DEBUG_ASSERT( __kmp_team_pool == NULL );
5982 __kmp_thread_pool = NULL;
5983 __kmp_thread_pool_insert_pt = NULL;
5984 __kmp_team_pool = NULL;
5989 size = (
sizeof(kmp_info_t*) +
sizeof(kmp_root_t*))*__kmp_threads_capacity + CACHE_LINE;
5990 __kmp_threads = (kmp_info_t**) __kmp_allocate( size );
5991 __kmp_root = (kmp_root_t**) ((
char*)__kmp_threads +
sizeof(kmp_info_t*) * __kmp_threads_capacity );
5994 KMP_DEBUG_ASSERT( __kmp_all_nth == 0 );
5995 KMP_DEBUG_ASSERT( __kmp_nth == 0 );
6000 gtid = __kmp_register_root( TRUE );
6001 KA_TRACE( 10, (
"__kmp_do_serial_initialize T#%d\n", gtid ));
6002 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6003 KMP_ASSERT( KMP_INITIAL_GTID( gtid ) );
6007 __kmp_common_initialize();
6011 __kmp_register_atfork();
6014 #if ! defined GUIDEDLL_EXPORTS
6019 int rc = atexit( __kmp_internal_end_atexit );
6021 __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError,
"atexit()" ), KMP_ERR( rc ), __kmp_msg_null );
6026 #if KMP_HANDLE_SIGNALS
6033 __kmp_install_signals( FALSE );
6036 __kmp_install_signals( TRUE );
6041 __kmp_init_counter ++;
6043 __kmp_init_serial = TRUE;
6045 if (__kmp_settings) {
6050 if (__kmp_display_env || __kmp_display_env_verbose) {
6051 __kmp_env_print_2();
6053 #endif // OMP_40_ENABLED
6057 KA_TRACE( 10, (
"__kmp_do_serial_initialize: exit\n" ) );
6061 __kmp_serial_initialize(
void )
6063 if ( __kmp_init_serial ) {
6066 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6067 if ( __kmp_init_serial ) {
6068 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6071 __kmp_do_serial_initialize();
6072 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6076 __kmp_do_middle_initialize(
void )
6079 int prev_dflt_team_nth;
6081 if( !__kmp_init_serial ) {
6082 __kmp_do_serial_initialize();
6085 KA_TRACE( 10, (
"__kmp_middle_initialize: enter\n" ) );
6091 prev_dflt_team_nth = __kmp_dflt_team_nth;
6093 #if KMP_AFFINITY_SUPPORTED
6098 __kmp_affinity_initialize();
6104 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6105 if ( TCR_PTR( __kmp_threads[ i ] ) != NULL ) {
6106 __kmp_affinity_set_init_mask( i, TRUE );
6111 KMP_ASSERT( __kmp_xproc > 0 );
6112 if ( __kmp_avail_proc == 0 ) {
6113 __kmp_avail_proc = __kmp_xproc;
6118 while ( __kmp_nested_nth.used && ! __kmp_nested_nth.nth[ j ] ) {
6119 __kmp_nested_nth.nth[ j ] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_avail_proc;
6123 if ( __kmp_dflt_team_nth == 0 ) {
6124 #ifdef KMP_DFLT_NTH_CORES
6128 __kmp_dflt_team_nth = __kmp_ncores;
6129 KA_TRACE( 20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_ncores (%d)\n",
6130 __kmp_dflt_team_nth ) );
6135 __kmp_dflt_team_nth = __kmp_avail_proc;
6136 KA_TRACE( 20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_avail_proc(%d)\n",
6137 __kmp_dflt_team_nth ) );
6141 if ( __kmp_dflt_team_nth < KMP_MIN_NTH ) {
6142 __kmp_dflt_team_nth = KMP_MIN_NTH;
6144 if( __kmp_dflt_team_nth > __kmp_sys_max_nth ) {
6145 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6152 KMP_DEBUG_ASSERT( __kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub );
6154 if ( __kmp_dflt_team_nth != prev_dflt_team_nth ) {
6161 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6162 kmp_info_t *thread = __kmp_threads[ i ];
6163 if ( thread == NULL )
continue;
6164 if ( thread->th.th_current_task->td_icvs.nproc != 0 )
continue;
6166 set__nproc( __kmp_threads[ i ], __kmp_dflt_team_nth );
6169 KA_TRACE( 20, (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6170 __kmp_dflt_team_nth) );
6172 #ifdef KMP_ADJUST_BLOCKTIME
6175 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
6176 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
6177 if ( __kmp_nth > __kmp_avail_proc ) {
6178 __kmp_zero_bt = TRUE;
6184 TCW_SYNC_4(__kmp_init_middle, TRUE);
6186 KA_TRACE( 10, (
"__kmp_do_middle_initialize: exit\n" ) );
6190 __kmp_middle_initialize(
void )
6192 if ( __kmp_init_middle ) {
6195 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6196 if ( __kmp_init_middle ) {
6197 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6200 __kmp_do_middle_initialize();
6201 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6205 __kmp_parallel_initialize(
void )
6207 int gtid = __kmp_entry_gtid();
6210 if( TCR_4(__kmp_init_parallel) )
return;
6211 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6212 if( TCR_4(__kmp_init_parallel) ) { __kmp_release_bootstrap_lock( &__kmp_initz_lock );
return; }
6215 if( TCR_4(__kmp_global.g.g_done) ) {
6216 KA_TRACE( 10, (
"__kmp_parallel_initialize: attempt to init while shutting down\n" ) );
6217 __kmp_infinite_loop();
6223 if( !__kmp_init_middle ) {
6224 __kmp_do_middle_initialize();
6228 KA_TRACE( 10, (
"__kmp_parallel_initialize: enter\n" ) );
6229 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6231 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
6236 __kmp_store_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word );
6237 __kmp_store_mxcsr( &__kmp_init_mxcsr );
6238 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
6242 # if KMP_HANDLE_SIGNALS
6244 __kmp_install_signals( TRUE );
6248 __kmp_suspend_initialize();
6250 # if defined(USE_LOAD_BALANCE)
6251 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6252 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
6255 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6256 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6260 if ( __kmp_version ) {
6261 __kmp_print_version_2();
6265 TCW_SYNC_4(__kmp_init_parallel, TRUE);
6268 KA_TRACE( 10, (
"__kmp_parallel_initialize: exit\n" ) );
6270 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6277 __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
6280 kmp_disp_t *dispatch;
6285 this_thr->th.th_local.this_construct = 0;
6286 #if KMP_CACHE_MANAGE
6287 KMP_CACHE_PREFETCH( &this_thr->th.th_bar[ bs_forkjoin_barrier ].bb.b_arrived );
6289 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
6290 KMP_DEBUG_ASSERT( dispatch );
6291 KMP_DEBUG_ASSERT( team->t.t_dispatch );
6294 dispatch->th_disp_index = 0;
6296 if( __kmp_env_consistency_check )
6297 __kmp_push_parallel( gtid, team->t.t_ident );
6303 __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
6306 if( __kmp_env_consistency_check )
6307 __kmp_pop_parallel( gtid, team->t.t_ident );
6311 __kmp_invoke_task_func(
int gtid )
6314 int tid = __kmp_tid_from_gtid( gtid );
6315 kmp_info_t *this_thr = __kmp_threads[ gtid ];
6316 kmp_team_t *team = this_thr->th.th_team;
6318 __kmp_run_before_invoked_task( gtid, tid, this_thr, team );
6320 if ( __itt_stack_caller_create_ptr ) {
6321 __kmp_itt_stack_callee_enter( (__itt_caller)team->t.t_stack_id );
6324 #if INCLUDE_SSC_MARKS
6325 SSC_MARK_INVOKING();
6327 rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
6328 gtid, tid, (
int) team->t.t_argc, (
void **) team->t.t_argv );
6331 if ( __itt_stack_caller_create_ptr ) {
6332 __kmp_itt_stack_callee_leave( (__itt_caller)team->t.t_stack_id );
6335 __kmp_run_after_invoked_task( gtid, tid, this_thr, team );
6342 __kmp_teams_master(
int gtid )
6345 kmp_info_t *thr = __kmp_threads[ gtid ];
6346 kmp_team_t *team = thr->th.th_team;
6347 ident_t *loc = team->t.t_ident;
6348 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
6349 KMP_DEBUG_ASSERT( thr->th.th_teams_microtask );
6350 KMP_DEBUG_ASSERT( thr->th.th_set_nproc );
6351 KA_TRACE( 20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n",
6352 gtid, __kmp_tid_from_gtid( gtid ), thr->th.th_teams_microtask ) );
6355 #if INCLUDE_SSC_MARKS
6358 __kmp_fork_call( loc, gtid, fork_context_intel,
6360 (microtask_t)thr->th.th_teams_microtask,
6361 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
6363 #if INCLUDE_SSC_MARKS
6366 __kmp_join_call( loc, gtid, 1 );
6371 __kmp_invoke_teams_master(
int gtid )
6373 kmp_info_t *this_thr = __kmp_threads[ gtid ];
6374 kmp_team_t *team = this_thr->th.th_team;
6376 if ( !__kmp_threads[gtid]-> th.th_team->t.t_serialized )
6377 KMP_DEBUG_ASSERT( (
void*)__kmp_threads[gtid]-> th.th_team->t.t_pkfn == (
void*)__kmp_teams_master );
6379 __kmp_run_before_invoked_task( gtid, 0, this_thr, team );
6380 __kmp_teams_master( gtid );
6381 __kmp_run_after_invoked_task( gtid, 0, this_thr, team );
6392 __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads )
6394 kmp_info_t *thr = __kmp_threads[gtid];
6396 if( num_threads > 0 )
6397 thr->th.th_set_nproc = num_threads;
6405 __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
int num_threads )
6407 kmp_info_t *thr = __kmp_threads[gtid];
6408 KMP_DEBUG_ASSERT(num_teams >= 0);
6409 KMP_DEBUG_ASSERT(num_threads >= 0);
6410 if( num_teams == 0 ) {
6414 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
6417 if( num_threads > 0 ) {
6418 thr->th.th_teams_size.nth = num_threads;
6420 if( !TCR_4(__kmp_init_middle) )
6421 __kmp_middle_initialize();
6422 thr->th.th_teams_size.nth = __kmp_avail_proc / num_teams;
6431 __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind )
6433 kmp_info_t *thr = __kmp_threads[gtid];
6434 thr->th.th_set_proc_bind = proc_bind;
6442 __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team )
6444 kmp_info_t *this_thr = __kmp_threads[gtid];
6450 KMP_DEBUG_ASSERT( team );
6451 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
6452 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
6455 team->t.t_construct = 0;
6456 team->t.t_ordered.dt.t_value = 0;
6459 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
6460 if ( team->t.t_max_nproc > 1 ) {
6462 for (i = 0; i < KMP_MAX_DISP_BUF; ++i)
6463 team->t.t_disp_buffer[ i ].buffer_index = i;
6465 team->t.t_disp_buffer[ 0 ].buffer_index = 0;
6469 KMP_ASSERT( this_thr->th.th_team == team );
6472 for( f=0 ; f<team->t.t_nproc ; f++ ) {
6473 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
6474 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
6479 __kmp_fork_barrier( gtid, 0 );
6484 __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team )
6486 kmp_info_t *this_thr = __kmp_threads[gtid];
6488 KMP_DEBUG_ASSERT( team );
6489 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
6490 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
6496 if (__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc ) {
6497 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n",gtid, gtid, __kmp_threads[gtid]);
6498 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, team->t.t_nproc=%d\n",
6499 gtid, __kmp_threads[gtid]->th.th_team_nproc, team, team->t.t_nproc);
6500 __kmp_print_structure();
6502 KMP_DEBUG_ASSERT( __kmp_threads[gtid] &&
6503 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc );
6506 __kmp_join_barrier( gtid );
6509 KMP_ASSERT( this_thr->th.th_team == team );
6516 #ifdef USE_LOAD_BALANCE
6523 __kmp_active_hot_team_nproc( kmp_root_t *root )
6527 kmp_team_t *hot_team;
6529 if ( root->r.r_active ) {
6532 hot_team = root->r.r_hot_team;
6533 if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
6534 return hot_team->t.t_nproc - 1;
6541 for ( i = 1; i < hot_team->t.t_nproc; i++ ) {
6542 if ( hot_team->t.t_threads[i]->th.th_active ) {
6554 __kmp_load_balance_nproc( kmp_root_t *root,
int set_nproc )
6558 int hot_team_active;
6559 int team_curr_active;
6562 KB_TRACE( 20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n",
6563 root, set_nproc ) );
6564 KMP_DEBUG_ASSERT( root );
6565 KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_threads[0]->th.th_current_task->td_icvs.dynamic == TRUE );
6566 KMP_DEBUG_ASSERT( set_nproc > 1 );
6568 if ( set_nproc == 1) {
6569 KB_TRACE( 20, (
"__kmp_load_balance_nproc: serial execution.\n" ) );
6580 pool_active = TCR_4(__kmp_thread_pool_active_nth);
6581 hot_team_active = __kmp_active_hot_team_nproc( root );
6582 team_curr_active = pool_active + hot_team_active + 1;
6587 system_active = __kmp_get_load_balance( __kmp_avail_proc + team_curr_active );
6588 KB_TRACE( 30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d hot team active = %d\n",
6589 system_active, pool_active, hot_team_active ) );
6591 if ( system_active < 0 ) {
6598 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6599 KMP_WARNING( CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit" );
6604 retval = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
6605 : root->r.r_hot_team->t.t_nproc);
6606 if ( retval > set_nproc ) {
6609 if ( retval < KMP_MIN_NTH ) {
6610 retval = KMP_MIN_NTH;
6613 KB_TRACE( 20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n", retval ) );
6623 if ( system_active < team_curr_active ) {
6624 system_active = team_curr_active;
6626 retval = __kmp_avail_proc - system_active + team_curr_active;
6627 if ( retval > set_nproc ) {
6630 if ( retval < KMP_MIN_NTH ) {
6631 retval = KMP_MIN_NTH;
6634 KB_TRACE( 20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval ) );
6646 __kmp_cleanup(
void )
6650 KA_TRACE( 10, (
"__kmp_cleanup: enter\n" ) );
6652 if (TCR_4(__kmp_init_parallel)) {
6653 #if KMP_HANDLE_SIGNALS
6654 __kmp_remove_signals();
6656 TCW_4(__kmp_init_parallel, FALSE);
6659 if (TCR_4(__kmp_init_middle)) {
6660 #if KMP_AFFINITY_SUPPORTED
6661 __kmp_affinity_uninitialize();
6663 TCW_4(__kmp_init_middle, FALSE);
6666 KA_TRACE( 10, (
"__kmp_cleanup: go serial cleanup\n" ) );
6668 if (__kmp_init_serial) {
6670 __kmp_runtime_destroy();
6672 __kmp_init_serial = FALSE;
6675 for ( f = 0; f < __kmp_threads_capacity; f++ ) {
6676 if ( __kmp_root[ f ] != NULL ) {
6677 __kmp_free( __kmp_root[ f ] );
6678 __kmp_root[ f ] = NULL;
6681 __kmp_free( __kmp_threads );
6684 __kmp_threads = NULL;
6686 __kmp_threads_capacity = 0;
6688 __kmp_cleanup_user_locks();
6690 #if KMP_AFFINITY_SUPPORTED
6691 KMP_INTERNAL_FREE( (
void *) __kmp_cpuinfo_file );
6692 __kmp_cpuinfo_file = NULL;
6695 #if KMP_USE_ADAPTIVE_LOCKS
6696 #if KMP_DEBUG_ADAPTIVE_LOCKS
6697 __kmp_print_speculative_stats();
6700 KMP_INTERNAL_FREE( __kmp_nested_nth.nth );
6701 __kmp_nested_nth.nth = NULL;
6702 __kmp_nested_nth.size = 0;
6703 __kmp_nested_nth.used = 0;
6705 __kmp_i18n_catclose();
6707 #if KMP_STATS_ENABLED
6708 __kmp_accumulate_stats_at_exit();
6709 __kmp_stats_list.deallocate();
6712 KA_TRACE( 10, (
"__kmp_cleanup: exit\n" ) );
6719 __kmp_ignore_mppbeg(
void )
6723 if ((env = getenv(
"KMP_IGNORE_MPPBEG" )) != NULL) {
6724 if (__kmp_str_match_false( env ))
6732 __kmp_ignore_mppend(
void )
6736 if ((env = getenv(
"KMP_IGNORE_MPPEND" )) != NULL) {
6737 if (__kmp_str_match_false( env ))
6745 __kmp_internal_begin(
void )
6752 gtid = __kmp_entry_gtid();
6753 root = __kmp_threads[ gtid ]->th.th_root;
6754 KMP_ASSERT( KMP_UBER_GTID( gtid ));
6756 if( root->r.r_begin )
return;
6757 __kmp_acquire_lock( &root->r.r_begin_lock, gtid );
6758 if( root->r.r_begin ) {
6759 __kmp_release_lock( & root->r.r_begin_lock, gtid );
6763 root->r.r_begin = TRUE;
6765 __kmp_release_lock( & root->r.r_begin_lock, gtid );
6773 __kmp_user_set_library (
enum library_type arg)
6781 gtid = __kmp_entry_gtid();
6782 thread = __kmp_threads[ gtid ];
6784 root = thread->th.th_root;
6786 KA_TRACE( 20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, library_serial ));
6787 if (root->r.r_in_parallel) {
6788 KMP_WARNING( SetLibraryIncorrectCall );
6793 case library_serial :
6794 thread->th.th_set_nproc = 0;
6795 set__nproc( thread, 1 );
6797 case library_turnaround :
6798 thread->th.th_set_nproc = 0;
6799 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
6801 case library_throughput :
6802 thread->th.th_set_nproc = 0;
6803 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
6806 KMP_FATAL( UnknownLibraryType, arg );
6809 __kmp_aux_set_library ( arg );
6813 __kmp_aux_set_stacksize(
size_t arg )
6815 if (! __kmp_init_serial)
6816 __kmp_serial_initialize();
6819 if (arg & (0x1000 - 1)) {
6820 arg &= ~(0x1000 - 1);
6825 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6828 if (! TCR_4(__kmp_init_parallel)) {
6831 if (value < __kmp_sys_min_stksize )
6832 value = __kmp_sys_min_stksize ;
6833 else if (value > KMP_MAX_STKSIZE)
6834 value = KMP_MAX_STKSIZE;
6836 __kmp_stksize = value;
6838 __kmp_env_stksize = TRUE;
6841 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6847 __kmp_aux_set_library (
enum library_type arg)
6849 __kmp_library = arg;
6851 switch ( __kmp_library ) {
6852 case library_serial :
6854 KMP_INFORM( LibraryIsSerial );
6855 (void) __kmp_change_library( TRUE );
6858 case library_turnaround :
6859 (void) __kmp_change_library( TRUE );
6861 case library_throughput :
6862 (void) __kmp_change_library( FALSE );
6865 KMP_FATAL( UnknownLibraryType, arg );
6873 __kmp_aux_set_blocktime (
int arg, kmp_info_t *thread,
int tid)
6875 int blocktime = arg;
6879 __kmp_save_internal_controls( thread );
6882 if (blocktime < KMP_MIN_BLOCKTIME)
6883 blocktime = KMP_MIN_BLOCKTIME;
6884 else if (blocktime > KMP_MAX_BLOCKTIME)
6885 blocktime = KMP_MAX_BLOCKTIME;
6887 set__blocktime_team( thread->th.th_team, tid, blocktime );
6888 set__blocktime_team( thread->th.th_serial_team, 0, blocktime );
6891 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
6893 set__bt_intervals_team( thread->th.th_team, tid, bt_intervals );
6894 set__bt_intervals_team( thread->th.th_serial_team, 0, bt_intervals );
6899 set__bt_set_team( thread->th.th_team, tid, bt_set );
6900 set__bt_set_team( thread->th.th_serial_team, 0, bt_set );
6901 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, bt_intervals=%d, monitor_updates=%d\n",
6902 __kmp_gtid_from_tid(tid, thread->th.th_team),
6903 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals, __kmp_monitor_wakeups ) );
6907 __kmp_aux_set_defaults(
6911 if ( ! __kmp_init_serial ) {
6912 __kmp_serial_initialize();
6914 __kmp_env_initialize( str );
6918 || __kmp_display_env || __kmp_display_env_verbose
6931 PACKED_REDUCTION_METHOD_T
6932 __kmp_determine_reduction_method(
ident_t *loc, kmp_int32 global_tid,
6933 kmp_int32 num_vars,
size_t reduce_size,
void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
6934 kmp_critical_name *lck )
6942 PACKED_REDUCTION_METHOD_T retval;
6946 KMP_DEBUG_ASSERT( loc );
6947 KMP_DEBUG_ASSERT( lck );
6949 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED ( ( loc->flags & ( KMP_IDENT_ATOMIC_REDUCE ) ) == ( KMP_IDENT_ATOMIC_REDUCE ) )
6950 #define FAST_REDUCTION_TREE_METHOD_GENERATED ( ( reduce_data ) && ( reduce_func ) )
6952 retval = critical_reduce_block;
6954 team_size = __kmp_get_team_num_threads( global_tid );
6956 if( team_size == 1 ) {
6958 retval = empty_reduce_block;
6962 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
6963 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
6965 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64
6967 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
6969 #define REDUCTION_TEAMSIZE_CUTOFF 8
6971 #define REDUCTION_TEAMSIZE_CUTOFF 4
6973 if( tree_available ) {
6974 if( team_size <= REDUCTION_TEAMSIZE_CUTOFF ) {
6975 if ( atomic_available ) {
6976 retval = atomic_reduce_block;
6979 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
6981 }
else if ( atomic_available ) {
6982 retval = atomic_reduce_block;
6985 #error "Unknown or unsupported OS"
6986 #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
6988 #elif KMP_ARCH_X86 || KMP_ARCH_ARM
6990 #if KMP_OS_LINUX || KMP_OS_WINDOWS
6994 if( atomic_available ) {
6995 if( num_vars <= 2 ) {
6996 retval = atomic_reduce_block;
7002 if( atomic_available && ( num_vars <= 3 ) ) {
7003 retval = atomic_reduce_block;
7004 }
else if( tree_available ) {
7005 if( ( reduce_size > ( 9 *
sizeof( kmp_real64 ) ) ) && ( reduce_size < ( 2000 *
sizeof( kmp_real64 ) ) ) ) {
7006 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
7011 #error "Unknown or unsupported OS"
7015 #error "Unknown or unsupported architecture"
7022 if( __kmp_force_reduction_method != reduction_method_not_defined ) {
7024 PACKED_REDUCTION_METHOD_T forced_retval;
7026 int atomic_available, tree_available;
7028 switch( ( forced_retval = __kmp_force_reduction_method ) )
7030 case critical_reduce_block:
7032 if( team_size <= 1 ) {
7033 forced_retval = empty_reduce_block;
7037 case atomic_reduce_block:
7038 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7039 KMP_ASSERT( atomic_available );
7042 case tree_reduce_block:
7043 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7044 KMP_ASSERT( tree_available );
7045 #if KMP_FAST_REDUCTION_BARRIER
7046 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7054 retval = forced_retval;
7057 KA_TRACE(10, (
"reduction method selected=%08x\n", retval ) );
7059 #undef FAST_REDUCTION_TREE_METHOD_GENERATED
7060 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
7067 __kmp_get_reduce_method(
void ) {
7068 return ( ( __kmp_entry_thread()->th.th_local.packed_reduction_method ) >> 8 );
#define KMP_START_EXPLICIT_TIMER(name)
"Starts" an explicit timer which will need a corresponding KMP_STOP_EXPLICIT_TIMER() macro...
#define KMP_STOP_EXPLICIT_TIMER(name)
"Stops" an explicit timer.
#define KMP_TIME_BLOCK(name)
Uses specified timer (name) to time code block.
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_IDENT_AUTOPAR
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)