38 #include "kmp_atomic.h"
39 #include "kmp_wrapper_getpid.h"
40 #include "kmp_environment.h"
43 #include "kmp_settings.h"
46 #include "kmp_error.h"
49 #define KMP_USE_PRCTL 0
50 #define KMP_USE_POOLED_ALLOC 0
53 #include <immintrin.h>
54 #define USE_NGO_STORES 1
57 #if KMP_MIC && USE_NGO_STORES
58 #define load_icvs(src) __m512d Vt_icvs = _mm512_load_pd((void *)(src))
59 #define store_icvs(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt_icvs)
60 #define sync_icvs() __asm__ volatile ("lock; addl $0,0(%%rsp)" ::: "memory")
62 #define load_icvs(src) ((void)0)
63 #define store_icvs(dst, src) copy_icvs((dst), (src))
64 #define sync_icvs() ((void)0)
72 #if defined(KMP_GOMP_COMPAT)
73 char const __kmp_version_alt_comp[] = KMP_VERSION_PREFIX
"alternative compiler support: yes";
76 char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX
"API version: "
87 char const __kmp_version_lock[] = KMP_VERSION_PREFIX
"lock type: run time selectable";
89 char const __kmp_version_perf_v19[] = KMP_VERSION_PREFIX
"perf v19: "
90 #if KMP_PERF_V19 == KMP_ON
92 #elif KMP_PERF_V19 == KMP_OFF
95 #error "Must specify KMP_PERF_V19 option"
98 char const __kmp_version_perf_v106[] = KMP_VERSION_PREFIX
"perf v106: "
99 #if KMP_PERF_V106 == KMP_ON
101 #elif KMP_PERF_V106 == KMP_OFF
104 #error "Must specify KMP_PERF_V106 option"
113 kmp_info_t __kmp_monitor;
120 void __kmp_cleanup(
void );
122 static void __kmp_initialize_info( kmp_info_t *, kmp_team_t *,
int tid,
int gtid );
123 static void __kmp_initialize_team(
127 kmp_internal_control_t * new_icvs,
130 int new_set_nproc,
int new_set_dynamic,
int new_set_nested,
131 int new_set_blocktime,
int new_bt_intervals,
int new_bt_set
134 static void __kmp_partition_places( kmp_team_t *team );
135 static void __kmp_do_serial_initialize(
void );
138 #ifdef USE_LOAD_BALANCE
139 static int __kmp_load_balance_nproc( kmp_root_t * root,
int set_nproc );
142 static int __kmp_expand_threads(
int nWish,
int nNeed);
143 static int __kmp_unregister_root_other_thread(
int gtid );
144 static void __kmp_unregister_library(
void );
145 static void __kmp_reap_thread( kmp_info_t * thread,
int is_root );
146 static kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
157 __kmp_get_global_thread_id( )
160 kmp_info_t **other_threads;
166 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
167 __kmp_nth, __kmp_all_nth ));
174 if ( !TCR_4(__kmp_init_gtid) )
return KMP_GTID_DNE;
176 #ifdef KMP_TDATA_GTID
177 if ( TCR_4(__kmp_gtid_mode) >= 3) {
178 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: using TDATA\n" ));
182 if ( TCR_4(__kmp_gtid_mode) >= 2) {
183 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n" ));
184 return __kmp_gtid_get_specific();
186 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n" ));
188 stack_addr = (
char*) & stack_data;
189 other_threads = __kmp_threads;
204 for( i = 0 ; i < __kmp_threads_capacity ; i++ ) {
206 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
209 stack_size = (size_t)TCR_PTR(thr -> th.th_info.ds.ds_stacksize);
210 stack_base = (
char *)TCR_PTR(thr -> th.th_info.ds.ds_stackbase);
214 if( stack_addr <= stack_base ) {
215 size_t stack_diff = stack_base - stack_addr;
217 if( stack_diff <= stack_size ) {
220 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == i );
227 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
228 "thread, using TLS\n" ));
229 i = __kmp_gtid_get_specific();
237 if( ! TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow) ) {
238 KMP_FATAL( StackOverflow, i );
241 stack_base = (
char *) other_threads[i] -> th.th_info.ds.ds_stackbase;
242 if( stack_addr > stack_base ) {
243 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
244 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
245 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base);
247 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, stack_base - stack_addr);
251 if ( __kmp_storage_map ) {
252 char *stack_end = (
char *) other_threads[i] -> th.th_info.ds.ds_stackbase;
253 char *stack_beg = stack_end - other_threads[i] -> th.th_info.ds.ds_stacksize;
254 __kmp_print_storage_map_gtid( i, stack_beg, stack_end,
255 other_threads[i] -> th.th_info.ds.ds_stacksize,
256 "th_%d stack (refinement)", i );
262 __kmp_get_global_thread_id_reg( )
266 if ( !__kmp_init_serial ) {
269 #ifdef KMP_TDATA_GTID
270 if ( TCR_4(__kmp_gtid_mode) >= 3 ) {
271 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n" ));
275 if ( TCR_4(__kmp_gtid_mode) >= 2 ) {
276 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n" ));
277 gtid = __kmp_gtid_get_specific();
279 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n" ));
280 gtid = __kmp_get_global_thread_id();
284 if( gtid == KMP_GTID_DNE ) {
285 KA_TRACE( 10, (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
286 "Registering a new gtid.\n" ));
287 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
288 if( !__kmp_init_serial ) {
289 __kmp_do_serial_initialize();
290 gtid = __kmp_gtid_get_specific();
292 gtid = __kmp_register_root(FALSE);
294 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
298 KMP_DEBUG_ASSERT( gtid >=0 );
305 __kmp_check_stack_overlap( kmp_info_t *th )
308 char *stack_beg = NULL;
309 char *stack_end = NULL;
312 KA_TRACE(10,(
"__kmp_check_stack_overlap: called\n"));
313 if ( __kmp_storage_map ) {
314 stack_end = (
char *) th -> th.th_info.ds.ds_stackbase;
315 stack_beg = stack_end - th -> th.th_info.ds.ds_stacksize;
317 gtid = __kmp_gtid_from_thread( th );
319 if (gtid == KMP_GTID_MONITOR) {
320 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
321 "th_%s stack (%s)",
"mon",
322 ( th->th.th_info.ds.ds_stackgrow ) ?
"initial" :
"actual" );
324 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
325 "th_%d stack (%s)", gtid,
326 ( th->th.th_info.ds.ds_stackgrow ) ?
"initial" :
"actual" );
331 if ( __kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid = __kmp_gtid_from_thread( th )))
333 KA_TRACE(10,(
"__kmp_check_stack_overlap: performing extensive checking\n"));
334 if ( stack_beg == NULL ) {
335 stack_end = (
char *) th -> th.th_info.ds.ds_stackbase;
336 stack_beg = stack_end - th -> th.th_info.ds.ds_stacksize;
339 for( f=0 ; f < __kmp_threads_capacity ; f++ ) {
340 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
342 if( f_th && f_th != th ) {
343 char *other_stack_end = (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
344 char *other_stack_beg = other_stack_end -
345 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
346 if((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
347 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
350 if ( __kmp_storage_map )
351 __kmp_print_storage_map_gtid( -1, other_stack_beg, other_stack_end,
352 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
353 "th_%d stack (overlapped)",
354 __kmp_gtid_from_thread( f_th ) );
356 __kmp_msg( kmp_ms_fatal, KMP_MSG( StackOverlap ), KMP_HNT( ChangeStackLimit ), __kmp_msg_null );
361 KA_TRACE(10,(
"__kmp_check_stack_overlap: returning\n"));
368 # define __kmp_static_delay( arg )
372 __kmp_static_delay(
int arg )
375 # if KMP_ARCH_X86_64 && KMP_OS_LINUX
376 KMP_ASSERT( arg != 0 );
378 KMP_ASSERT( arg >= 0 );
384 __kmp_static_yield(
int arg )
398 __kmp_wait_sleep( kmp_info_t *this_thr,
399 volatile kmp_uint *spinner,
402 USE_ITT_BUILD_ARG (
void * itt_sync_obj)
406 register volatile kmp_uint *spin = spinner;
407 register kmp_uint check = checker;
408 register kmp_uint32 spins;
409 register kmp_uint32 hibernate;
415 KMP_FSYNC_SPIN_INIT( spin, NULL );
416 if( TCR_4(*spin) == check ) {
417 KMP_FSYNC_SPIN_ACQUIRED( spin );
421 th_gtid = this_thr->th.th_info.ds.ds_gtid;
423 KA_TRACE( 20, (
"__kmp_wait_sleep: T#%d waiting for spin(%p) == %d\n",
428 KMP_INIT_YIELD( spins );
430 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
435 #ifdef KMP_ADJUST_BLOCKTIME
436 if ( __kmp_zero_bt && ! this_thr->th.th_team_bt_set ) {
440 hibernate = this_thr->th.th_team_bt_intervals;
443 hibernate = this_thr->th.th_team_bt_intervals;
452 if ( hibernate != 0 ) {
459 hibernate += TCR_4( __kmp_global.g.g_time.dt.t_value );
461 KF_TRACE( 20, (
"__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
462 th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
463 hibernate - __kmp_global.g.g_time.dt.t_value ));
469 while( TCR_4(*spin) != check ) {
485 kmp_task_team_t * task_team = NULL;
486 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
487 task_team = this_thr->th.th_task_team;
488 if ( task_team != NULL ) {
489 if ( ! TCR_SYNC_4( task_team->tt.tt_active ) ) {
490 KMP_DEBUG_ASSERT( ! KMP_MASTER_TID( this_thr->th.th_info.ds.ds_tid ) );
491 __kmp_unref_task_team( task_team, this_thr );
492 }
else if ( KMP_TASKING_ENABLED( task_team, this_thr->th.th_task_state ) ) {
493 __kmp_execute_tasks( this_thr, th_gtid, spin, check, final_spin, &flag
494 USE_ITT_BUILD_ARG( itt_sync_obj )
501 KMP_FSYNC_SPIN_PREPARE( spin );
502 if( TCR_4(__kmp_global.g.g_done) ) {
503 if( __kmp_global.g.g_abort )
504 __kmp_abort_thread( );
508 __kmp_static_delay( 1 );
512 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
516 KMP_YIELD_SPIN( spins );
522 in_pool = !!TCR_4(this_thr->th.th_in_pool);
523 if ( in_pool != !!this_thr->th.th_active_in_pool ) {
529 (kmp_int32 *) &__kmp_thread_pool_active_nth );
530 this_thr->th.th_active_in_pool = TRUE;
550 (kmp_int32 *) &__kmp_thread_pool_active_nth );
551 KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
552 this_thr->th.th_active_in_pool = FALSE;
558 if ( ( task_team != NULL ) && TCR_4(task_team->tt.tt_found_tasks) ) {
564 if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
569 if ( TCR_4( __kmp_global.g.g_time.dt.t_value ) < hibernate ) {
573 KF_TRACE( 50, (
"__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid ) );
575 __kmp_suspend( th_gtid, spin, check );
577 if( TCR_4( __kmp_global.g.g_done ) && __kmp_global.g.g_abort ) {
578 __kmp_abort_thread( );
585 KMP_FSYNC_SPIN_ACQUIRED( spin );
598 __kmp_release( kmp_info_t *target_thr,
volatile kmp_uint *spin,
599 enum kmp_mem_fence_type fetchadd_fence )
603 int target_gtid = target_thr->th.th_info.ds.ds_gtid;
604 int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
607 KF_TRACE( 20, (
"__kmp_release: T#%d releasing T#%d spin(%p) fence_type(%d)\n",
608 gtid, target_gtid, spin, fetchadd_fence ));
610 KMP_DEBUG_ASSERT( spin );
612 KMP_DEBUG_ASSERT( fetchadd_fence == kmp_acquire_fence ||
613 fetchadd_fence == kmp_release_fence );
615 KMP_FSYNC_RELEASING( spin );
617 old_spin = ( fetchadd_fence == kmp_acquire_fence )
618 ? KMP_TEST_THEN_ADD4_ACQ32( (
volatile kmp_int32 *) spin )
619 : KMP_TEST_THEN_ADD4_32( (
volatile kmp_int32 *) spin );
621 KF_TRACE( 100, (
"__kmp_release: T#%d old spin(%p)=%d, set new spin=%d\n",
622 gtid, spin, old_spin, *spin ) );
624 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
626 if ( old_spin & KMP_BARRIER_SLEEP_STATE ) {
628 int target_gtid = target_thr->th.th_info.ds.ds_gtid;
631 KF_TRACE( 50, (
"__kmp_release: T#%d waking up thread T#%d since sleep spin(%p) set\n",
632 gtid, target_gtid, spin ));
633 __kmp_resume( target_gtid, spin );
635 KF_TRACE( 50, (
"__kmp_release: T#%d don't wake up thread T#%d since sleep spin(%p) not set\n",
636 gtid, target_gtid, spin ));
644 __kmp_infinite_loop(
void )
646 static int done = FALSE;
653 #define MAX_MESSAGE 512
656 __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
char const *format, ...) {
657 char buffer[MAX_MESSAGE];
661 va_start( ap, format);
662 sprintf( buffer,
"OMP storage map: %p %p%8lu %s\n", p1, p2, (
unsigned long) size, format );
663 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
664 __kmp_vprintf( kmp_err, buffer, ap );
665 #if KMP_PRINT_DATA_PLACEMENT
667 if(p1 <= p2 && (
char*)p2 - (
char*)p1 == size) {
668 if( __kmp_storage_map_verbose ) {
669 node = __kmp_get_host_node(p1);
671 __kmp_storage_map_verbose = FALSE;
675 int localProc = __kmp_get_cpu_from_gtid(gtid);
677 p1 = (
void *)( (
size_t)p1 & ~((size_t)PAGE_SIZE - 1) );
678 p2 = (
void *)( ((
size_t) p2 - 1) & ~((
size_t)PAGE_SIZE - 1) );
680 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid, localProc>>1);
682 __kmp_printf_no_lock(
" GTID %d\n", gtid);
690 (
char*)p1 += PAGE_SIZE;
691 }
while(p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
692 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last,
693 (
char*)p1 - 1, lastNode);
696 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
697 (
char*)p1 + (PAGE_SIZE - 1), __kmp_get_host_node(p1));
699 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
700 (
char*)p2 + (PAGE_SIZE - 1), __kmp_get_host_node(p2));
706 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR( StorageMapWarning ) );
709 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
713 __kmp_warn(
char const * format, ... )
715 char buffer[MAX_MESSAGE];
718 if ( __kmp_generate_warnings == kmp_warnings_off ) {
722 va_start( ap, format );
724 snprintf( buffer,
sizeof(buffer) ,
"OMP warning: %s\n", format );
725 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
726 __kmp_vprintf( kmp_err, buffer, ap );
727 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
733 __kmp_abort_process()
737 __kmp_acquire_bootstrap_lock( & __kmp_exit_lock );
739 if ( __kmp_debug_buf ) {
740 __kmp_dump_debug_buffer();
743 if ( KMP_OS_WINDOWS ) {
746 __kmp_global.g.g_abort = SIGABRT;
764 __kmp_infinite_loop();
765 __kmp_release_bootstrap_lock( & __kmp_exit_lock );
770 __kmp_abort_thread(
void )
774 __kmp_infinite_loop();
785 __kmp_print_thread_storage_map( kmp_info_t *thr,
int gtid )
787 __kmp_print_storage_map_gtid( gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d", gtid );
789 __kmp_print_storage_map_gtid( gtid, &thr->th.th_info, &thr->th.th_team,
sizeof(kmp_desc_t),
790 "th_%d.th_info", gtid );
792 __kmp_print_storage_map_gtid( gtid, &thr->th.th_local, &thr->th.th_pri_head,
sizeof(kmp_local_t),
793 "th_%d.th_local", gtid );
795 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
796 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid );
798 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_plain_barrier],
799 &thr->th.th_bar[bs_plain_barrier+1],
800 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]", gtid);
802 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_forkjoin_barrier],
803 &thr->th.th_bar[bs_forkjoin_barrier+1],
804 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]", gtid);
806 #if KMP_FAST_REDUCTION_BARRIER
807 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_reduction_barrier],
808 &thr->th.th_bar[bs_reduction_barrier+1],
809 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]", gtid);
810 #endif // KMP_FAST_REDUCTION_BARRIER
819 __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
int team_id,
int num_thr )
821 int num_disp_buff = team->t.t_max_nproc > 1 ? KMP_MAX_DISP_BUF : 2;
822 __kmp_print_storage_map_gtid( -1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
825 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[0], &team->t.t_bar[bs_last_barrier],
826 sizeof(kmp_balign_team_t) * bs_last_barrier,
"%s_%d.t_bar", header, team_id );
829 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_plain_barrier], &team->t.t_bar[bs_plain_barrier+1],
830 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]", header, team_id );
832 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_forkjoin_barrier], &team->t.t_bar[bs_forkjoin_barrier+1],
833 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[forkjoin]", header, team_id );
835 #if KMP_FAST_REDUCTION_BARRIER
836 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_reduction_barrier], &team->t.t_bar[bs_reduction_barrier+1],
837 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[reduction]", header, team_id );
838 #endif // KMP_FAST_REDUCTION_BARRIER
840 __kmp_print_storage_map_gtid( -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
841 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id );
843 __kmp_print_storage_map_gtid( -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
844 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id );
846 __kmp_print_storage_map_gtid( -1, &team->t.t_disp_buffer[0], &team->t.t_disp_buffer[num_disp_buff],
847 sizeof(dispatch_shared_info_t) * num_disp_buff,
"%s_%d.t_disp_buffer",
882 __kmp_print_storage_map_gtid( -1, &team->t.t_taskq, &team->t.t_copypriv_data,
883 sizeof(kmp_taskq_t),
"%s_%d.t_taskq", header, team_id );
886 static void __kmp_init_allocator() {}
887 static void __kmp_fini_allocator() {}
888 static void __kmp_fini_allocator_thread() {}
892 #ifdef GUIDEDLL_EXPORTS
897 __kmp_reset_lock( kmp_bootstrap_lock_t* lck ) {
899 __kmp_init_bootstrap_lock( lck );
903 __kmp_reset_locks_on_process_detach(
int gtid_req ) {
920 for( i = 0; i < __kmp_threads_capacity; ++i ) {
921 if( !__kmp_threads )
continue;
922 kmp_info_t* th = __kmp_threads[ i ];
923 if( th == NULL )
continue;
924 int gtid = th->th.th_info.ds.ds_gtid;
925 if( gtid == gtid_req )
continue;
926 if( gtid < 0 )
continue;
928 int alive = __kmp_is_thread_alive( th, &exit_val );
933 if( thread_count == 0 )
break;
940 __kmp_reset_lock( &__kmp_forkjoin_lock );
942 __kmp_reset_lock( &__kmp_stdio_lock );
949 DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) {
952 switch( fdwReason ) {
954 case DLL_PROCESS_ATTACH:
955 KA_TRACE( 10, (
"DllMain: PROCESS_ATTACH\n" ));
959 case DLL_PROCESS_DETACH:
960 KA_TRACE( 10, (
"DllMain: PROCESS_DETACH T#%d\n",
961 __kmp_gtid_get_specific() ));
963 if( lpReserved != NULL )
990 __kmp_reset_locks_on_process_detach( __kmp_gtid_get_specific() );
993 __kmp_internal_end_library( __kmp_gtid_get_specific() );
997 case DLL_THREAD_ATTACH:
998 KA_TRACE( 10, (
"DllMain: THREAD_ATTACH\n" ));
1004 case DLL_THREAD_DETACH:
1005 KA_TRACE( 10, (
"DllMain: THREAD_DETACH T#%d\n",
1006 __kmp_gtid_get_specific() ));
1008 __kmp_internal_end_thread( __kmp_gtid_get_specific() );
1024 __kmp_change_library(
int status )
1028 old_status = __kmp_yield_init & 1;
1031 __kmp_yield_init |= 1;
1034 __kmp_yield_init &= ~1;
1047 __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
1049 int gtid = *gtid_ref;
1050 #ifdef BUILD_PARALLEL_ORDERED
1051 kmp_team_t *team = __kmp_team_from_gtid( gtid );
1054 if( __kmp_env_consistency_check ) {
1055 if( __kmp_threads[gtid] -> th.th_root -> r.r_active )
1056 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL );
1058 #ifdef BUILD_PARALLEL_ORDERED
1059 if( !team -> t.t_serialized ) {
1063 KMP_WAIT_YIELD(&team -> t.t_ordered.dt.t_value, __kmp_tid_from_gtid( gtid ), KMP_EQ, NULL);
1074 __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
1076 int gtid = *gtid_ref;
1077 #ifdef BUILD_PARALLEL_ORDERED
1078 int tid = __kmp_tid_from_gtid( gtid );
1079 kmp_team_t *team = __kmp_team_from_gtid( gtid );
1082 if( __kmp_env_consistency_check ) {
1083 if( __kmp_threads[gtid] -> th.th_root -> r.r_active )
1084 __kmp_pop_sync( gtid, ct_ordered_in_parallel, loc_ref );
1086 #ifdef BUILD_PARALLEL_ORDERED
1087 if ( ! team -> t.t_serialized ) {
1092 team -> t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc );
1108 __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws )
1114 if( ! TCR_4(__kmp_init_parallel) )
1115 __kmp_parallel_initialize();
1117 th = __kmp_threads[ gtid ];
1118 team = th -> th.th_team;
1121 th->th.th_ident = id_ref;
1123 if ( team -> t.t_serialized ) {
1126 kmp_int32 old_this = th->th.th_local.this_construct;
1128 ++th->th.th_local.this_construct;
1133 status = KMP_COMPARE_AND_STORE_ACQ32(&team -> t.t_construct, old_this,
1134 th->th.th_local.this_construct);
1137 if( __kmp_env_consistency_check ) {
1138 if (status && push_ws) {
1139 __kmp_push_workshare( gtid, ct_psingle, id_ref );
1141 __kmp_check_workshare( gtid, ct_psingle, id_ref );
1146 __kmp_itt_single_start( gtid );
1153 __kmp_exit_single(
int gtid )
1156 __kmp_itt_single_end( gtid );
1158 if( __kmp_env_consistency_check )
1159 __kmp_pop_workshare( gtid, ct_psingle, NULL );
1167 __kmp_linear_barrier_gather(
enum barrier_type bt,
1168 kmp_info_t *this_thr,
1171 void (*reduce)(
void *,
void *)
1172 USE_ITT_BUILD_ARG(
void * itt_sync_obj)
1175 register kmp_team_t *team = this_thr -> th.th_team;
1176 register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
1177 register kmp_info_t **other_threads = team -> t.t_threads;
1179 KA_TRACE( 20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
1180 gtid, team->t.t_id, tid, bt ) );
1182 KMP_DEBUG_ASSERT( this_thr == other_threads[this_thr->th.th_info.ds.ds_tid] );
1190 if ( ! KMP_MASTER_TID( tid )) {
1192 KA_TRACE( 20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
1193 "arrived(%p): %u => %u\n",
1194 gtid, team->t.t_id, tid,
1195 __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0,
1196 &thr_bar -> b_arrived, thr_bar -> b_arrived,
1197 thr_bar -> b_arrived + KMP_BARRIER_STATE_BUMP
1206 __kmp_release( other_threads[0], &thr_bar -> b_arrived, kmp_release_fence );
1209 register kmp_balign_team_t *team_bar = & team -> t.t_bar[ bt ];
1210 register int nproc = this_thr -> th.th_team_nproc;
1212 register kmp_uint new_state;
1215 new_state = team_bar -> b_arrived + KMP_BARRIER_STATE_BUMP;
1218 for (i = 1; i < nproc; i++) {
1219 #if KMP_CACHE_MANAGE
1222 KMP_CACHE_PREFETCH( &other_threads[ i+1 ] -> th.th_bar[ bt ].bb.b_arrived );
1224 KA_TRACE( 20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
1225 "arrived(%p) == %u\n",
1226 gtid, team->t.t_id, tid,
1227 __kmp_gtid_from_tid( i, team ), team->t.t_id, i,
1228 &other_threads[i] -> th.th_bar[ bt ].bb.b_arrived,
1232 __kmp_wait_sleep( this_thr,
1233 & other_threads[ i ] -> th.th_bar[ bt ].bb.b_arrived,
1235 USE_ITT_BUILD_ARG( itt_sync_obj )
1240 KA_TRACE( 100, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
1241 gtid, team->t.t_id, tid,
1242 __kmp_gtid_from_tid( i, team ), team->t.t_id, i ) );
1244 (*reduce)( this_thr -> th.th_local.reduce_data,
1245 other_threads[ i ] -> th.th_local.reduce_data );
1252 team_bar -> b_arrived = new_state;
1253 KA_TRACE( 20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d "
1254 "arrived(%p) = %u\n",
1255 gtid, team->t.t_id, tid, team->t.t_id,
1256 &team_bar -> b_arrived, new_state ) );
1259 KA_TRACE( 20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
1260 gtid, team->t.t_id, tid, bt ) );
1265 __kmp_tree_barrier_gather(
enum barrier_type bt,
1266 kmp_info_t *this_thr,
1269 void (*reduce) (
void *,
void *)
1270 USE_ITT_BUILD_ARG(
void * itt_sync_obj )
1273 register kmp_team_t *team = this_thr -> th.th_team;
1274 register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
1275 register kmp_info_t **other_threads = team -> t.t_threads;
1276 register kmp_uint32 nproc = this_thr -> th.th_team_nproc;
1277 register kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[ bt ];
1278 register kmp_uint32 branch_factor = 1 << branch_bits ;
1279 register kmp_uint32 child;
1280 register kmp_uint32 child_tid;
1281 register kmp_uint new_state;
1283 KA_TRACE( 20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
1284 gtid, team->t.t_id, tid, bt ) );
1286 KMP_DEBUG_ASSERT( this_thr == other_threads[this_thr->th.th_info.ds.ds_tid] );
1294 child_tid = (tid << branch_bits) + 1;
1296 if ( child_tid < nproc ) {
1299 new_state = team -> t.t_bar[ bt ].b_arrived + KMP_BARRIER_STATE_BUMP;
1303 register kmp_info_t *child_thr = other_threads[ child_tid ];
1304 register kmp_bstate_t *child_bar = & child_thr -> th.th_bar[ bt ].bb;
1305 #if KMP_CACHE_MANAGE
1307 if ( child+1 <= branch_factor && child_tid+1 < nproc )
1308 KMP_CACHE_PREFETCH( &other_threads[ child_tid+1 ] -> th.th_bar[ bt ].bb.b_arrived );
1310 KA_TRACE( 20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
1311 "arrived(%p) == %u\n",
1312 gtid, team->t.t_id, tid,
1313 __kmp_gtid_from_tid( child_tid, team ), team->t.t_id, child_tid,
1314 &child_bar -> b_arrived, new_state ) );
1317 __kmp_wait_sleep( this_thr, &child_bar -> b_arrived, new_state, FALSE
1318 USE_ITT_BUILD_ARG( itt_sync_obj)
1323 KA_TRACE( 100, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
1324 gtid, team->t.t_id, tid,
1325 __kmp_gtid_from_tid( child_tid, team ), team->t.t_id,
1328 (*reduce)( this_thr -> th.th_local.reduce_data,
1329 child_thr -> th.th_local.reduce_data );
1336 while ( child <= branch_factor && child_tid < nproc );
1339 if ( !KMP_MASTER_TID(tid) ) {
1341 register kmp_int32 parent_tid = (tid - 1) >> branch_bits;
1343 KA_TRACE( 20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
1344 "arrived(%p): %u => %u\n",
1345 gtid, team->t.t_id, tid,
1346 __kmp_gtid_from_tid( parent_tid, team ), team->t.t_id, parent_tid,
1347 &thr_bar -> b_arrived, thr_bar -> b_arrived,
1348 thr_bar -> b_arrived + KMP_BARRIER_STATE_BUMP
1357 __kmp_release( other_threads[parent_tid], &thr_bar -> b_arrived, kmp_release_fence );
1364 team -> t.t_bar[ bt ].b_arrived = new_state;
1366 team -> t.t_bar[ bt ].b_arrived += KMP_BARRIER_STATE_BUMP;
1368 KA_TRACE( 20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %u\n",
1369 gtid, team->t.t_id, tid, team->t.t_id,
1370 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived ) );
1373 KA_TRACE( 20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
1374 gtid, team->t.t_id, tid, bt ) );
1379 __kmp_hyper_barrier_gather(
enum barrier_type bt,
1380 kmp_info_t *this_thr,
1383 void (*reduce) (
void *,
void *)
1384 USE_ITT_BUILD_ARG (
void * itt_sync_obj)
1387 register kmp_team_t *team = this_thr -> th.th_team;
1388 register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
1389 register kmp_info_t **other_threads = team -> t.t_threads;
1390 register kmp_uint new_state = KMP_BARRIER_UNUSED_STATE;
1391 register kmp_uint32 num_threads = this_thr -> th.th_team_nproc;
1392 register kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[ bt ];
1393 register kmp_uint32 branch_factor = 1 << branch_bits ;
1394 register kmp_uint32 offset;
1395 register kmp_uint32 level;
1397 KA_TRACE( 20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
1398 gtid, team->t.t_id, tid, bt ) );
1400 KMP_DEBUG_ASSERT( this_thr == other_threads[this_thr->th.th_info.ds.ds_tid] );
1408 for ( level=0, offset =1;
1409 offset < num_threads;
1410 level += branch_bits, offset <<= branch_bits )
1412 register kmp_uint32 child;
1413 register kmp_uint32 child_tid;
1415 if ( ((tid >> level) & (branch_factor - 1)) != 0 ) {
1416 register kmp_int32 parent_tid = tid & ~( (1 << (level + branch_bits)) -1 );
1418 KA_TRACE( 20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
1419 "arrived(%p): %u => %u\n",
1420 gtid, team->t.t_id, tid,
1421 __kmp_gtid_from_tid( parent_tid, team ), team->t.t_id, parent_tid,
1422 &thr_bar -> b_arrived, thr_bar -> b_arrived,
1423 thr_bar -> b_arrived + KMP_BARRIER_STATE_BUMP
1433 __kmp_release( other_threads[parent_tid], &thr_bar -> b_arrived, kmp_release_fence );
1439 for ( child = 1, child_tid = tid + (1 << level);
1440 child < branch_factor && child_tid < num_threads;
1441 child++, child_tid += (1 << level) )
1443 register kmp_info_t *child_thr = other_threads[ child_tid ];
1444 register kmp_bstate_t *child_bar = & child_thr -> th.th_bar[ bt ].bb;
1445 #if KMP_CACHE_MANAGE
1446 register kmp_uint32 next_child_tid = child_tid + (1 << level);
1448 if ( child+1 < branch_factor && next_child_tid < num_threads )
1449 KMP_CACHE_PREFETCH( &other_threads[ next_child_tid ] -> th.th_bar[ bt ].bb.b_arrived );
1452 if (new_state == KMP_BARRIER_UNUSED_STATE)
1453 new_state = team -> t.t_bar[ bt ].b_arrived + KMP_BARRIER_STATE_BUMP;
1455 KA_TRACE( 20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
1456 "arrived(%p) == %u\n",
1457 gtid, team->t.t_id, tid,
1458 __kmp_gtid_from_tid( child_tid, team ), team->t.t_id, child_tid,
1459 &child_bar -> b_arrived, new_state ) );
1462 __kmp_wait_sleep( this_thr, &child_bar -> b_arrived, new_state, FALSE
1463 USE_ITT_BUILD_ARG (itt_sync_obj)
1468 KA_TRACE( 100, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
1469 gtid, team->t.t_id, tid,
1470 __kmp_gtid_from_tid( child_tid, team ), team->t.t_id,
1473 (*reduce)( this_thr -> th.th_local.reduce_data,
1474 child_thr -> th.th_local.reduce_data );
1481 if ( KMP_MASTER_TID(tid) ) {
1484 if (new_state == KMP_BARRIER_UNUSED_STATE)
1485 team -> t.t_bar[ bt ].b_arrived += KMP_BARRIER_STATE_BUMP;
1487 team -> t.t_bar[ bt ].b_arrived = new_state;
1489 KA_TRACE( 20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %u\n",
1490 gtid, team->t.t_id, tid, team->t.t_id,
1491 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived ) );
1494 KA_TRACE( 20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
1495 gtid, team->t.t_id, tid, bt ) );
1500 __kmp_linear_barrier_release(
enum barrier_type bt,
1501 kmp_info_t *this_thr,
1505 USE_ITT_BUILD_ARG(
void * itt_sync_obj)
1508 register kmp_bstate_t *thr_bar = &this_thr -> th.th_bar[ bt ].bb;
1509 register kmp_team_t *team;
1511 if (KMP_MASTER_TID( tid )) {
1512 register unsigned int i;
1513 register kmp_uint32 nproc = this_thr -> th.th_team_nproc;
1514 register kmp_info_t **other_threads;
1516 team = __kmp_threads[ gtid ]-> th.th_team;
1517 KMP_DEBUG_ASSERT( team != NULL );
1518 other_threads = team -> t.t_threads;
1520 KA_TRACE( 20, (
"__kmp_linear_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
1521 gtid, team->t.t_id, tid, bt ) );
1524 #if KMP_BARRIER_ICV_PUSH
1525 if ( propagate_icvs ) {
1526 load_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs);
1527 for (i = 1; i < nproc; i++) {
1528 __kmp_init_implicit_task( team->t.t_ident,
1529 team->t.t_threads[i], team, i, FALSE );
1530 store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs, &team->t.t_implicit_task_taskdata[0].td_icvs);
1534 #endif // KMP_BARRIER_ICV_PUSH
1537 for (i = 1; i < nproc; i++) {
1538 #if KMP_CACHE_MANAGE
1541 KMP_CACHE_PREFETCH( &other_threads[ i+1 ]-> th.th_bar[ bt ].bb.b_go );
1543 KA_TRACE( 20, (
"__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
1544 "go(%p): %u => %u\n",
1545 gtid, team->t.t_id, tid,
1546 other_threads[i]->th.th_info.ds.ds_gtid, team->t.t_id, i,
1547 &other_threads[i]->th.th_bar[bt].bb.b_go,
1548 other_threads[i]->th.th_bar[bt].bb.b_go,
1549 other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP
1552 __kmp_release( other_threads[ i ],
1553 &other_threads[ i ]-> th.th_bar[ bt ].bb.b_go, kmp_acquire_fence );
1559 KA_TRACE( 20, (
"__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n",
1560 gtid, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP ) );
1562 __kmp_wait_sleep( this_thr, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP, TRUE
1563 USE_ITT_BUILD_ARG(itt_sync_obj)
1566 #if USE_ITT_BUILD && OMP_30_ENABLED && USE_ITT_NOTIFY
1567 if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) {
1569 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier, 0, -1 );
1571 __kmp_itt_task_starting( itt_sync_obj );
1573 if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
1576 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1577 if ( itt_sync_obj != NULL )
1578 __kmp_itt_task_finished( itt_sync_obj );
1585 if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
1591 #if USE_ITT_BUILD && !OMP_30_ENABLED && USE_ITT_NOTIFY
1593 if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) {
1595 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1596 __kmp_itt_barrier_starting( gtid, itt_sync_obj );
1600 tid = __kmp_tid_from_gtid( gtid );
1601 team = __kmp_threads[ gtid ]-> th.th_team;
1603 KMP_DEBUG_ASSERT( team != NULL );
1605 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
1606 KA_TRACE( 20, (
"__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
1607 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE ) );
1612 KA_TRACE( 20, (
"__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
1613 gtid, team->t.t_id, tid, bt ) );
1618 __kmp_tree_barrier_release(
enum barrier_type bt,
1619 kmp_info_t *this_thr,
1623 USE_ITT_BUILD_ARG(
void * itt_sync_obj)
1627 register kmp_team_t *team;
1628 register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
1629 register kmp_uint32 nproc;
1630 register kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[ bt ];
1631 register kmp_uint32 branch_factor = 1 << branch_bits ;
1632 register kmp_uint32 child;
1633 register kmp_uint32 child_tid;
1640 if ( ! KMP_MASTER_TID( tid )) {
1643 KA_TRACE( 20, (
"__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n",
1644 gtid, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP ) );
1647 __kmp_wait_sleep( this_thr, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP, TRUE
1648 USE_ITT_BUILD_ARG(itt_sync_obj)
1651 #if USE_ITT_BUILD && OMP_30_ENABLED && USE_ITT_NOTIFY
1652 if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) {
1654 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier, 0, -1 );
1656 __kmp_itt_task_starting( itt_sync_obj );
1658 if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
1661 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1662 if ( itt_sync_obj != NULL )
1663 __kmp_itt_task_finished( itt_sync_obj );
1670 if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
1676 #if USE_ITT_BUILD && !OMP_30_ENABLED && USE_ITT_NOTIFY
1678 if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) {
1680 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1681 __kmp_itt_barrier_starting( gtid, itt_sync_obj );
1684 team = __kmp_threads[ gtid ]-> th.th_team;
1685 KMP_DEBUG_ASSERT( team != NULL );
1686 tid = __kmp_tid_from_gtid( gtid );
1688 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
1689 KA_TRACE( 20, (
"__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
1690 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE ) );
1695 team = __kmp_threads[ gtid ]-> th.th_team;
1696 KMP_DEBUG_ASSERT( team != NULL );
1698 KA_TRACE( 20, (
"__kmp_tree_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
1699 gtid, team->t.t_id, tid, bt ) );
1702 nproc = this_thr -> th.th_team_nproc;
1703 child_tid = ( tid << branch_bits ) + 1;
1705 if ( child_tid < nproc ) {
1706 register kmp_info_t **other_threads = team -> t.t_threads;
1711 register kmp_info_t *child_thr = other_threads[ child_tid ];
1712 register kmp_bstate_t *child_bar = & child_thr -> th.th_bar[ bt ].bb;
1713 #if KMP_CACHE_MANAGE
1715 if ( child+1 <= branch_factor && child_tid+1 < nproc )
1716 KMP_CACHE_PREFETCH( &other_threads[ child_tid+1 ] -> th.th_bar[ bt ].bb.b_go );
1719 #if KMP_BARRIER_ICV_PUSH
1720 if ( propagate_icvs ) {
1721 __kmp_init_implicit_task( team->t.t_ident,
1722 team->t.t_threads[child_tid], team, child_tid, FALSE );
1723 load_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs);
1724 store_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs, &team->t.t_implicit_task_taskdata[0].td_icvs);
1727 #endif // KMP_BARRIER_ICV_PUSH
1729 KA_TRACE( 20, (
"__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
1730 "go(%p): %u => %u\n",
1731 gtid, team->t.t_id, tid,
1732 __kmp_gtid_from_tid( child_tid, team ), team->t.t_id,
1733 child_tid, &child_bar -> b_go, child_bar -> b_go,
1734 child_bar -> b_go + KMP_BARRIER_STATE_BUMP ) );
1737 __kmp_release( child_thr, &child_bar -> b_go, kmp_acquire_fence );
1742 while ( child <= branch_factor && child_tid < nproc );
1745 KA_TRACE( 20, (
"__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
1746 gtid, team->t.t_id, tid, bt ) );
1750 #define KMP_REVERSE_HYPER_BAR
1751 #ifdef KMP_REVERSE_HYPER_BAR
1753 __kmp_hyper_barrier_release(
enum barrier_type bt,
1754 kmp_info_t *this_thr,
1758 USE_ITT_BUILD_ARG(
void * itt_sync_obj)
1762 register kmp_team_t *team;
1763 register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
1764 register kmp_info_t **other_threads;
1765 register kmp_uint32 num_threads;
1766 register kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[ bt ];
1767 register kmp_uint32 branch_factor = 1 << branch_bits;
1768 register kmp_uint32 child;
1769 register kmp_uint32 child_tid;
1770 register kmp_uint32 offset;
1771 register kmp_uint32 level;
1779 if ( ! KMP_MASTER_TID( tid )) {
1782 KA_TRACE( 20, (
"__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n",
1783 gtid, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP ) );
1786 __kmp_wait_sleep( this_thr, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP, TRUE
1787 USE_ITT_BUILD_ARG( itt_sync_obj )
1790 #if USE_ITT_BUILD && OMP_30_ENABLED && USE_ITT_NOTIFY
1791 if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) {
1793 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier, 0, -1 );
1795 __kmp_itt_task_starting( itt_sync_obj );
1797 if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
1800 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1801 if ( itt_sync_obj != NULL )
1802 __kmp_itt_task_finished( itt_sync_obj );
1809 if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
1815 #if USE_ITT_BUILD && !OMP_30_ENABLED && USE_ITT_NOTIFY
1817 if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) {
1819 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1820 __kmp_itt_barrier_starting( gtid, itt_sync_obj );
1823 team = __kmp_threads[ gtid ]-> th.th_team;
1824 KMP_DEBUG_ASSERT( team != NULL );
1825 tid = __kmp_tid_from_gtid( gtid );
1827 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
1828 KA_TRACE( 20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
1829 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE ) );
1834 team = __kmp_threads[ gtid ]-> th.th_team;
1835 KMP_DEBUG_ASSERT( team != NULL );
1837 KA_TRACE( 20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
1838 gtid, team->t.t_id, tid, bt ) );
1841 num_threads = this_thr -> th.th_team_nproc;
1842 other_threads = team -> t.t_threads;
1845 for ( level = 0, offset = 1;
1846 offset < num_threads && (((tid >> level) & (branch_factor-1)) == 0);
1847 level += branch_bits, offset <<= branch_bits );
1850 for ( level -= branch_bits, offset >>= branch_bits;
1852 level -= branch_bits, offset >>= branch_bits )
1856 child = num_threads >> ((level==0)?level:level-1);
1857 for ( child = (child < branch_factor-1) ? child : branch_factor-1,
1858 child_tid = tid + (child << level);
1860 child--, child_tid -= (1 << level) )
1863 if ( child_tid >= num_threads )
continue;
1865 register kmp_info_t *child_thr = other_threads[ child_tid ];
1866 register kmp_bstate_t *child_bar = & child_thr -> th.th_bar[ bt ].bb;
1867 #if KMP_CACHE_MANAGE
1868 register kmp_uint32 next_child_tid = child_tid - (1 << level);
1870 if ( child-1 >= 1 && next_child_tid < num_threads )
1871 KMP_CACHE_PREFETCH( &other_threads[ next_child_tid ]->th.th_bar[ bt ].bb.b_go );
1874 #if KMP_BARRIER_ICV_PUSH
1875 if ( propagate_icvs ) {
1876 KMP_DEBUG_ASSERT( team != NULL );
1877 __kmp_init_implicit_task( team->t.t_ident,
1878 team->t.t_threads[child_tid], team, child_tid, FALSE );
1879 load_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs);
1880 store_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs, &team->t.t_implicit_task_taskdata[0].td_icvs);
1883 #endif // KMP_BARRIER_ICV_PUSH
1885 KA_TRACE( 20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
1886 "go(%p): %u => %u\n",
1887 gtid, team->t.t_id, tid,
1888 __kmp_gtid_from_tid( child_tid, team ), team->t.t_id,
1889 child_tid, &child_bar -> b_go, child_bar -> b_go,
1890 child_bar -> b_go + KMP_BARRIER_STATE_BUMP ) );
1893 __kmp_release( child_thr, &child_bar -> b_go, kmp_acquire_fence );
1898 KA_TRACE( 20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
1899 gtid, team->t.t_id, tid, bt ) );
1905 __kmp_hyper_barrier_release(
enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
int propagate_icvs )
1908 register kmp_team_t *team;
1909 register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
1910 register kmp_info_t **other_threads;
1911 register kmp_uint32 num_threads;
1912 register kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[ bt ];
1913 register kmp_uint32 branch_factor = 1 << branch_bits;
1914 register kmp_uint32 child;
1915 register kmp_uint32 child_tid;
1916 register kmp_uint32 offset;
1917 register kmp_uint32 level;
1925 if ( ! KMP_MASTER_TID( tid )) {
1928 KA_TRACE( 20, (
"__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n",
1929 gtid, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP ) );
1932 __kmp_wait_sleep( this_thr, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP, TRUE, NULL );
1934 #if USE_ITT_BUILD && OMP_30_ENABLED && USE_ITT_NOTIFY
1935 if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) {
1937 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier, 0, -1 );
1939 __kmp_itt_task_starting( itt_sync_obj );
1941 if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
1944 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1945 if ( itt_sync_obj != NULL )
1946 __kmp_itt_task_finished( itt_sync_obj );
1953 if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
1959 #if USE_ITT_BUILD && !OMP_30_ENABLED && USE_ITT_NOTIFY
1961 if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) {
1963 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1964 __kmp_itt_barrier_starting( gtid, itt_sync_obj );
1967 team = __kmp_threads[ gtid ]-> th.th_team;
1968 KMP_DEBUG_ASSERT( team != NULL );
1969 tid = __kmp_tid_from_gtid( gtid );
1971 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
1972 KA_TRACE( 20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
1973 gtid, ( team != NULL ) ? team->t.t_id : -1, tid,
1974 &thr_bar->b_go, KMP_INIT_BARRIER_STATE ) );
1979 team = __kmp_threads[ gtid ]-> th.th_team;
1980 KMP_DEBUG_ASSERT( team != NULL );
1982 KA_TRACE( 20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) enter for barrier type %d\n",
1983 gtid, team->t.t_id, tid, bt ) );
1987 if ( team == NULL ) {
1989 tid = __kmp_tid_from_gtid( gtid );
1990 team = __kmp_threads[ gtid ]-> th.th_team;
1992 num_threads = this_thr -> th.th_team_nproc;
1993 other_threads = team -> t.t_threads;
1996 for ( level = 0, offset = 1;
1997 offset < num_threads;
1998 level += branch_bits, offset <<= branch_bits )
2000 if (((tid >> level) & (branch_factor - 1)) != 0)
2006 for ( child = 1, child_tid = tid + (1 << level);
2007 child < branch_factor && child_tid < num_threads;
2008 child++, child_tid += (1 << level) )
2010 register kmp_info_t *child_thr = other_threads[ child_tid ];
2011 register kmp_bstate_t *child_bar = & child_thr -> th.th_bar[ bt ].bb;
2012 #if KMP_CACHE_MANAGE
2014 register kmp_uint32 next_child_tid = child_tid + (1 << level);
2016 if ( child+1 < branch_factor && next_child_tid < num_threads )
2017 KMP_CACHE_PREFETCH( &other_threads[ next_child_tid ]->th.th_bar[ bt ].bb.b_go );
2021 #if KMP_BARRIER_ICV_PUSH
2022 if ( propagate_icvs ) {
2023 KMP_DEBUG_ASSERT( team != NULL );
2024 __kmp_init_implicit_task( team->t.t_ident,
2025 team->t.t_threads[child_tid], team, child_tid, FALSE );
2026 load_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs);
2027 store_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs, &team->t.t_implicit_task_taskdata[0].td_icvs);
2030 #endif // KMP_BARRIER_ICV_PUSH
2032 KA_TRACE( 20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) releasing "
2033 "T#%d(%d:%u) go(%p): %u => %u\n",
2034 gtid, team->t.t_id, tid,
2035 __kmp_gtid_from_tid( child_tid, team ), team->t.t_id,
2036 child_tid, &child_bar -> b_go, child_bar -> b_go,
2037 child_bar -> b_go + KMP_BARRIER_STATE_BUMP ) );
2040 __kmp_release( child_thr, &child_bar -> b_go, kmp_acquire_fence );
2044 KA_TRACE( 20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
2045 gtid, team->t.t_id, tid, bt ) );
2057 __kmp_barrier(
enum barrier_type bt,
int gtid,
int is_split,
2058 size_t reduce_size,
void *reduce_data,
void (*reduce)(
void *,
void *) )
2060 register int tid = __kmp_tid_from_gtid( gtid );
2061 register kmp_info_t *this_thr = __kmp_threads[ gtid ];
2062 register kmp_team_t *team = this_thr -> th.th_team;
2063 register int status = 0;
2065 KA_TRACE( 15, (
"__kmp_barrier: T#%d(%d:%d) has arrived\n",
2066 gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid) ) );
2068 if ( ! team->t.t_serialized ) {
2071 void * itt_sync_obj = NULL;
2073 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG )
2074 itt_sync_obj = __kmp_itt_barrier_object( gtid, bt, 1 );
2078 if ( __kmp_tasking_mode == tskm_extra_barrier ) {
2079 __kmp_tasking_barrier( team, this_thr, gtid );
2080 KA_TRACE( 15, (
"__kmp_barrier: T#%d(%d:%d) past tasking barrier\n",
2081 gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid) ) );
2092 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
2094 this_thr -> th.th_team_bt_intervals = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
2095 this_thr -> th.th_team_bt_set = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
2097 this_thr -> th.th_team_bt_intervals = team -> t.t_set_bt_intervals[tid];
2098 this_thr -> th.th_team_bt_set= team -> t.t_set_bt_set[tid];
2099 #endif // OMP_30_ENABLED
2103 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG )
2104 __kmp_itt_barrier_starting( gtid, itt_sync_obj );
2107 if ( reduce != NULL ) {
2109 this_thr -> th.th_local.reduce_data = reduce_data;
2111 if ( __kmp_barrier_gather_pattern[ bt ] == bp_linear_bar || __kmp_barrier_gather_branch_bits[ bt ] == 0 ) {
2112 __kmp_linear_barrier_gather( bt, this_thr, gtid, tid, reduce
2113 USE_ITT_BUILD_ARG( itt_sync_obj )
2115 }
else if ( __kmp_barrier_gather_pattern[ bt ] == bp_tree_bar ) {
2116 __kmp_tree_barrier_gather( bt, this_thr, gtid, tid, reduce
2117 USE_ITT_BUILD_ARG( itt_sync_obj )
2120 __kmp_hyper_barrier_gather( bt, this_thr, gtid, tid, reduce
2121 USE_ITT_BUILD_ARG( itt_sync_obj )
2129 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG )
2130 __kmp_itt_barrier_middle( gtid, itt_sync_obj );
2135 if ( KMP_MASTER_TID( tid ) ) {
2139 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2140 __kmp_task_team_wait( this_thr, team
2141 USE_ITT_BUILD_ARG( itt_sync_obj )
2143 __kmp_task_team_setup( this_thr, team );
2151 if( __kmp_forkjoin_frames_mode == 1 && __kmp_itt_csv_file )
2153 ident_t * loc = this_thr->th.th_ident;
2157 kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->
psource, 1 );
2160 #if defined( __GNUC__ )
2161 # if !defined( __INTEL_COMPILER )
2162 fr_end = __kmp_hardware_timestamp();
2169 K_DIAG( 3, (
"__kmp_barrier: T#%d(%d:%d) frame_begin = %llu, frame_end = %llu\n",
2170 gtid, ( team != NULL ) ? team->t.t_id : -1, tid, this_thr->th.th_frame_time, fr_end ) );
2172 __kmp_str_buf_print( &__kmp_itt_frame_buffer,
"%s$omp$frame@%s:%d:%d,%llu,%llu,,\n",
2173 str_loc.func, str_loc.file, str_loc.line, str_loc.col, this_thr->th.th_frame_time, fr_end );
2174 __kmp_str_loc_free( &str_loc );
2175 this_thr->th.th_frame_time = fr_end;
2182 if ( status == 1 || ! is_split ) {
2183 if ( __kmp_barrier_release_pattern[ bt ] == bp_linear_bar || __kmp_barrier_release_branch_bits[ bt ] == 0 ) {
2184 __kmp_linear_barrier_release( bt, this_thr, gtid, tid, FALSE
2185 USE_ITT_BUILD_ARG( itt_sync_obj )
2187 }
else if ( __kmp_barrier_release_pattern[ bt ] == bp_tree_bar ) {
2188 __kmp_tree_barrier_release( bt, this_thr, gtid, tid, FALSE
2189 USE_ITT_BUILD_ARG( itt_sync_obj )
2192 __kmp_hyper_barrier_release( bt, this_thr, gtid, tid, FALSE
2193 USE_ITT_BUILD_ARG( itt_sync_obj )
2197 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2198 __kmp_task_team_sync( this_thr, team );
2206 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG )
2207 __kmp_itt_barrier_finished( gtid, itt_sync_obj );
2215 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2220 KMP_DEBUG_ASSERT( team->t.t_task_team == NULL );
2221 KMP_DEBUG_ASSERT( this_thr->th.th_task_team == NULL );
2226 KA_TRACE( 15, (
"__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n",
2227 gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid),
2234 __kmp_end_split_barrier(
enum barrier_type bt,
int gtid )
2236 int tid = __kmp_tid_from_gtid( gtid );
2237 kmp_info_t *this_thr = __kmp_threads[ gtid ];
2238 kmp_team_t *team = this_thr -> th.th_team;
2240 if( ! team -> t.t_serialized ) {
2241 if( KMP_MASTER_GTID( gtid ) ) {
2242 if ( __kmp_barrier_release_pattern[ bt ] == bp_linear_bar || __kmp_barrier_release_branch_bits[ bt ] == 0 ) {
2243 __kmp_linear_barrier_release( bt, this_thr, gtid, tid, FALSE
2248 }
else if ( __kmp_barrier_release_pattern[ bt ] == bp_tree_bar ) {
2249 __kmp_tree_barrier_release( bt, this_thr, gtid, tid, FALSE
2255 __kmp_hyper_barrier_release( bt, this_thr, gtid, tid, FALSE
2262 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2263 __kmp_task_team_sync( this_thr, team );
2282 __kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team,
2283 int master_tid,
int set_nthreads
2291 int use_rml_to_adjust_nth;
2292 KMP_DEBUG_ASSERT( __kmp_init_serial );
2293 KMP_DEBUG_ASSERT( root && parent_team );
2298 if ( set_nthreads == 1 ) {
2299 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d reserving 1 thread; requested %d threads\n",
2300 __kmp_get_gtid(), set_nthreads ));
2303 if ( ( !get__nested_2(parent_team,master_tid) && (root->r.r_in_parallel
2307 ) ) || ( __kmp_library == library_serial ) ) {
2308 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d serializing team; requested %d threads\n",
2309 __kmp_get_gtid(), set_nthreads ));
2317 new_nthreads = set_nthreads;
2318 use_rml_to_adjust_nth = FALSE;
2319 if ( ! get__dynamic_2( parent_team, master_tid ) ) {
2322 #ifdef USE_LOAD_BALANCE
2323 else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) {
2324 new_nthreads = __kmp_load_balance_nproc( root, set_nthreads );
2325 if ( new_nthreads == 1 ) {
2326 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d load balance reduced reservation to 1 thread\n",
2330 if ( new_nthreads < set_nthreads ) {
2331 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d load balance reduced reservation to %d threads\n",
2332 master_tid, new_nthreads ));
2336 else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) {
2337 new_nthreads = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
2338 : root->r.r_hot_team->t.t_nproc);
2339 if ( new_nthreads <= 1 ) {
2340 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d thread limit reduced reservation to 1 thread\n",
2344 if ( new_nthreads < set_nthreads ) {
2345 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d thread limit reduced reservation to %d threads\n",
2346 master_tid, new_nthreads ));
2349 new_nthreads = set_nthreads;
2352 else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) {
2353 if ( set_nthreads > 2 ) {
2354 new_nthreads = __kmp_get_random( parent_team->t.t_threads[master_tid] );
2355 new_nthreads = ( new_nthreads % set_nthreads ) + 1;
2356 if ( new_nthreads == 1 ) {
2357 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d dynamic random reduced reservation to 1 thread\n",
2361 if ( new_nthreads < set_nthreads ) {
2362 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d dynamic random reduced reservation to %d threads\n",
2363 master_tid, new_nthreads ));
2374 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
2375 root->r.r_hot_team->t.t_nproc ) > __kmp_max_nth ) {
2376 int tl_nthreads = __kmp_max_nth - __kmp_nth + ( root->r.r_active ? 1 :
2377 root->r.r_hot_team->t.t_nproc );
2378 if ( tl_nthreads <= 0 ) {
2385 if ( ! get__dynamic_2( parent_team, master_tid )
2386 && ( ! __kmp_reserve_warn ) ) {
2387 __kmp_reserve_warn = 1;
2390 KMP_MSG( CantFormThrTeam, set_nthreads, tl_nthreads ),
2391 KMP_HNT( Unset_ALL_THREADS ),
2395 if ( tl_nthreads == 1 ) {
2396 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to 1 thread\n",
2400 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to %d threads\n",
2401 master_tid, tl_nthreads ));
2402 new_nthreads = tl_nthreads;
2412 capacity = __kmp_threads_capacity;
2413 if ( TCR_PTR(__kmp_threads[0]) == NULL ) {
2416 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
2417 root->r.r_hot_team->t.t_nproc ) > capacity ) {
2421 int slotsRequired = __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
2422 root->r.r_hot_team->t.t_nproc ) - capacity;
2423 int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired);
2424 if ( slotsAdded < slotsRequired ) {
2428 new_nthreads -= ( slotsRequired - slotsAdded );
2429 KMP_ASSERT( new_nthreads >= 1 );
2434 if ( ! get__dynamic_2( parent_team, master_tid )
2435 && ( ! __kmp_reserve_warn ) ) {
2436 __kmp_reserve_warn = 1;
2437 if ( __kmp_tp_cached ) {
2440 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
2441 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
2442 KMP_HNT( PossibleSystemLimitOnThreads ),
2449 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
2450 KMP_HNT( SystemLimitOnThreads ),
2458 if ( new_nthreads == 1 ) {
2459 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d serializing team after reclaiming dead roots and rechecking; requested %d threads\n",
2460 __kmp_get_gtid(), set_nthreads ) );
2464 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested %d threads\n",
2465 __kmp_get_gtid(), new_nthreads, set_nthreads ));
2466 return new_nthreads;
2477 __kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team,
2478 kmp_info_t *master_th,
int master_gtid )
2482 KA_TRACE( 10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc ) );
2483 KMP_DEBUG_ASSERT( master_gtid == __kmp_get_gtid() );
2487 master_th -> th.th_info .ds.ds_tid = 0;
2488 master_th -> th.th_team = team;
2489 master_th -> th.th_team_nproc = team -> t.t_nproc;
2490 master_th -> th.th_team_master = master_th;
2491 master_th -> th.th_team_serialized = FALSE;
2492 master_th -> th.th_dispatch = & team -> t.t_dispatch[ 0 ];
2495 if ( team != root->r.r_hot_team ) {
2498 team -> t.t_threads[ 0 ] = master_th;
2499 __kmp_initialize_info( master_th, team, 0, master_gtid );
2502 for ( i=1 ; i < team->t.t_nproc ; i++ ) {
2505 team -> t.t_threads[ i ] = __kmp_allocate_thread( root, team, i );
2506 KMP_DEBUG_ASSERT( team->t.t_threads[i] );
2507 KMP_DEBUG_ASSERT( team->t.t_threads[i]->th.th_team == team );
2509 KA_TRACE( 20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived T#%d(%d:%d) join =%u, plain=%u\n",
2510 __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0,
2511 __kmp_gtid_from_tid( i, team ), team->t.t_id, i,
2512 team->t.t_bar[ bs_forkjoin_barrier ].b_arrived,
2513 team->t.t_bar[ bs_plain_barrier ].b_arrived ) );
2517 kmp_balign_t * balign = team->t.t_threads[ i ]->th.th_bar;
2518 for ( b = 0; b < bs_last_barrier; ++ b ) {
2519 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
2524 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
2525 __kmp_partition_places( team );
2534 __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc );
2546 microtask_t microtask,
2549 #
if KMP_ARCH_X86_64 && KMP_OS_LINUX
2559 int master_this_cons;
2560 int master_last_cons;
2562 kmp_team_t *parent_team;
2563 kmp_info_t *master_th;
2567 int master_set_numthreads;
2573 KA_TRACE( 20, (
"__kmp_fork_call: enter T#%d\n", gtid ));
2576 KMP_DEBUG_ASSERT( __kmp_init_serial );
2577 if( ! TCR_4(__kmp_init_parallel) )
2578 __kmp_parallel_initialize();
2581 master_th = __kmp_threads[ gtid ];
2582 parent_team = master_th -> th.th_team;
2583 master_tid = master_th -> th.th_info.ds.ds_tid;
2584 master_this_cons = master_th -> th.th_local.this_construct;
2585 master_last_cons = master_th -> th.th_local.last_construct;
2586 root = master_th -> th.th_root;
2587 master_active = root -> r.r_active;
2588 master_set_numthreads = master_th -> th.th_set_nproc;
2591 level = parent_team->t.t_level;
2592 #endif // OMP_30_ENABLED
2594 teams_level = master_th->th.th_teams_level;
2599 master_th->th.th_ident = loc;
2602 if ( master_th->th.th_team_microtask &&
2603 ap && microtask != (microtask_t)__kmp_teams_master && level == teams_level ) {
2607 parent_team->t.t_ident = loc;
2608 parent_team->t.t_argc = argc;
2609 argv = (
void**)parent_team->t.t_argv;
2610 for( i=argc-1; i >= 0; --i )
2612 #if KMP_ARCH_X86_64 && KMP_OS_LINUX
2613 *argv++ = va_arg( *ap,
void * );
2615 *argv++ = va_arg( ap,
void * );
2618 if ( parent_team == master_th->th.th_serial_team ) {
2621 KMP_DEBUG_ASSERT( parent_team->t.t_serialized > 1 );
2622 parent_team->t.t_serialized--;
2624 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv );
2627 parent_team->t.t_pkfn = microtask;
2628 parent_team->t.t_invoke = invoker;
2629 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
2630 parent_team->t.t_active_level ++;
2631 parent_team->t.t_level ++;
2634 if ( master_set_numthreads ) {
2635 if ( master_set_numthreads < master_th->th.th_set_nth_teams ) {
2637 kmp_info_t **other_threads = parent_team->t.t_threads;
2638 parent_team->t.t_nproc = master_set_numthreads;
2639 for ( i = 0; i < master_set_numthreads; ++i ) {
2640 other_threads[i]->th.th_team_nproc = master_set_numthreads;
2644 master_th->th.th_set_nproc = 0;
2648 KF_TRACE( 10, (
"__kmp_fork_call: before internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
2649 __kmp_internal_fork( loc, gtid, parent_team );
2650 KF_TRACE( 10, (
"__kmp_fork_call: after internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
2653 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
2654 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
2656 if (! parent_team->t.t_invoke( gtid )) {
2657 KMP_ASSERT2( 0,
"cannot invoke microtask for MASTER thread" );
2659 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
2660 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
2663 KA_TRACE( 20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid ));
2669 #if OMP_30_ENABLED && KMP_DEBUG
2670 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2671 KMP_DEBUG_ASSERT( master_th->th.th_task_team == parent_team->t.t_task_team );
2673 #endif // OMP_30_ENABLED
2676 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2679 if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) {
2683 #endif // OMP_30_ENABLED
2686 nthreads = master_set_numthreads ?
2687 master_set_numthreads : get__nproc_2( parent_team, master_tid );
2688 nthreads = __kmp_reserve_threads( root, parent_team, master_tid, nthreads
2694 ,( ( ap == NULL && teams_level == 0 ) ||
2695 ( ap && teams_level > 0 && teams_level == level ) )
2699 KMP_DEBUG_ASSERT( nthreads > 0 );
2702 master_th -> th.th_set_nproc = 0;
2706 if ( nthreads == 1 ) {
2708 #if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 )
2709 void * args[ argc ];
2711 void * * args = (
void**) alloca( argc *
sizeof(
void * ) );
2714 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2715 KA_TRACE( 20, (
"__kmp_fork_call: T#%d serializing parallel region\n", gtid ));
2719 if ( exec_master == 0 ) {
2721 KA_TRACE( 20, (
"__kmp_fork_call: T#%d serial exit\n", gtid ));
2723 }
else if ( exec_master == 1 ) {
2725 master_th -> th.th_serial_team -> t.t_ident = loc;
2729 master_th -> th.th_serial_team -> t.t_level--;
2731 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv );
2732 }
else if ( microtask == (microtask_t)__kmp_teams_master ) {
2733 KMP_DEBUG_ASSERT( master_th->th.th_team == master_th->th.th_serial_team );
2734 team = master_th->th.th_team;
2736 team->t.t_invoke = invoker;
2737 __kmp_alloc_argv_entries( argc, team, TRUE );
2738 team->t.t_argc = argc;
2739 argv = (
void**) team->t.t_argv;
2741 for( i=argc-1; i >= 0; --i )
2743 #
if KMP_ARCH_X86_64 && KMP_OS_LINUX
2744 *argv++ = va_arg( *ap,
void * );
2746 *argv++ = va_arg( ap,
void * );
2749 for( i=0; i < argc; ++i )
2751 argv[i] = parent_team->t.t_argv[i];
2761 for( i=argc-1; i >= 0; --i )
2763 #
if KMP_ARCH_X86_64 && KMP_OS_LINUX
2764 *argv++ = va_arg( *ap,
void * );
2766 *argv++ = va_arg( ap,
void * );
2769 __kmp_invoke_microtask( microtask, gtid, 0, argc, args );
2775 KMP_ASSERT2( exec_master <= 1,
"__kmp_fork_call: unknown parameter exec_master" );
2778 KA_TRACE( 20, (
"__kmp_fork_call: T#%d serial exit\n", gtid ));
2787 KF_TRACE( 10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, curtask=%p, curtask_max_aclevel=%d\n",
2788 parent_team->t.t_active_level, master_th, master_th->th.th_current_task,
2789 master_th->th.th_current_task->td_icvs.max_active_levels ) );
2792 master_th->th.th_current_task->td_flags.executing = 0;
2796 if ( !master_th->th.th_team_microtask || level > teams_level )
2800 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
2807 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
2808 if ( ( level + 1 < __kmp_nested_nth.used ) &&
2809 ( __kmp_nested_nth.nth[level + 1] != nthreads_icv ) ) {
2810 nthreads_icv = __kmp_nested_nth.nth[level + 1];
2820 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2821 kmp_proc_bind_t proc_bind_icv;
2823 if ( master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
2824 proc_bind = proc_bind_false;
2825 proc_bind_icv = proc_bind_default;
2828 proc_bind_icv = master_th->th.th_current_task->td_icvs.proc_bind;
2829 if ( proc_bind == proc_bind_default ) {
2834 proc_bind = proc_bind_icv;
2847 if ( ( level + 1 < __kmp_nested_proc_bind.used )
2848 && ( __kmp_nested_proc_bind.bind_types[level + 1] != proc_bind_icv ) ) {
2849 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2852 proc_bind_icv = proc_bind_default;
2859 master_th->th.th_set_proc_bind = proc_bind_default;
2862 if ( ( nthreads_icv > 0 )
2864 || ( proc_bind_icv != proc_bind_default )
2868 kmp_internal_control_t new_icvs;
2869 copy_icvs( & new_icvs, & master_th->th.th_current_task->td_icvs );
2870 new_icvs.next = NULL;
2872 if ( nthreads_icv > 0 ) {
2873 new_icvs.nproc = nthreads_icv;
2877 if ( proc_bind_icv != proc_bind_default ) {
2878 new_icvs.proc_bind = proc_bind_icv;
2883 KF_TRACE( 10, (
"__kmp_fork_call: before __kmp_allocate_team\n" ) );
2884 team = __kmp_allocate_team(root, nthreads, nthreads,
2893 KF_TRACE( 10, (
"__kmp_fork_call: before __kmp_allocate_team\n" ) );
2894 team = __kmp_allocate_team(root, nthreads, nthreads,
2899 &master_th->th.th_current_task->td_icvs,
2901 parent_team->t.t_set_nproc[master_tid],
2902 parent_team->t.t_set_dynamic[master_tid],
2903 parent_team->t.t_set_nested[master_tid],
2904 parent_team->t.t_set_blocktime[master_tid],
2905 parent_team->t.t_set_bt_intervals[master_tid],
2906 parent_team->t.t_set_bt_set[master_tid],
2911 KF_TRACE( 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n",
2915 team->t.t_master_tid = master_tid;
2916 team->t.t_master_this_cons = master_this_cons;
2917 team->t.t_master_last_cons = master_last_cons;
2919 team->t.t_parent = parent_team;
2920 TCW_SYNC_PTR(team->t.t_pkfn, microtask);
2921 team->t.t_invoke = invoker;
2922 team->t.t_ident = loc;
2926 if ( !master_th->th.th_team_microtask || level > teams_level ) {
2928 team->t.t_level = parent_team->t.t_level + 1;
2929 team->t.t_active_level = parent_team->t.t_active_level + 1;
2933 team->t.t_level = parent_team->t.t_level;
2934 team->t.t_active_level = parent_team->t.t_active_level;
2937 team->t.t_sched = get__sched_2( parent_team, master_tid );
2939 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2940 if ( __kmp_inherit_fp_control ) {
2941 __kmp_store_x87_fpu_control_word( &team->t.t_x87_fpu_control_word );
2942 __kmp_store_mxcsr( &team->t.t_mxcsr );
2943 team->t.t_mxcsr &= KMP_X86_MXCSR_MASK;
2944 team->t.t_fp_control_saved = TRUE;
2947 team->t.t_fp_control_saved = FALSE;
2951 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2956 KMP_DEBUG_ASSERT( master_th->th.th_task_team == parent_team->t.t_task_team );
2957 KA_TRACE( 20, (
"__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n",
2958 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
2959 parent_team, team->t.t_task_team, team ) );
2960 master_th->th.th_task_team = team->t.t_task_team;
2961 KMP_DEBUG_ASSERT( ( master_th->th.th_task_team == NULL ) || ( team == root->r.r_hot_team ) ) ;
2963 #endif // OMP_30_ENABLED
2965 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2966 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, team->t.t_nproc ));
2967 KMP_DEBUG_ASSERT( team != root->r.r_hot_team ||
2968 ( team->t.t_master_tid == 0 &&
2969 ( team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized ) ));
2973 argv = (
void**) team -> t.t_argv;
2977 for( i=argc-1; i >= 0; --i )
2979 #
if KMP_ARCH_X86_64 && KMP_OS_LINUX
2980 *argv++ = va_arg( *ap,
void * );
2982 *argv++ = va_arg( ap,
void * );
2986 for( i=0; i < argc; ++i )
2988 argv[i] = team->t.t_parent->t.t_argv[i];
2994 team->t.t_master_active = master_active;
2995 if (!root -> r.r_active)
2996 root -> r.r_active = TRUE;
2998 __kmp_fork_team_threads( root, team, master_th, gtid );
3001 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3006 if ( ( __itt_frame_begin_v3_ptr && __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode ) || KMP_ITT_DEBUG )
3008 if ( !master_th->th.th_team_microtask || microtask == (microtask_t)__kmp_teams_master )
3011 __kmp_itt_region_forking( gtid );
3017 if( __kmp_forkjoin_frames_mode == 1 && __kmp_itt_csv_file )
3019 kmp_uint64 fr_begin;
3020 #if defined( __GNUC__ )
3021 # if !defined( __INTEL_COMPILER )
3022 fr_begin = __kmp_hardware_timestamp();
3024 fr_begin = __rdtsc();
3027 fr_begin = __rdtsc();
3029 if( ! ( team->t.t_active_level > 1 ) ) {
3030 master_th->th.th_frame_time = fr_begin;
3036 KMP_DEBUG_ASSERT( team == __kmp_threads[gtid]->th.th_team );
3039 KF_TRACE( 10, (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n", root, team, master_th, gtid ) );
3042 if ( __itt_stack_caller_create_ptr ) {
3043 team->t.t_stack_id = __kmp_itt_stack_caller_create();
3051 __kmp_internal_fork( loc, gtid, team );
3052 KF_TRACE( 10, (
"__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n", root, team, master_th, gtid ) );
3055 if (! exec_master) {
3056 KA_TRACE( 20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid ));
3061 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
3062 gtid, team->t.t_id, team->t.t_pkfn ) );
3064 if (! team->t.t_invoke( gtid )) {
3065 KMP_ASSERT2( 0,
"cannot invoke microtask for MASTER thread" );
3067 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
3068 gtid, team->t.t_id, team->t.t_pkfn ) );
3071 KA_TRACE( 20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid ));
3078 __kmp_join_call(
ident_t *loc,
int gtid
3085 kmp_team_t *parent_team;
3086 kmp_info_t *master_th;
3091 KA_TRACE( 20, (
"__kmp_join_call: enter T#%d\n", gtid ));
3094 master_th = __kmp_threads[ gtid ];
3095 root = master_th -> th.th_root;
3096 team = master_th -> th.th_team;
3097 parent_team = team->t.t_parent;
3099 master_th->th.th_ident = loc;
3101 #if OMP_30_ENABLED && KMP_DEBUG
3102 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
3103 KA_TRACE( 20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n",
3104 __kmp_gtid_from_thread( master_th ), team,
3105 team -> t.t_task_team, master_th->th.th_task_team) );
3106 KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team );
3108 #endif // OMP_30_ENABLED
3110 if( team->t.t_serialized ) {
3112 if ( master_th->th.th_team_microtask ) {
3114 int level = team->t.t_level;
3115 int tlevel = master_th->th.th_teams_level;
3116 if ( level == tlevel ) {
3120 }
else if ( level == tlevel + 1 ) {
3123 team->t.t_serialized++;
3131 master_active = team->t.t_master_active;
3139 __kmp_internal_join( loc, gtid, team );
3144 if ( __itt_stack_caller_create_ptr ) {
3145 __kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id );
3149 if ( ( __itt_frame_end_v3_ptr && __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode ) || KMP_ITT_DEBUG )
3151 if ( !master_th->th.th_team_microtask ||
3152 ( !exit_teams && team->t.t_level == master_th->th.th_teams_level ) )
3156 __kmp_itt_region_joined( gtid );
3160 if ( master_th->th.th_team_microtask &&
3162 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
3163 team->t.t_level == master_th->th.th_teams_level + 1 ) {
3170 team->t.t_active_level --;
3171 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
3174 if ( master_th->th.th_team_nproc < master_th->th.th_set_nth_teams ) {
3175 int old_num = master_th->th.th_team_nproc;
3176 int new_num = master_th->th.th_set_nth_teams;
3177 kmp_info_t **other_threads = team->t.t_threads;
3178 team->t.t_nproc = new_num;
3179 for ( i = 0; i < old_num; ++i ) {
3180 other_threads[i]->th.th_team_nproc = new_num;
3183 for ( i = old_num; i < new_num; ++i ) {
3186 kmp_balign_t * balign = other_threads[i]->th.th_bar;
3187 for ( b = 0; b < bp_last_bar; ++ b ) {
3188 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
3191 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
3198 master_th -> th.th_info .ds.ds_tid = team -> t.t_master_tid;
3199 master_th -> th.th_local.this_construct = team -> t.t_master_this_cons;
3200 master_th -> th.th_local.last_construct = team -> t.t_master_last_cons;
3202 master_th -> th.th_dispatch =
3203 & parent_team -> t.t_dispatch[ team -> t.t_master_tid ];
3209 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3212 if ( !master_th->th.th_team_microtask || team->t.t_level > master_th->th.th_teams_level )
3216 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
3218 KMP_DEBUG_ASSERT( root->r.r_in_parallel >= 0 );
3221 KF_TRACE( 10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n",
3222 0, master_th, team ) );
3223 __kmp_pop_current_task_from_thread( master_th );
3224 #endif // OMP_30_ENABLED
3226 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
3230 master_th -> th.th_first_place = team -> t.t_first_place;
3231 master_th -> th.th_last_place = team -> t.t_last_place;
3234 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3235 if ( __kmp_inherit_fp_control && team->t.t_fp_control_saved ) {
3236 __kmp_clear_x87_fpu_status_word();
3237 __kmp_load_x87_fpu_control_word( &team->t.t_x87_fpu_control_word );
3238 __kmp_load_mxcsr( &team->t.t_mxcsr );
3242 if ( root -> r.r_active != master_active )
3243 root -> r.r_active = master_active;
3245 __kmp_free_team( root, team );
3253 master_th -> th.th_team = parent_team;
3254 master_th -> th.th_team_nproc = parent_team -> t.t_nproc;
3255 master_th -> th.th_team_master = parent_team -> t.t_threads[0];
3256 master_th -> th.th_team_serialized = parent_team -> t.t_serialized;
3259 if( parent_team -> t.t_serialized &&
3260 parent_team != master_th->th.th_serial_team &&
3261 parent_team != root->r.r_root_team ) {
3262 __kmp_free_team( root, master_th -> th.th_serial_team );
3263 master_th -> th.th_serial_team = parent_team;
3267 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
3272 if ( ( master_th -> th.th_task_team = parent_team -> t.t_task_team ) != NULL ) {
3273 master_th -> th.th_task_state = master_th -> th.th_task_team -> tt.tt_state;
3275 KA_TRACE( 20, (
"__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
3276 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
3284 master_th->th.th_current_task->td_flags.executing = 1;
3285 #endif // OMP_30_ENABLED
3287 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3290 KA_TRACE( 20, (
"__kmp_join_call: exit T#%d\n", gtid ));
3299 __kmp_save_internal_controls ( kmp_info_t * thread )
3302 if ( thread -> th.th_team != thread -> th.th_serial_team ) {
3305 if (thread -> th.th_team -> t.t_serialized > 1) {
3308 if (thread -> th.th_team -> t.t_control_stack_top == NULL) {
3311 if ( thread -> th.th_team -> t.t_control_stack_top -> serial_nesting_level !=
3312 thread -> th.th_team -> t.t_serialized ) {
3317 kmp_internal_control_t * control = (kmp_internal_control_t *) __kmp_allocate(
sizeof(kmp_internal_control_t));
3320 copy_icvs( control, & thread->th.th_current_task->td_icvs );
3322 control->nproc = thread->th.th_team->t.t_set_nproc[0];
3323 control->dynamic = thread->th.th_team->t.t_set_dynamic[0];
3324 control->nested = thread->th.th_team->t.t_set_nested[0];
3325 control->blocktime = thread->th.th_team->t.t_set_blocktime[0];
3326 control->bt_intervals = thread->th.th_team->t.t_set_bt_intervals[0];
3327 control->bt_set = thread->th.th_team->t.t_set_bt_set[0];
3328 #endif // OMP_30_ENABLED
3330 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
3332 control->next = thread -> th.th_team -> t.t_control_stack_top;
3333 thread -> th.th_team -> t.t_control_stack_top = control;
3340 __kmp_set_num_threads(
int new_nth,
int gtid )
3345 KF_TRACE( 10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth ));
3346 KMP_DEBUG_ASSERT( __kmp_init_serial );
3350 else if (new_nth > __kmp_max_nth)
3351 new_nth = __kmp_max_nth;
3353 thread = __kmp_threads[gtid];
3355 __kmp_save_internal_controls( thread );
3357 set__nproc( thread, new_nth );
3364 root = thread->th.th_root;
3365 if ( __kmp_init_parallel && ( ! root->r.r_active )
3366 && ( root->r.r_hot_team->t.t_nproc > new_nth ) ) {
3367 kmp_team_t *hot_team = root->r.r_hot_team;
3370 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3374 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
3375 kmp_task_team_t *task_team = hot_team->t.t_task_team;
3376 if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) {
3383 KMP_DEBUG_ASSERT( hot_team->t.t_nproc > 1 );
3384 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
3387 KA_TRACE( 20, (
"__kmp_set_num_threads: setting task_team %p to NULL\n",
3388 &hot_team->t.t_task_team ) );
3389 hot_team->t.t_task_team = NULL;
3392 KMP_DEBUG_ASSERT( task_team == NULL );
3395 #endif // OMP_30_ENABLED
3400 for ( f = new_nth; f < hot_team->t.t_nproc; f++ ) {
3401 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
3402 __kmp_free_thread( hot_team->t.t_threads[f] );
3403 hot_team->t.t_threads[f] = NULL;
3405 hot_team->t.t_nproc = new_nth;
3408 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3413 for( f=0 ; f < new_nth; f++ ) {
3414 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
3415 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
3419 hot_team -> t.t_size_changed = -1;
3428 __kmp_set_max_active_levels(
int gtid,
int max_active_levels )
3432 KF_TRACE( 10, (
"__kmp_set_max_active_levels: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
3433 KMP_DEBUG_ASSERT( __kmp_init_serial );
3436 if( max_active_levels < 0 ) {
3437 KMP_WARNING( ActiveLevelsNegative, max_active_levels );
3441 KF_TRACE( 10, (
"__kmp_set_max_active_levels: the call is ignored: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
3444 if( max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT ) {
3448 KMP_WARNING( ActiveLevelsExceedLimit, max_active_levels, KMP_MAX_ACTIVE_LEVELS_LIMIT );
3449 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
3454 KF_TRACE( 10, (
"__kmp_set_max_active_levels: after validation: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
3456 thread = __kmp_threads[ gtid ];
3458 __kmp_save_internal_controls( thread );
3460 set__max_active_levels( thread, max_active_levels );
3466 __kmp_get_max_active_levels(
int gtid )
3470 KF_TRACE( 10, (
"__kmp_get_max_active_levels: thread %d\n", gtid ) );
3471 KMP_DEBUG_ASSERT( __kmp_init_serial );
3473 thread = __kmp_threads[ gtid ];
3474 KMP_DEBUG_ASSERT( thread -> th.th_current_task );
3475 KF_TRACE( 10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, curtask_maxaclevel=%d\n",
3476 gtid, thread -> th.th_current_task, thread -> th.th_current_task -> td_icvs.max_active_levels ) );
3477 return thread -> th.th_current_task -> td_icvs.max_active_levels;
3482 __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk )
3487 KF_TRACE( 10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", gtid, (
int)kind, chunk ));
3488 KMP_DEBUG_ASSERT( __kmp_init_serial );
3494 if ( kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
3495 ( kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std ) )
3500 KMP_MSG( ScheduleKindOutOfRange, kind ),
3501 KMP_HNT( DefaultScheduleKindUsed,
"static, no chunk" ),
3504 kind = kmp_sched_default;
3508 thread = __kmp_threads[ gtid ];
3510 __kmp_save_internal_controls( thread );
3512 if ( kind < kmp_sched_upper_std ) {
3513 if ( kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK ) {
3516 thread -> th.th_current_task -> td_icvs.sched.r_sched_type =
kmp_sch_static;
3518 thread -> th.th_current_task -> td_icvs.sched.r_sched_type = __kmp_sch_map[ kind - kmp_sched_lower - 1 ];
3522 thread -> th.th_current_task -> td_icvs.sched.r_sched_type =
3523 __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
3525 if ( kind == kmp_sched_auto ) {
3527 thread -> th.th_current_task -> td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
3529 thread -> th.th_current_task -> td_icvs.sched.chunk = chunk;
3535 __kmp_get_schedule(
int gtid, kmp_sched_t * kind,
int * chunk )
3541 KF_TRACE( 10, (
"__kmp_get_schedule: thread %d\n", gtid ));
3542 KMP_DEBUG_ASSERT( __kmp_init_serial );
3544 thread = __kmp_threads[ gtid ];
3547 th_type = thread -> th.th_current_task -> td_icvs.sched.r_sched_type;
3549 switch ( th_type ) {
3551 case kmp_sch_static_greedy:
3552 case kmp_sch_static_balanced:
3553 *kind = kmp_sched_static;
3556 case kmp_sch_static_chunked:
3557 *kind = kmp_sched_static;
3559 case kmp_sch_dynamic_chunked:
3560 *kind = kmp_sched_dynamic;
3563 case kmp_sch_guided_iterative_chunked:
3564 case kmp_sch_guided_analytical_chunked:
3565 *kind = kmp_sched_guided;
3568 *kind = kmp_sched_auto;
3570 case kmp_sch_trapezoidal:
3571 *kind = kmp_sched_trapezoidal;
3579 KMP_FATAL( UnknownSchedulingType, th_type );
3583 *chunk = thread -> th.th_current_task -> td_icvs.sched.chunk;
3587 __kmp_get_ancestor_thread_num(
int gtid,
int level ) {
3593 KF_TRACE( 10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level ));
3594 KMP_DEBUG_ASSERT( __kmp_init_serial );
3597 if( level == 0 )
return 0;
3598 if( level < 0 )
return -1;
3599 thr = __kmp_threads[ gtid ];
3600 team = thr->th.th_team;
3601 ii = team -> t.t_level;
3602 if( level > ii )
return -1;
3605 if( thr->th.th_team_microtask ) {
3607 int tlevel = thr->th.th_teams_level;
3608 if( level <= tlevel ) {
3609 KMP_DEBUG_ASSERT( ii >= tlevel );
3611 if ( ii == tlevel ) {
3620 if( ii == level )
return __kmp_tid_from_gtid( gtid );
3622 dd = team -> t.t_serialized;
3626 for( dd = team -> t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
3629 if( ( team -> t.t_serialized ) && ( !dd ) ) {
3630 team = team->t.t_parent;
3634 team = team->t.t_parent;
3635 dd = team -> t.t_serialized;
3640 return ( dd > 1 ) ? ( 0 ) : ( team -> t.t_master_tid );
3644 __kmp_get_team_size(
int gtid,
int level ) {
3650 KF_TRACE( 10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level ));
3651 KMP_DEBUG_ASSERT( __kmp_init_serial );
3654 if( level == 0 )
return 1;
3655 if( level < 0 )
return -1;
3656 thr = __kmp_threads[ gtid ];
3657 team = thr->th.th_team;
3658 ii = team -> t.t_level;
3659 if( level > ii )
return -1;
3662 if( thr->th.th_team_microtask ) {
3664 int tlevel = thr->th.th_teams_level;
3665 if( level <= tlevel ) {
3666 KMP_DEBUG_ASSERT( ii >= tlevel );
3668 if ( ii == tlevel ) {
3679 for( dd = team -> t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
3682 if( team -> t.t_serialized && ( !dd ) ) {
3683 team = team->t.t_parent;
3687 team = team->t.t_parent;
3692 return team -> t.t_nproc;
3695 #endif // OMP_30_ENABLED
3698 __kmp_get_schedule_global() {
3702 kmp_r_sched_t r_sched;
3708 r_sched.r_sched_type = __kmp_static;
3710 r_sched.r_sched_type = __kmp_guided;
3712 r_sched.r_sched_type = __kmp_sched;
3715 if ( __kmp_chunk < KMP_DEFAULT_CHUNK ) {
3716 r_sched.chunk = KMP_DEFAULT_CHUNK;
3718 r_sched.chunk = __kmp_chunk;
3733 __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc )
3736 KMP_DEBUG_ASSERT( team );
3737 if( !realloc || argc > team -> t.t_max_argc ) {
3739 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, current entries=%d\n",
3740 team->t.t_id, argc, ( realloc ) ? team->t.t_max_argc : 0 ));
3741 #if (KMP_PERF_V106 == KMP_ON)
3743 if ( realloc && team -> t.t_argv != &team -> t.t_inline_argv[0] )
3744 __kmp_free( (
void *) team -> t.t_argv );
3746 if ( argc <= KMP_INLINE_ARGV_ENTRIES ) {
3748 team -> t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3749 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d argv entries\n",
3750 team->t.t_id, team->t.t_max_argc ));
3751 team -> t.t_argv = &team -> t.t_inline_argv[0];
3752 if ( __kmp_storage_map ) {
3753 __kmp_print_storage_map_gtid( -1, &team->t.t_inline_argv[0],
3754 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3755 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
3756 "team_%d.t_inline_argv",
3761 team -> t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ?
3762 KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc;
3763 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n",
3764 team->t.t_id, team->t.t_max_argc ));
3765 team -> t.t_argv = (
void**) __kmp_page_allocate(
sizeof(
void*) * team->t.t_max_argc );
3766 if ( __kmp_storage_map ) {
3767 __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc],
3768 sizeof(
void *) * team->t.t_max_argc,
"team_%d.t_argv",
3774 __kmp_free( (
void*) team -> t.t_argv );
3775 team -> t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ?
3776 KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc;
3777 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n",
3778 team->t.t_id, team->t.t_max_argc ));
3779 team -> t.t_argv = __kmp_page_allocate(
sizeof(
void*) * team->t.t_max_argc );
3780 if ( __kmp_storage_map ) {
3781 __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc],
3782 sizeof(
void *) * team->t.t_max_argc,
"team_%d.t_argv", team->t.t_id );
3790 __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth)
3793 int num_disp_buff = max_nth > 1 ? KMP_MAX_DISP_BUF : 2;
3794 #if KMP_USE_POOLED_ALLOC
3796 char *ptr = __kmp_allocate(max_nth *
3797 (
sizeof(kmp_info_t*) +
sizeof(dispatch_shared_info_t)*num_disp_buf
3798 +
sizeof(kmp_disp_t) +
sizeof(
int)*6
3801 +
sizeof(kmp_r_sched_t)
3802 +
sizeof(kmp_taskdata_t)
3806 team -> t.t_threads = (kmp_info_t**) ptr; ptr +=
sizeof(kmp_info_t*) * max_nth;
3807 team -> t.t_disp_buffer = (dispatch_shared_info_t*) ptr;
3808 ptr +=
sizeof(dispatch_shared_info_t) * num_disp_buff;
3809 team -> t.t_dispatch = (kmp_disp_t*) ptr; ptr +=
sizeof(kmp_disp_t) * max_nth;
3810 team -> t.t_set_nproc = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
3811 team -> t.t_set_dynamic = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
3812 team -> t.t_set_nested = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
3813 team -> t.t_set_blocktime = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
3814 team -> t.t_set_bt_intervals = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
3815 team -> t.t_set_bt_set = (
int*) ptr;
3817 ptr +=
sizeof(int) * max_nth;
3819 team -> t.t_set_sched = (kmp_r_sched_t*) ptr;
3820 ptr +=
sizeof(kmp_r_sched_t) * max_nth;
3821 team -> t.t_implicit_task_taskdata = (kmp_taskdata_t*) ptr;
3822 ptr +=
sizeof(kmp_taskdata_t) * max_nth;
3823 # endif // OMP_30_ENABLED
3826 team -> t.t_threads = (kmp_info_t**) __kmp_allocate(
sizeof(kmp_info_t*) * max_nth );
3827 team -> t.t_disp_buffer = (dispatch_shared_info_t*)
3828 __kmp_allocate(
sizeof(dispatch_shared_info_t) * num_disp_buff );
3829 team -> t.t_dispatch = (kmp_disp_t*) __kmp_allocate(
sizeof(kmp_disp_t) * max_nth );
3833 team -> t.t_implicit_task_taskdata = (kmp_taskdata_t*) __kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth );
3835 team -> t.t_set_nproc = (
int*) __kmp_allocate(
sizeof(
int) * max_nth );
3836 team -> t.t_set_dynamic = (
int*) __kmp_allocate(
sizeof(
int) * max_nth );
3837 team -> t.t_set_nested = (
int*) __kmp_allocate(
sizeof(
int) * max_nth );
3838 team -> t.t_set_blocktime = (
int*) __kmp_allocate(
sizeof(
int) * max_nth );
3839 team -> t.t_set_bt_intervals = (
int*) __kmp_allocate(
sizeof(
int) * max_nth );
3840 team -> t.t_set_bt_set = (
int*) __kmp_allocate(
sizeof(
int) * max_nth );
3841 # endif // OMP_30_ENABLED
3843 team->t.t_max_nproc = max_nth;
3846 for(i = 0 ; i < num_disp_buff; ++i)
3847 team -> t.t_disp_buffer[i].buffer_index = i;
3851 __kmp_free_team_arrays(kmp_team_t *team) {
3854 for ( i = 0; i < team->t.t_max_nproc; ++ i ) {
3855 if ( team->t.t_dispatch[ i ].th_disp_buffer != NULL ) {
3856 __kmp_free( team->t.t_dispatch[ i ].th_disp_buffer );
3857 team->t.t_dispatch[ i ].th_disp_buffer = NULL;
3860 __kmp_free(team->t.t_threads);
3861 #if !KMP_USE_POOLED_ALLOC
3862 __kmp_free(team->t.t_disp_buffer);
3863 __kmp_free(team->t.t_dispatch);
3867 __kmp_free(team->t.t_implicit_task_taskdata);
3869 __kmp_free(team->t.t_set_nproc);
3870 __kmp_free(team->t.t_set_dynamic);
3871 __kmp_free(team->t.t_set_nested);
3872 __kmp_free(team->t.t_set_blocktime);
3873 __kmp_free(team->t.t_set_bt_intervals);
3874 __kmp_free(team->t.t_set_bt_set);
3875 # endif // OMP_30_ENABLED
3877 team->t.t_threads = NULL;
3878 team->t.t_disp_buffer = NULL;
3879 team->t.t_dispatch = NULL;
3883 team->t.t_implicit_task_taskdata = 0;
3885 team->t.t_set_nproc = 0;
3886 team->t.t_set_dynamic = 0;
3887 team->t.t_set_nested = 0;
3888 team->t.t_set_blocktime = 0;
3889 team->t.t_set_bt_intervals = 0;
3890 team->t.t_set_bt_set = 0;
3891 #endif // OMP_30_ENABLED
3895 __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3896 kmp_info_t **oldThreads = team->t.t_threads;
3898 #if !KMP_USE_POOLED_ALLOC
3899 __kmp_free(team->t.t_disp_buffer);
3900 __kmp_free(team->t.t_dispatch);
3904 __kmp_free(team->t.t_implicit_task_taskdata);
3906 __kmp_free(team->t.t_set_nproc);
3907 __kmp_free(team->t.t_set_dynamic);
3908 __kmp_free(team->t.t_set_nested);
3909 __kmp_free(team->t.t_set_blocktime);
3910 __kmp_free(team->t.t_set_bt_intervals);
3911 __kmp_free(team->t.t_set_bt_set);
3912 # endif // OMP_30_ENABLED
3914 __kmp_allocate_team_arrays(team, max_nth);
3916 memcpy(team->t.t_threads, oldThreads, team->t.t_nproc * sizeof (kmp_info_t*));
3918 __kmp_free(oldThreads);
3921 static kmp_internal_control_t
3922 __kmp_get_global_icvs(
void ) {
3925 kmp_r_sched_t r_sched = __kmp_get_schedule_global();
3929 KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.used > 0 );
3932 kmp_internal_control_t g_icvs = {
3935 __kmp_global.g.g_dynamic,
3936 __kmp_dflt_team_nth,
3939 __kmp_dflt_blocktime,
3941 __kmp_env_blocktime,
3943 __kmp_dflt_max_active_levels,
3947 __kmp_nested_proc_bind.bind_types[0],
3955 static kmp_internal_control_t
3956 __kmp_get_x_global_icvs(
const kmp_team_t *team ) {
3959 kmp_internal_control_t gx_icvs;
3960 gx_icvs.serial_nesting_level = 0;
3961 copy_icvs( & gx_icvs, & team->t.t_threads[0]->th.th_current_task->td_icvs );
3962 gx_icvs.next = NULL;
3964 kmp_internal_control_t gx_icvs =
3967 team->t.t_set_nested[0],
3968 team->t.t_set_dynamic[0],
3969 team->t.t_set_nproc[0],
3970 team->t.t_set_blocktime[0],
3971 team->t.t_set_bt_intervals[0],
3972 team->t.t_set_bt_set[0],
3975 #endif // OMP_30_ENABLED
3981 __kmp_initialize_root( kmp_root_t *root )
3984 kmp_team_t *root_team;
3985 kmp_team_t *hot_team;
3986 size_t disp_size, dispatch_size, bar_size;
3987 int hot_team_max_nth;
3989 kmp_r_sched_t r_sched = __kmp_get_schedule_global();
3990 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3991 #endif // OMP_30_ENABLED
3992 KMP_DEBUG_ASSERT( root );
3993 KMP_ASSERT( ! root->r.r_begin );
3996 __kmp_init_lock( &root->r.r_begin_lock );
3997 root -> r.r_begin = FALSE;
3998 root -> r.r_active = FALSE;
3999 root -> r.r_in_parallel = 0;
4000 root -> r.r_blocktime = __kmp_dflt_blocktime;
4001 root -> r.r_nested = __kmp_dflt_nested;
4005 KF_TRACE( 10, (
"__kmp_initialize_root: before root_team\n" ) );
4007 __kmp_allocate_team(
4012 __kmp_nested_proc_bind.bind_types[0],
4017 __kmp_dflt_team_nth_ub,
4018 __kmp_global.g.g_dynamic,
4020 __kmp_dflt_blocktime,
4022 __kmp_env_blocktime,
4027 KF_TRACE( 10, (
"__kmp_initialize_root: after root_team = %p\n", root_team ) );
4029 root -> r.r_root_team = root_team;
4030 root_team -> t.t_control_stack_top = NULL;
4033 root_team -> t.t_threads[0] = NULL;
4034 root_team -> t.t_nproc = 1;
4035 root_team -> t.t_serialized = 1;
4038 root_team -> t.t_sched.r_sched_type = r_sched.r_sched_type;
4039 root_team -> t.t_sched.chunk = r_sched.chunk;
4040 #endif // OMP_30_ENABLED
4041 KA_TRACE( 20, (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
4042 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
4046 KF_TRACE( 10, (
"__kmp_initialize_root: before hot_team\n" ) );
4048 __kmp_allocate_team(
4051 __kmp_dflt_team_nth_ub * 2,
4053 __kmp_nested_proc_bind.bind_types[0],
4058 __kmp_dflt_team_nth_ub,
4059 __kmp_global.g.g_dynamic,
4061 __kmp_dflt_blocktime,
4063 __kmp_env_blocktime,
4067 KF_TRACE( 10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team ) );
4069 root -> r.r_hot_team = hot_team;
4070 root_team -> t.t_control_stack_top = NULL;
4073 hot_team -> t.t_parent = root_team;
4076 hot_team_max_nth = hot_team->t.t_max_nproc;
4077 for ( f = 0; f < hot_team_max_nth; ++ f ) {
4078 hot_team -> t.t_threads[ f ] = NULL;
4080 hot_team -> t.t_nproc = 1;
4083 hot_team -> t.t_sched.r_sched_type = r_sched.r_sched_type;
4084 hot_team -> t.t_sched.chunk = r_sched.chunk;
4085 #endif // OMP_30_ENABLED
4087 hot_team -> t.t_size_changed = 0;
4095 typedef struct kmp_team_list_item {
4096 kmp_team_p
const * entry;
4097 struct kmp_team_list_item * next;
4098 } kmp_team_list_item_t;
4099 typedef kmp_team_list_item_t * kmp_team_list_t;
4103 __kmp_print_structure_team_accum(
4104 kmp_team_list_t list,
4105 kmp_team_p
const * team
4115 KMP_DEBUG_ASSERT( list != NULL );
4116 if ( team == NULL ) {
4120 __kmp_print_structure_team_accum( list, team->t.t_parent );
4121 __kmp_print_structure_team_accum( list, team->t.t_next_pool );
4125 while ( l->next != NULL && l->entry != team ) {
4128 if ( l->next != NULL ) {
4134 while ( l->next != NULL && l->entry->t.t_id <= team->t.t_id ) {
4140 kmp_team_list_item_t * item =
4141 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof( kmp_team_list_item_t ) );
4150 __kmp_print_structure_team(
4152 kmp_team_p
const * team
4155 __kmp_printf(
"%s", title );
4156 if ( team != NULL ) {
4157 __kmp_printf(
"%2x %p\n", team->t.t_id, team );
4159 __kmp_printf(
" - (nil)\n" );
4164 __kmp_print_structure_thread(
4166 kmp_info_p
const * thread
4169 __kmp_printf(
"%s", title );
4170 if ( thread != NULL ) {
4171 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread );
4173 __kmp_printf(
" - (nil)\n" );
4178 __kmp_print_structure(
4182 kmp_team_list_t list;
4185 list = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof( kmp_team_list_item_t ) );
4189 __kmp_printf(
"\n------------------------------\nGlobal Thread Table\n------------------------------\n" );
4192 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
4193 __kmp_printf(
"%2d", gtid );
4194 if ( __kmp_threads != NULL ) {
4195 __kmp_printf(
" %p", __kmp_threads[ gtid ] );
4197 if ( __kmp_root != NULL ) {
4198 __kmp_printf(
" %p", __kmp_root[ gtid ] );
4200 __kmp_printf(
"\n" );
4205 __kmp_printf(
"\n------------------------------\nThreads\n------------------------------\n" );
4206 if ( __kmp_threads != NULL ) {
4208 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
4209 kmp_info_t
const * thread = __kmp_threads[ gtid ];
4210 if ( thread != NULL ) {
4211 __kmp_printf(
"GTID %2d %p:\n", gtid, thread );
4212 __kmp_printf(
" Our Root: %p\n", thread->th.th_root );
4213 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team );
4214 __kmp_print_structure_team(
" Serial Team: ", thread->th.th_serial_team );
4215 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc );
4216 __kmp_print_structure_thread(
" Master: ", thread->th.th_team_master );
4217 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized );
4218 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc );
4220 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind );
4222 __kmp_print_structure_thread(
" Next in pool: ", thread->th.th_next_pool );
4223 __kmp_printf(
"\n" );
4224 __kmp_print_structure_team_accum( list, thread->th.th_team );
4225 __kmp_print_structure_team_accum( list, thread->th.th_serial_team );
4229 __kmp_printf(
"Threads array is not allocated.\n" );
4233 __kmp_printf(
"\n------------------------------\nUbers\n------------------------------\n" );
4234 if ( __kmp_root != NULL ) {
4236 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
4237 kmp_root_t
const * root = __kmp_root[ gtid ];
4238 if ( root != NULL ) {
4239 __kmp_printf(
"GTID %2d %p:\n", gtid, root );
4240 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team );
4241 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team );
4242 __kmp_print_structure_thread(
" Uber Thread: ", root->r.r_uber_thread );
4243 __kmp_printf(
" Active?: %2d\n", root->r.r_active );
4244 __kmp_printf(
" Nested?: %2d\n", root->r.r_nested );
4245 __kmp_printf(
" In Parallel: %2d\n", root->r.r_in_parallel );
4246 __kmp_printf(
"\n" );
4247 __kmp_print_structure_team_accum( list, root->r.r_root_team );
4248 __kmp_print_structure_team_accum( list, root->r.r_hot_team );
4252 __kmp_printf(
"Ubers array is not allocated.\n" );
4255 __kmp_printf(
"\n------------------------------\nTeams\n------------------------------\n" );
4256 while ( list->next != NULL ) {
4257 kmp_team_p
const * team = list->entry;
4259 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team );
4260 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent );
4261 __kmp_printf(
" Master TID: %2d\n", team->t.t_master_tid );
4262 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc );
4263 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized );
4264 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc );
4265 for ( i = 0; i < team->t.t_nproc; ++ i ) {
4266 __kmp_printf(
" Thread %2d: ", i );
4267 __kmp_print_structure_thread(
"", team->t.t_threads[ i ] );
4269 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool );
4270 __kmp_printf(
"\n" );
4275 __kmp_printf(
"\n------------------------------\nPools\n------------------------------\n" );
4276 __kmp_print_structure_thread(
"Thread pool: ", (kmp_info_t *)__kmp_thread_pool );
4277 __kmp_print_structure_team(
"Team pool: ", (kmp_team_t *)__kmp_team_pool );
4278 __kmp_printf(
"\n" );
4281 while ( list != NULL ) {
4282 kmp_team_list_item_t * item = list;
4284 KMP_INTERNAL_FREE( item );
4296 static const unsigned __kmp_primes[] = {
4297 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5,
4298 0xba5703f5, 0xb495a877, 0xe1626741, 0x79695e6b,
4299 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
4300 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b,
4301 0xbe4d6fe9, 0x5f15e201, 0x99afc3fd, 0xf3f16801,
4302 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
4303 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed,
4304 0x085a3d61, 0x46eb5ea7, 0x3d9910ed, 0x2e687b5b,
4305 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
4306 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7,
4307 0x54581edb, 0xf2480f45, 0x0bb9288f, 0xef1affc7,
4308 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
4309 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b,
4310 0xfc411073, 0xc3749363, 0xb892d829, 0x3549366b,
4311 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
4312 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f
4319 __kmp_get_random( kmp_info_t * thread )
4321 unsigned x = thread -> th.th_x;
4322 unsigned short r = x>>16;
4324 thread -> th.th_x = x*thread->th.th_a+1;
4326 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
4327 thread->th.th_info.ds.ds_tid, r) );
4335 __kmp_init_random( kmp_info_t * thread )
4337 unsigned seed = thread->th.th_info.ds.ds_tid;
4339 thread -> th.th_a = __kmp_primes[seed%(
sizeof(__kmp_primes)/
sizeof(__kmp_primes[0]))];
4340 thread -> th.th_x = (seed+1)*thread->th.th_a+1;
4341 KA_TRACE(30, (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread -> th.th_a) );
4348 __kmp_reclaim_dead_roots(
void) {
4351 for(i = 0; i < __kmp_threads_capacity; ++i) {
4352 if( KMP_UBER_GTID( i ) &&
4353 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
4354 !__kmp_root[i]->r.r_active ) {
4355 r += __kmp_unregister_root_other_thread(i);
4384 __kmp_expand_threads(
int nWish,
int nNeed) {
4387 int __kmp_actual_max_nth;
4391 #if KMP_OS_WINDOWS && !defined GUIDEDLL_EXPORTS
4394 added = __kmp_reclaim_dead_roots();
4412 int minimumRequiredCapacity;
4414 kmp_info_t **newThreads;
4415 kmp_root_t **newRoot;
4437 old_tp_cached = __kmp_tp_cached;
4438 __kmp_actual_max_nth = old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth;
4439 KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity);
4443 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
4447 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
4453 nTarget = __kmp_actual_max_nth - __kmp_threads_capacity;
4460 minimumRequiredCapacity = __kmp_threads_capacity + nTarget;
4462 newCapacity = __kmp_threads_capacity;
4465 newCapacity <= (__kmp_actual_max_nth >> 1) ?
4466 (newCapacity << 1) :
4467 __kmp_actual_max_nth;
4468 }
while(newCapacity < minimumRequiredCapacity);
4469 newThreads = (kmp_info_t**) __kmp_allocate((
sizeof(kmp_info_t*) +
sizeof(kmp_root_t*)) * newCapacity + CACHE_LINE);
4470 newRoot = (kmp_root_t**) ((
char*)newThreads +
sizeof(kmp_info_t*) * newCapacity );
4471 memcpy(newThreads, __kmp_threads, __kmp_threads_capacity *
sizeof(kmp_info_t*));
4472 memcpy(newRoot, __kmp_root, __kmp_threads_capacity *
sizeof(kmp_root_t*));
4473 memset(newThreads + __kmp_threads_capacity, 0,
4474 (newCapacity - __kmp_threads_capacity) *
sizeof(kmp_info_t*));
4475 memset(newRoot + __kmp_threads_capacity, 0,
4476 (newCapacity - __kmp_threads_capacity) *
sizeof(kmp_root_t*));
4478 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
4484 __kmp_free(newThreads);
4487 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
4488 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
4490 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
4491 __kmp_free(newThreads);
4497 *(kmp_info_t**
volatile*)&__kmp_threads = newThreads;
4498 *(kmp_root_t**
volatile*)&__kmp_root = newRoot;
4499 added += newCapacity - __kmp_threads_capacity;
4500 *(
volatile int*)&__kmp_threads_capacity = newCapacity;
4501 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
4512 __kmp_register_root(
int initial_thread )
4514 kmp_info_t *root_thread;
4518 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
4519 KA_TRACE( 20, (
"__kmp_register_root: entered\n"));
4537 capacity = __kmp_threads_capacity;
4538 if ( ! initial_thread && TCR_PTR(__kmp_threads[0]) == NULL ) {
4543 if ( __kmp_all_nth >= capacity && !__kmp_expand_threads( 1, 1 ) ) {
4544 if ( __kmp_tp_cached ) {
4547 KMP_MSG( CantRegisterNewThread ),
4548 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
4549 KMP_HNT( PossibleSystemLimitOnThreads ),
4556 KMP_MSG( CantRegisterNewThread ),
4557 KMP_HNT( SystemLimitOnThreads ),
4566 for( gtid=(initial_thread ? 0 : 1) ; TCR_PTR(__kmp_threads[gtid]) != NULL ; gtid++ );
4567 KA_TRACE( 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid ));
4568 KMP_ASSERT( gtid < __kmp_threads_capacity );
4572 TCW_4(__kmp_nth, __kmp_nth + 1);
4579 if ( __kmp_adjust_gtid_mode ) {
4580 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
4581 if ( TCR_4(__kmp_gtid_mode) != 2) {
4582 TCW_4(__kmp_gtid_mode, 2);
4586 if (TCR_4(__kmp_gtid_mode) != 1 ) {
4587 TCW_4(__kmp_gtid_mode, 1);
4592 #ifdef KMP_ADJUST_BLOCKTIME
4595 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4596 if ( __kmp_nth > __kmp_avail_proc ) {
4597 __kmp_zero_bt = TRUE;
4603 if( ! ( root = __kmp_root[gtid] )) {
4604 root = __kmp_root[gtid] = (kmp_root_t*) __kmp_allocate(
sizeof(kmp_root_t) );
4605 KMP_DEBUG_ASSERT( ! root->r.r_root_team );
4608 __kmp_initialize_root( root );
4611 if( root -> r.r_uber_thread ) {
4612 root_thread = root -> r.r_uber_thread;
4614 root_thread = (kmp_info_t*) __kmp_allocate(
sizeof(kmp_info_t) );
4615 if ( __kmp_storage_map ) {
4616 __kmp_print_thread_storage_map( root_thread, gtid );
4618 root_thread -> th.th_info .ds.ds_gtid = gtid;
4619 root_thread -> th.th_root = root;
4620 if( __kmp_env_consistency_check ) {
4621 root_thread -> th.th_cons = __kmp_allocate_cons_stack( gtid );
4624 __kmp_initialize_fast_memory( root_thread );
4628 KMP_DEBUG_ASSERT( root_thread -> th.th_local.bget_data == NULL );
4629 __kmp_initialize_bget( root_thread );
4631 __kmp_init_random( root_thread );
4635 if( ! root_thread -> th.th_serial_team ) {
4637 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
4638 #endif // OMP_30_ENABLED
4639 KF_TRACE( 10, (
"__kmp_register_root: before serial_team\n" ) );
4640 root_thread -> th.th_serial_team = __kmp_allocate_team( root, 1, 1,
4647 __kmp_dflt_team_nth_ub,
4648 __kmp_global.g.g_dynamic,
4650 __kmp_dflt_blocktime,
4652 __kmp_env_blocktime,
4656 KMP_ASSERT( root_thread -> th.th_serial_team );
4657 KF_TRACE( 10, (
"__kmp_register_root: after serial_team = %p\n",
4658 root_thread -> th.th_serial_team ) );
4661 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
4663 root -> r.r_root_team -> t.t_threads[0] = root_thread;
4664 root -> r.r_hot_team -> t.t_threads[0] = root_thread;
4665 root_thread -> th.th_serial_team -> t.t_threads[0] = root_thread;
4666 root -> r.r_uber_thread = root_thread;
4669 __kmp_initialize_info( root_thread, root->r.r_root_team, 0, gtid );
4672 __kmp_gtid_set_specific( gtid );
4673 #ifdef KMP_TDATA_GTID
4676 __kmp_create_worker( gtid, root_thread, __kmp_stksize );
4677 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == gtid );
4678 TCW_4(__kmp_init_gtid, TRUE);
4680 KA_TRACE( 20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, plain=%u\n",
4681 gtid, __kmp_gtid_from_tid( 0, root->r.r_hot_team ),
4682 root -> r.r_hot_team -> t.t_id, 0, KMP_INIT_BARRIER_STATE,
4683 KMP_INIT_BARRIER_STATE ) );
4686 for ( b = 0; b < bs_last_barrier; ++ b ) {
4687 root_thread->th.th_bar[ b ].bb.b_arrived = KMP_INIT_BARRIER_STATE;
4690 KMP_DEBUG_ASSERT( root->r.r_hot_team->t.t_bar[ bs_forkjoin_barrier ].b_arrived == KMP_INIT_BARRIER_STATE );
4693 #if KMP_OS_WINDOWS || KMP_OS_LINUX
4694 if ( TCR_4(__kmp_init_middle) ) {
4695 __kmp_affinity_set_init_mask( gtid, TRUE );
4699 __kmp_root_counter ++;
4702 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
4711 __kmp_reset_root(
int gtid, kmp_root_t *root)
4713 kmp_team_t * root_team = root->r.r_root_team;
4714 kmp_team_t * hot_team = root->r.r_hot_team;
4715 int n = hot_team->t.t_nproc;
4718 KMP_DEBUG_ASSERT( ! root->r.r_active );
4720 root->r.r_root_team = NULL;
4721 root->r.r_hot_team = NULL;
4724 __kmp_free_team( root, root_team );
4725 __kmp_free_team( root, hot_team );
4732 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
4733 __kmp_wait_to_unref_task_teams();
4739 KA_TRACE( 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
"\n",
4740 (LPVOID)&(root->r.r_uber_thread->th),
4741 root->r.r_uber_thread->th.th_info.ds.ds_thread ) );
4742 __kmp_free_handle( root->r.r_uber_thread->th.th_info.ds.ds_thread );
4745 TCW_4(__kmp_nth, __kmp_nth - 1);
4746 __kmp_reap_thread( root->r.r_uber_thread, 1 );
4749 root->r.r_uber_thread = NULL;
4751 root -> r.r_begin = FALSE;
4757 __kmp_unregister_root_current_thread(
int gtid )
4759 kmp_root_t *root = __kmp_root[gtid];
4761 KA_TRACE( 1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid ));
4762 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
4763 KMP_ASSERT( KMP_UBER_GTID( gtid ));
4764 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
4765 KMP_ASSERT( root->r.r_active == FALSE );
4771 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
4775 __kmp_reset_root(gtid, root);
4778 __kmp_gtid_set_specific( KMP_GTID_DNE );
4779 #ifdef KMP_TDATA_GTID
4780 __kmp_gtid = KMP_GTID_DNE;
4784 KC_TRACE( 10, (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid ));
4786 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
4794 __kmp_unregister_root_other_thread(
int gtid )
4796 kmp_root_t *root = __kmp_root[gtid];
4799 KA_TRACE( 1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid ));
4800 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
4801 KMP_ASSERT( KMP_UBER_GTID( gtid ));
4802 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
4803 KMP_ASSERT( root->r.r_active == FALSE );
4805 r = __kmp_reset_root(gtid, root);
4806 KC_TRACE( 10, (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid ));
4813 void __kmp_task_info() {
4815 kmp_int32 gtid = __kmp_entry_gtid();
4816 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
4817 kmp_info_t *this_thr = __kmp_threads[ gtid ];
4818 kmp_team_t *steam = this_thr -> th.th_serial_team;
4819 kmp_team_t *team = this_thr -> th.th_team;
4821 __kmp_printf(
"__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p ptask=%p\n",
4822 gtid, tid, this_thr, team, this_thr->th.th_current_task, team->t.t_implicit_task_taskdata[tid].td_parent );
4826 #endif // OMP_30_ENABLED
4832 __kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team,
int tid,
int gtid )
4837 KMP_DEBUG_ASSERT( this_thr != NULL );
4838 KMP_DEBUG_ASSERT( this_thr -> th.th_serial_team );
4839 KMP_DEBUG_ASSERT( team );
4840 KMP_DEBUG_ASSERT( team -> t.t_threads );
4841 KMP_DEBUG_ASSERT( team -> t.t_dispatch );
4842 KMP_DEBUG_ASSERT( team -> t.t_threads[0] );
4843 KMP_DEBUG_ASSERT( team -> t.t_threads[0] -> th.th_root );
4847 TCW_SYNC_PTR(this_thr->th.th_team, team);
4849 this_thr->th.th_info.ds.ds_tid = tid;
4850 this_thr->th.th_set_nproc = 0;
4852 this_thr->th.th_set_proc_bind = proc_bind_default;
4853 # if (KMP_OS_WINDOWS || KMP_OS_LINUX)
4854 this_thr->th.th_new_place = this_thr->th.th_current_place;
4857 this_thr->th.th_root = team -> t.t_threads[0] -> th.th_root;
4860 this_thr->th.th_team_nproc = team -> t.t_nproc;
4861 this_thr->th.th_team_master = team -> t.t_threads[0];
4862 this_thr->th.th_team_serialized = team -> t.t_serialized;
4864 this_thr->th.th_team_microtask = team -> t.t_threads[0] -> th.th_team_microtask;
4865 this_thr->th.th_teams_level = team -> t.t_threads[0] -> th.th_teams_level;
4866 this_thr->th.th_set_nth_teams = team -> t.t_threads[0] -> th.th_set_nth_teams;
4868 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
4871 KMP_DEBUG_ASSERT( team -> t.t_implicit_task_taskdata );
4872 this_thr->th.th_task_state = 0;
4874 KF_TRACE( 10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4875 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4877 __kmp_init_implicit_task( this_thr->th.th_team_master->th.th_ident, this_thr, team, tid, TRUE );
4879 KF_TRACE( 10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4880 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4882 #endif // OMP_30_ENABLED
4885 this_thr -> th.th_dispatch = &team -> t.t_dispatch[ tid ];
4887 this_thr->th.th_local.this_construct = 0;
4888 this_thr->th.th_local.last_construct = 0;
4891 this_thr->th.th_local.tv_data = 0;
4894 if ( ! this_thr->th.th_pri_common ) {
4895 this_thr->th.th_pri_common = (
struct common_table *) __kmp_allocate(
sizeof(
struct common_table) );
4896 if ( __kmp_storage_map ) {
4897 __kmp_print_storage_map_gtid(
4898 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4899 sizeof(
struct common_table ),
"th_%d.th_pri_common\n", gtid
4902 this_thr->th.th_pri_head = NULL;
4907 volatile kmp_disp_t *dispatch = this_thr -> th.th_dispatch;
4911 size_t disp_size =
sizeof( dispatch_private_info_t ) *
4912 ( team->t.t_max_nproc == 1 ? 1 : KMP_MAX_DISP_BUF );
4913 KD_TRACE( 10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, team->t.t_max_nproc ) );
4914 KMP_ASSERT( dispatch );
4915 KMP_DEBUG_ASSERT( team -> t.t_dispatch );
4916 KMP_DEBUG_ASSERT( dispatch == &team->t.t_dispatch[ tid ] );
4918 dispatch->th_disp_index = 0;
4920 if( ! dispatch -> th_disp_buffer ) {
4921 dispatch -> th_disp_buffer = (dispatch_private_info_t *) __kmp_allocate( disp_size );
4923 if ( __kmp_storage_map ) {
4924 __kmp_print_storage_map_gtid( gtid, &dispatch->th_disp_buffer[ 0 ],
4925 &dispatch->th_disp_buffer[ team->t.t_max_nproc == 1 ? 1 : KMP_MAX_DISP_BUF ],
4926 disp_size,
"th_%d.th_dispatch.th_disp_buffer "
4927 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4928 gtid, team->t.t_id, gtid );
4931 memset( & dispatch -> th_disp_buffer[0],
'\0', disp_size );
4934 dispatch -> th_dispatch_pr_current = 0;
4935 dispatch -> th_dispatch_sh_current = 0;
4937 dispatch -> th_deo_fcn = 0;
4938 dispatch -> th_dxo_fcn = 0;
4941 this_thr->th.th_next_pool = NULL;
4943 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
4944 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
4957 __kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team,
int new_tid )
4959 kmp_team_t *serial_team;
4960 kmp_info_t *new_thr;
4963 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid() ));
4964 KMP_DEBUG_ASSERT( root && team );
4965 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( __kmp_get_gtid() ));
4969 if ( __kmp_thread_pool ) {
4971 new_thr = (kmp_info_t*)__kmp_thread_pool;
4972 __kmp_thread_pool = (
volatile kmp_info_t *) new_thr->th.th_next_pool;
4973 if ( new_thr == __kmp_thread_pool_insert_pt ) {
4974 __kmp_thread_pool_insert_pt = NULL;
4976 TCW_4(new_thr->th.th_in_pool, FALSE);
4982 __kmp_thread_pool_nth--;
4984 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4985 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid ));
4986 KMP_ASSERT( ! new_thr -> th.th_team );
4987 KMP_DEBUG_ASSERT( __kmp_nth < __kmp_threads_capacity );
4988 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth >= 0 );
4991 __kmp_initialize_info( new_thr, team, new_tid, new_thr->th.th_info.ds.ds_gtid );
4992 KMP_DEBUG_ASSERT( new_thr->th.th_serial_team );
4994 TCW_4(__kmp_nth, __kmp_nth + 1);
4996 #ifdef KMP_ADJUST_BLOCKTIME
4999 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5000 if ( __kmp_nth > __kmp_avail_proc ) {
5001 __kmp_zero_bt = TRUE;
5006 KF_TRACE( 10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
5007 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid ));
5015 KMP_ASSERT( __kmp_nth == __kmp_all_nth );
5016 KMP_ASSERT( __kmp_all_nth < __kmp_threads_capacity );
5022 if ( ! TCR_4( __kmp_init_monitor ) ) {
5023 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5024 if ( ! TCR_4( __kmp_init_monitor ) ) {
5025 KF_TRACE( 10, (
"before __kmp_create_monitor\n" ) );
5026 TCW_4( __kmp_init_monitor, 1 );
5027 __kmp_create_monitor( & __kmp_monitor );
5028 KF_TRACE( 10, (
"after __kmp_create_monitor\n" ) );
5030 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5034 for( new_gtid=1 ; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid ) {
5035 KMP_DEBUG_ASSERT( new_gtid < __kmp_threads_capacity );
5039 new_thr = (kmp_info_t*) __kmp_allocate(
sizeof(kmp_info_t) );
5041 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
5043 if ( __kmp_storage_map ) {
5044 __kmp_print_thread_storage_map( new_thr, new_gtid );
5050 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs( team );
5051 #endif // OMP_30_ENABLED
5052 KF_TRACE( 10, (
"__kmp_allocate_thread: before th_serial/serial_team\n" ) );
5053 new_thr -> th.th_serial_team = serial_team =
5054 (kmp_team_t*) __kmp_allocate_team( root, 1, 1,
5061 team->t.t_set_nproc[0],
5062 team->t.t_set_dynamic[0],
5063 team->t.t_set_nested[0],
5064 team->t.t_set_blocktime[0],
5065 team->t.t_set_bt_intervals[0],
5066 team->t.t_set_bt_set[0],
5070 KMP_ASSERT ( serial_team );
5071 serial_team -> t.t_threads[0] = new_thr;
5072 KF_TRACE( 10, (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
5076 __kmp_initialize_info( new_thr, team, new_tid, new_gtid );
5079 __kmp_initialize_fast_memory( new_thr );
5083 KMP_DEBUG_ASSERT( new_thr -> th.th_local.bget_data == NULL );
5084 __kmp_initialize_bget( new_thr );
5087 __kmp_init_random( new_thr );
5090 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
5091 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5093 new_thr->th.th_bar[ bs_forkjoin_barrier ].bb.b_go = KMP_INIT_BARRIER_STATE;
5094 new_thr->th.th_bar[ bs_plain_barrier ].bb.b_go = KMP_INIT_BARRIER_STATE;
5095 #if KMP_FAST_REDUCTION_BARRIER
5096 new_thr->th.th_bar[ bs_reduction_barrier ].bb.b_go = KMP_INIT_BARRIER_STATE;
5097 #endif // KMP_FAST_REDUCTION_BARRIER
5099 new_thr->th.th_spin_here = FALSE;
5100 new_thr->th.th_next_waiting = 0;
5102 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
5103 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
5104 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
5105 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
5106 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
5109 TCW_4(new_thr->th.th_in_pool, FALSE);
5110 new_thr->th.th_active_in_pool = FALSE;
5111 TCW_4(new_thr->th.th_active, TRUE);
5122 if ( __kmp_adjust_gtid_mode ) {
5123 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
5124 if ( TCR_4(__kmp_gtid_mode) != 2) {
5125 TCW_4(__kmp_gtid_mode, 2);
5129 if (TCR_4(__kmp_gtid_mode) != 1 ) {
5130 TCW_4(__kmp_gtid_mode, 1);
5135 #ifdef KMP_ADJUST_BLOCKTIME
5138 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5139 if ( __kmp_nth > __kmp_avail_proc ) {
5140 __kmp_zero_bt = TRUE;
5146 KF_TRACE( 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr ));
5147 __kmp_create_worker( new_gtid, new_thr, __kmp_stksize );
5148 KF_TRACE( 10, (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr ));
5151 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), new_gtid ));
5166 __kmp_reinitialize_team(
5170 kmp_internal_control_t * new_icvs,
5173 int new_set_nproc,
int new_set_dynamic,
int new_set_nested,
5174 int new_set_blocktime,
int new_bt_intervals,
int new_bt_set
5179 KMP_DEBUG_ASSERT( team && new_nproc && new_icvs );
5180 KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
5181 team->t.t_ident = loc;
5183 KMP_DEBUG_ASSERT( team && new_nproc && new_set_nproc );
5184 #endif // OMP_30_ENABLED
5186 team->t.t_id = KMP_GEN_TEAM_ID();
5188 #if KMP_BARRIER_ICV_PULL
5193 load_icvs(new_icvs);
5194 store_icvs(&team->t.t_initial_icvs, new_icvs);
5202 __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
5203 store_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
5205 KF_TRACE( 10, (
"__kmp_reinitialize_team2: T#%d this_thread=%p team=%p\n",
5206 0, team->t.t_threads[0], team ) );
5208 #elif KMP_BARRIER_ICV_PUSH
5213 __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
5214 load_icvs(new_icvs);
5215 store_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
5218 KF_TRACE( 10, (
"__kmp_reinitialize_team2: T#%d this_thread=%p team=%p\n",
5219 0, team->t.t_threads[0], team ) );
5226 load_icvs(new_icvs);
5228 for( f=0 ; f<new_nproc ; f++) {
5231 KF_TRACE( 10, (
"__kmp_reinitialize_team1: T#%d this_thread=%p team=%p\n",
5232 f, team->t.t_threads[f], team ) );
5233 __kmp_init_implicit_task( loc, team->t.t_threads[f], team, f, FALSE );
5234 store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs);
5235 KF_TRACE( 10, (
"__kmp_reinitialize_team2: T#%d this_thread=%p team=%p\n",
5236 f, team->t.t_threads[f], team ) );
5238 team -> t.t_set_nproc[f] = new_set_nproc;
5239 team -> t.t_set_dynamic[f] = new_set_dynamic;
5240 team -> t.t_set_nested[f] = new_set_nested;
5241 team -> t.t_set_blocktime[f] = new_set_blocktime;
5242 team -> t.t_set_bt_intervals[f] = new_bt_intervals;
5243 team -> t.t_set_bt_set[f] = new_bt_set;
5244 # endif // OMP_30_ENABLED
5249 #endif // KMP_BARRIER_ICV_PUSH || KMP_BARRIER_ICV_PULL
5257 __kmp_initialize_team(
5261 kmp_internal_control_t * new_icvs,
5264 int new_set_nproc,
int new_set_dynamic,
int new_set_nested,
5265 int new_set_blocktime,
int new_bt_intervals,
int new_bt_set
5269 KMP_DEBUG_ASSERT( team );
5270 KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc );
5271 KMP_DEBUG_ASSERT( team->t.t_threads );
5274 team -> t.t_master_tid = 0;
5276 team -> t.t_serialized = new_nproc > 1 ? 0 : 1;
5277 team -> t.t_nproc = new_nproc;
5280 team -> t.t_next_pool = NULL;
5283 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
5284 team -> t.t_invoke = NULL;
5288 team -> t.t_sched = new_icvs->sched;
5289 #endif // OMP_30_ENABLED
5291 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
5292 team -> t.t_fp_control_saved = FALSE;
5293 team -> t.t_x87_fpu_control_word = 0;
5294 team -> t.t_mxcsr = 0;
5297 team -> t.t_construct = 0;
5298 __kmp_init_lock( & team -> t.t_single_lock );
5300 team -> t.t_ordered .dt.t_value = 0;
5301 team -> t.t_master_active = FALSE;
5303 memset( & team -> t.t_taskq,
'\0',
sizeof( kmp_taskq_t ));
5306 team -> t.t_copypriv_data = NULL;
5308 team -> t.t_copyin_counter = 0;
5310 team -> t.t_control_stack_top = NULL;
5312 __kmp_reinitialize_team(
5318 new_set_nproc, new_set_dynamic, new_set_nested,
5319 new_set_blocktime, new_bt_intervals, new_bt_set
5329 __kmp_set_thread_affinity_mask_full_tmp( kmp_affin_mask_t *old_mask )
5331 if ( KMP_AFFINITY_CAPABLE() ) {
5333 if ( old_mask != NULL ) {
5334 status = __kmp_get_system_affinity( old_mask, TRUE );
5336 if ( status != 0 ) {
5339 KMP_MSG( ChangeThreadAffMaskError ),
5345 __kmp_set_system_affinity( __kmp_affinity_get_fullMask(), TRUE );
5350 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
5359 __kmp_partition_places( kmp_team_t *team )
5364 kmp_info_t *master_th = team->t.t_threads[0];
5365 KMP_DEBUG_ASSERT( master_th != NULL );
5366 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
5367 int first_place = master_th->th.th_first_place;
5368 int last_place = master_th->th.th_last_place;
5369 int masters_place = master_th->th.th_current_place;
5370 team->t.t_first_place = first_place;
5371 team->t.t_last_place = last_place;
5373 KA_TRACE( 20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) bound to place %d partition = [%d,%d]\n",
5374 proc_bind, __kmp_gtid_from_thread( team->t.t_threads[0] ), team->t.t_id,
5375 masters_place, first_place, last_place ) );
5377 switch ( proc_bind ) {
5379 case proc_bind_default:
5385 KMP_DEBUG_ASSERT( team->t.t_nproc == 1 );
5388 case proc_bind_master:
5391 int n_th = team->t.t_nproc;
5392 for ( f = 1; f < n_th; f++ ) {
5393 kmp_info_t *th = team->t.t_threads[f];
5394 KMP_DEBUG_ASSERT( th != NULL );
5395 th->th.th_first_place = first_place;
5396 th->th.th_last_place = last_place;
5397 th->th.th_new_place = masters_place;
5399 KA_TRACE( 100, (
"__kmp_partition_places: master: T#%d(%d:%d) place %d partition = [%d,%d]\n",
5400 __kmp_gtid_from_thread( team->t.t_threads[f] ),
5401 team->t.t_id, f, masters_place, first_place, last_place ) );
5406 case proc_bind_close:
5409 int n_th = team->t.t_nproc;
5411 if ( first_place <= last_place ) {
5412 n_places = last_place - first_place + 1;
5415 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
5417 if ( n_th <= n_places ) {
5418 int place = masters_place;
5419 for ( f = 1; f < n_th; f++ ) {
5420 kmp_info_t *th = team->t.t_threads[f];
5421 KMP_DEBUG_ASSERT( th != NULL );
5423 if ( place == last_place ) {
5424 place = first_place;
5426 else if ( place == __kmp_affinity_num_masks - 1) {
5432 th->th.th_first_place = first_place;
5433 th->th.th_last_place = last_place;
5434 th->th.th_new_place = place;
5436 KA_TRACE( 100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
5437 __kmp_gtid_from_thread( team->t.t_threads[f] ),
5438 team->t.t_id, f, place, first_place, last_place ) );
5442 int S, rem, gap, s_count;
5443 S = n_th / n_places;
5445 rem = n_th - ( S * n_places );
5446 gap = rem > 0 ? n_places/rem : n_places;
5447 int place = masters_place;
5449 for ( f = 0; f < n_th; f++ ) {
5450 kmp_info_t *th = team->t.t_threads[f];
5451 KMP_DEBUG_ASSERT( th != NULL );
5453 th->th.th_first_place = first_place;
5454 th->th.th_last_place = last_place;
5455 th->th.th_new_place = place;
5458 if ( (s_count == S) && rem && (gap_ct == gap) ) {
5461 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
5463 if ( place == last_place ) {
5464 place = first_place;
5466 else if ( place == __kmp_affinity_num_masks - 1) {
5476 else if (s_count == S) {
5477 if ( place == last_place ) {
5478 place = first_place;
5480 else if ( place == __kmp_affinity_num_masks - 1) {
5490 KA_TRACE( 100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
5491 __kmp_gtid_from_thread( team->t.t_threads[f] ),
5492 team->t.t_id, f, th->th.th_new_place, first_place,
5495 KMP_DEBUG_ASSERT( place == masters_place );
5500 case proc_bind_spread:
5503 int n_th = team->t.t_nproc;
5505 if ( first_place <= last_place ) {
5506 n_places = last_place - first_place + 1;
5509 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
5511 if ( n_th <= n_places ) {
5512 int place = masters_place;
5513 int S = n_places/n_th;
5514 int s_count, rem, gap, gap_ct;
5515 rem = n_places - n_th*S;
5516 gap = rem ? n_th/rem : 1;
5518 for ( f = 0; f < n_th; f++ ) {
5519 kmp_info_t *th = team->t.t_threads[f];
5520 KMP_DEBUG_ASSERT( th != NULL );
5522 th->th.th_first_place = place;
5523 th->th.th_new_place = place;
5525 while (s_count < S) {
5526 if ( place == last_place ) {
5527 place = first_place;
5529 else if ( place == __kmp_affinity_num_masks - 1) {
5537 if (rem && (gap_ct == gap)) {
5538 if ( place == last_place ) {
5539 place = first_place;
5541 else if ( place == __kmp_affinity_num_masks - 1) {
5550 th->th.th_last_place = place;
5553 if ( place == last_place ) {
5554 place = first_place;
5556 else if ( place == __kmp_affinity_num_masks - 1) {
5563 KA_TRACE( 100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
5564 __kmp_gtid_from_thread( team->t.t_threads[f] ),
5565 team->t.t_id, f, th->th.th_new_place,
5566 th->th.th_first_place, th->th.th_last_place ) );
5568 KMP_DEBUG_ASSERT( place == masters_place );
5571 int S, rem, gap, s_count;
5572 S = n_th / n_places;
5574 rem = n_th - ( S * n_places );
5575 gap = rem > 0 ? n_places/rem : n_places;
5576 int place = masters_place;
5578 for ( f = 0; f < n_th; f++ ) {
5579 kmp_info_t *th = team->t.t_threads[f];
5580 KMP_DEBUG_ASSERT( th != NULL );
5582 th->th.th_first_place = place;
5583 th->th.th_last_place = place;
5584 th->th.th_new_place = place;
5587 if ( (s_count == S) && rem && (gap_ct == gap) ) {
5590 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
5592 if ( place == last_place ) {
5593 place = first_place;
5595 else if ( place == __kmp_affinity_num_masks - 1) {
5605 else if (s_count == S) {
5606 if ( place == last_place ) {
5607 place = first_place;
5609 else if ( place == __kmp_affinity_num_masks - 1) {
5619 KA_TRACE( 100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
5620 __kmp_gtid_from_thread( team->t.t_threads[f] ),
5621 team->t.t_id, f, th->th.th_new_place,
5622 th->th.th_first_place, th->th.th_last_place) );
5624 KMP_DEBUG_ASSERT( place == masters_place );
5633 KA_TRACE( 20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id ) );
5640 __kmp_allocate_team( kmp_root_t *root,
int new_nproc,
int max_nproc,
5642 kmp_proc_bind_t new_proc_bind,
5645 kmp_internal_control_t *new_icvs,
5647 int new_set_nproc,
int new_set_dynamic,
int new_set_nested,
5648 int new_set_blocktime,
int new_bt_intervals,
int new_bt_set,
5657 KA_TRACE( 20, (
"__kmp_allocate_team: called\n"));
5658 KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 );
5659 KMP_DEBUG_ASSERT( max_nproc >= new_nproc );
5666 if ( ! root->r.r_active && new_nproc > 1 ) {
5668 KMP_DEBUG_ASSERT( new_nproc == max_nproc );
5670 team = root -> r.r_hot_team;
5672 #if OMP_30_ENABLED && KMP_DEBUG
5673 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5674 KA_TRACE( 20, (
"__kmp_allocate_team: hot team task_team = %p before reinit\n",
5675 team -> t.t_task_team ));
5680 if( team -> t.t_nproc > new_nproc ) {
5681 KA_TRACE( 20, (
"__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc ));
5684 team -> t.t_size_changed = 1;
5687 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5688 kmp_task_team_t *task_team = team->t.t_task_team;
5689 if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) {
5696 KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
5697 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
5700 KA_TRACE( 20, (
"__kmp_allocate_team: setting task_team %p to NULL\n",
5701 &team->t.t_task_team ) );
5702 team->t.t_task_team = NULL;
5705 KMP_DEBUG_ASSERT( task_team == NULL );
5708 #endif // OMP_30_ENABLED
5711 for( f = new_nproc ; f < team->t.t_nproc ; f++ ) {
5712 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
5713 __kmp_free_thread( team->t.t_threads[ f ] );
5714 team -> t.t_threads[ f ] = NULL;
5717 team -> t.t_nproc = new_nproc;
5720 team -> t.t_sched = new_icvs->sched;
5722 __kmp_reinitialize_team( team, new_nproc,
5725 root->r.r_uber_thread->th.th_ident
5727 new_set_nproc, new_set_dynamic, new_set_nested,
5728 new_set_blocktime, new_bt_intervals, new_bt_set
5733 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5734 kmp_task_team_t *task_team = team->t.t_task_team;
5735 if ( task_team != NULL ) {
5736 KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) );
5737 task_team->tt.tt_nproc = new_nproc;
5738 task_team->tt.tt_unfinished_threads = new_nproc;
5739 task_team->tt.tt_ref_ct = new_nproc - 1;
5745 for( f = 0 ; f < new_nproc ; f++ ) {
5746 team -> t.t_threads[ f ] -> th.th_team_nproc = team->t.t_nproc;
5751 KF_TRACE( 10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n",
5752 0, team->t.t_threads[0], team ) );
5754 __kmp_push_current_task_to_thread( team -> t.t_threads[ 0 ], team, 0 );
5758 for ( f = 0; f < team->t.t_nproc; f++ ) {
5759 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
5760 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
5765 team->t.t_proc_bind = new_proc_bind;
5766 # if KMP_OS_WINDOWS || KMP_OS_LINUX
5767 __kmp_partition_places( team );
5772 else if ( team -> t.t_nproc < new_nproc ) {
5774 kmp_affin_mask_t *old_mask;
5775 if ( KMP_AFFINITY_CAPABLE() ) {
5776 KMP_CPU_ALLOC(old_mask);
5780 KA_TRACE( 20, (
"__kmp_allocate_team: increasing hot team thread count to %d\n", new_nproc ));
5783 team -> t.t_size_changed = 1;
5787 if(team -> t.t_max_nproc < new_nproc) {
5789 __kmp_reallocate_team_arrays(team, new_nproc);
5790 __kmp_reinitialize_team( team, new_nproc,
5795 new_set_nproc, new_set_dynamic, new_set_nested,
5796 new_set_blocktime, new_bt_intervals, new_bt_set
5808 __kmp_set_thread_affinity_mask_full_tmp( old_mask );
5812 for( f = team->t.t_nproc ; f < new_nproc ; f++ ) {
5813 kmp_info_t * new_worker = __kmp_allocate_thread( root, team, f );
5814 KMP_DEBUG_ASSERT( new_worker );
5815 team->t.t_threads[ f ] = new_worker;
5816 new_worker->th.th_team_nproc = team->t.t_nproc;
5818 KA_TRACE( 20, (
"__kmp_allocate_team: team %d init T#%d arrived: join=%u, plain=%u\n",
5819 team->t.t_id, __kmp_gtid_from_tid( f, team ), team->t.t_id, f,
5820 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5821 team->t.t_bar[bs_plain_barrier].b_arrived ) );
5825 kmp_balign_t * balign = new_worker->th.th_bar;
5826 for ( b = 0; b < bp_last_bar; ++ b ) {
5827 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
5833 if ( KMP_AFFINITY_CAPABLE() ) {
5835 __kmp_set_system_affinity( old_mask, TRUE );
5836 KMP_CPU_FREE(old_mask);
5841 __kmp_initialize_team( team, new_nproc,
5844 root->r.r_uber_thread->th.th_ident
5846 new_set_nproc, new_set_dynamic, new_set_nested,
5847 new_set_blocktime, new_bt_intervals, new_bt_set
5852 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5853 kmp_task_team_t *task_team = team->t.t_task_team;
5854 if ( task_team != NULL ) {
5855 KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) );
5856 task_team->tt.tt_nproc = new_nproc;
5857 task_team->tt.tt_unfinished_threads = new_nproc;
5858 task_team->tt.tt_ref_ct = new_nproc - 1;
5864 for( f = 0 ; f < team->t.t_nproc ; f++ )
5865 __kmp_initialize_info( team->t.t_threads[ f ], team, f,
5866 __kmp_gtid_from_tid( f, team ) );
5868 for ( f = 0; f < team->t.t_nproc; ++ f ) {
5869 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
5870 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
5875 team->t.t_proc_bind = new_proc_bind;
5876 # if KMP_OS_WINDOWS || KMP_OS_LINUX
5877 __kmp_partition_places( team );
5883 KA_TRACE( 20, (
"__kmp_allocate_team: reusing hot team\n" ));
5887 if ( team -> t.t_size_changed == -1 ) {
5888 team -> t.t_size_changed = 1;
5890 team -> t.t_size_changed = 0;
5896 team -> t.t_sched = new_icvs->sched;
5899 __kmp_reinitialize_team( team, new_nproc,
5902 root->r.r_uber_thread->th.th_ident
5904 new_set_nproc, new_set_dynamic, new_set_nested,
5905 new_set_blocktime, new_bt_intervals, new_bt_set
5910 KF_TRACE( 10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n",
5911 0, team->t.t_threads[0], team ) );
5912 __kmp_push_current_task_to_thread( team -> t.t_threads[ 0 ], team, 0 );
5916 # if (KMP_OS_WINDOWS || KMP_OS_LINUX)
5917 if ( team->t.t_proc_bind == new_proc_bind ) {
5918 KA_TRACE( 200, (
"__kmp_allocate_team: reusing hot team #%d bindings: proc_bind = %d, partition = [%d,%d]\n",
5919 team->t.t_id, new_proc_bind, team->t.t_first_place,
5920 team->t.t_last_place ) );
5923 team->t.t_proc_bind = new_proc_bind;
5924 __kmp_partition_places( team );
5927 if ( team->t.t_proc_bind != new_proc_bind ) {
5928 team->t.t_proc_bind = new_proc_bind;
5935 __kmp_alloc_argv_entries( argc, team, TRUE );
5936 team -> t.t_argc = argc;
5942 KF_TRACE( 10, (
" hot_team = %p\n", team ) );
5944 #if OMP_30_ENABLED && KMP_DEBUG
5945 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5946 KA_TRACE( 20, (
"__kmp_allocate_team: hot team task_team = %p after reinit\n",
5947 team -> t.t_task_team ));
5958 for( team = (kmp_team_t*) __kmp_team_pool ; (team) ; )
5961 if ( team->t.t_max_nproc >= max_nproc ) {
5963 __kmp_team_pool = team->t.t_next_pool;
5966 __kmp_initialize_team( team, new_nproc,
5971 new_set_nproc, new_set_dynamic, new_set_nested,
5972 new_set_blocktime, new_bt_intervals, new_bt_set
5977 KA_TRACE( 20, (
"__kmp_allocate_team: setting task_team %p to NULL\n",
5978 &team->t.t_task_team ) );
5979 team -> t.t_task_team = NULL;
5983 __kmp_alloc_argv_entries( argc, team, TRUE );
5984 team -> t.t_argc = argc;
5986 KA_TRACE( 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5987 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5990 for ( b = 0; b < bs_last_barrier; ++ b) {
5991 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
5996 team->t.t_proc_bind = new_proc_bind;
5999 KA_TRACE( 20, (
"__kmp_allocate_team: using team from pool %d.\n", team->t.t_id ));
6008 team = __kmp_reap_team( team );
6009 __kmp_team_pool = team;
6014 team = (kmp_team_t*) __kmp_allocate(
sizeof( kmp_team_t ) );
6017 team -> t.t_max_nproc = max_nproc;
6021 __kmp_allocate_team_arrays( team, max_nproc );
6022 __kmp_initialize_team( team, new_nproc,
6027 new_set_nproc, new_set_dynamic, new_set_nested,
6028 new_set_blocktime, new_bt_intervals, new_bt_set
6033 KA_TRACE( 20, (
"__kmp_allocate_team: setting task_team %p to NULL\n",
6034 &team->t.t_task_team ) );
6035 team -> t.t_task_team = NULL;
6038 if ( __kmp_storage_map ) {
6039 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc );
6043 __kmp_alloc_argv_entries( argc, team, FALSE );
6044 team -> t.t_argc = argc;
6046 KA_TRACE( 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
6047 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
6050 for ( b = 0; b < bs_last_barrier; ++ b ) {
6051 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
6056 team->t.t_proc_bind = new_proc_bind;
6061 KA_TRACE( 20, (
"__kmp_allocate_team: done creating a new team %d.\n", team->t.t_id ));
6072 __kmp_free_team( kmp_root_t *root, kmp_team_t *team )
6075 KA_TRACE( 20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), team->t.t_id ));
6078 KMP_DEBUG_ASSERT( root );
6079 KMP_DEBUG_ASSERT( team );
6080 KMP_DEBUG_ASSERT( team->t.t_nproc <= team->t.t_max_nproc );
6081 KMP_DEBUG_ASSERT( team->t.t_threads );
6084 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
6085 team -> t.t_copyin_counter = 0;
6089 if( team != root->r.r_hot_team ) {
6092 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
6093 kmp_task_team_t *task_team = team->t.t_task_team;
6094 if ( task_team != NULL ) {
6101 KA_TRACE( 20, (
"__kmp_free_team: deactivating task_team %p\n",
6103 KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
6104 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
6106 team->t.t_task_team = NULL;
6112 team -> t.t_parent = NULL;
6116 for ( f = 1; f < team->t.t_nproc; ++ f ) {
6117 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
6118 __kmp_free_thread( team->t.t_threads[ f ] );
6119 team->t.t_threads[ f ] = NULL;
6125 team -> t.t_next_pool = (kmp_team_t*) __kmp_team_pool;
6126 __kmp_team_pool = (
volatile kmp_team_t*) team;
6135 __kmp_reap_team( kmp_team_t *team )
6137 kmp_team_t *next_pool = team -> t.t_next_pool;
6139 KMP_DEBUG_ASSERT( team );
6140 KMP_DEBUG_ASSERT( team -> t.t_dispatch );
6141 KMP_DEBUG_ASSERT( team -> t.t_disp_buffer );
6142 KMP_DEBUG_ASSERT( team -> t.t_threads );
6145 KMP_DEBUG_ASSERT( team -> t.t_set_nproc );
6147 KMP_DEBUG_ASSERT( team -> t.t_argv );
6153 __kmp_free_team_arrays( team );
6154 #if (KMP_PERF_V106 == KMP_ON)
6155 if ( team -> t.t_argv != &team -> t.t_inline_argv[0] )
6156 __kmp_free( (
void*) team -> t.t_argv );
6158 __kmp_free( (
void*) team -> t.t_argv );
6194 __kmp_free_thread( kmp_info_t *this_th )
6199 KA_TRACE( 20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
6200 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid ));
6202 KMP_DEBUG_ASSERT( this_th );
6206 TCW_PTR(this_th->th.th_team, NULL);
6207 TCW_PTR(this_th->th.th_root, NULL);
6208 TCW_PTR(this_th->th.th_dispatch, NULL);
6214 gtid = this_th->th.th_info.ds.ds_gtid;
6215 if ( __kmp_thread_pool_insert_pt != NULL ) {
6216 KMP_DEBUG_ASSERT( __kmp_thread_pool != NULL );
6217 if ( __kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid ) {
6218 __kmp_thread_pool_insert_pt = NULL;
6229 if ( __kmp_thread_pool_insert_pt != NULL ) {
6230 scan = &( __kmp_thread_pool_insert_pt->th.th_next_pool );
6233 scan = (kmp_info_t **)&__kmp_thread_pool;
6235 for (; ( *scan != NULL ) && ( (*scan)->th.th_info.ds.ds_gtid < gtid );
6236 scan = &( (*scan)->th.th_next_pool ) );
6242 TCW_PTR(this_th->th.th_next_pool, *scan);
6243 __kmp_thread_pool_insert_pt = *scan = this_th;
6244 KMP_DEBUG_ASSERT( ( this_th->th.th_next_pool == NULL )
6245 || ( this_th->th.th_info.ds.ds_gtid
6246 < this_th->th.th_next_pool->th.th_info.ds.ds_gtid ) );
6247 TCW_4(this_th->th.th_in_pool, TRUE);
6248 __kmp_thread_pool_nth++;
6250 TCW_4(__kmp_nth, __kmp_nth - 1);
6252 #ifdef KMP_ADJUST_BLOCKTIME
6255 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
6256 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
6257 if ( __kmp_nth <= __kmp_avail_proc ) {
6258 __kmp_zero_bt = FALSE;
6267 __kmp_join_barrier(
int gtid )
6269 register kmp_info_t *this_thr = __kmp_threads[ gtid ];
6270 register kmp_team_t *team;
6271 register kmp_uint nproc;
6272 kmp_info_t *master_thread;
6278 void * itt_sync_obj = NULL;
6280 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG )
6281 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
6288 team = this_thr -> th.th_team;
6290 nproc = this_thr -> th.th_team_nproc;
6291 KMP_DEBUG_ASSERT( nproc == team->t.t_nproc );
6292 tid = __kmp_tid_from_gtid(gtid);
6294 team_id = team -> t.t_id;
6297 master_thread = this_thr -> th.th_team_master;
6299 if ( master_thread != team->t.t_threads[0] ) {
6300 __kmp_print_structure();
6303 KMP_DEBUG_ASSERT( master_thread == team->t.t_threads[0] );
6307 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
6308 KMP_DEBUG_ASSERT( TCR_PTR(this_thr->th.th_team) );
6309 KMP_DEBUG_ASSERT( TCR_PTR(this_thr->th.th_root) );
6310 KMP_DEBUG_ASSERT( this_thr == team -> t.t_threads[tid] );
6312 KA_TRACE( 10, (
"__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n",
6313 gtid, team_id, tid ));
6317 if ( __kmp_tasking_mode == tskm_extra_barrier ) {
6318 __kmp_tasking_barrier( team, this_thr, gtid );
6320 KA_TRACE( 10, (
"__kmp_join_barrier: T#%d(%d:%d) past taking barrier\n",
6321 gtid, team_id, tid ));
6324 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
6325 KA_TRACE( 20, (
"__kmp_join_barrier: T#%d, old team = %d, old task_team = %p, th_task_team = %p\n",
6326 __kmp_gtid_from_thread( this_thr ), team_id, team -> t.t_task_team,
6327 this_thr->th.th_task_team ) );
6328 KMP_DEBUG_ASSERT( this_thr->th.th_task_team == team->t.t_task_team );
6341 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
6343 this_thr -> th.th_team_bt_intervals = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
6344 this_thr -> th.th_team_bt_set = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
6346 this_thr -> th.th_team_bt_intervals = team -> t.t_set_bt_intervals[tid];
6347 this_thr -> th.th_team_bt_set= team -> t.t_set_bt_set[tid];
6348 #endif // OMP_30_ENABLED
6363 if( KMP_MASTER_TID( tid ) && TCR_4(__kmp_init_monitor) < 2 ) {
6364 __kmp_wait_sleep( this_thr, (
volatile kmp_uint32*)&__kmp_init_monitor, 2, 0
6365 USE_ITT_BUILD_ARG( itt_sync_obj )
6371 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG )
6372 __kmp_itt_barrier_starting( gtid, itt_sync_obj );
6375 if ( __kmp_barrier_gather_pattern[ bs_forkjoin_barrier ] == bp_linear_bar || __kmp_barrier_gather_branch_bits[ bs_forkjoin_barrier ] == 0 ) {
6376 __kmp_linear_barrier_gather( bs_forkjoin_barrier, this_thr, gtid, tid, NULL
6377 USE_ITT_BUILD_ARG( itt_sync_obj )
6379 }
else if ( __kmp_barrier_gather_pattern[ bs_forkjoin_barrier ] == bp_tree_bar ) {
6380 __kmp_tree_barrier_gather( bs_forkjoin_barrier, this_thr, gtid, tid, NULL
6381 USE_ITT_BUILD_ARG( itt_sync_obj )
6384 __kmp_hyper_barrier_gather( bs_forkjoin_barrier, this_thr, gtid, tid, NULL
6385 USE_ITT_BUILD_ARG( itt_sync_obj )
6390 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG )
6391 __kmp_itt_barrier_middle( gtid, itt_sync_obj );
6404 if ( KMP_MASTER_TID( tid ) ) {
6405 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
6408 __kmp_task_team_wait( this_thr, team
6409 USE_ITT_BUILD_ARG( itt_sync_obj )
6415 if( __kmp_forkjoin_frames_mode == 1 && __kmp_itt_csv_file )
6417 ident_t * loc = this_thr->th.th_ident;
6421 kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->
psource, 1 );
6424 #if defined( __GNUC__ )
6425 # if !defined( __INTEL_COMPILER )
6426 fr_end = __kmp_hardware_timestamp();
6433 K_DIAG( 3, (
"__kmp_join_barrier: T#%d(%d:%d) frame_begin = %llu, frame_end = %llu\n",
6434 gtid, ( team != NULL ) ? team->t.t_id : -1, tid, this_thr->th.th_frame_time, fr_end ) );
6436 __kmp_str_buf_print( &__kmp_itt_frame_buffer,
"%s$omp$frame@%s:%d:%d,%llu,%llu,,\n",
6437 str_loc.func, str_loc.file, str_loc.line, str_loc.col, this_thr->th.th_frame_time, fr_end );
6439 __kmp_str_loc_free( &str_loc );
6447 if( KMP_MASTER_TID( tid )) {
6448 KA_TRACE( 15, (
"__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n",
6449 gtid, team_id, tid, nproc ));
6456 KA_TRACE( 10, (
"__kmp_join_barrier: T#%d(%d:%d) leaving\n",
6457 gtid, team_id, tid ));
6464 __kmp_fork_barrier(
int gtid,
int tid )
6466 kmp_info_t *this_thr = __kmp_threads[ gtid ];
6467 kmp_team_t *team = ( tid == 0 ) ? this_thr -> th.th_team : NULL;
6469 void * itt_sync_obj = NULL;
6472 KA_TRACE( 10, (
"__kmp_fork_barrier: T#%d(%d:%d) has arrived\n",
6473 gtid, ( team != NULL ) ? team->t.t_id : -1, tid ));
6476 if ( KMP_MASTER_TID( tid ) ) {
6478 #if USE_ITT_BUILD && USE_ITT_NOTIFY
6479 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
6480 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier, 1 );
6482 __kmp_itt_barrier_middle( gtid, itt_sync_obj );
6488 register kmp_info_t **other_threads = team -> t.t_threads;
6494 for( i = 1; i < team -> t.t_nproc ; i++ ) {
6495 KA_TRACE( 500, (
"__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork "
6497 gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid,
6498 team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid,
6499 other_threads[i]->th.th_bar[ bs_forkjoin_barrier ].bb.b_go ) );
6501 KMP_DEBUG_ASSERT( ( TCR_4( other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go )
6502 & ~(KMP_BARRIER_SLEEP_STATE) )
6503 == KMP_INIT_BARRIER_STATE );
6504 KMP_DEBUG_ASSERT( other_threads[i]->th.th_team == team );
6510 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
6511 __kmp_task_team_setup( this_thr, team );
6525 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
6527 this_thr -> th.th_team_bt_intervals = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
6528 this_thr -> th.th_team_bt_set = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
6530 this_thr -> th.th_team_bt_intervals = team -> t.t_set_bt_intervals[tid];
6531 this_thr -> th.th_team_bt_set= team -> t.t_set_bt_set[tid];
6532 #endif // OMP_30_ENABLED
6536 if ( __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] == bp_linear_bar || __kmp_barrier_release_branch_bits[ bs_forkjoin_barrier ] == 0 ) {
6537 __kmp_linear_barrier_release( bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
6538 USE_ITT_BUILD_ARG( itt_sync_obj )
6540 }
else if ( __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] == bp_tree_bar ) {
6541 __kmp_tree_barrier_release( bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
6542 USE_ITT_BUILD_ARG( itt_sync_obj )
6545 __kmp_hyper_barrier_release( bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
6546 USE_ITT_BUILD_ARG( itt_sync_obj )
6553 if ( TCR_4(__kmp_global.g.g_done) ) {
6556 if ( this_thr->th.th_task_team != NULL ) {
6557 if ( KMP_MASTER_TID( tid ) ) {
6558 TCW_PTR(this_thr->th.th_task_team, NULL);
6561 __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
6566 #if USE_ITT_BUILD && USE_ITT_NOTIFY
6567 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
6568 if ( !KMP_MASTER_TID( tid ) ) {
6569 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
6571 __kmp_itt_barrier_finished( gtid, itt_sync_obj );
6575 KA_TRACE( 10, (
"__kmp_fork_barrier: T#%d is leaving early\n", gtid ));
6586 team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team);
6587 KMP_DEBUG_ASSERT( team != NULL );
6588 tid = __kmp_tid_from_gtid( gtid );
6592 # if KMP_BARRIER_ICV_PULL
6598 if (! KMP_MASTER_TID( tid ) ) {
6603 __kmp_init_implicit_task( team->t.t_ident, team->t.t_threads[tid],
6605 load_icvs(&team->t.t_initial_icvs);
6606 store_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, &team->t.t_initial_icvs);
6609 # endif // KMP_BARRIER_ICV_PULL
6611 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
6612 __kmp_task_team_sync( this_thr, team );
6617 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
6618 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
6619 if ( proc_bind == proc_bind_intel ) {
6625 if( __kmp_affinity_type == affinity_balanced && team->t.t_size_changed ) {
6626 __kmp_balanced_affinity( tid, team->t.t_nproc );
6629 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
6631 else if ( ( proc_bind != proc_bind_false )
6632 && ( proc_bind != proc_bind_disabled )) {
6633 if ( this_thr->th.th_new_place == this_thr->th.th_current_place ) {
6634 KA_TRACE( 100, (
"__kmp_fork_barrier: T#%d already in correct place %d\n",
6635 __kmp_gtid_from_thread( this_thr ), this_thr->th.th_current_place ) );
6638 __kmp_affinity_set_place( gtid );
6643 #if USE_ITT_BUILD && USE_ITT_NOTIFY
6644 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
6645 if ( !KMP_MASTER_TID( tid ) ) {
6646 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
6647 __kmp_itt_barrier_finished( gtid, itt_sync_obj );
6651 KA_TRACE( 10, (
"__kmp_fork_barrier: T#%d(%d:%d) is leaving\n",
6652 gtid, team->t.t_id, tid ));
6660 __kmp_launch_thread( kmp_info_t *this_thr )
6662 int gtid = this_thr->th.th_info.ds.ds_gtid;
6664 kmp_team_t *(*
volatile pteam);
6667 KA_TRACE( 10, (
"__kmp_launch_thread: T#%d start\n", gtid ) );
6669 if( __kmp_env_consistency_check ) {
6670 this_thr -> th.th_cons = __kmp_allocate_cons_stack( gtid );
6674 while( ! TCR_4(__kmp_global.g.g_done) ) {
6675 KMP_DEBUG_ASSERT( this_thr == __kmp_threads[ gtid ] );
6679 KA_TRACE( 20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid ));
6682 __kmp_fork_barrier( gtid, KMP_GTID_DNE );
6684 pteam = (kmp_team_t *(*))(& this_thr->th.th_team);
6687 if ( TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done) ) {
6689 if ( TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL ) {
6691 KA_TRACE( 20, (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
6692 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn ));
6694 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
6695 if ( __kmp_inherit_fp_control && (*pteam)->t.t_fp_control_saved ) {
6696 __kmp_clear_x87_fpu_status_word();
6697 __kmp_load_x87_fpu_control_word( &(*pteam)->t.t_x87_fpu_control_word );
6698 __kmp_load_mxcsr( &(*pteam)->t.t_mxcsr );
6702 rc = (*pteam) -> t.t_invoke( gtid );
6706 KA_TRACE( 20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
6707 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn ));
6711 __kmp_join_barrier( gtid );
6714 TCR_SYNC_PTR(__kmp_global.g.g_done);
6717 if ( TCR_PTR( this_thr->th.th_task_team ) != NULL ) {
6718 __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
6723 __kmp_common_destroy_gtid( gtid );
6725 KA_TRACE( 10, (
"__kmp_launch_thread: T#%d done\n", gtid ) );
6736 __kmp_internal_end_dest(
void *specific_gtid )
6738 #ifdef __INTEL_COMPILER
6739 #pragma warning( push )
6740 #pragma warning( disable: 810 ) // conversion from "void *" to "int" may lose significant bits
6743 int gtid = (kmp_intptr_t)specific_gtid - 1;
6744 #ifdef __INTEL_COMPILER
6745 #pragma warning( pop )
6748 KA_TRACE( 30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
6762 if(gtid >= 0 && KMP_UBER_GTID(gtid))
6763 __kmp_gtid_set_specific( gtid );
6764 #ifdef KMP_TDATA_GTID
6767 __kmp_internal_end_thread( gtid );
6770 #if KMP_OS_UNIX && GUIDEDLL_EXPORTS
6776 __attribute__(( destructor ))
6778 __kmp_internal_end_dtor(
void )
6780 __kmp_internal_end_atexit();
6784 __kmp_internal_end_fini(
void )
6786 __kmp_internal_end_atexit();
6793 __kmp_internal_end_atexit(
void )
6795 KA_TRACE( 30, (
"__kmp_internal_end_atexit\n" ) );
6817 __kmp_internal_end_library( -1 );
6819 __kmp_close_console();
6825 kmp_info_t * thread,
6833 KMP_DEBUG_ASSERT( thread != NULL );
6835 gtid = thread->th.th_info.ds.ds_gtid;
6839 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
6841 KA_TRACE( 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", gtid ) );
6845 &thread->th.th_bar[ bs_forkjoin_barrier ].bb.b_go,
6852 __kmp_reap_worker( thread );
6867 if ( thread->th.th_active_in_pool ) {
6868 thread->th.th_active_in_pool = FALSE;
6869 KMP_TEST_THEN_DEC32(
6870 (kmp_int32 *) &__kmp_thread_pool_active_nth );
6871 KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
6875 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth > 0 );
6876 --__kmp_thread_pool_nth;
6881 __kmp_free_fast_memory( thread );
6884 __kmp_suspend_uninitialize_thread( thread );
6886 KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] == thread );
6887 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
6892 #ifdef KMP_ADJUST_BLOCKTIME
6895 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
6896 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
6897 if ( __kmp_nth <= __kmp_avail_proc ) {
6898 __kmp_zero_bt = FALSE;
6904 if( __kmp_env_consistency_check ) {
6905 if ( thread->th.th_cons ) {
6906 __kmp_free_cons_stack( thread->th.th_cons );
6907 thread->th.th_cons = NULL;
6911 if ( thread->th.th_pri_common != NULL ) {
6912 __kmp_free( thread->th.th_pri_common );
6913 thread->th.th_pri_common = NULL;
6917 if ( thread->th.th_local.bget_data != NULL ) {
6918 __kmp_finalize_bget( thread );
6922 #if (KMP_OS_WINDOWS || KMP_OS_LINUX)
6923 if ( thread->th.th_affin_mask != NULL ) {
6924 KMP_CPU_FREE( thread->th.th_affin_mask );
6925 thread->th.th_affin_mask = NULL;
6929 __kmp_reap_team( thread->th.th_serial_team );
6930 thread->th.th_serial_team = NULL;
6931 __kmp_free( thread );
6938 __kmp_internal_end(
void)
6943 __kmp_unregister_library();
6951 __kmp_reclaim_dead_roots();
6954 for( i=0 ; i<__kmp_threads_capacity ; i++ )
6956 if( __kmp_root[i] -> r.r_active )
6959 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6961 if ( i < __kmp_threads_capacity ) {
6979 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
6980 if ( TCR_4( __kmp_init_monitor ) ) {
6981 __kmp_reap_monitor( & __kmp_monitor );
6982 TCW_4( __kmp_init_monitor, 0 );
6984 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
6985 KA_TRACE( 10, (
"__kmp_internal_end: monitor reaped\n" ) );
6990 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6991 if( __kmp_root[i] ) {
6992 KMP_ASSERT( ! KMP_UBER_GTID( i ) );
6993 KMP_ASSERT( ! __kmp_root[i] -> r.r_active );
7002 while ( __kmp_thread_pool != NULL ) {
7004 kmp_info_t * thread = (kmp_info_t *) __kmp_thread_pool;
7005 __kmp_thread_pool = thread->th.th_next_pool;
7007 thread->th.th_next_pool = NULL;
7008 thread->th.th_in_pool = FALSE;
7009 __kmp_reap_thread( thread, 0 );
7011 __kmp_thread_pool_insert_pt = NULL;
7014 while ( __kmp_team_pool != NULL ) {
7016 kmp_team_t * team = (kmp_team_t *) __kmp_team_pool;
7017 __kmp_team_pool = team->t.t_next_pool;
7019 team->t.t_next_pool = NULL;
7020 __kmp_reap_team( team );
7024 __kmp_reap_task_teams( );
7027 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
7034 TCW_SYNC_4(__kmp_init_common, FALSE);
7036 KA_TRACE( 10, (
"__kmp_internal_end: all workers reaped\n" ) );
7045 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
7046 if ( TCR_4( __kmp_init_monitor ) ) {
7047 __kmp_reap_monitor( & __kmp_monitor );
7048 TCW_4( __kmp_init_monitor, 0 );
7050 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
7051 KA_TRACE( 10, (
"__kmp_internal_end: monitor reaped\n" ) );
7054 TCW_4(__kmp_init_gtid, FALSE);
7062 __kmp_internal_end_library(
int gtid_req )
7072 if( __kmp_global.g.g_abort ) {
7073 KA_TRACE( 11, (
"__kmp_internal_end_library: abort, exiting\n" ));
7077 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
7078 KA_TRACE( 10, (
"__kmp_internal_end_library: already finished\n" ));
7087 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
7088 KA_TRACE( 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req ));
7089 if( gtid == KMP_GTID_SHUTDOWN ) {
7090 KA_TRACE( 10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system already shutdown\n" ));
7092 }
else if( gtid == KMP_GTID_MONITOR ) {
7093 KA_TRACE( 10, (
"__kmp_internal_end_library: monitor thread, gtid not registered, or system shutdown\n" ));
7095 }
else if( gtid == KMP_GTID_DNE ) {
7096 KA_TRACE( 10, (
"__kmp_internal_end_library: gtid not registered or system shutdown\n" ));
7098 }
else if( KMP_UBER_GTID( gtid )) {
7100 if( __kmp_root[gtid] -> r.r_active ) {
7101 __kmp_global.g.g_abort = -1;
7102 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
7103 KA_TRACE( 10, (
"__kmp_internal_end_library: root still active, abort T#%d\n", gtid ));
7106 KA_TRACE( 10, (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid ));
7107 __kmp_unregister_root_current_thread( gtid );
7114 #ifdef DUMP_DEBUG_ON_EXIT
7115 if ( __kmp_debug_buf )
7116 __kmp_dump_debug_buffer( );
7122 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7125 if( __kmp_global.g.g_abort ) {
7126 KA_TRACE( 10, (
"__kmp_internal_end_library: abort, exiting\n" ));
7128 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7131 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
7132 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7142 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
7145 __kmp_internal_end();
7147 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
7148 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7150 KA_TRACE( 10, (
"__kmp_internal_end_library: exit\n" ) );
7152 #ifdef DUMP_DEBUG_ON_EXIT
7153 if ( __kmp_debug_buf )
7154 __kmp_dump_debug_buffer();
7158 __kmp_close_console();
7161 __kmp_fini_allocator();
7166 __kmp_internal_end_thread(
int gtid_req )
7176 if( __kmp_global.g.g_abort ) {
7177 KA_TRACE( 11, (
"__kmp_internal_end_thread: abort, exiting\n" ));
7181 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
7182 KA_TRACE( 10, (
"__kmp_internal_end_thread: already finished\n" ));
7190 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
7191 KA_TRACE( 10, (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req ));
7192 if( gtid == KMP_GTID_SHUTDOWN ) {
7193 KA_TRACE( 10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system already shutdown\n" ));
7195 }
else if( gtid == KMP_GTID_MONITOR ) {
7196 KA_TRACE( 10, (
"__kmp_internal_end_thread: monitor thread, gtid not registered, or system shutdown\n" ));
7198 }
else if( gtid == KMP_GTID_DNE ) {
7199 KA_TRACE( 10, (
"__kmp_internal_end_thread: gtid not registered or system shutdown\n" ));
7202 }
else if( KMP_UBER_GTID( gtid )) {
7204 if( __kmp_root[gtid] -> r.r_active ) {
7205 __kmp_global.g.g_abort = -1;
7206 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
7207 KA_TRACE( 10, (
"__kmp_internal_end_thread: root still active, abort T#%d\n", gtid ));
7210 KA_TRACE( 10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n", gtid ));
7211 __kmp_unregister_root_current_thread( gtid );
7215 KA_TRACE( 10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid ));
7219 kmp_info_t *this_thr = __kmp_threads[ gtid ];
7220 if (TCR_PTR(this_thr->th.th_task_team) != NULL) {
7221 __kmp_unref_task_team(this_thr->th.th_task_team, this_thr);
7226 KA_TRACE( 10, (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid ));
7230 #if defined GUIDEDLL_EXPORTS
7238 KA_TRACE( 10, (
"__kmp_internal_end_thread: exiting\n") );
7242 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7245 if( __kmp_global.g.g_abort ) {
7246 KA_TRACE( 10, (
"__kmp_internal_end_thread: abort, exiting\n" ));
7248 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7251 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
7252 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7264 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
7266 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
7267 if ( KMP_UBER_GTID( i ) ) {
7268 KA_TRACE( 10, (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i ));
7269 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
7270 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7277 __kmp_internal_end();
7279 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
7280 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7282 KA_TRACE( 10, (
"__kmp_internal_end_thread: exit\n" ) );
7284 #ifdef DUMP_DEBUG_ON_EXIT
7285 if ( __kmp_debug_buf )
7286 __kmp_dump_debug_buffer();
7293 static long __kmp_registration_flag = 0;
7295 static char * __kmp_registration_str = NULL;
7301 __kmp_reg_status_name() {
7307 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int) getpid() );
7312 __kmp_register_library_startup(
7316 char * name = __kmp_reg_status_name();
7323 __kmp_initialize_system_tick();
7325 __kmp_read_system_time( & time.dtime );
7326 __kmp_registration_flag = 0xCAFE0000L | ( time.ltime & 0x0000FFFFL );
7327 __kmp_registration_str =
7330 & __kmp_registration_flag,
7331 __kmp_registration_flag,
7335 KA_TRACE( 50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name, __kmp_registration_str ) );
7339 char * value = NULL;
7342 __kmp_env_set( name, __kmp_registration_str, 0 );
7344 value = __kmp_env_get( name );
7345 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
7354 char * tail = value;
7355 char * flag_addr_str = NULL;
7356 char * flag_val_str = NULL;
7357 char const * file_name = NULL;
7358 __kmp_str_split( tail,
'-', & flag_addr_str, & tail );
7359 __kmp_str_split( tail,
'-', & flag_val_str, & tail );
7361 if ( tail != NULL ) {
7362 long * flag_addr = 0;
7364 sscanf( flag_addr_str,
"%p", & flag_addr );
7365 sscanf( flag_val_str,
"%lx", & flag_val );
7366 if ( flag_addr != 0 && flag_val != 0 && strcmp( file_name,
"" ) != 0 ) {
7370 if ( __kmp_is_address_mapped( flag_addr ) && * flag_addr == flag_val ) {
7378 switch ( neighbor ) {
7383 file_name =
"unknown library";
7387 char * duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK" );
7388 if ( ! __kmp_str_match_true( duplicate_ok ) ) {
7392 KMP_MSG( DuplicateLibrary, KMP_LIBRARY_FILE, file_name ),
7393 KMP_HNT( DuplicateLibrary ),
7397 KMP_INTERNAL_FREE( duplicate_ok );
7398 __kmp_duplicate_library_ok = 1;
7403 __kmp_env_unset( name );
7406 KMP_DEBUG_ASSERT( 0 );
7411 KMP_INTERNAL_FREE( (
void *) value );
7414 KMP_INTERNAL_FREE( (
void *) name );
7420 __kmp_unregister_library(
void ) {
7422 char * name = __kmp_reg_status_name();
7423 char * value = __kmp_env_get( name );
7425 KMP_DEBUG_ASSERT( __kmp_registration_flag != 0 );
7426 KMP_DEBUG_ASSERT( __kmp_registration_str != NULL );
7427 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
7429 __kmp_env_unset( name );
7432 KMP_INTERNAL_FREE( __kmp_registration_str );
7433 KMP_INTERNAL_FREE( value );
7434 KMP_INTERNAL_FREE( name );
7436 __kmp_registration_flag = 0;
7437 __kmp_registration_str = NULL;
7446 __kmp_do_serial_initialize(
void )
7451 KA_TRACE( 10, (
"__kmp_serial_initialize: enter\n" ) );
7453 KMP_DEBUG_ASSERT(
sizeof( kmp_int32 ) == 4 );
7454 KMP_DEBUG_ASSERT(
sizeof( kmp_uint32 ) == 4 );
7455 KMP_DEBUG_ASSERT(
sizeof( kmp_int64 ) == 8 );
7456 KMP_DEBUG_ASSERT(
sizeof( kmp_uint64 ) == 8 );
7457 KMP_DEBUG_ASSERT(
sizeof( kmp_intptr_t ) ==
sizeof(
void * ) );
7459 __kmp_validate_locks();
7462 __kmp_init_allocator();
7468 __kmp_register_library_startup( );
7471 if( TCR_4(__kmp_global.g.g_done) ) {
7472 KA_TRACE( 10, (
"__kmp_do_serial_initialize: reinitialization of library\n" ) );
7475 __kmp_global.g.g_abort = 0;
7476 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
7479 #if KMP_USE_ADAPTIVE_LOCKS
7480 #if KMP_DEBUG_ADAPTIVE_LOCKS
7481 __kmp_init_speculative_stats();
7484 __kmp_init_lock( & __kmp_global_lock );
7485 __kmp_init_queuing_lock( & __kmp_dispatch_lock );
7486 __kmp_init_lock( & __kmp_debug_lock );
7487 __kmp_init_atomic_lock( & __kmp_atomic_lock );
7488 __kmp_init_atomic_lock( & __kmp_atomic_lock_1i );
7489 __kmp_init_atomic_lock( & __kmp_atomic_lock_2i );
7490 __kmp_init_atomic_lock( & __kmp_atomic_lock_4i );
7491 __kmp_init_atomic_lock( & __kmp_atomic_lock_4r );
7492 __kmp_init_atomic_lock( & __kmp_atomic_lock_8i );
7493 __kmp_init_atomic_lock( & __kmp_atomic_lock_8r );
7494 __kmp_init_atomic_lock( & __kmp_atomic_lock_8c );
7495 __kmp_init_atomic_lock( & __kmp_atomic_lock_10r );
7496 __kmp_init_atomic_lock( & __kmp_atomic_lock_16r );
7497 __kmp_init_atomic_lock( & __kmp_atomic_lock_16c );
7498 __kmp_init_atomic_lock( & __kmp_atomic_lock_20c );
7499 __kmp_init_atomic_lock( & __kmp_atomic_lock_32c );
7500 __kmp_init_bootstrap_lock( & __kmp_forkjoin_lock );
7501 __kmp_init_bootstrap_lock( & __kmp_exit_lock );
7502 __kmp_init_bootstrap_lock( & __kmp_monitor_lock );
7503 __kmp_init_bootstrap_lock( & __kmp_tp_cached_lock );
7507 __kmp_runtime_initialize();
7513 __kmp_abort_delay = 0;
7517 __kmp_dflt_team_nth_ub = __kmp_xproc;
7518 if( __kmp_dflt_team_nth_ub < KMP_MIN_NTH ) {
7519 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
7521 if( __kmp_dflt_team_nth_ub > __kmp_sys_max_nth ) {
7522 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
7524 __kmp_max_nth = __kmp_sys_max_nth;
7525 __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub );
7528 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
7529 __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
7530 __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
7532 __kmp_library = library_throughput;
7534 __kmp_static = kmp_sch_static_balanced;
7539 #endif // OMP_30_ENABLED
7542 #if KMP_FAST_REDUCTION_BARRIER
7543 #define kmp_reduction_barrier_gather_bb ((int)1)
7544 #define kmp_reduction_barrier_release_bb ((int)1)
7545 #define kmp_reduction_barrier_gather_pat bp_hyper_bar
7546 #define kmp_reduction_barrier_release_pat bp_hyper_bar
7547 #endif // KMP_FAST_REDUCTION_BARRIER
7548 for ( i=bs_plain_barrier; i<bs_last_barrier; i++ ) {
7549 __kmp_barrier_gather_branch_bits [ i ] = __kmp_barrier_gather_bb_dflt;
7550 __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt;
7551 __kmp_barrier_gather_pattern [ i ] = __kmp_barrier_gather_pat_dflt;
7552 __kmp_barrier_release_pattern[ i ] = __kmp_barrier_release_pat_dflt;
7553 #if KMP_FAST_REDUCTION_BARRIER
7554 if( i == bs_reduction_barrier ) {
7555 __kmp_barrier_gather_branch_bits [ i ] = kmp_reduction_barrier_gather_bb;
7556 __kmp_barrier_release_branch_bits[ i ] = kmp_reduction_barrier_release_bb;
7557 __kmp_barrier_gather_pattern [ i ] = kmp_reduction_barrier_gather_pat;
7558 __kmp_barrier_release_pattern[ i ] = kmp_reduction_barrier_release_pat;
7560 #endif // KMP_FAST_REDUCTION_BARRIER
7562 #if KMP_FAST_REDUCTION_BARRIER
7563 #undef kmp_reduction_barrier_release_pat
7564 #undef kmp_reduction_barrier_gather_pat
7565 #undef kmp_reduction_barrier_release_bb
7566 #undef kmp_reduction_barrier_gather_bb
7567 #endif // KMP_FAST_REDUCTION_BARRIER
7570 __kmp_barrier_gather_branch_bits [ 0 ] = 3;
7571 __kmp_barrier_release_branch_bits[ 1 ] = 1;
7576 __kmp_env_checks = TRUE;
7578 __kmp_env_checks = FALSE;
7582 __kmp_foreign_tp = TRUE;
7584 __kmp_global.g.g_dynamic = FALSE;
7585 __kmp_global.g.g_dynamic_mode = dynamic_default;
7587 __kmp_env_initialize( NULL );
7590 char const * val = __kmp_env_get(
"KMP_DUMP_CATALOG" );
7591 if ( __kmp_str_match_true( val ) ) {
7592 kmp_str_buf_t buffer;
7593 __kmp_str_buf_init( & buffer );
7594 __kmp_i18n_dump_catalog( buffer );
7595 __kmp_printf(
"%s", buffer.str );
7596 __kmp_str_buf_free( & buffer );
7598 __kmp_env_free( & val );
7602 __kmp_tp_capacity = __kmp_default_tp_capacity(__kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
7609 KMP_DEBUG_ASSERT( __kmp_thread_pool == NULL );
7610 KMP_DEBUG_ASSERT( __kmp_thread_pool_insert_pt == NULL );
7611 KMP_DEBUG_ASSERT( __kmp_team_pool == NULL );
7612 __kmp_thread_pool = NULL;
7613 __kmp_thread_pool_insert_pt = NULL;
7614 __kmp_team_pool = NULL;
7619 size = (
sizeof(kmp_info_t*) +
sizeof(kmp_root_t*))*__kmp_threads_capacity + CACHE_LINE;
7620 __kmp_threads = (kmp_info_t**) __kmp_allocate( size );
7621 __kmp_root = (kmp_root_t**) ((
char*)__kmp_threads +
sizeof(kmp_info_t*) * __kmp_threads_capacity );
7624 KMP_DEBUG_ASSERT( __kmp_all_nth == 0 );
7625 KMP_DEBUG_ASSERT( __kmp_nth == 0 );
7630 gtid = __kmp_register_root( TRUE );
7631 KA_TRACE( 10, (
"__kmp_do_serial_initialize T#%d\n", gtid ));
7632 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
7633 KMP_ASSERT( KMP_INITIAL_GTID( gtid ) );
7637 __kmp_common_initialize();
7641 __kmp_register_atfork();
7644 #if ! defined GUIDEDLL_EXPORTS
7649 int rc = atexit( __kmp_internal_end_atexit );
7651 __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError,
"atexit()" ), KMP_ERR( rc ), __kmp_msg_null );
7656 #if KMP_HANDLE_SIGNALS
7663 __kmp_install_signals( FALSE );
7666 __kmp_install_signals( TRUE );
7671 __kmp_init_counter ++;
7673 __kmp_init_serial = TRUE;
7675 if (__kmp_settings) {
7680 if (__kmp_display_env || __kmp_display_env_verbose) {
7681 __kmp_env_print_2();
7683 #endif // OMP_40_ENABLED
7687 KA_TRACE( 10, (
"__kmp_do_serial_initialize: exit\n" ) );
7691 __kmp_serial_initialize(
void )
7693 if ( __kmp_init_serial ) {
7696 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7697 if ( __kmp_init_serial ) {
7698 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7701 __kmp_do_serial_initialize();
7702 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7706 __kmp_do_middle_initialize(
void )
7709 int prev_dflt_team_nth;
7711 if( !__kmp_init_serial ) {
7712 __kmp_do_serial_initialize();
7715 KA_TRACE( 10, (
"__kmp_middle_initialize: enter\n" ) );
7721 prev_dflt_team_nth = __kmp_dflt_team_nth;
7723 #if KMP_OS_WINDOWS || KMP_OS_LINUX
7728 __kmp_affinity_initialize();
7734 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
7735 if ( TCR_PTR( __kmp_threads[ i ] ) != NULL ) {
7736 __kmp_affinity_set_init_mask( i, TRUE );
7741 KMP_ASSERT( __kmp_xproc > 0 );
7742 if ( __kmp_avail_proc == 0 ) {
7743 __kmp_avail_proc = __kmp_xproc;
7748 while ( __kmp_nested_nth.used && ! __kmp_nested_nth.nth[ j ] ) {
7749 __kmp_nested_nth.nth[ j ] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_avail_proc;
7753 if ( __kmp_dflt_team_nth == 0 ) {
7754 #ifdef KMP_DFLT_NTH_CORES
7758 __kmp_dflt_team_nth = __kmp_ncores;
7759 KA_TRACE( 20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_ncores (%d)\n",
7760 __kmp_dflt_team_nth ) );
7765 __kmp_dflt_team_nth = __kmp_avail_proc;
7766 KA_TRACE( 20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_avail_proc(%d)\n",
7767 __kmp_dflt_team_nth ) );
7771 if ( __kmp_dflt_team_nth < KMP_MIN_NTH ) {
7772 __kmp_dflt_team_nth = KMP_MIN_NTH;
7774 if( __kmp_dflt_team_nth > __kmp_sys_max_nth ) {
7775 __kmp_dflt_team_nth = __kmp_sys_max_nth;
7782 KMP_DEBUG_ASSERT( __kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub );
7784 if ( __kmp_dflt_team_nth != prev_dflt_team_nth ) {
7791 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
7792 kmp_info_t *thread = __kmp_threads[ i ];
7793 if ( thread == NULL )
continue;
7795 if ( thread->th.th_current_task->td_icvs.nproc != 0 )
continue;
7797 if ( thread->th.th_team->t.t_set_nproc[ thread->th.th_info.ds.ds_tid ] != 0 )
continue;
7800 set__nproc_p( __kmp_threads[ i ], __kmp_dflt_team_nth );
7803 KA_TRACE( 20, (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
7804 __kmp_dflt_team_nth) );
7806 #ifdef KMP_ADJUST_BLOCKTIME
7809 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
7810 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
7811 if ( __kmp_nth > __kmp_avail_proc ) {
7812 __kmp_zero_bt = TRUE;
7818 TCW_SYNC_4(__kmp_init_middle, TRUE);
7820 KA_TRACE( 10, (
"__kmp_do_middle_initialize: exit\n" ) );
7824 __kmp_middle_initialize(
void )
7826 if ( __kmp_init_middle ) {
7829 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7830 if ( __kmp_init_middle ) {
7831 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7834 __kmp_do_middle_initialize();
7835 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7839 __kmp_parallel_initialize(
void )
7841 int gtid = __kmp_entry_gtid();
7844 if( TCR_4(__kmp_init_parallel) )
return;
7845 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7846 if( TCR_4(__kmp_init_parallel) ) { __kmp_release_bootstrap_lock( &__kmp_initz_lock );
return; }
7849 if( TCR_4(__kmp_global.g.g_done) ) {
7850 KA_TRACE( 10, (
"__kmp_parallel_initialize: attempt to init while shutting down\n" ) );
7851 __kmp_infinite_loop();
7857 if( !__kmp_init_middle ) {
7858 __kmp_do_middle_initialize();
7862 KA_TRACE( 10, (
"__kmp_parallel_initialize: enter\n" ) );
7863 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
7865 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
7870 __kmp_store_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word );
7871 __kmp_store_mxcsr( &__kmp_init_mxcsr );
7872 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7876 # if KMP_HANDLE_SIGNALS
7878 __kmp_install_signals( TRUE );
7882 __kmp_suspend_initialize();
7884 # if defined(USE_LOAD_BALANCE)
7885 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
7886 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7889 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
7890 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7894 if ( __kmp_version ) {
7895 __kmp_print_version_2();
7900 if( __kmp_forkjoin_frames_mode == 1 )
7903 const char * csv_file;
7907 char * host_name = __kmp_env_get(
"AMPLXE_HOSTNAME");
7908 char * out_dir = __kmp_env_get(
"AMPLXE_DATA_DIR");
7910 if( out_dir && host_name ) {
7911 csv_file = __kmp_str_format(
"%s/omp-frames-hostname-%s.csv", out_dir, host_name );
7912 __kmp_itt_csv_file = fopen( csv_file,
"w" );
7913 __kmp_str_free( &csv_file );
7917 csv_file = __kmp_str_format(
"./omp-frames-hostname-xxx.csv" );
7918 __kmp_itt_csv_file = fopen( csv_file,
"w" );
7919 __kmp_str_free( &csv_file );
7922 if( __kmp_itt_csv_file ) {
7923 __kmp_str_buf_init( & __kmp_itt_frame_buffer );
7924 __kmp_str_buf_print( & __kmp_itt_frame_buffer,
"name,start_tsc.TSC,end_tsc,pid,tid\n" );
7931 TCW_SYNC_4(__kmp_init_parallel, TRUE);
7934 KA_TRACE( 10, (
"__kmp_parallel_initialize: exit\n" ) );
7936 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7943 __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7946 kmp_disp_t *dispatch;
7951 this_thr->th.th_local.this_construct = 0;
7952 this_thr->th.th_local.last_construct = 0;
7953 #if KMP_CACHE_MANAGE
7954 KMP_CACHE_PREFETCH( &this_thr -> th.th_bar[ bs_forkjoin_barrier ].bb.b_arrived );
7956 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7957 KMP_DEBUG_ASSERT( dispatch );
7958 KMP_DEBUG_ASSERT( team -> t.t_dispatch );
7961 dispatch -> th_disp_index = 0;
7963 if( __kmp_env_consistency_check )
7964 __kmp_push_parallel( gtid, team->t.t_ident );
7970 __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7973 if( __kmp_env_consistency_check )
7974 __kmp_pop_parallel( gtid, team->t.t_ident );
7978 __kmp_invoke_task_func(
int gtid )
7981 int tid = __kmp_tid_from_gtid( gtid );
7982 kmp_info_t *this_thr = __kmp_threads[ gtid ];
7983 kmp_team_t *team = this_thr -> th.th_team;
7985 __kmp_run_before_invoked_task( gtid, tid, this_thr, team );
7987 if ( __itt_stack_caller_create_ptr ) {
7988 __kmp_itt_stack_callee_enter( (__itt_caller)team->t.t_stack_id );
7991 rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
7992 gtid, tid, (
int) team->t.t_argc, (
void **) team->t.t_argv );
7995 if ( __itt_stack_caller_create_ptr ) {
7996 __kmp_itt_stack_callee_leave( (__itt_caller)team->t.t_stack_id );
7999 __kmp_run_after_invoked_task( gtid, tid, this_thr, team );
8006 __kmp_teams_master( microtask_t microtask,
int gtid )
8009 kmp_info_t *this_thr = __kmp_threads[ gtid ];
8010 kmp_team_t *team = this_thr -> th.th_team;
8011 ident_t *loc = team->t.t_ident;
8014 int tid = __kmp_tid_from_gtid( gtid );
8015 KA_TRACE( 20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n",
8016 gtid, tid, microtask) );
8021 this_thr->th.th_set_nproc = this_thr->th.th_set_nth_teams;
8022 __kmp_fork_call( loc, gtid, TRUE,
8025 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
8027 __kmp_join_call( loc, gtid, 1 );
8032 __kmp_invoke_teams_master(
int gtid )
8035 if ( !__kmp_threads[gtid]-> th.th_team->t.t_serialized )
8036 KMP_DEBUG_ASSERT( (
void*)__kmp_threads[gtid]-> th.th_team->t.t_pkfn == (
void*)__kmp_teams_master );
8039 __kmp_teams_master( (microtask_t)__kmp_threads[gtid]->th.th_team_microtask, gtid );
8051 __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads )
8053 kmp_info_t *thr = __kmp_threads[gtid];
8055 if( num_threads > 0 )
8056 thr -> th.th_set_nproc = num_threads;
8064 __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
int num_threads )
8066 kmp_info_t *thr = __kmp_threads[gtid];
8068 if( num_teams > 0 ) {
8069 thr -> th.th_set_nproc = num_teams;
8071 thr -> th.th_set_nproc = 1;
8075 if( num_threads > 0 ) {
8076 thr -> th.th_set_nth_teams = num_threads;
8078 if( !TCR_4(__kmp_init_middle) )
8079 __kmp_middle_initialize();
8080 thr -> th.th_set_nth_teams = __kmp_avail_proc / thr -> th.th_set_nproc;
8089 __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind )
8091 kmp_info_t *thr = __kmp_threads[gtid];
8092 thr -> th.th_set_proc_bind = proc_bind;
8100 __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team )
8102 kmp_info_t *this_thr = __kmp_threads[gtid];
8108 KMP_DEBUG_ASSERT( team );
8109 KMP_DEBUG_ASSERT( this_thr -> th.th_team == team );
8110 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
8113 team -> t.t_construct = 0;
8114 team -> t.t_ordered.dt.t_value = 0;
8117 KMP_DEBUG_ASSERT( team -> t.t_disp_buffer );
8118 if ( team->t.t_max_nproc > 1 ) {
8120 for (i = 0; i < KMP_MAX_DISP_BUF; ++i)
8121 team -> t.t_disp_buffer[ i ].buffer_index = i;
8123 team -> t.t_disp_buffer[ 0 ].buffer_index = 0;
8127 KMP_ASSERT( this_thr -> th.th_team == team );
8130 for( f=0 ; f<team->t.t_nproc ; f++ ) {
8131 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
8132 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
8137 __kmp_fork_barrier( gtid, 0 );
8142 __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team )
8144 kmp_info_t *this_thr = __kmp_threads[gtid];
8146 KMP_DEBUG_ASSERT( team );
8147 KMP_DEBUG_ASSERT( this_thr -> th.th_team == team );
8148 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
8154 if (__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc ) {
8155 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n",gtid, gtid, __kmp_threads[gtid]);
8156 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, team->t.t_nproc=%d\n",
8157 gtid, __kmp_threads[gtid]->th.th_team_nproc, team, team->t.t_nproc);
8158 __kmp_print_structure();
8160 KMP_DEBUG_ASSERT( __kmp_threads[gtid] &&
8161 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc );
8164 __kmp_join_barrier( gtid );
8167 KMP_ASSERT( this_thr -> th.th_team == team );
8174 #ifdef USE_LOAD_BALANCE
8181 __kmp_active_hot_team_nproc( kmp_root_t *root )
8185 kmp_team_t *hot_team;
8187 if ( root->r.r_active ) {
8190 hot_team = root->r.r_hot_team;
8191 if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
8192 return hot_team->t.t_nproc - 1;
8199 for ( i = 1; i < hot_team->t.t_nproc; i++ ) {
8200 if ( hot_team->t.t_threads[i]->th.th_active ) {
8212 __kmp_load_balance_nproc( kmp_root_t *root,
int set_nproc )
8216 int hot_team_active;
8217 int team_curr_active;
8220 KB_TRACE( 20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n",
8221 root, set_nproc ) );
8222 KMP_DEBUG_ASSERT( root );
8224 KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_threads[0]->th.th_current_task->td_icvs.dynamic == TRUE );
8226 KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_set_dynamic[0] == TRUE );
8228 KMP_DEBUG_ASSERT( set_nproc > 1 );
8230 if ( set_nproc == 1) {
8231 KB_TRACE( 20, (
"__kmp_load_balance_nproc: serial execution.\n" ) );
8242 pool_active = TCR_4(__kmp_thread_pool_active_nth);
8243 hot_team_active = __kmp_active_hot_team_nproc( root );
8244 team_curr_active = pool_active + hot_team_active + 1;
8249 system_active = __kmp_get_load_balance( __kmp_avail_proc + team_curr_active );
8250 KB_TRACE( 30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d hot team active = %d\n",
8251 system_active, pool_active, hot_team_active ) );
8253 if ( system_active < 0 ) {
8260 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
8261 KMP_WARNING( CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit" );
8266 retval = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
8267 : root->r.r_hot_team->t.t_nproc);
8268 if ( retval > set_nproc ) {
8271 if ( retval < KMP_MIN_NTH ) {
8272 retval = KMP_MIN_NTH;
8275 KB_TRACE( 20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n", retval ) );
8285 if ( system_active < team_curr_active ) {
8286 system_active = team_curr_active;
8288 retval = __kmp_avail_proc - system_active + team_curr_active;
8289 if ( retval > set_nproc ) {
8292 if ( retval < KMP_MIN_NTH ) {
8293 retval = KMP_MIN_NTH;
8296 KB_TRACE( 20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval ) );
8308 __kmp_cleanup(
void )
8312 KA_TRACE( 10, (
"__kmp_cleanup: enter\n" ) );
8314 if (TCR_4(__kmp_init_parallel)) {
8315 #if KMP_HANDLE_SIGNALS
8316 __kmp_remove_signals();
8318 TCW_4(__kmp_init_parallel, FALSE);
8321 if (TCR_4(__kmp_init_middle)) {
8322 #if KMP_OS_WINDOWS || KMP_OS_LINUX
8323 __kmp_affinity_uninitialize();
8325 TCW_4(__kmp_init_middle, FALSE);
8328 KA_TRACE( 10, (
"__kmp_cleanup: go serial cleanup\n" ) );
8330 if (__kmp_init_serial) {
8332 __kmp_runtime_destroy();
8334 __kmp_init_serial = FALSE;
8337 for ( f = 0; f < __kmp_threads_capacity; f++ ) {
8338 if ( __kmp_root[ f ] != NULL ) {
8339 __kmp_free( __kmp_root[ f ] );
8340 __kmp_root[ f ] = NULL;
8343 __kmp_free( __kmp_threads );
8346 __kmp_threads = NULL;
8348 __kmp_threads_capacity = 0;
8350 __kmp_cleanup_user_locks();
8352 #if KMP_OS_LINUX || KMP_OS_WINDOWS
8353 KMP_INTERNAL_FREE( (
void *) __kmp_cpuinfo_file );
8354 __kmp_cpuinfo_file = NULL;
8357 #if KMP_USE_ADAPTIVE_LOCKS
8358 #if KMP_DEBUG_ADAPTIVE_LOCKS
8359 __kmp_print_speculative_stats();
8362 KMP_INTERNAL_FREE( __kmp_nested_nth.nth );
8363 __kmp_nested_nth.nth = NULL;
8364 __kmp_nested_nth.size = 0;
8365 __kmp_nested_nth.used = 0;
8367 __kmp_i18n_catclose();
8371 if( __kmp_forkjoin_frames_mode && __kmp_itt_csv_file ) {
8372 fprintf( __kmp_itt_csv_file, __kmp_itt_frame_buffer.str );
8374 __kmp_str_buf_free( & __kmp_itt_frame_buffer );
8375 fclose( __kmp_itt_csv_file );
8379 KA_TRACE( 10, (
"__kmp_cleanup: exit\n" ) );
8386 __kmp_ignore_mppbeg(
void )
8390 if ((env = getenv(
"KMP_IGNORE_MPPBEG" )) != NULL) {
8391 if (__kmp_str_match_false( env ))
8399 __kmp_ignore_mppend(
void )
8403 if ((env = getenv(
"KMP_IGNORE_MPPEND" )) != NULL) {
8404 if (__kmp_str_match_false( env ))
8412 __kmp_internal_begin(
void )
8419 gtid = __kmp_entry_gtid();
8420 root = __kmp_threads[ gtid ] -> th.th_root;
8421 KMP_ASSERT( KMP_UBER_GTID( gtid ));
8423 if( root->r.r_begin )
return;
8424 __kmp_acquire_lock( &root->r.r_begin_lock, gtid );
8425 if( root->r.r_begin ) {
8426 __kmp_release_lock( & root->r.r_begin_lock, gtid );
8430 root -> r.r_begin = TRUE;
8432 __kmp_release_lock( & root->r.r_begin_lock, gtid );
8440 __kmp_user_set_library (
enum library_type arg)
8448 gtid = __kmp_entry_gtid();
8449 thread = __kmp_threads[ gtid ];
8451 root = thread -> th.th_root;
8453 KA_TRACE( 20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, library_serial ));
8454 if (root->r.r_in_parallel) {
8455 KMP_WARNING( SetLibraryIncorrectCall );
8460 case library_serial :
8461 thread -> th.th_set_nproc = 0;
8462 set__nproc_p( thread, 1 );
8464 case library_turnaround :
8465 thread -> th.th_set_nproc = 0;
8466 set__nproc_p( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
8468 case library_throughput :
8469 thread -> th.th_set_nproc = 0;
8470 set__nproc_p( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
8473 KMP_FATAL( UnknownLibraryType, arg );
8476 __kmp_aux_set_library ( arg );
8480 __kmp_aux_set_stacksize(
size_t arg )
8482 if (! __kmp_init_serial)
8483 __kmp_serial_initialize();
8486 if (arg & (0x1000 - 1)) {
8487 arg &= ~(0x1000 - 1);
8492 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
8495 if (! TCR_4(__kmp_init_parallel)) {
8498 if (value < __kmp_sys_min_stksize )
8499 value = __kmp_sys_min_stksize ;
8500 else if (value > KMP_MAX_STKSIZE)
8501 value = KMP_MAX_STKSIZE;
8503 __kmp_stksize = value;
8505 __kmp_env_stksize = TRUE;
8508 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
8514 __kmp_aux_set_library (
enum library_type arg)
8516 __kmp_library = arg;
8518 switch ( __kmp_library ) {
8519 case library_serial :
8521 KMP_INFORM( LibraryIsSerial );
8522 (void) __kmp_change_library( TRUE );
8525 case library_turnaround :
8526 (void) __kmp_change_library( TRUE );
8528 case library_throughput :
8529 (void) __kmp_change_library( FALSE );
8532 KMP_FATAL( UnknownLibraryType, arg );
8540 __kmp_aux_set_blocktime (
int arg, kmp_info_t *thread,
int tid)
8542 int blocktime = arg;
8546 __kmp_save_internal_controls( thread );
8549 if (blocktime < KMP_MIN_BLOCKTIME)
8550 blocktime = KMP_MIN_BLOCKTIME;
8551 else if (blocktime > KMP_MAX_BLOCKTIME)
8552 blocktime = KMP_MAX_BLOCKTIME;
8554 set__blocktime_team( thread -> th.th_team, tid, blocktime );
8555 set__blocktime_team( thread -> th.th_serial_team, 0, blocktime );
8558 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8560 set__bt_intervals_team( thread -> th.th_team, tid, bt_intervals );
8561 set__bt_intervals_team( thread -> th.th_serial_team, 0, bt_intervals );
8566 set__bt_set_team( thread -> th.th_team, tid, bt_set );
8567 set__bt_set_team( thread -> th.th_serial_team, 0, bt_set );
8568 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, bt_intervals=%d, monitor_updates=%d\n",
8569 __kmp_gtid_from_tid(tid, thread->th.th_team),
8570 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals, __kmp_monitor_wakeups ) );
8574 __kmp_aux_set_defaults(
8578 if ( ! __kmp_init_serial ) {
8579 __kmp_serial_initialize();
8581 __kmp_env_initialize( str );
8585 || __kmp_display_env || __kmp_display_env_verbose
8606 PACKED_REDUCTION_METHOD_T
8607 __kmp_determine_reduction_method(
ident_t *loc, kmp_int32 global_tid,
8608 kmp_int32 num_vars,
size_t reduce_size,
void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
8609 kmp_critical_name *lck )
8617 PACKED_REDUCTION_METHOD_T retval;
8621 KMP_DEBUG_ASSERT( loc );
8622 KMP_DEBUG_ASSERT( lck );
8624 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED ( ( loc->flags & ( KMP_IDENT_ATOMIC_REDUCE ) ) == ( KMP_IDENT_ATOMIC_REDUCE ) )
8625 #define FAST_REDUCTION_TREE_METHOD_GENERATED ( ( reduce_data ) && ( reduce_func ) )
8627 retval = critical_reduce_block;
8629 team_size = __kmp_get_team_num_threads( global_tid );
8631 if( team_size == 1 ) {
8633 retval = empty_reduce_block;
8637 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8638 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8642 #if KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_DARWIN
8644 #define REDUCTION_TEAMSIZE_CUTOFF 8
8646 #define REDUCTION_TEAMSIZE_CUTOFF 4
8648 if( tree_available ) {
8649 if( team_size <= REDUCTION_TEAMSIZE_CUTOFF ) {
8650 if ( atomic_available ) {
8651 retval = atomic_reduce_block;
8654 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8656 }
else if ( atomic_available ) {
8657 retval = atomic_reduce_block;
8660 #error "Unknown or unsupported OS"
8661 #endif // KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_DARWIN
8665 #if KMP_OS_LINUX || KMP_OS_WINDOWS
8681 if( atomic_available ) {
8682 if( num_vars <= 2 ) {
8683 retval = atomic_reduce_block;
8690 if( atomic_available && ( num_vars <= 3 ) ) {
8691 retval = atomic_reduce_block;
8692 }
else if( tree_available ) {
8693 if( ( reduce_size > ( 9 *
sizeof( kmp_real64 ) ) ) && ( reduce_size < ( 2000 *
sizeof( kmp_real64 ) ) ) ) {
8694 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8699 #error "Unknown or unsupported OS"
8703 #error "Unknown or unsupported architecture"
8722 if( __kmp_force_reduction_method != reduction_method_not_defined ) {
8724 PACKED_REDUCTION_METHOD_T forced_retval;
8726 int atomic_available, tree_available;
8728 switch( ( forced_retval = __kmp_force_reduction_method ) )
8730 case critical_reduce_block:
8732 if( team_size <= 1 ) {
8733 forced_retval = empty_reduce_block;
8737 case atomic_reduce_block:
8738 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8739 KMP_ASSERT( atomic_available );
8742 case tree_reduce_block:
8743 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8744 KMP_ASSERT( tree_available );
8745 #if KMP_FAST_REDUCTION_BARRIER
8746 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8754 retval = forced_retval;
8757 KA_TRACE(10, (
"reduction method selected=%08x\n", retval ) );
8759 #undef FAST_REDUCTION_TREE_METHOD_GENERATED
8760 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
8767 __kmp_get_reduce_method(
void ) {
8768 return ( ( __kmp_entry_thread() -> th.th_local.packed_reduction_method ) >> 8 );