38 #include "kmp_atomic.h"
39 #include "kmp_wrapper_getpid.h"
40 #include "kmp_environment.h"
43 #include "kmp_settings.h"
46 #include "kmp_error.h"
49 #define KMP_USE_PRCTL 0
50 #define KMP_USE_POOLED_ALLOC 0
53 #include <immintrin.h>
54 #define USE_NGO_STORES 1
57 #if KMP_MIC && USE_NGO_STORES
58 #define load_icvs(src) __m512d Vt_icvs = _mm512_load_pd((void *)(src))
59 #define store_icvs(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt_icvs)
60 #define sync_icvs() __asm__ volatile ("lock; addl $0,0(%%rsp)" ::: "memory")
62 #define load_icvs(src) ((void)0)
63 #define store_icvs(dst, src) copy_icvs((dst), (src))
64 #define sync_icvs() ((void)0)
72 #if defined(KMP_GOMP_COMPAT)
73 char const __kmp_version_alt_comp[] = KMP_VERSION_PREFIX
"alternative compiler support: yes";
76 char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX
"API version: "
87 char const __kmp_version_lock[] = KMP_VERSION_PREFIX
"lock type: run time selectable";
89 char const __kmp_version_perf_v19[] = KMP_VERSION_PREFIX
"perf v19: "
90 #if KMP_PERF_V19 == KMP_ON
92 #elif KMP_PERF_V19 == KMP_OFF
95 #error "Must specify KMP_PERF_V19 option"
98 char const __kmp_version_perf_v106[] = KMP_VERSION_PREFIX
"perf v106: "
99 #if KMP_PERF_V106 == KMP_ON
101 #elif KMP_PERF_V106 == KMP_OFF
104 #error "Must specify KMP_PERF_V106 option"
110 #define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) )
115 kmp_info_t __kmp_monitor;
122 void __kmp_cleanup(
void );
124 static void __kmp_initialize_info( kmp_info_t *, kmp_team_t *,
int tid,
int gtid );
125 static void __kmp_initialize_team(
129 kmp_internal_control_t * new_icvs,
132 int new_set_nproc,
int new_set_dynamic,
int new_set_nested,
133 int new_set_blocktime,
int new_bt_intervals,
int new_bt_set
136 static void __kmp_partition_places( kmp_team_t *team );
137 static void __kmp_do_serial_initialize(
void );
140 #ifdef USE_LOAD_BALANCE
141 static int __kmp_load_balance_nproc( kmp_root_t * root,
int set_nproc );
144 static int __kmp_expand_threads(
int nWish,
int nNeed);
145 static int __kmp_unregister_root_other_thread(
int gtid );
146 static void __kmp_unregister_library(
void );
147 static void __kmp_reap_thread( kmp_info_t * thread,
int is_root );
148 static kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
159 __kmp_get_global_thread_id( )
162 kmp_info_t **other_threads;
168 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
169 __kmp_nth, __kmp_all_nth ));
176 if ( !TCR_4(__kmp_init_gtid) )
return KMP_GTID_DNE;
178 #ifdef KMP_TDATA_GTID
179 if ( TCR_4(__kmp_gtid_mode) >= 3) {
180 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: using TDATA\n" ));
184 if ( TCR_4(__kmp_gtid_mode) >= 2) {
185 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n" ));
186 return __kmp_gtid_get_specific();
188 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n" ));
190 stack_addr = (
char*) & stack_data;
191 other_threads = __kmp_threads;
206 for( i = 0 ; i < __kmp_threads_capacity ; i++ ) {
208 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
211 stack_size = (size_t)TCR_PTR(thr -> th.th_info.ds.ds_stacksize);
212 stack_base = (
char *)TCR_PTR(thr -> th.th_info.ds.ds_stackbase);
216 if( stack_addr <= stack_base ) {
217 size_t stack_diff = stack_base - stack_addr;
219 if( stack_diff <= stack_size ) {
222 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == i );
229 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
230 "thread, using TLS\n" ));
231 i = __kmp_gtid_get_specific();
239 if( ! TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow) ) {
240 KMP_FATAL( StackOverflow, i );
243 stack_base = (
char *) other_threads[i] -> th.th_info.ds.ds_stackbase;
244 if( stack_addr > stack_base ) {
245 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
246 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
247 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base);
249 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, stack_base - stack_addr);
253 if ( __kmp_storage_map ) {
254 char *stack_end = (
char *) other_threads[i] -> th.th_info.ds.ds_stackbase;
255 char *stack_beg = stack_end - other_threads[i] -> th.th_info.ds.ds_stacksize;
256 __kmp_print_storage_map_gtid( i, stack_beg, stack_end,
257 other_threads[i] -> th.th_info.ds.ds_stacksize,
258 "th_%d stack (refinement)", i );
264 __kmp_get_global_thread_id_reg( )
268 if ( !__kmp_init_serial ) {
271 #ifdef KMP_TDATA_GTID
272 if ( TCR_4(__kmp_gtid_mode) >= 3 ) {
273 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n" ));
277 if ( TCR_4(__kmp_gtid_mode) >= 2 ) {
278 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n" ));
279 gtid = __kmp_gtid_get_specific();
281 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n" ));
282 gtid = __kmp_get_global_thread_id();
286 if( gtid == KMP_GTID_DNE ) {
287 KA_TRACE( 10, (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
288 "Registering a new gtid.\n" ));
289 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
290 if( !__kmp_init_serial ) {
291 __kmp_do_serial_initialize();
292 gtid = __kmp_gtid_get_specific();
294 gtid = __kmp_register_root(FALSE);
296 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
300 KMP_DEBUG_ASSERT( gtid >=0 );
307 __kmp_check_stack_overlap( kmp_info_t *th )
310 char *stack_beg = NULL;
311 char *stack_end = NULL;
314 KA_TRACE(10,(
"__kmp_check_stack_overlap: called\n"));
315 if ( __kmp_storage_map ) {
316 stack_end = (
char *) th -> th.th_info.ds.ds_stackbase;
317 stack_beg = stack_end - th -> th.th_info.ds.ds_stacksize;
319 gtid = __kmp_gtid_from_thread( th );
321 if (gtid == KMP_GTID_MONITOR) {
322 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
323 "th_%s stack (%s)",
"mon",
324 ( th->th.th_info.ds.ds_stackgrow ) ?
"initial" :
"actual" );
326 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
327 "th_%d stack (%s)", gtid,
328 ( th->th.th_info.ds.ds_stackgrow ) ?
"initial" :
"actual" );
333 if ( __kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid = __kmp_gtid_from_thread( th )))
335 KA_TRACE(10,(
"__kmp_check_stack_overlap: performing extensive checking\n"));
336 if ( stack_beg == NULL ) {
337 stack_end = (
char *) th -> th.th_info.ds.ds_stackbase;
338 stack_beg = stack_end - th -> th.th_info.ds.ds_stacksize;
341 for( f=0 ; f < __kmp_threads_capacity ; f++ ) {
342 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
344 if( f_th && f_th != th ) {
345 char *other_stack_end = (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
346 char *other_stack_beg = other_stack_end -
347 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
348 if((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
349 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
352 if ( __kmp_storage_map )
353 __kmp_print_storage_map_gtid( -1, other_stack_beg, other_stack_end,
354 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
355 "th_%d stack (overlapped)",
356 __kmp_gtid_from_thread( f_th ) );
358 __kmp_msg( kmp_ms_fatal, KMP_MSG( StackOverlap ), KMP_HNT( ChangeStackLimit ), __kmp_msg_null );
363 KA_TRACE(10,(
"__kmp_check_stack_overlap: returning\n"));
370 # define __kmp_static_delay( arg )
374 __kmp_static_delay(
int arg )
377 # if KMP_ARCH_X86_64 && KMP_OS_LINUX
378 KMP_ASSERT( arg != 0 );
380 KMP_ASSERT( arg >= 0 );
386 __kmp_static_yield(
int arg )
400 __kmp_wait_sleep( kmp_info_t *this_thr,
401 volatile kmp_uint *spinner,
404 USE_ITT_BUILD_ARG (
void * itt_sync_obj)
408 register volatile kmp_uint *spin = spinner;
409 register kmp_uint check = checker;
410 register kmp_uint32 spins;
411 register kmp_uint32 hibernate;
417 KMP_FSYNC_SPIN_INIT( spin, NULL );
418 if( TCR_4(*spin) == check ) {
419 KMP_FSYNC_SPIN_ACQUIRED( spin );
423 th_gtid = this_thr->th.th_info.ds.ds_gtid;
425 KA_TRACE( 20, (
"__kmp_wait_sleep: T#%d waiting for spin(%p) == %d\n",
430 KMP_INIT_YIELD( spins );
432 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
437 #ifdef KMP_ADJUST_BLOCKTIME
438 if ( __kmp_zero_bt && ! this_thr->th.th_team_bt_set ) {
442 hibernate = this_thr->th.th_team_bt_intervals;
445 hibernate = this_thr->th.th_team_bt_intervals;
454 if ( hibernate != 0 ) {
461 hibernate += TCR_4( __kmp_global.g.g_time.dt.t_value );
463 KF_TRACE( 20, (
"__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
464 th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
465 hibernate - __kmp_global.g.g_time.dt.t_value ));
471 while( TCR_4(*spin) != check ) {
487 kmp_task_team_t * task_team = NULL;
488 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
489 task_team = this_thr->th.th_task_team;
490 if ( task_team != NULL ) {
491 if ( ! TCR_SYNC_4( task_team->tt.tt_active ) ) {
492 KMP_DEBUG_ASSERT( ! KMP_MASTER_TID( this_thr->th.th_info.ds.ds_tid ) );
493 __kmp_unref_task_team( task_team, this_thr );
494 }
else if ( KMP_TASKING_ENABLED( task_team, this_thr->th.th_task_state ) ) {
495 __kmp_execute_tasks( this_thr, th_gtid, spin, check, final_spin, &flag
496 USE_ITT_BUILD_ARG( itt_sync_obj ), 0);
502 KMP_FSYNC_SPIN_PREPARE( spin );
503 if( TCR_4(__kmp_global.g.g_done) ) {
504 if( __kmp_global.g.g_abort )
505 __kmp_abort_thread( );
509 __kmp_static_delay( 1 );
513 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
517 KMP_YIELD_SPIN( spins );
523 in_pool = !!TCR_4(this_thr->th.th_in_pool);
524 if ( in_pool != !!this_thr->th.th_active_in_pool ) {
530 (kmp_int32 *) &__kmp_thread_pool_active_nth );
531 this_thr->th.th_active_in_pool = TRUE;
551 (kmp_int32 *) &__kmp_thread_pool_active_nth );
552 KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
553 this_thr->th.th_active_in_pool = FALSE;
559 if ( ( task_team != NULL ) && TCR_4(task_team->tt.tt_found_tasks) ) {
565 if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
570 if ( TCR_4( __kmp_global.g.g_time.dt.t_value ) < hibernate ) {
574 KF_TRACE( 50, (
"__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid ) );
576 __kmp_suspend( th_gtid, spin, check );
578 if( TCR_4( __kmp_global.g.g_done ) && __kmp_global.g.g_abort ) {
579 __kmp_abort_thread( );
586 KMP_FSYNC_SPIN_ACQUIRED( spin );
599 __kmp_release( kmp_info_t *target_thr,
volatile kmp_uint *spin,
600 enum kmp_mem_fence_type fetchadd_fence )
604 int target_gtid = target_thr->th.th_info.ds.ds_gtid;
605 int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
608 KF_TRACE( 20, (
"__kmp_release: T#%d releasing T#%d spin(%p) fence_type(%d)\n",
609 gtid, target_gtid, spin, fetchadd_fence ));
611 KMP_DEBUG_ASSERT( spin );
613 KMP_DEBUG_ASSERT( fetchadd_fence == kmp_acquire_fence ||
614 fetchadd_fence == kmp_release_fence );
616 KMP_FSYNC_RELEASING( spin );
618 old_spin = ( fetchadd_fence == kmp_acquire_fence )
619 ? KMP_TEST_THEN_ADD4_ACQ32( (
volatile kmp_int32 *) spin )
620 : KMP_TEST_THEN_ADD4_32( (
volatile kmp_int32 *) spin );
622 KF_TRACE( 100, (
"__kmp_release: T#%d old spin(%p)=%d, set new spin=%d\n",
623 gtid, spin, old_spin, *spin ) );
625 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
627 if ( old_spin & KMP_BARRIER_SLEEP_STATE ) {
629 int target_gtid = target_thr->th.th_info.ds.ds_gtid;
632 KF_TRACE( 50, (
"__kmp_release: T#%d waking up thread T#%d since sleep spin(%p) set\n",
633 gtid, target_gtid, spin ));
634 __kmp_resume( target_gtid, spin );
636 KF_TRACE( 50, (
"__kmp_release: T#%d don't wake up thread T#%d since sleep spin(%p) not set\n",
637 gtid, target_gtid, spin ));
645 __kmp_infinite_loop(
void )
647 static int done = FALSE;
654 #define MAX_MESSAGE 512
657 __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
char const *format, ...) {
658 char buffer[MAX_MESSAGE];
662 va_start( ap, format);
663 sprintf( buffer,
"OMP storage map: %p %p%8lu %s\n", p1, p2, (
unsigned long) size, format );
664 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
665 __kmp_vprintf( kmp_err, buffer, ap );
666 #if KMP_PRINT_DATA_PLACEMENT
668 if(p1 <= p2 && (
char*)p2 - (
char*)p1 == size) {
669 if( __kmp_storage_map_verbose ) {
670 node = __kmp_get_host_node(p1);
672 __kmp_storage_map_verbose = FALSE;
676 int localProc = __kmp_get_cpu_from_gtid(gtid);
678 p1 = (
void *)( (
size_t)p1 & ~((size_t)PAGE_SIZE - 1) );
679 p2 = (
void *)( ((
size_t) p2 - 1) & ~((
size_t)PAGE_SIZE - 1) );
681 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid, localProc>>1);
683 __kmp_printf_no_lock(
" GTID %d\n", gtid);
691 (
char*)p1 += PAGE_SIZE;
692 }
while(p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
693 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last,
694 (
char*)p1 - 1, lastNode);
697 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
698 (
char*)p1 + (PAGE_SIZE - 1), __kmp_get_host_node(p1));
700 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
701 (
char*)p2 + (PAGE_SIZE - 1), __kmp_get_host_node(p2));
707 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR( StorageMapWarning ) );
710 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
714 __kmp_warn(
char const * format, ... )
716 char buffer[MAX_MESSAGE];
719 if ( __kmp_generate_warnings == kmp_warnings_off ) {
723 va_start( ap, format );
725 snprintf( buffer,
sizeof(buffer) ,
"OMP warning: %s\n", format );
726 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
727 __kmp_vprintf( kmp_err, buffer, ap );
728 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
734 __kmp_abort_process()
738 __kmp_acquire_bootstrap_lock( & __kmp_exit_lock );
740 if ( __kmp_debug_buf ) {
741 __kmp_dump_debug_buffer();
744 if ( KMP_OS_WINDOWS ) {
747 __kmp_global.g.g_abort = SIGABRT;
765 __kmp_infinite_loop();
766 __kmp_release_bootstrap_lock( & __kmp_exit_lock );
771 __kmp_abort_thread(
void )
775 __kmp_infinite_loop();
786 __kmp_print_thread_storage_map( kmp_info_t *thr,
int gtid )
788 __kmp_print_storage_map_gtid( gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d", gtid );
790 __kmp_print_storage_map_gtid( gtid, &thr->th.th_info, &thr->th.th_team,
sizeof(kmp_desc_t),
791 "th_%d.th_info", gtid );
793 __kmp_print_storage_map_gtid( gtid, &thr->th.th_local, &thr->th.th_pri_head,
sizeof(kmp_local_t),
794 "th_%d.th_local", gtid );
796 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
797 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid );
799 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_plain_barrier],
800 &thr->th.th_bar[bs_plain_barrier+1],
801 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]", gtid);
803 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_forkjoin_barrier],
804 &thr->th.th_bar[bs_forkjoin_barrier+1],
805 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]", gtid);
807 #if KMP_FAST_REDUCTION_BARRIER
808 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_reduction_barrier],
809 &thr->th.th_bar[bs_reduction_barrier+1],
810 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]", gtid);
811 #endif // KMP_FAST_REDUCTION_BARRIER
820 __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
int team_id,
int num_thr )
822 int num_disp_buff = team->t.t_max_nproc > 1 ? KMP_MAX_DISP_BUF : 2;
823 __kmp_print_storage_map_gtid( -1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
826 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[0], &team->t.t_bar[bs_last_barrier],
827 sizeof(kmp_balign_team_t) * bs_last_barrier,
"%s_%d.t_bar", header, team_id );
830 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_plain_barrier], &team->t.t_bar[bs_plain_barrier+1],
831 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]", header, team_id );
833 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_forkjoin_barrier], &team->t.t_bar[bs_forkjoin_barrier+1],
834 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[forkjoin]", header, team_id );
836 #if KMP_FAST_REDUCTION_BARRIER
837 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_reduction_barrier], &team->t.t_bar[bs_reduction_barrier+1],
838 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[reduction]", header, team_id );
839 #endif // KMP_FAST_REDUCTION_BARRIER
841 __kmp_print_storage_map_gtid( -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
842 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id );
844 __kmp_print_storage_map_gtid( -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
845 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id );
847 __kmp_print_storage_map_gtid( -1, &team->t.t_disp_buffer[0], &team->t.t_disp_buffer[num_disp_buff],
848 sizeof(dispatch_shared_info_t) * num_disp_buff,
"%s_%d.t_disp_buffer",
883 __kmp_print_storage_map_gtid( -1, &team->t.t_taskq, &team->t.t_copypriv_data,
884 sizeof(kmp_taskq_t),
"%s_%d.t_taskq", header, team_id );
887 static void __kmp_init_allocator() {}
888 static void __kmp_fini_allocator() {}
889 static void __kmp_fini_allocator_thread() {}
893 #ifdef GUIDEDLL_EXPORTS
898 __kmp_reset_lock( kmp_bootstrap_lock_t* lck ) {
900 __kmp_init_bootstrap_lock( lck );
904 __kmp_reset_locks_on_process_detach(
int gtid_req ) {
921 for( i = 0; i < __kmp_threads_capacity; ++i ) {
922 if( !__kmp_threads )
continue;
923 kmp_info_t* th = __kmp_threads[ i ];
924 if( th == NULL )
continue;
925 int gtid = th->th.th_info.ds.ds_gtid;
926 if( gtid == gtid_req )
continue;
927 if( gtid < 0 )
continue;
929 int alive = __kmp_is_thread_alive( th, &exit_val );
934 if( thread_count == 0 )
break;
941 __kmp_reset_lock( &__kmp_forkjoin_lock );
943 __kmp_reset_lock( &__kmp_stdio_lock );
950 DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) {
953 switch( fdwReason ) {
955 case DLL_PROCESS_ATTACH:
956 KA_TRACE( 10, (
"DllMain: PROCESS_ATTACH\n" ));
960 case DLL_PROCESS_DETACH:
961 KA_TRACE( 10, (
"DllMain: PROCESS_DETACH T#%d\n",
962 __kmp_gtid_get_specific() ));
964 if( lpReserved != NULL )
991 __kmp_reset_locks_on_process_detach( __kmp_gtid_get_specific() );
994 __kmp_internal_end_library( __kmp_gtid_get_specific() );
998 case DLL_THREAD_ATTACH:
999 KA_TRACE( 10, (
"DllMain: THREAD_ATTACH\n" ));
1005 case DLL_THREAD_DETACH:
1006 KA_TRACE( 10, (
"DllMain: THREAD_DETACH T#%d\n",
1007 __kmp_gtid_get_specific() ));
1009 __kmp_internal_end_thread( __kmp_gtid_get_specific() );
1025 __kmp_change_library(
int status )
1029 old_status = __kmp_yield_init & 1;
1032 __kmp_yield_init |= 1;
1035 __kmp_yield_init &= ~1;
1048 __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
1050 int gtid = *gtid_ref;
1051 #ifdef BUILD_PARALLEL_ORDERED
1052 kmp_team_t *team = __kmp_team_from_gtid( gtid );
1055 if( __kmp_env_consistency_check ) {
1056 if( __kmp_threads[gtid] -> th.th_root -> r.r_active )
1057 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL );
1059 #ifdef BUILD_PARALLEL_ORDERED
1060 if( !team -> t.t_serialized ) {
1064 KMP_WAIT_YIELD(&team -> t.t_ordered.dt.t_value, __kmp_tid_from_gtid( gtid ), KMP_EQ, NULL);
1075 __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
1077 int gtid = *gtid_ref;
1078 #ifdef BUILD_PARALLEL_ORDERED
1079 int tid = __kmp_tid_from_gtid( gtid );
1080 kmp_team_t *team = __kmp_team_from_gtid( gtid );
1083 if( __kmp_env_consistency_check ) {
1084 if( __kmp_threads[gtid] -> th.th_root -> r.r_active )
1085 __kmp_pop_sync( gtid, ct_ordered_in_parallel, loc_ref );
1087 #ifdef BUILD_PARALLEL_ORDERED
1088 if ( ! team -> t.t_serialized ) {
1093 team -> t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc );
1109 __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws )
1115 if( ! TCR_4(__kmp_init_parallel) )
1116 __kmp_parallel_initialize();
1118 th = __kmp_threads[ gtid ];
1119 team = th -> th.th_team;
1122 th->th.th_ident = id_ref;
1124 if ( team -> t.t_serialized ) {
1127 kmp_int32 old_this = th->th.th_local.this_construct;
1129 ++th->th.th_local.this_construct;
1134 status = KMP_COMPARE_AND_STORE_ACQ32(&team -> t.t_construct, old_this,
1135 th->th.th_local.this_construct);
1138 if( __kmp_env_consistency_check ) {
1139 if (status && push_ws) {
1140 __kmp_push_workshare( gtid, ct_psingle, id_ref );
1142 __kmp_check_workshare( gtid, ct_psingle, id_ref );
1147 __kmp_itt_single_start( gtid );
1154 __kmp_exit_single(
int gtid )
1157 __kmp_itt_single_end( gtid );
1159 if( __kmp_env_consistency_check )
1160 __kmp_pop_workshare( gtid, ct_psingle, NULL );
1168 __kmp_linear_barrier_gather(
enum barrier_type bt,
1169 kmp_info_t *this_thr,
1172 void (*reduce)(
void *,
void *)
1173 USE_ITT_BUILD_ARG(
void * itt_sync_obj)
1176 register kmp_team_t *team = this_thr -> th.th_team;
1177 register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
1178 register kmp_info_t **other_threads = team -> t.t_threads;
1180 KA_TRACE( 20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
1181 gtid, team->t.t_id, tid, bt ) );
1183 KMP_DEBUG_ASSERT( this_thr == other_threads[this_thr->th.th_info.ds.ds_tid] );
1191 if ( ! KMP_MASTER_TID( tid )) {
1193 KA_TRACE( 20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
1194 "arrived(%p): %u => %u\n",
1195 gtid, team->t.t_id, tid,
1196 __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0,
1197 &thr_bar -> b_arrived, thr_bar -> b_arrived,
1198 thr_bar -> b_arrived + KMP_BARRIER_STATE_BUMP
1207 __kmp_release( other_threads[0], &thr_bar -> b_arrived, kmp_release_fence );
1210 register kmp_balign_team_t *team_bar = & team -> t.t_bar[ bt ];
1211 register int nproc = this_thr -> th.th_team_nproc;
1214 register kmp_uint new_state = team_bar -> b_arrived + KMP_BARRIER_STATE_BUMP;
1217 for (i = 1; i < nproc; i++) {
1218 #if KMP_CACHE_MANAGE
1221 KMP_CACHE_PREFETCH( &other_threads[ i+1 ] -> th.th_bar[ bt ].bb.b_arrived );
1223 KA_TRACE( 20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
1224 "arrived(%p) == %u\n",
1225 gtid, team->t.t_id, tid,
1226 __kmp_gtid_from_tid( i, team ), team->t.t_id, i,
1227 &other_threads[i] -> th.th_bar[ bt ].bb.b_arrived,
1231 __kmp_wait_sleep( this_thr,
1232 & other_threads[ i ] -> th.th_bar[ bt ].bb.b_arrived,
1234 USE_ITT_BUILD_ARG( itt_sync_obj )
1239 KA_TRACE( 100, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
1240 gtid, team->t.t_id, tid,
1241 __kmp_gtid_from_tid( i, team ), team->t.t_id, i ) );
1243 (*reduce)( this_thr -> th.th_local.reduce_data,
1244 other_threads[ i ] -> th.th_local.reduce_data );
1251 team_bar -> b_arrived = new_state;
1252 KA_TRACE( 20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d "
1253 "arrived(%p) = %u\n",
1254 gtid, team->t.t_id, tid, team->t.t_id,
1255 &team_bar -> b_arrived, new_state ) );
1258 KA_TRACE( 20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
1259 gtid, team->t.t_id, tid, bt ) );
1264 __kmp_tree_barrier_gather(
enum barrier_type bt,
1265 kmp_info_t *this_thr,
1268 void (*reduce) (
void *,
void *)
1269 USE_ITT_BUILD_ARG(
void * itt_sync_obj )
1272 register kmp_team_t *team = this_thr -> th.th_team;
1273 register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
1274 register kmp_info_t **other_threads = team -> t.t_threads;
1275 register kmp_uint32 nproc = this_thr -> th.th_team_nproc;
1276 register kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[ bt ];
1277 register kmp_uint32 branch_factor = 1 << branch_bits ;
1278 register kmp_uint32 child;
1279 register kmp_uint32 child_tid;
1280 register kmp_uint new_state;
1282 KA_TRACE( 20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
1283 gtid, team->t.t_id, tid, bt ) );
1285 KMP_DEBUG_ASSERT( this_thr == other_threads[this_thr->th.th_info.ds.ds_tid] );
1293 child_tid = (tid << branch_bits) + 1;
1295 if ( child_tid < nproc ) {
1298 new_state = team -> t.t_bar[ bt ].b_arrived + KMP_BARRIER_STATE_BUMP;
1302 register kmp_info_t *child_thr = other_threads[ child_tid ];
1303 register kmp_bstate_t *child_bar = & child_thr -> th.th_bar[ bt ].bb;
1304 #if KMP_CACHE_MANAGE
1306 if ( child+1 <= branch_factor && child_tid+1 < nproc )
1307 KMP_CACHE_PREFETCH( &other_threads[ child_tid+1 ] -> th.th_bar[ bt ].bb.b_arrived );
1309 KA_TRACE( 20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
1310 "arrived(%p) == %u\n",
1311 gtid, team->t.t_id, tid,
1312 __kmp_gtid_from_tid( child_tid, team ), team->t.t_id, child_tid,
1313 &child_bar -> b_arrived, new_state ) );
1316 __kmp_wait_sleep( this_thr, &child_bar -> b_arrived, new_state, FALSE
1317 USE_ITT_BUILD_ARG( itt_sync_obj)
1322 KA_TRACE( 100, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
1323 gtid, team->t.t_id, tid,
1324 __kmp_gtid_from_tid( child_tid, team ), team->t.t_id,
1327 (*reduce)( this_thr -> th.th_local.reduce_data,
1328 child_thr -> th.th_local.reduce_data );
1335 while ( child <= branch_factor && child_tid < nproc );
1338 if ( !KMP_MASTER_TID(tid) ) {
1340 register kmp_int32 parent_tid = (tid - 1) >> branch_bits;
1342 KA_TRACE( 20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
1343 "arrived(%p): %u => %u\n",
1344 gtid, team->t.t_id, tid,
1345 __kmp_gtid_from_tid( parent_tid, team ), team->t.t_id, parent_tid,
1346 &thr_bar -> b_arrived, thr_bar -> b_arrived,
1347 thr_bar -> b_arrived + KMP_BARRIER_STATE_BUMP
1356 __kmp_release( other_threads[parent_tid], &thr_bar -> b_arrived, kmp_release_fence );
1363 team -> t.t_bar[ bt ].b_arrived = new_state;
1365 team -> t.t_bar[ bt ].b_arrived += KMP_BARRIER_STATE_BUMP;
1367 KA_TRACE( 20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %u\n",
1368 gtid, team->t.t_id, tid, team->t.t_id,
1369 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived ) );
1372 KA_TRACE( 20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
1373 gtid, team->t.t_id, tid, bt ) );
1378 __kmp_hyper_barrier_gather(
enum barrier_type bt,
1379 kmp_info_t *this_thr,
1382 void (*reduce) (
void *,
void *)
1383 USE_ITT_BUILD_ARG (
void * itt_sync_obj)
1386 register kmp_team_t *team = this_thr -> th.th_team;
1387 register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
1388 register kmp_info_t **other_threads = team -> t.t_threads;
1389 register kmp_uint new_state = KMP_BARRIER_UNUSED_STATE;
1390 register kmp_uint32 num_threads = this_thr -> th.th_team_nproc;
1391 register kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[ bt ];
1392 register kmp_uint32 branch_factor = 1 << branch_bits ;
1393 register kmp_uint32 offset;
1394 register kmp_uint32 level;
1396 KA_TRACE( 20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
1397 gtid, team->t.t_id, tid, bt ) );
1399 KMP_DEBUG_ASSERT( this_thr == other_threads[this_thr->th.th_info.ds.ds_tid] );
1401 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1403 if( __kmp_forkjoin_frames_mode == 2 || __kmp_forkjoin_frames_mode == 3 ) {
1404 this_thr->th.th_bar_arrive_time = __itt_get_timestamp();
1413 for ( level=0, offset =1;
1414 offset < num_threads;
1415 level += branch_bits, offset <<= branch_bits )
1417 register kmp_uint32 child;
1418 register kmp_uint32 child_tid;
1420 if ( ((tid >> level) & (branch_factor - 1)) != 0 ) {
1421 register kmp_int32 parent_tid = tid & ~( (1 << (level + branch_bits)) -1 );
1423 KA_TRACE( 20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
1424 "arrived(%p): %u => %u\n",
1425 gtid, team->t.t_id, tid,
1426 __kmp_gtid_from_tid( parent_tid, team ), team->t.t_id, parent_tid,
1427 &thr_bar -> b_arrived, thr_bar -> b_arrived,
1428 thr_bar -> b_arrived + KMP_BARRIER_STATE_BUMP
1438 __kmp_release( other_threads[parent_tid], &thr_bar -> b_arrived, kmp_release_fence );
1444 if (new_state == KMP_BARRIER_UNUSED_STATE)
1445 new_state = team -> t.t_bar[ bt ].b_arrived + KMP_BARRIER_STATE_BUMP;
1447 for ( child = 1, child_tid = tid + (1 << level);
1448 child < branch_factor && child_tid < num_threads;
1449 child++, child_tid += (1 << level) )
1451 register kmp_info_t *child_thr = other_threads[ child_tid ];
1452 register kmp_bstate_t *child_bar = & child_thr -> th.th_bar[ bt ].bb;
1453 #if KMP_CACHE_MANAGE
1454 register kmp_uint32 next_child_tid = child_tid + (1 << level);
1456 if ( child+1 < branch_factor && next_child_tid < num_threads )
1457 KMP_CACHE_PREFETCH( &other_threads[ next_child_tid ] -> th.th_bar[ bt ].bb.b_arrived );
1459 KA_TRACE( 20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
1460 "arrived(%p) == %u\n",
1461 gtid, team->t.t_id, tid,
1462 __kmp_gtid_from_tid( child_tid, team ), team->t.t_id, child_tid,
1463 &child_bar -> b_arrived, new_state ) );
1466 __kmp_wait_sleep( this_thr, &child_bar -> b_arrived, new_state, FALSE
1467 USE_ITT_BUILD_ARG (itt_sync_obj)
1472 if( __kmp_forkjoin_frames_mode == 2 || __kmp_forkjoin_frames_mode == 3 ) {
1473 this_thr->th.th_bar_arrive_time = KMP_MIN( this_thr->th.th_bar_arrive_time, child_thr->th.th_bar_arrive_time );
1478 KA_TRACE( 100, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
1479 gtid, team->t.t_id, tid,
1480 __kmp_gtid_from_tid( child_tid, team ), team->t.t_id,
1483 (*reduce)( this_thr -> th.th_local.reduce_data,
1484 child_thr -> th.th_local.reduce_data );
1491 if ( KMP_MASTER_TID(tid) ) {
1494 if (new_state == KMP_BARRIER_UNUSED_STATE)
1495 team -> t.t_bar[ bt ].b_arrived += KMP_BARRIER_STATE_BUMP;
1497 team -> t.t_bar[ bt ].b_arrived = new_state;
1499 KA_TRACE( 20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %u\n",
1500 gtid, team->t.t_id, tid, team->t.t_id,
1501 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived ) );
1504 KA_TRACE( 20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
1505 gtid, team->t.t_id, tid, bt ) );
1510 __kmp_linear_barrier_release(
enum barrier_type bt,
1511 kmp_info_t *this_thr,
1515 USE_ITT_BUILD_ARG(
void * itt_sync_obj)
1518 register kmp_bstate_t *thr_bar = &this_thr -> th.th_bar[ bt ].bb;
1519 register kmp_team_t *team;
1521 if (KMP_MASTER_TID( tid )) {
1522 register unsigned int i;
1523 register kmp_uint32 nproc = this_thr -> th.th_team_nproc;
1524 register kmp_info_t **other_threads;
1526 team = __kmp_threads[ gtid ]-> th.th_team;
1527 KMP_DEBUG_ASSERT( team != NULL );
1528 other_threads = team -> t.t_threads;
1530 KA_TRACE( 20, (
"__kmp_linear_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
1531 gtid, team->t.t_id, tid, bt ) );
1534 #if KMP_BARRIER_ICV_PUSH
1535 if ( propagate_icvs ) {
1536 load_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs);
1537 for (i = 1; i < nproc; i++) {
1538 __kmp_init_implicit_task( team->t.t_ident,
1539 team->t.t_threads[i], team, i, FALSE );
1540 store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs, &team->t.t_implicit_task_taskdata[0].td_icvs);
1544 #endif // KMP_BARRIER_ICV_PUSH
1547 for (i = 1; i < nproc; i++) {
1548 #if KMP_CACHE_MANAGE
1551 KMP_CACHE_PREFETCH( &other_threads[ i+1 ]-> th.th_bar[ bt ].bb.b_go );
1553 KA_TRACE( 20, (
"__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
1554 "go(%p): %u => %u\n",
1555 gtid, team->t.t_id, tid,
1556 other_threads[i]->th.th_info.ds.ds_gtid, team->t.t_id, i,
1557 &other_threads[i]->th.th_bar[bt].bb.b_go,
1558 other_threads[i]->th.th_bar[bt].bb.b_go,
1559 other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP
1562 __kmp_release( other_threads[ i ],
1563 &other_threads[ i ]-> th.th_bar[ bt ].bb.b_go, kmp_acquire_fence );
1569 KA_TRACE( 20, (
"__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n",
1570 gtid, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP ) );
1572 __kmp_wait_sleep( this_thr, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP, TRUE
1573 USE_ITT_BUILD_ARG(itt_sync_obj)
1576 #if USE_ITT_BUILD && OMP_30_ENABLED && USE_ITT_NOTIFY
1577 if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) {
1579 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier, 0, -1 );
1581 __kmp_itt_task_starting( itt_sync_obj );
1583 if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
1586 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1587 if ( itt_sync_obj != NULL )
1588 __kmp_itt_task_finished( itt_sync_obj );
1595 if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
1601 #if USE_ITT_BUILD && !OMP_30_ENABLED && USE_ITT_NOTIFY
1603 if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) {
1605 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1606 __kmp_itt_barrier_starting( gtid, itt_sync_obj );
1610 tid = __kmp_tid_from_gtid( gtid );
1611 team = __kmp_threads[ gtid ]-> th.th_team;
1613 KMP_DEBUG_ASSERT( team != NULL );
1615 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
1616 KA_TRACE( 20, (
"__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
1617 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE ) );
1622 KA_TRACE( 20, (
"__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
1623 gtid, team->t.t_id, tid, bt ) );
1628 __kmp_tree_barrier_release(
enum barrier_type bt,
1629 kmp_info_t *this_thr,
1633 USE_ITT_BUILD_ARG(
void * itt_sync_obj)
1637 register kmp_team_t *team;
1638 register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
1639 register kmp_uint32 nproc;
1640 register kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[ bt ];
1641 register kmp_uint32 branch_factor = 1 << branch_bits ;
1642 register kmp_uint32 child;
1643 register kmp_uint32 child_tid;
1650 if ( ! KMP_MASTER_TID( tid )) {
1653 KA_TRACE( 20, (
"__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n",
1654 gtid, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP ) );
1657 __kmp_wait_sleep( this_thr, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP, TRUE
1658 USE_ITT_BUILD_ARG(itt_sync_obj)
1661 #if USE_ITT_BUILD && OMP_30_ENABLED && USE_ITT_NOTIFY
1662 if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) {
1664 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier, 0, -1 );
1666 __kmp_itt_task_starting( itt_sync_obj );
1668 if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
1671 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1672 if ( itt_sync_obj != NULL )
1673 __kmp_itt_task_finished( itt_sync_obj );
1680 if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
1686 #if USE_ITT_BUILD && !OMP_30_ENABLED && USE_ITT_NOTIFY
1688 if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) {
1690 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1691 __kmp_itt_barrier_starting( gtid, itt_sync_obj );
1694 team = __kmp_threads[ gtid ]-> th.th_team;
1695 KMP_DEBUG_ASSERT( team != NULL );
1696 tid = __kmp_tid_from_gtid( gtid );
1698 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
1699 KA_TRACE( 20, (
"__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
1700 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE ) );
1705 team = __kmp_threads[ gtid ]-> th.th_team;
1706 KMP_DEBUG_ASSERT( team != NULL );
1708 KA_TRACE( 20, (
"__kmp_tree_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
1709 gtid, team->t.t_id, tid, bt ) );
1712 nproc = this_thr -> th.th_team_nproc;
1713 child_tid = ( tid << branch_bits ) + 1;
1715 if ( child_tid < nproc ) {
1716 register kmp_info_t **other_threads = team -> t.t_threads;
1721 register kmp_info_t *child_thr = other_threads[ child_tid ];
1722 register kmp_bstate_t *child_bar = & child_thr -> th.th_bar[ bt ].bb;
1723 #if KMP_CACHE_MANAGE
1725 if ( child+1 <= branch_factor && child_tid+1 < nproc )
1726 KMP_CACHE_PREFETCH( &other_threads[ child_tid+1 ] -> th.th_bar[ bt ].bb.b_go );
1729 #if KMP_BARRIER_ICV_PUSH
1730 if ( propagate_icvs ) {
1731 __kmp_init_implicit_task( team->t.t_ident,
1732 team->t.t_threads[child_tid], team, child_tid, FALSE );
1733 load_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs);
1734 store_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs, &team->t.t_implicit_task_taskdata[0].td_icvs);
1737 #endif // KMP_BARRIER_ICV_PUSH
1739 KA_TRACE( 20, (
"__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
1740 "go(%p): %u => %u\n",
1741 gtid, team->t.t_id, tid,
1742 __kmp_gtid_from_tid( child_tid, team ), team->t.t_id,
1743 child_tid, &child_bar -> b_go, child_bar -> b_go,
1744 child_bar -> b_go + KMP_BARRIER_STATE_BUMP ) );
1747 __kmp_release( child_thr, &child_bar -> b_go, kmp_acquire_fence );
1752 while ( child <= branch_factor && child_tid < nproc );
1755 KA_TRACE( 20, (
"__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
1756 gtid, team->t.t_id, tid, bt ) );
1760 #define KMP_REVERSE_HYPER_BAR
1762 __kmp_hyper_barrier_release(
enum barrier_type bt,
1763 kmp_info_t *this_thr,
1767 USE_ITT_BUILD_ARG(
void * itt_sync_obj)
1771 register kmp_team_t *team;
1772 register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
1773 register kmp_info_t **other_threads;
1774 register kmp_uint32 num_threads;
1775 register kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[ bt ];
1776 register kmp_uint32 branch_factor = 1 << branch_bits;
1777 register kmp_uint32 child;
1778 register kmp_uint32 child_tid;
1779 register kmp_uint32 offset;
1780 register kmp_uint32 level;
1787 if ( ! KMP_MASTER_TID( tid )) {
1789 KA_TRACE( 20, (
"__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n",
1790 gtid, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP ) );
1793 __kmp_wait_sleep( this_thr, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP, TRUE
1794 USE_ITT_BUILD_ARG( itt_sync_obj )
1797 #if USE_ITT_BUILD && OMP_30_ENABLED && USE_ITT_NOTIFY
1798 if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) {
1800 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier, 0, -1 );
1802 __kmp_itt_task_starting( itt_sync_obj );
1804 if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
1807 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1808 if ( itt_sync_obj != NULL )
1809 __kmp_itt_task_finished( itt_sync_obj );
1816 if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
1822 #if USE_ITT_BUILD && !OMP_30_ENABLED && USE_ITT_NOTIFY
1824 if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) {
1826 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1827 __kmp_itt_barrier_starting( gtid, itt_sync_obj );
1830 team = __kmp_threads[ gtid ]-> th.th_team;
1831 KMP_DEBUG_ASSERT( team != NULL );
1832 tid = __kmp_tid_from_gtid( gtid );
1834 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
1835 KA_TRACE( 20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
1836 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE ) );
1841 team = __kmp_threads[ gtid ]-> th.th_team;
1842 KMP_DEBUG_ASSERT( team != NULL );
1844 KA_TRACE( 20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
1845 gtid, team->t.t_id, tid, bt ) );
1848 num_threads = this_thr -> th.th_team_nproc;
1849 other_threads = team -> t.t_threads;
1851 #ifdef KMP_REVERSE_HYPER_BAR
1853 for ( level = 0, offset = 1;
1854 offset < num_threads && (((tid >> level) & (branch_factor-1)) == 0);
1855 level += branch_bits, offset <<= branch_bits );
1858 for ( level -= branch_bits, offset >>= branch_bits;
1860 level -= branch_bits, offset >>= branch_bits )
1863 for ( level = 0, offset = 1;
1864 offset < num_threads;
1865 level += branch_bits, offset <<= branch_bits )
1868 #ifdef KMP_REVERSE_HYPER_BAR
1871 child = num_threads >> ((level==0)?level:level-1);
1872 for ( child = (child < branch_factor-1) ? child : branch_factor-1,
1873 child_tid = tid + (child << level);
1875 child--, child_tid -= (1 << level) )
1877 if (((tid >> level) & (branch_factor - 1)) != 0)
1883 for ( child = 1, child_tid = tid + (1 << level);
1884 child < branch_factor && child_tid < num_threads;
1885 child++, child_tid += (1 << level) )
1886 #endif // KMP_REVERSE_HYPER_BAR
1888 if ( child_tid >= num_threads )
continue;
1890 register kmp_info_t *child_thr = other_threads[ child_tid ];
1891 register kmp_bstate_t *child_bar = & child_thr -> th.th_bar[ bt ].bb;
1892 #if KMP_CACHE_MANAGE
1893 register kmp_uint32 next_child_tid = child_tid - (1 << level);
1895 #ifdef KMP_REVERSE_HYPER_BAR
1896 if ( child-1 >= 1 && next_child_tid < num_threads )
1898 if ( child+1 < branch_factor && next_child_tid < num_threads )
1899 #endif // KMP_REVERSE_HYPER_BAR
1900 KMP_CACHE_PREFETCH( &other_threads[ next_child_tid ]->th.th_bar[ bt ].bb.b_go );
1903 #if KMP_BARRIER_ICV_PUSH
1904 if ( propagate_icvs ) {
1905 KMP_DEBUG_ASSERT( team != NULL );
1906 __kmp_init_implicit_task( team->t.t_ident,
1907 team->t.t_threads[child_tid], team, child_tid, FALSE );
1908 load_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs);
1909 store_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs, &team->t.t_implicit_task_taskdata[0].td_icvs);
1912 #endif // KMP_BARRIER_ICV_PUSH
1914 KA_TRACE( 20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
1915 "go(%p): %u => %u\n",
1916 gtid, team->t.t_id, tid,
1917 __kmp_gtid_from_tid( child_tid, team ), team->t.t_id,
1918 child_tid, &child_bar -> b_go, child_bar -> b_go,
1919 child_bar -> b_go + KMP_BARRIER_STATE_BUMP ) );
1922 __kmp_release( child_thr, &child_bar -> b_go, kmp_acquire_fence );
1927 KA_TRACE( 20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
1928 gtid, team->t.t_id, tid, bt ) );
1938 __kmp_barrier(
enum barrier_type bt,
int gtid,
int is_split,
1939 size_t reduce_size,
void *reduce_data,
void (*reduce)(
void *,
void *) )
1941 register int tid = __kmp_tid_from_gtid( gtid );
1942 register kmp_info_t *this_thr = __kmp_threads[ gtid ];
1943 register kmp_team_t *team = this_thr -> th.th_team;
1944 register int status = 0;
1946 ident_t * tmp_loc = __kmp_threads[ gtid ]->th.th_ident;
1948 KA_TRACE( 15, (
"__kmp_barrier: T#%d(%d:%d) has arrived\n",
1949 gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid) ) );
1951 if ( ! team->t.t_serialized ) {
1954 void * itt_sync_obj = NULL;
1956 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG )
1957 itt_sync_obj = __kmp_itt_barrier_object( gtid, bt, 1 );
1961 if ( __kmp_tasking_mode == tskm_extra_barrier ) {
1962 __kmp_tasking_barrier( team, this_thr, gtid );
1963 KA_TRACE( 15, (
"__kmp_barrier: T#%d(%d:%d) past tasking barrier\n",
1964 gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid) ) );
1975 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
1977 this_thr -> th.th_team_bt_intervals = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1978 this_thr -> th.th_team_bt_set = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
1980 this_thr -> th.th_team_bt_intervals = team -> t.t_set_bt_intervals[tid];
1981 this_thr -> th.th_team_bt_set= team -> t.t_set_bt_set[tid];
1982 #endif // OMP_30_ENABLED
1986 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG )
1987 __kmp_itt_barrier_starting( gtid, itt_sync_obj );
1990 if ( reduce != NULL ) {
1992 this_thr -> th.th_local.reduce_data = reduce_data;
1994 if ( __kmp_barrier_gather_pattern[ bt ] == bp_linear_bar || __kmp_barrier_gather_branch_bits[ bt ] == 0 ) {
1995 __kmp_linear_barrier_gather( bt, this_thr, gtid, tid, reduce
1996 USE_ITT_BUILD_ARG( itt_sync_obj )
1998 }
else if ( __kmp_barrier_gather_pattern[ bt ] == bp_tree_bar ) {
1999 __kmp_tree_barrier_gather( bt, this_thr, gtid, tid, reduce
2000 USE_ITT_BUILD_ARG( itt_sync_obj )
2003 __kmp_hyper_barrier_gather( bt, this_thr, gtid, tid, reduce
2004 USE_ITT_BUILD_ARG( itt_sync_obj )
2012 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG )
2013 __kmp_itt_barrier_middle( gtid, itt_sync_obj );
2018 if ( KMP_MASTER_TID( tid ) ) {
2022 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2023 __kmp_task_team_wait( this_thr, team
2024 USE_ITT_BUILD_ARG( itt_sync_obj )
2026 __kmp_task_team_setup( this_thr, team );
2031 #if USE_ITT_BUILD && USE_ITT_NOTIFY
2033 if( __itt_frame_submit_v3_ptr && __kmp_forkjoin_frames_mode ) {
2034 kmp_uint64 tmp = __itt_get_timestamp();
2035 switch( __kmp_forkjoin_frames_mode ) {
2037 __kmp_itt_frame_submit( gtid, this_thr->th.th_frame_time, tmp, 0, tmp_loc );
2038 this_thr->th.th_frame_time = tmp;
2041 __kmp_itt_frame_submit( gtid, this_thr->th.th_bar_arrive_time, tmp, 1, tmp_loc );
2044 __kmp_itt_frame_submit( gtid, this_thr->th.th_frame_time, tmp, 0, tmp_loc );
2045 __kmp_itt_frame_submit( gtid, this_thr->th.th_bar_arrive_time, tmp, 1, tmp_loc );
2046 this_thr->th.th_frame_time = tmp;
2054 if ( status == 1 || ! is_split ) {
2055 if ( __kmp_barrier_release_pattern[ bt ] == bp_linear_bar || __kmp_barrier_release_branch_bits[ bt ] == 0 ) {
2056 __kmp_linear_barrier_release( bt, this_thr, gtid, tid, FALSE
2057 USE_ITT_BUILD_ARG( itt_sync_obj )
2059 }
else if ( __kmp_barrier_release_pattern[ bt ] == bp_tree_bar ) {
2060 __kmp_tree_barrier_release( bt, this_thr, gtid, tid, FALSE
2061 USE_ITT_BUILD_ARG( itt_sync_obj )
2064 __kmp_hyper_barrier_release( bt, this_thr, gtid, tid, FALSE
2065 USE_ITT_BUILD_ARG( itt_sync_obj )
2069 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2070 __kmp_task_team_sync( this_thr, team );
2078 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG )
2079 __kmp_itt_barrier_finished( gtid, itt_sync_obj );
2087 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2092 KMP_DEBUG_ASSERT( team->t.t_task_team == NULL );
2093 KMP_DEBUG_ASSERT( this_thr->th.th_task_team == NULL );
2098 KA_TRACE( 15, (
"__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n",
2099 gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid),
2106 __kmp_end_split_barrier(
enum barrier_type bt,
int gtid )
2108 int tid = __kmp_tid_from_gtid( gtid );
2109 kmp_info_t *this_thr = __kmp_threads[ gtid ];
2110 kmp_team_t *team = this_thr -> th.th_team;
2112 if( ! team -> t.t_serialized ) {
2113 if( KMP_MASTER_GTID( gtid ) ) {
2114 if ( __kmp_barrier_release_pattern[ bt ] == bp_linear_bar || __kmp_barrier_release_branch_bits[ bt ] == 0 ) {
2115 __kmp_linear_barrier_release( bt, this_thr, gtid, tid, FALSE
2120 }
else if ( __kmp_barrier_release_pattern[ bt ] == bp_tree_bar ) {
2121 __kmp_tree_barrier_release( bt, this_thr, gtid, tid, FALSE
2127 __kmp_hyper_barrier_release( bt, this_thr, gtid, tid, FALSE
2134 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2135 __kmp_task_team_sync( this_thr, team );
2154 __kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team,
2155 int master_tid,
int set_nthreads
2163 int use_rml_to_adjust_nth;
2164 KMP_DEBUG_ASSERT( __kmp_init_serial );
2165 KMP_DEBUG_ASSERT( root && parent_team );
2170 if ( set_nthreads == 1 ) {
2171 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d reserving 1 thread; requested %d threads\n",
2172 __kmp_get_gtid(), set_nthreads ));
2175 if ( ( !get__nested_2(parent_team,master_tid) && (root->r.r_in_parallel
2179 ) ) || ( __kmp_library == library_serial ) ) {
2180 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d serializing team; requested %d threads\n",
2181 __kmp_get_gtid(), set_nthreads ));
2189 new_nthreads = set_nthreads;
2190 use_rml_to_adjust_nth = FALSE;
2191 if ( ! get__dynamic_2( parent_team, master_tid ) ) {
2194 #ifdef USE_LOAD_BALANCE
2195 else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) {
2196 new_nthreads = __kmp_load_balance_nproc( root, set_nthreads );
2197 if ( new_nthreads == 1 ) {
2198 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d load balance reduced reservation to 1 thread\n",
2202 if ( new_nthreads < set_nthreads ) {
2203 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d load balance reduced reservation to %d threads\n",
2204 master_tid, new_nthreads ));
2208 else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) {
2209 new_nthreads = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
2210 : root->r.r_hot_team->t.t_nproc);
2211 if ( new_nthreads <= 1 ) {
2212 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d thread limit reduced reservation to 1 thread\n",
2216 if ( new_nthreads < set_nthreads ) {
2217 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d thread limit reduced reservation to %d threads\n",
2218 master_tid, new_nthreads ));
2221 new_nthreads = set_nthreads;
2224 else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) {
2225 if ( set_nthreads > 2 ) {
2226 new_nthreads = __kmp_get_random( parent_team->t.t_threads[master_tid] );
2227 new_nthreads = ( new_nthreads % set_nthreads ) + 1;
2228 if ( new_nthreads == 1 ) {
2229 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d dynamic random reduced reservation to 1 thread\n",
2233 if ( new_nthreads < set_nthreads ) {
2234 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d dynamic random reduced reservation to %d threads\n",
2235 master_tid, new_nthreads ));
2246 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
2247 root->r.r_hot_team->t.t_nproc ) > __kmp_max_nth ) {
2248 int tl_nthreads = __kmp_max_nth - __kmp_nth + ( root->r.r_active ? 1 :
2249 root->r.r_hot_team->t.t_nproc );
2250 if ( tl_nthreads <= 0 ) {
2257 if ( ! get__dynamic_2( parent_team, master_tid )
2258 && ( ! __kmp_reserve_warn ) ) {
2259 __kmp_reserve_warn = 1;
2262 KMP_MSG( CantFormThrTeam, set_nthreads, tl_nthreads ),
2263 KMP_HNT( Unset_ALL_THREADS ),
2267 if ( tl_nthreads == 1 ) {
2268 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to 1 thread\n",
2272 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to %d threads\n",
2273 master_tid, tl_nthreads ));
2274 new_nthreads = tl_nthreads;
2284 capacity = __kmp_threads_capacity;
2285 if ( TCR_PTR(__kmp_threads[0]) == NULL ) {
2288 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
2289 root->r.r_hot_team->t.t_nproc ) > capacity ) {
2293 int slotsRequired = __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
2294 root->r.r_hot_team->t.t_nproc ) - capacity;
2295 int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired);
2296 if ( slotsAdded < slotsRequired ) {
2300 new_nthreads -= ( slotsRequired - slotsAdded );
2301 KMP_ASSERT( new_nthreads >= 1 );
2306 if ( ! get__dynamic_2( parent_team, master_tid )
2307 && ( ! __kmp_reserve_warn ) ) {
2308 __kmp_reserve_warn = 1;
2309 if ( __kmp_tp_cached ) {
2312 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
2313 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
2314 KMP_HNT( PossibleSystemLimitOnThreads ),
2321 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
2322 KMP_HNT( SystemLimitOnThreads ),
2330 if ( new_nthreads == 1 ) {
2331 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d serializing team after reclaiming dead roots and rechecking; requested %d threads\n",
2332 __kmp_get_gtid(), set_nthreads ) );
2336 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested %d threads\n",
2337 __kmp_get_gtid(), new_nthreads, set_nthreads ));
2338 return new_nthreads;
2349 __kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team,
2350 kmp_info_t *master_th,
int master_gtid )
2354 KA_TRACE( 10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc ) );
2355 KMP_DEBUG_ASSERT( master_gtid == __kmp_get_gtid() );
2359 master_th -> th.th_info.ds.ds_tid = 0;
2360 master_th -> th.th_team = team;
2361 master_th -> th.th_team_nproc = team -> t.t_nproc;
2362 master_th -> th.th_team_master = master_th;
2363 master_th -> th.th_team_serialized = FALSE;
2364 master_th -> th.th_dispatch = & team -> t.t_dispatch[ 0 ];
2367 if ( team != root->r.r_hot_team ) {
2370 team -> t.t_threads[ 0 ] = master_th;
2371 __kmp_initialize_info( master_th, team, 0, master_gtid );
2374 for ( i=1 ; i < team->t.t_nproc ; i++ ) {
2377 team -> t.t_threads[ i ] = __kmp_allocate_thread( root, team, i );
2378 KMP_DEBUG_ASSERT( team->t.t_threads[i] );
2379 KMP_DEBUG_ASSERT( team->t.t_threads[i]->th.th_team == team );
2381 KA_TRACE( 20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived T#%d(%d:%d) join =%u, plain=%u\n",
2382 __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0,
2383 __kmp_gtid_from_tid( i, team ), team->t.t_id, i,
2384 team->t.t_bar[ bs_forkjoin_barrier ].b_arrived,
2385 team->t.t_bar[ bs_plain_barrier ].b_arrived ) );
2389 kmp_balign_t * balign = team->t.t_threads[ i ]->th.th_bar;
2390 for ( b = 0; b < bs_last_barrier; ++ b ) {
2391 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
2396 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
2397 __kmp_partition_places( team );
2406 __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc );
2409 __kmp_setup_icv_copy( kmp_team_t *team,
int new_nproc,
2411 kmp_internal_control_t * new_icvs,
2414 int new_set_nproc,
int new_set_dynamic,
int new_set_nested,
2415 int new_set_blocktime,
int new_bt_intervals,
int new_bt_set
2429 microtask_t microtask,
2432 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
2442 int master_this_cons;
2443 int master_last_cons;
2445 kmp_team_t *parent_team;
2446 kmp_info_t *master_th;
2450 int master_set_numthreads;
2456 KA_TRACE( 20, (
"__kmp_fork_call: enter T#%d\n", gtid ));
2459 KMP_DEBUG_ASSERT( __kmp_init_serial );
2460 if( ! TCR_4(__kmp_init_parallel) )
2461 __kmp_parallel_initialize();
2464 master_th = __kmp_threads[ gtid ];
2465 parent_team = master_th -> th.th_team;
2466 master_tid = master_th -> th.th_info.ds.ds_tid;
2467 master_this_cons = master_th -> th.th_local.this_construct;
2468 master_last_cons = master_th -> th.th_local.last_construct;
2469 root = master_th -> th.th_root;
2470 master_active = root -> r.r_active;
2471 master_set_numthreads = master_th -> th.th_set_nproc;
2474 level = parent_team->t.t_level;
2475 #endif // OMP_30_ENABLED
2477 teams_level = master_th->th.th_teams_level;
2481 master_th->th.th_ident = loc;
2484 if ( master_th->th.th_team_microtask &&
2485 ap && microtask != (microtask_t)__kmp_teams_master && level == teams_level ) {
2489 parent_team->t.t_ident = loc;
2490 parent_team->t.t_argc = argc;
2491 argv = (
void**)parent_team->t.t_argv;
2492 for( i=argc-1; i >= 0; --i )
2494 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
2495 *argv++ = va_arg( *ap,
void * );
2497 *argv++ = va_arg( ap,
void * );
2500 if ( parent_team == master_th->th.th_serial_team ) {
2503 KMP_DEBUG_ASSERT( parent_team->t.t_serialized > 1 );
2504 parent_team->t.t_serialized--;
2506 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv );
2509 parent_team->t.t_pkfn = microtask;
2510 parent_team->t.t_invoke = invoker;
2511 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
2512 parent_team->t.t_active_level ++;
2513 parent_team->t.t_level ++;
2516 if ( master_set_numthreads ) {
2517 if ( master_set_numthreads < master_th->th.th_set_nth_teams ) {
2519 kmp_info_t **other_threads = parent_team->t.t_threads;
2520 parent_team->t.t_nproc = master_set_numthreads;
2521 for ( i = 0; i < master_set_numthreads; ++i ) {
2522 other_threads[i]->th.th_team_nproc = master_set_numthreads;
2526 master_th->th.th_set_nproc = 0;
2530 KF_TRACE( 10, (
"__kmp_fork_call: before internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
2531 __kmp_internal_fork( loc, gtid, parent_team );
2532 KF_TRACE( 10, (
"__kmp_fork_call: after internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
2535 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
2536 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
2538 if (! parent_team->t.t_invoke( gtid )) {
2539 KMP_ASSERT2( 0,
"cannot invoke microtask for MASTER thread" );
2541 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
2542 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
2545 KA_TRACE( 20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid ));
2551 #if OMP_30_ENABLED && KMP_DEBUG
2552 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2553 KMP_DEBUG_ASSERT( master_th->th.th_task_team == parent_team->t.t_task_team );
2555 #endif // OMP_30_ENABLED
2558 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2561 if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) {
2565 #endif // OMP_30_ENABLED
2568 nthreads = master_set_numthreads ?
2569 master_set_numthreads : get__nproc_2( parent_team, master_tid );
2570 nthreads = __kmp_reserve_threads( root, parent_team, master_tid, nthreads
2576 ,( ( ap == NULL && teams_level == 0 ) ||
2577 ( ap && teams_level > 0 && teams_level == level ) )
2581 KMP_DEBUG_ASSERT( nthreads > 0 );
2584 master_th -> th.th_set_nproc = 0;
2588 if ( nthreads == 1 ) {
2590 #if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM )
2591 void * args[ argc ];
2593 void * * args = (
void**) alloca( argc *
sizeof(
void * ) );
2596 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2597 KA_TRACE( 20, (
"__kmp_fork_call: T#%d serializing parallel region\n", gtid ));
2601 if ( exec_master == 0 ) {
2603 KA_TRACE( 20, (
"__kmp_fork_call: T#%d serial exit\n", gtid ));
2605 }
else if ( exec_master == 1 ) {
2607 master_th -> th.th_serial_team -> t.t_ident = loc;
2611 master_th -> th.th_serial_team -> t.t_level--;
2613 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv );
2614 }
else if ( microtask == (microtask_t)__kmp_teams_master ) {
2615 KMP_DEBUG_ASSERT( master_th->th.th_team == master_th->th.th_serial_team );
2616 team = master_th->th.th_team;
2618 team->t.t_invoke = invoker;
2619 __kmp_alloc_argv_entries( argc, team, TRUE );
2620 team->t.t_argc = argc;
2621 argv = (
void**) team->t.t_argv;
2623 for( i=argc-1; i >= 0; --i )
2625 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
2626 *argv++ = va_arg( *ap,
void * );
2628 *argv++ = va_arg( ap,
void * );
2631 for( i=0; i < argc; ++i )
2633 argv[i] = parent_team->t.t_argv[i];
2643 for( i=argc-1; i >= 0; --i )
2645 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
2646 *argv++ = va_arg( *ap,
void * );
2648 *argv++ = va_arg( ap,
void * );
2651 __kmp_invoke_microtask( microtask, gtid, 0, argc, args );
2657 KMP_ASSERT2( exec_master <= 1,
"__kmp_fork_call: unknown parameter exec_master" );
2660 KA_TRACE( 20, (
"__kmp_fork_call: T#%d serial exit\n", gtid ));
2669 KF_TRACE( 10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, curtask=%p, curtask_max_aclevel=%d\n",
2670 parent_team->t.t_active_level, master_th, master_th->th.th_current_task,
2671 master_th->th.th_current_task->td_icvs.max_active_levels ) );
2674 master_th->th.th_current_task->td_flags.executing = 0;
2678 if ( !master_th->th.th_team_microtask || level > teams_level )
2682 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
2689 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
2690 if ( ( level + 1 < __kmp_nested_nth.used ) &&
2691 ( __kmp_nested_nth.nth[level + 1] != nthreads_icv ) ) {
2692 nthreads_icv = __kmp_nested_nth.nth[level + 1];
2702 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2703 kmp_proc_bind_t proc_bind_icv;
2705 if ( master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
2706 proc_bind = proc_bind_false;
2707 proc_bind_icv = proc_bind_default;
2710 proc_bind_icv = master_th->th.th_current_task->td_icvs.proc_bind;
2711 if ( proc_bind == proc_bind_default ) {
2716 proc_bind = proc_bind_icv;
2729 if ( ( level + 1 < __kmp_nested_proc_bind.used )
2730 && ( __kmp_nested_proc_bind.bind_types[level + 1] != proc_bind_icv ) ) {
2731 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2734 proc_bind_icv = proc_bind_default;
2741 master_th->th.th_set_proc_bind = proc_bind_default;
2744 if ( ( nthreads_icv > 0 )
2746 || ( proc_bind_icv != proc_bind_default )
2750 kmp_internal_control_t new_icvs;
2751 copy_icvs( & new_icvs, & master_th->th.th_current_task->td_icvs );
2752 new_icvs.next = NULL;
2754 if ( nthreads_icv > 0 ) {
2755 new_icvs.nproc = nthreads_icv;
2759 if ( proc_bind_icv != proc_bind_default ) {
2760 new_icvs.proc_bind = proc_bind_icv;
2765 KF_TRACE( 10, (
"__kmp_fork_call: before __kmp_allocate_team\n" ) );
2766 team = __kmp_allocate_team(root, nthreads, nthreads,
2775 KF_TRACE( 10, (
"__kmp_fork_call: before __kmp_allocate_team\n" ) );
2776 team = __kmp_allocate_team(root, nthreads, nthreads,
2781 &master_th->th.th_current_task->td_icvs,
2783 parent_team->t.t_set_nproc[master_tid],
2784 parent_team->t.t_set_dynamic[master_tid],
2785 parent_team->t.t_set_nested[master_tid],
2786 parent_team->t.t_set_blocktime[master_tid],
2787 parent_team->t.t_set_bt_intervals[master_tid],
2788 parent_team->t.t_set_bt_set[master_tid],
2793 KF_TRACE( 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n",
2797 team->t.t_master_tid = master_tid;
2798 team->t.t_master_this_cons = master_this_cons;
2799 team->t.t_master_last_cons = master_last_cons;
2801 team->t.t_parent = parent_team;
2802 TCW_SYNC_PTR(team->t.t_pkfn, microtask);
2803 team->t.t_invoke = invoker;
2804 team->t.t_ident = loc;
2808 if ( !master_th->th.th_team_microtask || level > teams_level ) {
2810 team->t.t_level = parent_team->t.t_level + 1;
2811 team->t.t_active_level = parent_team->t.t_active_level + 1;
2815 team->t.t_level = parent_team->t.t_level;
2816 team->t.t_active_level = parent_team->t.t_active_level;
2819 team->t.t_sched = get__sched_2( parent_team, master_tid );
2821 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2822 if ( __kmp_inherit_fp_control ) {
2823 __kmp_store_x87_fpu_control_word( &team->t.t_x87_fpu_control_word );
2824 __kmp_store_mxcsr( &team->t.t_mxcsr );
2825 team->t.t_mxcsr &= KMP_X86_MXCSR_MASK;
2826 team->t.t_fp_control_saved = TRUE;
2829 team->t.t_fp_control_saved = FALSE;
2833 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2838 KMP_DEBUG_ASSERT( master_th->th.th_task_team == parent_team->t.t_task_team );
2839 KA_TRACE( 20, (
"__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n",
2840 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
2841 parent_team, team->t.t_task_team, team ) );
2842 master_th->th.th_task_team = team->t.t_task_team;
2843 KMP_DEBUG_ASSERT( ( master_th->th.th_task_team == NULL ) || ( team == root->r.r_hot_team ) ) ;
2845 #endif // OMP_30_ENABLED
2847 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2848 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, team->t.t_nproc ));
2849 KMP_DEBUG_ASSERT( team != root->r.r_hot_team ||
2850 ( team->t.t_master_tid == 0 &&
2851 ( team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized ) ));
2855 argv = (
void**) team -> t.t_argv;
2859 for( i=argc-1; i >= 0; --i )
2861 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM) && KMP_OS_LINUX
2862 *argv++ = va_arg( *ap,
void * );
2864 *argv++ = va_arg( ap,
void * );
2868 for( i=0; i < argc; ++i )
2870 argv[i] = team->t.t_parent->t.t_argv[i];
2876 team->t.t_master_active = master_active;
2877 if (!root -> r.r_active)
2878 root -> r.r_active = TRUE;
2880 __kmp_fork_team_threads( root, team, master_th, gtid );
2881 __kmp_setup_icv_copy(team, nthreads
2883 , &master_th->th.th_current_task->td_icvs, loc
2885 , parent_team->t.t_set_nproc[master_tid],
2886 parent_team->t.t_set_dynamic[master_tid],
2887 parent_team->t.t_set_nested[master_tid],
2888 parent_team->t.t_set_blocktime[master_tid],
2889 parent_team->t.t_set_bt_intervals[master_tid],
2890 parent_team->t.t_set_bt_set[master_tid]
2895 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2900 if ( ( __itt_frame_begin_v3_ptr && __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode ) || KMP_ITT_DEBUG )
2902 if ( !master_th->th.th_team_microtask || microtask == (microtask_t)__kmp_teams_master )
2905 __kmp_itt_region_forking( gtid );
2908 #if USE_ITT_BUILD && USE_ITT_NOTIFY && OMP_30_ENABLED
2910 if( ( __kmp_forkjoin_frames_mode == 1 || __kmp_forkjoin_frames_mode == 3 ) && __itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr )
2912 if( ! ( team->t.t_active_level > 1 ) ) {
2913 master_th->th.th_frame_time = __itt_get_timestamp();
2919 KMP_DEBUG_ASSERT( team == __kmp_threads[gtid]->th.th_team );
2922 KF_TRACE( 10, (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n", root, team, master_th, gtid ) );
2925 if ( __itt_stack_caller_create_ptr ) {
2926 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2934 __kmp_internal_fork( loc, gtid, team );
2935 KF_TRACE( 10, (
"__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n", root, team, master_th, gtid ) );
2938 if (! exec_master) {
2939 KA_TRACE( 20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid ));
2944 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
2945 gtid, team->t.t_id, team->t.t_pkfn ) );
2947 if (! team->t.t_invoke( gtid )) {
2948 KMP_ASSERT2( 0,
"cannot invoke microtask for MASTER thread" );
2950 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
2951 gtid, team->t.t_id, team->t.t_pkfn ) );
2954 KA_TRACE( 20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid ));
2961 __kmp_join_call(
ident_t *loc,
int gtid
2968 kmp_team_t *parent_team;
2969 kmp_info_t *master_th;
2974 KA_TRACE( 20, (
"__kmp_join_call: enter T#%d\n", gtid ));
2977 master_th = __kmp_threads[ gtid ];
2978 root = master_th -> th.th_root;
2979 team = master_th -> th.th_team;
2980 parent_team = team->t.t_parent;
2982 master_th->th.th_ident = loc;
2984 #if OMP_30_ENABLED && KMP_DEBUG
2985 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2986 KA_TRACE( 20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n",
2987 __kmp_gtid_from_thread( master_th ), team,
2988 team -> t.t_task_team, master_th->th.th_task_team) );
2989 KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team );
2991 #endif // OMP_30_ENABLED
2993 if( team->t.t_serialized ) {
2995 if ( master_th->th.th_team_microtask ) {
2997 int level = team->t.t_level;
2998 int tlevel = master_th->th.th_teams_level;
2999 if ( level == tlevel ) {
3003 }
else if ( level == tlevel + 1 ) {
3006 team->t.t_serialized++;
3014 master_active = team->t.t_master_active;
3022 __kmp_internal_join( loc, gtid, team );
3027 if ( __itt_stack_caller_create_ptr ) {
3028 __kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id );
3032 if ( ( __itt_frame_end_v3_ptr && __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode ) || KMP_ITT_DEBUG )
3034 if ( !master_th->th.th_team_microtask ||
3035 ( !exit_teams && team->t.t_level == master_th->th.th_teams_level ) )
3040 master_th->th.th_ident = loc;
3041 __kmp_itt_region_joined( gtid );
3046 if ( master_th->th.th_team_microtask &&
3048 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
3049 team->t.t_level == master_th->th.th_teams_level + 1 ) {
3056 team->t.t_active_level --;
3057 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
3060 if ( master_th->th.th_team_nproc < master_th->th.th_set_nth_teams ) {
3061 int old_num = master_th->th.th_team_nproc;
3062 int new_num = master_th->th.th_set_nth_teams;
3063 kmp_info_t **other_threads = team->t.t_threads;
3064 team->t.t_nproc = new_num;
3065 for ( i = 0; i < old_num; ++i ) {
3066 other_threads[i]->th.th_team_nproc = new_num;
3069 for ( i = old_num; i < new_num; ++i ) {
3072 kmp_balign_t * balign = other_threads[i]->th.th_bar;
3073 for ( b = 0; b < bp_last_bar; ++ b ) {
3074 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
3077 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
3084 master_th -> th.th_info .ds.ds_tid = team -> t.t_master_tid;
3085 master_th -> th.th_local.this_construct = team -> t.t_master_this_cons;
3086 master_th -> th.th_local.last_construct = team -> t.t_master_last_cons;
3088 master_th -> th.th_dispatch =
3089 & parent_team -> t.t_dispatch[ team -> t.t_master_tid ];
3095 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3098 if ( !master_th->th.th_team_microtask || team->t.t_level > master_th->th.th_teams_level )
3102 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
3104 KMP_DEBUG_ASSERT( root->r.r_in_parallel >= 0 );
3107 KF_TRACE( 10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n",
3108 0, master_th, team ) );
3109 __kmp_pop_current_task_from_thread( master_th );
3110 #endif // OMP_30_ENABLED
3112 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
3116 master_th -> th.th_first_place = team -> t.t_first_place;
3117 master_th -> th.th_last_place = team -> t.t_last_place;
3120 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3121 if ( __kmp_inherit_fp_control && team->t.t_fp_control_saved ) {
3122 __kmp_clear_x87_fpu_status_word();
3123 __kmp_load_x87_fpu_control_word( &team->t.t_x87_fpu_control_word );
3124 __kmp_load_mxcsr( &team->t.t_mxcsr );
3128 if ( root -> r.r_active != master_active )
3129 root -> r.r_active = master_active;
3131 __kmp_free_team( root, team );
3139 master_th -> th.th_team = parent_team;
3140 master_th -> th.th_team_nproc = parent_team -> t.t_nproc;
3141 master_th -> th.th_team_master = parent_team -> t.t_threads[0];
3142 master_th -> th.th_team_serialized = parent_team -> t.t_serialized;
3145 if( parent_team -> t.t_serialized &&
3146 parent_team != master_th->th.th_serial_team &&
3147 parent_team != root->r.r_root_team ) {
3148 __kmp_free_team( root, master_th -> th.th_serial_team );
3149 master_th -> th.th_serial_team = parent_team;
3153 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
3158 if ( ( master_th -> th.th_task_team = parent_team -> t.t_task_team ) != NULL ) {
3159 master_th -> th.th_task_state = master_th -> th.th_task_team -> tt.tt_state;
3161 KA_TRACE( 20, (
"__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
3162 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
3170 master_th->th.th_current_task->td_flags.executing = 1;
3171 #endif // OMP_30_ENABLED
3173 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3176 KA_TRACE( 20, (
"__kmp_join_call: exit T#%d\n", gtid ));
3185 __kmp_save_internal_controls ( kmp_info_t * thread )
3188 if ( thread -> th.th_team != thread -> th.th_serial_team ) {
3191 if (thread -> th.th_team -> t.t_serialized > 1) {
3194 if (thread -> th.th_team -> t.t_control_stack_top == NULL) {
3197 if ( thread -> th.th_team -> t.t_control_stack_top -> serial_nesting_level !=
3198 thread -> th.th_team -> t.t_serialized ) {
3203 kmp_internal_control_t * control = (kmp_internal_control_t *) __kmp_allocate(
sizeof(kmp_internal_control_t));
3206 copy_icvs( control, & thread->th.th_current_task->td_icvs );
3208 control->nproc = thread->th.th_team->t.t_set_nproc[0];
3209 control->dynamic = thread->th.th_team->t.t_set_dynamic[0];
3210 control->nested = thread->th.th_team->t.t_set_nested[0];
3211 control->blocktime = thread->th.th_team->t.t_set_blocktime[0];
3212 control->bt_intervals = thread->th.th_team->t.t_set_bt_intervals[0];
3213 control->bt_set = thread->th.th_team->t.t_set_bt_set[0];
3214 #endif // OMP_30_ENABLED
3216 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
3218 control->next = thread -> th.th_team -> t.t_control_stack_top;
3219 thread -> th.th_team -> t.t_control_stack_top = control;
3226 __kmp_set_num_threads(
int new_nth,
int gtid )
3231 KF_TRACE( 10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth ));
3232 KMP_DEBUG_ASSERT( __kmp_init_serial );
3236 else if (new_nth > __kmp_max_nth)
3237 new_nth = __kmp_max_nth;
3239 thread = __kmp_threads[gtid];
3241 __kmp_save_internal_controls( thread );
3243 set__nproc( thread, new_nth );
3250 root = thread->th.th_root;
3251 if ( __kmp_init_parallel && ( ! root->r.r_active )
3252 && ( root->r.r_hot_team->t.t_nproc > new_nth ) ) {
3253 kmp_team_t *hot_team = root->r.r_hot_team;
3256 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3260 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
3261 kmp_task_team_t *task_team = hot_team->t.t_task_team;
3262 if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) {
3269 KMP_DEBUG_ASSERT( hot_team->t.t_nproc > 1 );
3270 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
3273 KA_TRACE( 20, (
"__kmp_set_num_threads: setting task_team %p to NULL\n",
3274 &hot_team->t.t_task_team ) );
3275 hot_team->t.t_task_team = NULL;
3278 KMP_DEBUG_ASSERT( task_team == NULL );
3281 #endif // OMP_30_ENABLED
3286 for ( f = new_nth; f < hot_team->t.t_nproc; f++ ) {
3287 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
3288 __kmp_free_thread( hot_team->t.t_threads[f] );
3289 hot_team->t.t_threads[f] = NULL;
3291 hot_team->t.t_nproc = new_nth;
3294 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3299 for( f=0 ; f < new_nth; f++ ) {
3300 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
3301 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
3305 hot_team -> t.t_size_changed = -1;
3314 __kmp_set_max_active_levels(
int gtid,
int max_active_levels )
3318 KF_TRACE( 10, (
"__kmp_set_max_active_levels: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
3319 KMP_DEBUG_ASSERT( __kmp_init_serial );
3322 if( max_active_levels < 0 ) {
3323 KMP_WARNING( ActiveLevelsNegative, max_active_levels );
3327 KF_TRACE( 10, (
"__kmp_set_max_active_levels: the call is ignored: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
3330 if( max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT ) {
3334 KMP_WARNING( ActiveLevelsExceedLimit, max_active_levels, KMP_MAX_ACTIVE_LEVELS_LIMIT );
3335 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
3340 KF_TRACE( 10, (
"__kmp_set_max_active_levels: after validation: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
3342 thread = __kmp_threads[ gtid ];
3344 __kmp_save_internal_controls( thread );
3346 set__max_active_levels( thread, max_active_levels );
3352 __kmp_get_max_active_levels(
int gtid )
3356 KF_TRACE( 10, (
"__kmp_get_max_active_levels: thread %d\n", gtid ) );
3357 KMP_DEBUG_ASSERT( __kmp_init_serial );
3359 thread = __kmp_threads[ gtid ];
3360 KMP_DEBUG_ASSERT( thread -> th.th_current_task );
3361 KF_TRACE( 10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, curtask_maxaclevel=%d\n",
3362 gtid, thread -> th.th_current_task, thread -> th.th_current_task -> td_icvs.max_active_levels ) );
3363 return thread -> th.th_current_task -> td_icvs.max_active_levels;
3368 __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk )
3373 KF_TRACE( 10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", gtid, (
int)kind, chunk ));
3374 KMP_DEBUG_ASSERT( __kmp_init_serial );
3380 if ( kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
3381 ( kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std ) )
3386 KMP_MSG( ScheduleKindOutOfRange, kind ),
3387 KMP_HNT( DefaultScheduleKindUsed,
"static, no chunk" ),
3390 kind = kmp_sched_default;
3394 thread = __kmp_threads[ gtid ];
3396 __kmp_save_internal_controls( thread );
3398 if ( kind < kmp_sched_upper_std ) {
3399 if ( kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK ) {
3402 thread -> th.th_current_task -> td_icvs.sched.r_sched_type =
kmp_sch_static;
3404 thread -> th.th_current_task -> td_icvs.sched.r_sched_type = __kmp_sch_map[ kind - kmp_sched_lower - 1 ];
3408 thread -> th.th_current_task -> td_icvs.sched.r_sched_type =
3409 __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
3411 if ( kind == kmp_sched_auto ) {
3413 thread -> th.th_current_task -> td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
3415 thread -> th.th_current_task -> td_icvs.sched.chunk = chunk;
3421 __kmp_get_schedule(
int gtid, kmp_sched_t * kind,
int * chunk )
3427 KF_TRACE( 10, (
"__kmp_get_schedule: thread %d\n", gtid ));
3428 KMP_DEBUG_ASSERT( __kmp_init_serial );
3430 thread = __kmp_threads[ gtid ];
3433 th_type = thread -> th.th_current_task -> td_icvs.sched.r_sched_type;
3435 switch ( th_type ) {
3437 case kmp_sch_static_greedy:
3438 case kmp_sch_static_balanced:
3439 *kind = kmp_sched_static;
3442 case kmp_sch_static_chunked:
3443 *kind = kmp_sched_static;
3445 case kmp_sch_dynamic_chunked:
3446 *kind = kmp_sched_dynamic;
3449 case kmp_sch_guided_iterative_chunked:
3450 case kmp_sch_guided_analytical_chunked:
3451 *kind = kmp_sched_guided;
3454 *kind = kmp_sched_auto;
3456 case kmp_sch_trapezoidal:
3457 *kind = kmp_sched_trapezoidal;
3465 KMP_FATAL( UnknownSchedulingType, th_type );
3469 *chunk = thread -> th.th_current_task -> td_icvs.sched.chunk;
3473 __kmp_get_ancestor_thread_num(
int gtid,
int level ) {
3479 KF_TRACE( 10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level ));
3480 KMP_DEBUG_ASSERT( __kmp_init_serial );
3483 if( level == 0 )
return 0;
3484 if( level < 0 )
return -1;
3485 thr = __kmp_threads[ gtid ];
3486 team = thr->th.th_team;
3487 ii = team -> t.t_level;
3488 if( level > ii )
return -1;
3491 if( thr->th.th_team_microtask ) {
3493 int tlevel = thr->th.th_teams_level;
3494 if( level <= tlevel ) {
3495 KMP_DEBUG_ASSERT( ii >= tlevel );
3497 if ( ii == tlevel ) {
3506 if( ii == level )
return __kmp_tid_from_gtid( gtid );
3508 dd = team -> t.t_serialized;
3512 for( dd = team -> t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
3515 if( ( team -> t.t_serialized ) && ( !dd ) ) {
3516 team = team->t.t_parent;
3520 team = team->t.t_parent;
3521 dd = team -> t.t_serialized;
3526 return ( dd > 1 ) ? ( 0 ) : ( team -> t.t_master_tid );
3530 __kmp_get_team_size(
int gtid,
int level ) {
3536 KF_TRACE( 10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level ));
3537 KMP_DEBUG_ASSERT( __kmp_init_serial );
3540 if( level == 0 )
return 1;
3541 if( level < 0 )
return -1;
3542 thr = __kmp_threads[ gtid ];
3543 team = thr->th.th_team;
3544 ii = team -> t.t_level;
3545 if( level > ii )
return -1;
3548 if( thr->th.th_team_microtask ) {
3550 int tlevel = thr->th.th_teams_level;
3551 if( level <= tlevel ) {
3552 KMP_DEBUG_ASSERT( ii >= tlevel );
3554 if ( ii == tlevel ) {
3565 for( dd = team -> t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
3568 if( team -> t.t_serialized && ( !dd ) ) {
3569 team = team->t.t_parent;
3573 team = team->t.t_parent;
3578 return team -> t.t_nproc;
3581 #endif // OMP_30_ENABLED
3584 __kmp_get_schedule_global() {
3588 kmp_r_sched_t r_sched;
3594 r_sched.r_sched_type = __kmp_static;
3596 r_sched.r_sched_type = __kmp_guided;
3598 r_sched.r_sched_type = __kmp_sched;
3601 if ( __kmp_chunk < KMP_DEFAULT_CHUNK ) {
3602 r_sched.chunk = KMP_DEFAULT_CHUNK;
3604 r_sched.chunk = __kmp_chunk;
3619 __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc )
3622 KMP_DEBUG_ASSERT( team );
3623 if( !realloc || argc > team -> t.t_max_argc ) {
3625 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, current entries=%d\n",
3626 team->t.t_id, argc, ( realloc ) ? team->t.t_max_argc : 0 ));
3627 #if (KMP_PERF_V106 == KMP_ON)
3629 if ( realloc && team -> t.t_argv != &team -> t.t_inline_argv[0] )
3630 __kmp_free( (
void *) team -> t.t_argv );
3632 if ( argc <= KMP_INLINE_ARGV_ENTRIES ) {
3634 team -> t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3635 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d argv entries\n",
3636 team->t.t_id, team->t.t_max_argc ));
3637 team -> t.t_argv = &team -> t.t_inline_argv[0];
3638 if ( __kmp_storage_map ) {
3639 __kmp_print_storage_map_gtid( -1, &team->t.t_inline_argv[0],
3640 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3641 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
3642 "team_%d.t_inline_argv",
3647 team -> t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ?
3648 KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc;
3649 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n",
3650 team->t.t_id, team->t.t_max_argc ));
3651 team -> t.t_argv = (
void**) __kmp_page_allocate(
sizeof(
void*) * team->t.t_max_argc );
3652 if ( __kmp_storage_map ) {
3653 __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc],
3654 sizeof(
void *) * team->t.t_max_argc,
"team_%d.t_argv",
3660 __kmp_free( (
void*) team -> t.t_argv );
3661 team -> t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ?
3662 KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc;
3663 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n",
3664 team->t.t_id, team->t.t_max_argc ));
3665 team -> t.t_argv = __kmp_page_allocate(
sizeof(
void*) * team->t.t_max_argc );
3666 if ( __kmp_storage_map ) {
3667 __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc],
3668 sizeof(
void *) * team->t.t_max_argc,
"team_%d.t_argv", team->t.t_id );
3676 __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth)
3679 int num_disp_buff = max_nth > 1 ? KMP_MAX_DISP_BUF : 2;
3680 #if KMP_USE_POOLED_ALLOC
3682 char *ptr = __kmp_allocate(max_nth *
3683 (
sizeof(kmp_info_t*) +
sizeof(dispatch_shared_info_t)*num_disp_buf
3684 +
sizeof(kmp_disp_t) +
sizeof(
int)*6
3687 +
sizeof(kmp_r_sched_t)
3688 +
sizeof(kmp_taskdata_t)
3692 team -> t.t_threads = (kmp_info_t**) ptr; ptr +=
sizeof(kmp_info_t*) * max_nth;
3693 team -> t.t_disp_buffer = (dispatch_shared_info_t*) ptr;
3694 ptr +=
sizeof(dispatch_shared_info_t) * num_disp_buff;
3695 team -> t.t_dispatch = (kmp_disp_t*) ptr; ptr +=
sizeof(kmp_disp_t) * max_nth;
3696 team -> t.t_set_nproc = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
3697 team -> t.t_set_dynamic = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
3698 team -> t.t_set_nested = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
3699 team -> t.t_set_blocktime = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
3700 team -> t.t_set_bt_intervals = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
3701 team -> t.t_set_bt_set = (
int*) ptr;
3703 ptr +=
sizeof(int) * max_nth;
3705 team -> t.t_set_sched = (kmp_r_sched_t*) ptr;
3706 ptr +=
sizeof(kmp_r_sched_t) * max_nth;
3707 team -> t.t_implicit_task_taskdata = (kmp_taskdata_t*) ptr;
3708 ptr +=
sizeof(kmp_taskdata_t) * max_nth;
3709 # endif // OMP_30_ENABLED
3712 team -> t.t_threads = (kmp_info_t**) __kmp_allocate(
sizeof(kmp_info_t*) * max_nth );
3713 team -> t.t_disp_buffer = (dispatch_shared_info_t*)
3714 __kmp_allocate(
sizeof(dispatch_shared_info_t) * num_disp_buff );
3715 team -> t.t_dispatch = (kmp_disp_t*) __kmp_allocate(
sizeof(kmp_disp_t) * max_nth );
3719 team -> t.t_implicit_task_taskdata = (kmp_taskdata_t*) __kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth );
3721 team -> t.t_set_nproc = (
int*) __kmp_allocate(
sizeof(
int) * max_nth );
3722 team -> t.t_set_dynamic = (
int*) __kmp_allocate(
sizeof(
int) * max_nth );
3723 team -> t.t_set_nested = (
int*) __kmp_allocate(
sizeof(
int) * max_nth );
3724 team -> t.t_set_blocktime = (
int*) __kmp_allocate(
sizeof(
int) * max_nth );
3725 team -> t.t_set_bt_intervals = (
int*) __kmp_allocate(
sizeof(
int) * max_nth );
3726 team -> t.t_set_bt_set = (
int*) __kmp_allocate(
sizeof(
int) * max_nth );
3727 # endif // OMP_30_ENABLED
3729 team->t.t_max_nproc = max_nth;
3732 for(i = 0 ; i < num_disp_buff; ++i)
3733 team -> t.t_disp_buffer[i].buffer_index = i;
3737 __kmp_free_team_arrays(kmp_team_t *team) {
3740 for ( i = 0; i < team->t.t_max_nproc; ++ i ) {
3741 if ( team->t.t_dispatch[ i ].th_disp_buffer != NULL ) {
3742 __kmp_free( team->t.t_dispatch[ i ].th_disp_buffer );
3743 team->t.t_dispatch[ i ].th_disp_buffer = NULL;
3746 __kmp_free(team->t.t_threads);
3747 #if !KMP_USE_POOLED_ALLOC
3748 __kmp_free(team->t.t_disp_buffer);
3749 __kmp_free(team->t.t_dispatch);
3753 __kmp_free(team->t.t_implicit_task_taskdata);
3755 __kmp_free(team->t.t_set_nproc);
3756 __kmp_free(team->t.t_set_dynamic);
3757 __kmp_free(team->t.t_set_nested);
3758 __kmp_free(team->t.t_set_blocktime);
3759 __kmp_free(team->t.t_set_bt_intervals);
3760 __kmp_free(team->t.t_set_bt_set);
3761 # endif // OMP_30_ENABLED
3763 team->t.t_threads = NULL;
3764 team->t.t_disp_buffer = NULL;
3765 team->t.t_dispatch = NULL;
3769 team->t.t_implicit_task_taskdata = 0;
3771 team->t.t_set_nproc = 0;
3772 team->t.t_set_dynamic = 0;
3773 team->t.t_set_nested = 0;
3774 team->t.t_set_blocktime = 0;
3775 team->t.t_set_bt_intervals = 0;
3776 team->t.t_set_bt_set = 0;
3777 #endif // OMP_30_ENABLED
3781 __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3782 kmp_info_t **oldThreads = team->t.t_threads;
3784 #if !KMP_USE_POOLED_ALLOC
3785 __kmp_free(team->t.t_disp_buffer);
3786 __kmp_free(team->t.t_dispatch);
3790 __kmp_free(team->t.t_implicit_task_taskdata);
3792 __kmp_free(team->t.t_set_nproc);
3793 __kmp_free(team->t.t_set_dynamic);
3794 __kmp_free(team->t.t_set_nested);
3795 __kmp_free(team->t.t_set_blocktime);
3796 __kmp_free(team->t.t_set_bt_intervals);
3797 __kmp_free(team->t.t_set_bt_set);
3798 # endif // OMP_30_ENABLED
3800 __kmp_allocate_team_arrays(team, max_nth);
3802 memcpy(team->t.t_threads, oldThreads, team->t.t_nproc * sizeof (kmp_info_t*));
3804 __kmp_free(oldThreads);
3807 static kmp_internal_control_t
3808 __kmp_get_global_icvs(
void ) {
3811 kmp_r_sched_t r_sched = __kmp_get_schedule_global();
3815 KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.used > 0 );
3818 kmp_internal_control_t g_icvs = {
3821 __kmp_global.g.g_dynamic,
3822 __kmp_dflt_team_nth,
3825 __kmp_dflt_blocktime,
3827 __kmp_env_blocktime,
3829 __kmp_dflt_max_active_levels,
3833 __kmp_nested_proc_bind.bind_types[0],
3841 static kmp_internal_control_t
3842 __kmp_get_x_global_icvs(
const kmp_team_t *team ) {
3845 kmp_internal_control_t gx_icvs;
3846 gx_icvs.serial_nesting_level = 0;
3847 copy_icvs( & gx_icvs, & team->t.t_threads[0]->th.th_current_task->td_icvs );
3848 gx_icvs.next = NULL;
3850 kmp_internal_control_t gx_icvs =
3853 team->t.t_set_nested[0],
3854 team->t.t_set_dynamic[0],
3855 team->t.t_set_nproc[0],
3856 team->t.t_set_blocktime[0],
3857 team->t.t_set_bt_intervals[0],
3858 team->t.t_set_bt_set[0],
3861 #endif // OMP_30_ENABLED
3867 __kmp_initialize_root( kmp_root_t *root )
3870 kmp_team_t *root_team;
3871 kmp_team_t *hot_team;
3872 size_t disp_size, dispatch_size, bar_size;
3873 int hot_team_max_nth;
3875 kmp_r_sched_t r_sched = __kmp_get_schedule_global();
3876 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3877 #endif // OMP_30_ENABLED
3878 KMP_DEBUG_ASSERT( root );
3879 KMP_ASSERT( ! root->r.r_begin );
3882 __kmp_init_lock( &root->r.r_begin_lock );
3883 root -> r.r_begin = FALSE;
3884 root -> r.r_active = FALSE;
3885 root -> r.r_in_parallel = 0;
3886 root -> r.r_blocktime = __kmp_dflt_blocktime;
3887 root -> r.r_nested = __kmp_dflt_nested;
3891 KF_TRACE( 10, (
"__kmp_initialize_root: before root_team\n" ) );
3893 __kmp_allocate_team(
3898 __kmp_nested_proc_bind.bind_types[0],
3903 __kmp_dflt_team_nth_ub,
3904 __kmp_global.g.g_dynamic,
3906 __kmp_dflt_blocktime,
3908 __kmp_env_blocktime,
3913 KF_TRACE( 10, (
"__kmp_initialize_root: after root_team = %p\n", root_team ) );
3915 root -> r.r_root_team = root_team;
3916 root_team -> t.t_control_stack_top = NULL;
3919 root_team -> t.t_threads[0] = NULL;
3920 root_team -> t.t_nproc = 1;
3921 root_team -> t.t_serialized = 1;
3924 root_team -> t.t_sched.r_sched_type = r_sched.r_sched_type;
3925 root_team -> t.t_sched.chunk = r_sched.chunk;
3926 #endif // OMP_30_ENABLED
3927 KA_TRACE( 20, (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3928 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
3932 KF_TRACE( 10, (
"__kmp_initialize_root: before hot_team\n" ) );
3934 __kmp_allocate_team(
3937 __kmp_dflt_team_nth_ub * 2,
3939 __kmp_nested_proc_bind.bind_types[0],
3944 __kmp_dflt_team_nth_ub,
3945 __kmp_global.g.g_dynamic,
3947 __kmp_dflt_blocktime,
3949 __kmp_env_blocktime,
3953 KF_TRACE( 10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team ) );
3955 root -> r.r_hot_team = hot_team;
3956 root_team -> t.t_control_stack_top = NULL;
3959 hot_team -> t.t_parent = root_team;
3962 hot_team_max_nth = hot_team->t.t_max_nproc;
3963 for ( f = 0; f < hot_team_max_nth; ++ f ) {
3964 hot_team -> t.t_threads[ f ] = NULL;
3966 hot_team -> t.t_nproc = 1;
3969 hot_team -> t.t_sched.r_sched_type = r_sched.r_sched_type;
3970 hot_team -> t.t_sched.chunk = r_sched.chunk;
3971 #endif // OMP_30_ENABLED
3973 hot_team -> t.t_size_changed = 0;
3981 typedef struct kmp_team_list_item {
3982 kmp_team_p
const * entry;
3983 struct kmp_team_list_item * next;
3984 } kmp_team_list_item_t;
3985 typedef kmp_team_list_item_t * kmp_team_list_t;
3989 __kmp_print_structure_team_accum(
3990 kmp_team_list_t list,
3991 kmp_team_p
const * team
4001 KMP_DEBUG_ASSERT( list != NULL );
4002 if ( team == NULL ) {
4006 __kmp_print_structure_team_accum( list, team->t.t_parent );
4007 __kmp_print_structure_team_accum( list, team->t.t_next_pool );
4011 while ( l->next != NULL && l->entry != team ) {
4014 if ( l->next != NULL ) {
4020 while ( l->next != NULL && l->entry->t.t_id <= team->t.t_id ) {
4026 kmp_team_list_item_t * item =
4027 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof( kmp_team_list_item_t ) );
4036 __kmp_print_structure_team(
4038 kmp_team_p
const * team
4041 __kmp_printf(
"%s", title );
4042 if ( team != NULL ) {
4043 __kmp_printf(
"%2x %p\n", team->t.t_id, team );
4045 __kmp_printf(
" - (nil)\n" );
4050 __kmp_print_structure_thread(
4052 kmp_info_p
const * thread
4055 __kmp_printf(
"%s", title );
4056 if ( thread != NULL ) {
4057 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread );
4059 __kmp_printf(
" - (nil)\n" );
4064 __kmp_print_structure(
4068 kmp_team_list_t list;
4071 list = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof( kmp_team_list_item_t ) );
4075 __kmp_printf(
"\n------------------------------\nGlobal Thread Table\n------------------------------\n" );
4078 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
4079 __kmp_printf(
"%2d", gtid );
4080 if ( __kmp_threads != NULL ) {
4081 __kmp_printf(
" %p", __kmp_threads[ gtid ] );
4083 if ( __kmp_root != NULL ) {
4084 __kmp_printf(
" %p", __kmp_root[ gtid ] );
4086 __kmp_printf(
"\n" );
4091 __kmp_printf(
"\n------------------------------\nThreads\n------------------------------\n" );
4092 if ( __kmp_threads != NULL ) {
4094 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
4095 kmp_info_t
const * thread = __kmp_threads[ gtid ];
4096 if ( thread != NULL ) {
4097 __kmp_printf(
"GTID %2d %p:\n", gtid, thread );
4098 __kmp_printf(
" Our Root: %p\n", thread->th.th_root );
4099 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team );
4100 __kmp_print_structure_team(
" Serial Team: ", thread->th.th_serial_team );
4101 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc );
4102 __kmp_print_structure_thread(
" Master: ", thread->th.th_team_master );
4103 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized );
4104 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc );
4106 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind );
4108 __kmp_print_structure_thread(
" Next in pool: ", thread->th.th_next_pool );
4109 __kmp_printf(
"\n" );
4110 __kmp_print_structure_team_accum( list, thread->th.th_team );
4111 __kmp_print_structure_team_accum( list, thread->th.th_serial_team );
4115 __kmp_printf(
"Threads array is not allocated.\n" );
4119 __kmp_printf(
"\n------------------------------\nUbers\n------------------------------\n" );
4120 if ( __kmp_root != NULL ) {
4122 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
4123 kmp_root_t
const * root = __kmp_root[ gtid ];
4124 if ( root != NULL ) {
4125 __kmp_printf(
"GTID %2d %p:\n", gtid, root );
4126 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team );
4127 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team );
4128 __kmp_print_structure_thread(
" Uber Thread: ", root->r.r_uber_thread );
4129 __kmp_printf(
" Active?: %2d\n", root->r.r_active );
4130 __kmp_printf(
" Nested?: %2d\n", root->r.r_nested );
4131 __kmp_printf(
" In Parallel: %2d\n", root->r.r_in_parallel );
4132 __kmp_printf(
"\n" );
4133 __kmp_print_structure_team_accum( list, root->r.r_root_team );
4134 __kmp_print_structure_team_accum( list, root->r.r_hot_team );
4138 __kmp_printf(
"Ubers array is not allocated.\n" );
4141 __kmp_printf(
"\n------------------------------\nTeams\n------------------------------\n" );
4142 while ( list->next != NULL ) {
4143 kmp_team_p
const * team = list->entry;
4145 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team );
4146 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent );
4147 __kmp_printf(
" Master TID: %2d\n", team->t.t_master_tid );
4148 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc );
4149 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized );
4150 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc );
4151 for ( i = 0; i < team->t.t_nproc; ++ i ) {
4152 __kmp_printf(
" Thread %2d: ", i );
4153 __kmp_print_structure_thread(
"", team->t.t_threads[ i ] );
4155 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool );
4156 __kmp_printf(
"\n" );
4161 __kmp_printf(
"\n------------------------------\nPools\n------------------------------\n" );
4162 __kmp_print_structure_thread(
"Thread pool: ", (kmp_info_t *)__kmp_thread_pool );
4163 __kmp_print_structure_team(
"Team pool: ", (kmp_team_t *)__kmp_team_pool );
4164 __kmp_printf(
"\n" );
4167 while ( list != NULL ) {
4168 kmp_team_list_item_t * item = list;
4170 KMP_INTERNAL_FREE( item );
4182 static const unsigned __kmp_primes[] = {
4183 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5,
4184 0xba5703f5, 0xb495a877, 0xe1626741, 0x79695e6b,
4185 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
4186 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b,
4187 0xbe4d6fe9, 0x5f15e201, 0x99afc3fd, 0xf3f16801,
4188 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
4189 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed,
4190 0x085a3d61, 0x46eb5ea7, 0x3d9910ed, 0x2e687b5b,
4191 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
4192 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7,
4193 0x54581edb, 0xf2480f45, 0x0bb9288f, 0xef1affc7,
4194 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
4195 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b,
4196 0xfc411073, 0xc3749363, 0xb892d829, 0x3549366b,
4197 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
4198 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f
4205 __kmp_get_random( kmp_info_t * thread )
4207 unsigned x = thread -> th.th_x;
4208 unsigned short r = x>>16;
4210 thread -> th.th_x = x*thread->th.th_a+1;
4212 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
4213 thread->th.th_info.ds.ds_tid, r) );
4221 __kmp_init_random( kmp_info_t * thread )
4223 unsigned seed = thread->th.th_info.ds.ds_tid;
4225 thread -> th.th_a = __kmp_primes[seed%(
sizeof(__kmp_primes)/
sizeof(__kmp_primes[0]))];
4226 thread -> th.th_x = (seed+1)*thread->th.th_a+1;
4227 KA_TRACE(30, (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread -> th.th_a) );
4234 __kmp_reclaim_dead_roots(
void) {
4237 for(i = 0; i < __kmp_threads_capacity; ++i) {
4238 if( KMP_UBER_GTID( i ) &&
4239 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
4240 !__kmp_root[i]->r.r_active ) {
4241 r += __kmp_unregister_root_other_thread(i);
4270 __kmp_expand_threads(
int nWish,
int nNeed) {
4273 int __kmp_actual_max_nth;
4277 #if KMP_OS_WINDOWS && !defined GUIDEDLL_EXPORTS
4280 added = __kmp_reclaim_dead_roots();
4298 int minimumRequiredCapacity;
4300 kmp_info_t **newThreads;
4301 kmp_root_t **newRoot;
4323 old_tp_cached = __kmp_tp_cached;
4324 __kmp_actual_max_nth = old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth;
4325 KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity);
4329 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
4333 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
4339 nTarget = __kmp_actual_max_nth - __kmp_threads_capacity;
4346 minimumRequiredCapacity = __kmp_threads_capacity + nTarget;
4348 newCapacity = __kmp_threads_capacity;
4351 newCapacity <= (__kmp_actual_max_nth >> 1) ?
4352 (newCapacity << 1) :
4353 __kmp_actual_max_nth;
4354 }
while(newCapacity < minimumRequiredCapacity);
4355 newThreads = (kmp_info_t**) __kmp_allocate((
sizeof(kmp_info_t*) +
sizeof(kmp_root_t*)) * newCapacity + CACHE_LINE);
4356 newRoot = (kmp_root_t**) ((
char*)newThreads +
sizeof(kmp_info_t*) * newCapacity );
4357 memcpy(newThreads, __kmp_threads, __kmp_threads_capacity *
sizeof(kmp_info_t*));
4358 memcpy(newRoot, __kmp_root, __kmp_threads_capacity *
sizeof(kmp_root_t*));
4359 memset(newThreads + __kmp_threads_capacity, 0,
4360 (newCapacity - __kmp_threads_capacity) *
sizeof(kmp_info_t*));
4361 memset(newRoot + __kmp_threads_capacity, 0,
4362 (newCapacity - __kmp_threads_capacity) *
sizeof(kmp_root_t*));
4364 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
4370 __kmp_free(newThreads);
4373 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
4374 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
4376 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
4377 __kmp_free(newThreads);
4383 *(kmp_info_t**
volatile*)&__kmp_threads = newThreads;
4384 *(kmp_root_t**
volatile*)&__kmp_root = newRoot;
4385 added += newCapacity - __kmp_threads_capacity;
4386 *(
volatile int*)&__kmp_threads_capacity = newCapacity;
4387 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
4398 __kmp_register_root(
int initial_thread )
4400 kmp_info_t *root_thread;
4404 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
4405 KA_TRACE( 20, (
"__kmp_register_root: entered\n"));
4423 capacity = __kmp_threads_capacity;
4424 if ( ! initial_thread && TCR_PTR(__kmp_threads[0]) == NULL ) {
4429 if ( __kmp_all_nth >= capacity && !__kmp_expand_threads( 1, 1 ) ) {
4430 if ( __kmp_tp_cached ) {
4433 KMP_MSG( CantRegisterNewThread ),
4434 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
4435 KMP_HNT( PossibleSystemLimitOnThreads ),
4442 KMP_MSG( CantRegisterNewThread ),
4443 KMP_HNT( SystemLimitOnThreads ),
4452 for( gtid=(initial_thread ? 0 : 1) ; TCR_PTR(__kmp_threads[gtid]) != NULL ; gtid++ );
4453 KA_TRACE( 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid ));
4454 KMP_ASSERT( gtid < __kmp_threads_capacity );
4458 TCW_4(__kmp_nth, __kmp_nth + 1);
4465 if ( __kmp_adjust_gtid_mode ) {
4466 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
4467 if ( TCR_4(__kmp_gtid_mode) != 2) {
4468 TCW_4(__kmp_gtid_mode, 2);
4472 if (TCR_4(__kmp_gtid_mode) != 1 ) {
4473 TCW_4(__kmp_gtid_mode, 1);
4478 #ifdef KMP_ADJUST_BLOCKTIME
4481 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4482 if ( __kmp_nth > __kmp_avail_proc ) {
4483 __kmp_zero_bt = TRUE;
4489 if( ! ( root = __kmp_root[gtid] )) {
4490 root = __kmp_root[gtid] = (kmp_root_t*) __kmp_allocate(
sizeof(kmp_root_t) );
4491 KMP_DEBUG_ASSERT( ! root->r.r_root_team );
4494 __kmp_initialize_root( root );
4497 if( root -> r.r_uber_thread ) {
4498 root_thread = root -> r.r_uber_thread;
4500 root_thread = (kmp_info_t*) __kmp_allocate(
sizeof(kmp_info_t) );
4501 if ( __kmp_storage_map ) {
4502 __kmp_print_thread_storage_map( root_thread, gtid );
4504 root_thread -> th.th_info .ds.ds_gtid = gtid;
4505 root_thread -> th.th_root = root;
4506 if( __kmp_env_consistency_check ) {
4507 root_thread -> th.th_cons = __kmp_allocate_cons_stack( gtid );
4510 __kmp_initialize_fast_memory( root_thread );
4514 KMP_DEBUG_ASSERT( root_thread -> th.th_local.bget_data == NULL );
4515 __kmp_initialize_bget( root_thread );
4517 __kmp_init_random( root_thread );
4521 if( ! root_thread -> th.th_serial_team ) {
4523 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
4524 #endif // OMP_30_ENABLED
4525 KF_TRACE( 10, (
"__kmp_register_root: before serial_team\n" ) );
4526 root_thread -> th.th_serial_team = __kmp_allocate_team( root, 1, 1,
4533 __kmp_dflt_team_nth_ub,
4534 __kmp_global.g.g_dynamic,
4536 __kmp_dflt_blocktime,
4538 __kmp_env_blocktime,
4542 KMP_ASSERT( root_thread -> th.th_serial_team );
4543 KF_TRACE( 10, (
"__kmp_register_root: after serial_team = %p\n",
4544 root_thread -> th.th_serial_team ) );
4547 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
4549 root -> r.r_root_team -> t.t_threads[0] = root_thread;
4550 root -> r.r_hot_team -> t.t_threads[0] = root_thread;
4551 root_thread -> th.th_serial_team -> t.t_threads[0] = root_thread;
4552 root_thread -> th.th_serial_team -> t.t_serialized = 0;
4553 root -> r.r_uber_thread = root_thread;
4556 __kmp_initialize_info( root_thread, root->r.r_root_team, 0, gtid );
4559 __kmp_gtid_set_specific( gtid );
4560 #ifdef KMP_TDATA_GTID
4563 __kmp_create_worker( gtid, root_thread, __kmp_stksize );
4564 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == gtid );
4565 TCW_4(__kmp_init_gtid, TRUE);
4567 KA_TRACE( 20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, plain=%u\n",
4568 gtid, __kmp_gtid_from_tid( 0, root->r.r_hot_team ),
4569 root -> r.r_hot_team -> t.t_id, 0, KMP_INIT_BARRIER_STATE,
4570 KMP_INIT_BARRIER_STATE ) );
4573 for ( b = 0; b < bs_last_barrier; ++ b ) {
4574 root_thread->th.th_bar[ b ].bb.b_arrived = KMP_INIT_BARRIER_STATE;
4577 KMP_DEBUG_ASSERT( root->r.r_hot_team->t.t_bar[ bs_forkjoin_barrier ].b_arrived == KMP_INIT_BARRIER_STATE );
4580 #if KMP_OS_WINDOWS || KMP_OS_LINUX
4581 if ( TCR_4(__kmp_init_middle) ) {
4582 __kmp_affinity_set_init_mask( gtid, TRUE );
4586 __kmp_root_counter ++;
4589 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
4598 __kmp_reset_root(
int gtid, kmp_root_t *root)
4600 kmp_team_t * root_team = root->r.r_root_team;
4601 kmp_team_t * hot_team = root->r.r_hot_team;
4602 int n = hot_team->t.t_nproc;
4605 KMP_DEBUG_ASSERT( ! root->r.r_active );
4607 root->r.r_root_team = NULL;
4608 root->r.r_hot_team = NULL;
4611 __kmp_free_team( root, root_team );
4612 __kmp_free_team( root, hot_team );
4619 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
4620 __kmp_wait_to_unref_task_teams();
4626 KA_TRACE( 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
"\n",
4627 (LPVOID)&(root->r.r_uber_thread->th),
4628 root->r.r_uber_thread->th.th_info.ds.ds_thread ) );
4629 __kmp_free_handle( root->r.r_uber_thread->th.th_info.ds.ds_thread );
4632 TCW_4(__kmp_nth, __kmp_nth - 1);
4633 __kmp_reap_thread( root->r.r_uber_thread, 1 );
4636 root->r.r_uber_thread = NULL;
4638 root -> r.r_begin = FALSE;
4644 __kmp_unregister_root_current_thread(
int gtid )
4646 kmp_root_t *root = __kmp_root[gtid];
4648 KA_TRACE( 1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid ));
4649 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
4650 KMP_ASSERT( KMP_UBER_GTID( gtid ));
4651 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
4652 KMP_ASSERT( root->r.r_active == FALSE );
4658 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
4662 __kmp_reset_root(gtid, root);
4665 __kmp_gtid_set_specific( KMP_GTID_DNE );
4666 #ifdef KMP_TDATA_GTID
4667 __kmp_gtid = KMP_GTID_DNE;
4671 KC_TRACE( 10, (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid ));
4673 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
4681 __kmp_unregister_root_other_thread(
int gtid )
4683 kmp_root_t *root = __kmp_root[gtid];
4686 KA_TRACE( 1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid ));
4687 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
4688 KMP_ASSERT( KMP_UBER_GTID( gtid ));
4689 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
4690 KMP_ASSERT( root->r.r_active == FALSE );
4692 r = __kmp_reset_root(gtid, root);
4693 KC_TRACE( 10, (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid ));
4700 void __kmp_task_info() {
4702 kmp_int32 gtid = __kmp_entry_gtid();
4703 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
4704 kmp_info_t *this_thr = __kmp_threads[ gtid ];
4705 kmp_team_t *steam = this_thr -> th.th_serial_team;
4706 kmp_team_t *team = this_thr -> th.th_team;
4708 __kmp_printf(
"__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p ptask=%p\n",
4709 gtid, tid, this_thr, team, this_thr->th.th_current_task, team->t.t_implicit_task_taskdata[tid].td_parent );
4713 #endif // OMP_30_ENABLED
4719 __kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team,
int tid,
int gtid )
4724 KMP_DEBUG_ASSERT( this_thr != NULL );
4725 KMP_DEBUG_ASSERT( this_thr -> th.th_serial_team );
4726 KMP_DEBUG_ASSERT( team );
4727 KMP_DEBUG_ASSERT( team -> t.t_threads );
4728 KMP_DEBUG_ASSERT( team -> t.t_dispatch );
4729 KMP_DEBUG_ASSERT( team -> t.t_threads[0] );
4730 KMP_DEBUG_ASSERT( team -> t.t_threads[0] -> th.th_root );
4734 TCW_SYNC_PTR(this_thr->th.th_team, team);
4736 this_thr->th.th_info.ds.ds_tid = tid;
4737 this_thr->th.th_set_nproc = 0;
4739 this_thr->th.th_set_proc_bind = proc_bind_default;
4740 # if (KMP_OS_WINDOWS || KMP_OS_LINUX)
4741 this_thr->th.th_new_place = this_thr->th.th_current_place;
4744 this_thr->th.th_root = team -> t.t_threads[0] -> th.th_root;
4747 this_thr->th.th_team_nproc = team -> t.t_nproc;
4748 this_thr->th.th_team_master = team -> t.t_threads[0];
4749 this_thr->th.th_team_serialized = team -> t.t_serialized;
4751 this_thr->th.th_team_microtask = team -> t.t_threads[0] -> th.th_team_microtask;
4752 this_thr->th.th_teams_level = team -> t.t_threads[0] -> th.th_teams_level;
4753 this_thr->th.th_set_nth_teams = team -> t.t_threads[0] -> th.th_set_nth_teams;
4755 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
4758 KMP_DEBUG_ASSERT( team -> t.t_implicit_task_taskdata );
4759 this_thr->th.th_task_state = 0;
4761 KF_TRACE( 10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4762 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4764 __kmp_init_implicit_task( this_thr->th.th_team_master->th.th_ident, this_thr, team, tid, TRUE );
4766 KF_TRACE( 10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4767 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4769 #endif // OMP_30_ENABLED
4772 this_thr -> th.th_dispatch = &team -> t.t_dispatch[ tid ];
4774 this_thr->th.th_local.this_construct = 0;
4775 this_thr->th.th_local.last_construct = 0;
4778 this_thr->th.th_local.tv_data = 0;
4781 if ( ! this_thr->th.th_pri_common ) {
4782 this_thr->th.th_pri_common = (
struct common_table *) __kmp_allocate(
sizeof(
struct common_table) );
4783 if ( __kmp_storage_map ) {
4784 __kmp_print_storage_map_gtid(
4785 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4786 sizeof(
struct common_table ),
"th_%d.th_pri_common\n", gtid
4789 this_thr->th.th_pri_head = NULL;
4794 volatile kmp_disp_t *dispatch = this_thr -> th.th_dispatch;
4798 size_t disp_size =
sizeof( dispatch_private_info_t ) *
4799 ( team->t.t_max_nproc == 1 ? 1 : KMP_MAX_DISP_BUF );
4800 KD_TRACE( 10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, team->t.t_max_nproc ) );
4801 KMP_ASSERT( dispatch );
4802 KMP_DEBUG_ASSERT( team -> t.t_dispatch );
4803 KMP_DEBUG_ASSERT( dispatch == &team->t.t_dispatch[ tid ] );
4805 dispatch->th_disp_index = 0;
4807 if( ! dispatch -> th_disp_buffer ) {
4808 dispatch -> th_disp_buffer = (dispatch_private_info_t *) __kmp_allocate( disp_size );
4810 if ( __kmp_storage_map ) {
4811 __kmp_print_storage_map_gtid( gtid, &dispatch->th_disp_buffer[ 0 ],
4812 &dispatch->th_disp_buffer[ team->t.t_max_nproc == 1 ? 1 : KMP_MAX_DISP_BUF ],
4813 disp_size,
"th_%d.th_dispatch.th_disp_buffer "
4814 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4815 gtid, team->t.t_id, gtid );
4818 memset( & dispatch -> th_disp_buffer[0],
'\0', disp_size );
4821 dispatch -> th_dispatch_pr_current = 0;
4822 dispatch -> th_dispatch_sh_current = 0;
4824 dispatch -> th_deo_fcn = 0;
4825 dispatch -> th_dxo_fcn = 0;
4828 this_thr->th.th_next_pool = NULL;
4830 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
4831 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
4844 __kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team,
int new_tid )
4846 kmp_team_t *serial_team;
4847 kmp_info_t *new_thr;
4850 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid() ));
4851 KMP_DEBUG_ASSERT( root && team );
4852 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( __kmp_get_gtid() ));
4856 if ( __kmp_thread_pool ) {
4858 new_thr = (kmp_info_t*)__kmp_thread_pool;
4859 __kmp_thread_pool = (
volatile kmp_info_t *) new_thr->th.th_next_pool;
4860 if ( new_thr == __kmp_thread_pool_insert_pt ) {
4861 __kmp_thread_pool_insert_pt = NULL;
4863 TCW_4(new_thr->th.th_in_pool, FALSE);
4869 __kmp_thread_pool_nth--;
4871 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4872 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid ));
4873 KMP_ASSERT( ! new_thr -> th.th_team );
4874 KMP_DEBUG_ASSERT( __kmp_nth < __kmp_threads_capacity );
4875 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth >= 0 );
4878 __kmp_initialize_info( new_thr, team, new_tid, new_thr->th.th_info.ds.ds_gtid );
4879 KMP_DEBUG_ASSERT( new_thr->th.th_serial_team );
4881 TCW_4(__kmp_nth, __kmp_nth + 1);
4883 #ifdef KMP_ADJUST_BLOCKTIME
4886 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4887 if ( __kmp_nth > __kmp_avail_proc ) {
4888 __kmp_zero_bt = TRUE;
4893 KF_TRACE( 10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4894 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid ));
4902 KMP_ASSERT( __kmp_nth == __kmp_all_nth );
4903 KMP_ASSERT( __kmp_all_nth < __kmp_threads_capacity );
4909 if ( ! TCR_4( __kmp_init_monitor ) ) {
4910 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
4911 if ( ! TCR_4( __kmp_init_monitor ) ) {
4912 KF_TRACE( 10, (
"before __kmp_create_monitor\n" ) );
4913 TCW_4( __kmp_init_monitor, 1 );
4914 __kmp_create_monitor( & __kmp_monitor );
4915 KF_TRACE( 10, (
"after __kmp_create_monitor\n" ) );
4924 while ( TCR_4(__kmp_init_monitor) < 2 ) {
4927 KF_TRACE( 10, (
"after monitor thread has started\n" ) );
4930 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
4934 for( new_gtid=1 ; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid ) {
4935 KMP_DEBUG_ASSERT( new_gtid < __kmp_threads_capacity );
4939 new_thr = (kmp_info_t*) __kmp_allocate(
sizeof(kmp_info_t) );
4941 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4943 if ( __kmp_storage_map ) {
4944 __kmp_print_thread_storage_map( new_thr, new_gtid );
4950 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs( team );
4951 #endif // OMP_30_ENABLED
4952 KF_TRACE( 10, (
"__kmp_allocate_thread: before th_serial/serial_team\n" ) );
4953 new_thr -> th.th_serial_team = serial_team =
4954 (kmp_team_t*) __kmp_allocate_team( root, 1, 1,
4961 team->t.t_set_nproc[0],
4962 team->t.t_set_dynamic[0],
4963 team->t.t_set_nested[0],
4964 team->t.t_set_blocktime[0],
4965 team->t.t_set_bt_intervals[0],
4966 team->t.t_set_bt_set[0],
4970 KMP_ASSERT ( serial_team );
4971 serial_team -> t.t_serialized = 0;
4972 serial_team -> t.t_threads[0] = new_thr;
4973 KF_TRACE( 10, (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4977 __kmp_initialize_info( new_thr, team, new_tid, new_gtid );
4980 __kmp_initialize_fast_memory( new_thr );
4984 KMP_DEBUG_ASSERT( new_thr -> th.th_local.bget_data == NULL );
4985 __kmp_initialize_bget( new_thr );
4988 __kmp_init_random( new_thr );
4991 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4992 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
4994 new_thr->th.th_bar[ bs_forkjoin_barrier ].bb.b_go = KMP_INIT_BARRIER_STATE;
4995 new_thr->th.th_bar[ bs_plain_barrier ].bb.b_go = KMP_INIT_BARRIER_STATE;
4996 #if KMP_FAST_REDUCTION_BARRIER
4997 new_thr->th.th_bar[ bs_reduction_barrier ].bb.b_go = KMP_INIT_BARRIER_STATE;
4998 #endif // KMP_FAST_REDUCTION_BARRIER
5000 new_thr->th.th_spin_here = FALSE;
5001 new_thr->th.th_next_waiting = 0;
5003 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
5004 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
5005 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
5006 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
5007 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
5010 TCW_4(new_thr->th.th_in_pool, FALSE);
5011 new_thr->th.th_active_in_pool = FALSE;
5012 TCW_4(new_thr->th.th_active, TRUE);
5023 if ( __kmp_adjust_gtid_mode ) {
5024 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
5025 if ( TCR_4(__kmp_gtid_mode) != 2) {
5026 TCW_4(__kmp_gtid_mode, 2);
5030 if (TCR_4(__kmp_gtid_mode) != 1 ) {
5031 TCW_4(__kmp_gtid_mode, 1);
5036 #ifdef KMP_ADJUST_BLOCKTIME
5039 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5040 if ( __kmp_nth > __kmp_avail_proc ) {
5041 __kmp_zero_bt = TRUE;
5047 KF_TRACE( 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr ));
5048 __kmp_create_worker( new_gtid, new_thr, __kmp_stksize );
5049 KF_TRACE( 10, (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr ));
5052 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), new_gtid ));
5067 __kmp_reinitialize_team( kmp_team_t *team,
5069 kmp_internal_control_t *new_icvs,
ident_t *loc
5071 int new_set_nproc,
int new_set_dynamic,
int new_set_nested,
5072 int new_set_blocktime,
int new_bt_intervals,
int new_bt_set
5075 KF_TRACE( 10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
5076 team->t.t_threads[0], team ) );
5078 KMP_DEBUG_ASSERT( team && new_icvs);
5079 KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
5080 team->t.t_ident = loc;
5082 KMP_DEBUG_ASSERT( team && new_set_nproc );
5083 #endif // OMP_30_ENABLED
5085 team->t.t_id = KMP_GEN_TEAM_ID();
5089 load_icvs(new_icvs);
5090 __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
5091 store_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
5094 team -> t.t_set_nproc[0] = new_set_nproc;
5095 team -> t.t_set_dynamic[0] = new_set_dynamic;
5096 team -> t.t_set_nested[0] = new_set_nested;
5097 team -> t.t_set_blocktime[0] = new_set_blocktime;
5098 team -> t.t_set_bt_intervals[0] = new_bt_intervals;
5099 team -> t.t_set_bt_set[0] = new_bt_set;
5100 # endif // OMP_30_ENABLED
5102 KF_TRACE( 10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
5103 team->t.t_threads[0], team ) );
5107 __kmp_setup_icv_copy(kmp_team_t * team,
int new_nproc,
5109 kmp_internal_control_t * new_icvs,
5112 int new_set_nproc,
int new_set_dynamic,
int new_set_nested,
5113 int new_set_blocktime,
int new_bt_intervals,
int new_bt_set
5120 KMP_DEBUG_ASSERT( team && new_nproc && new_icvs );
5121 KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
5123 KMP_DEBUG_ASSERT( team && new_nproc && new_set_nproc );
5124 #endif // OMP_30_ENABLED
5128 #if KMP_BARRIER_ICV_PULL
5131 load_icvs(new_icvs);
5132 KMP_DEBUG_ASSERT(team->t.t_threads[0]);
5133 store_icvs(&team->t.t_threads[0]->th.th_fixed_icvs, new_icvs);
5135 KF_TRACE(10, (
"__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n", 0, team->t.t_threads[0], team));
5137 #elif KMP_BARRIER_ICV_PUSH
5139 KF_TRACE(10, (
"__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n", 0, team->t.t_threads[0], team));
5144 load_icvs(new_icvs);
5145 # endif // OMP_30_ENABLED
5146 KMP_DEBUG_ASSERT(team->t.t_threads[0]);
5147 for(f=1 ; f<new_nproc ; f++) {
5150 KF_TRACE( 10, (
"__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
5151 f, team->t.t_threads[f], team ) );
5152 __kmp_init_implicit_task( loc, team->t.t_threads[f], team, f, FALSE );
5153 store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs);
5154 KF_TRACE( 10, (
"__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
5155 f, team->t.t_threads[f], team ) );
5157 team -> t.t_set_nproc[f] = new_set_nproc;
5158 team -> t.t_set_dynamic[f] = new_set_dynamic;
5159 team -> t.t_set_nested[f] = new_set_nested;
5160 team -> t.t_set_blocktime[f] = new_set_blocktime;
5161 team -> t.t_set_bt_intervals[f] = new_bt_intervals;
5162 team -> t.t_set_bt_set[f] = new_bt_set;
5163 # endif // OMP_30_ENABLED
5167 # endif // OMP_30_ENABLED
5168 #endif // KMP_BARRIER_ICV_PULL
5175 __kmp_initialize_team(
5179 kmp_internal_control_t * new_icvs,
5182 int new_set_nproc,
int new_set_dynamic,
int new_set_nested,
5183 int new_set_blocktime,
int new_bt_intervals,
int new_bt_set
5186 KF_TRACE( 10, (
"__kmp_initialize_team: enter: team=%p\n", team ) );
5189 KMP_DEBUG_ASSERT( team );
5190 KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc );
5191 KMP_DEBUG_ASSERT( team->t.t_threads );
5194 team -> t.t_master_tid = 0;
5196 team -> t.t_serialized = new_nproc > 1 ? 0 : 1;
5197 team -> t.t_nproc = new_nproc;
5200 team -> t.t_next_pool = NULL;
5203 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
5204 team -> t.t_invoke = NULL;
5208 team -> t.t_sched = new_icvs->sched;
5209 #endif // OMP_30_ENABLED
5211 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
5212 team -> t.t_fp_control_saved = FALSE;
5213 team -> t.t_x87_fpu_control_word = 0;
5214 team -> t.t_mxcsr = 0;
5217 team -> t.t_construct = 0;
5218 __kmp_init_lock( & team -> t.t_single_lock );
5220 team -> t.t_ordered .dt.t_value = 0;
5221 team -> t.t_master_active = FALSE;
5223 memset( & team -> t.t_taskq,
'\0',
sizeof( kmp_taskq_t ));
5226 team -> t.t_copypriv_data = NULL;
5228 team -> t.t_copyin_counter = 0;
5230 team -> t.t_control_stack_top = NULL;
5232 __kmp_reinitialize_team( team,
5236 new_set_nproc, new_set_dynamic, new_set_nested,
5237 new_set_blocktime, new_bt_intervals, new_bt_set
5243 KF_TRACE( 10, (
"__kmp_initialize_team: exit: team=%p\n", team ) );
5249 __kmp_set_thread_affinity_mask_full_tmp( kmp_affin_mask_t *old_mask )
5251 if ( KMP_AFFINITY_CAPABLE() ) {
5253 if ( old_mask != NULL ) {
5254 status = __kmp_get_system_affinity( old_mask, TRUE );
5256 if ( status != 0 ) {
5259 KMP_MSG( ChangeThreadAffMaskError ),
5265 __kmp_set_system_affinity( __kmp_affinity_get_fullMask(), TRUE );
5270 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
5279 __kmp_partition_places( kmp_team_t *team )
5284 kmp_info_t *master_th = team->t.t_threads[0];
5285 KMP_DEBUG_ASSERT( master_th != NULL );
5286 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
5287 int first_place = master_th->th.th_first_place;
5288 int last_place = master_th->th.th_last_place;
5289 int masters_place = master_th->th.th_current_place;
5290 team->t.t_first_place = first_place;
5291 team->t.t_last_place = last_place;
5293 KA_TRACE( 20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) bound to place %d partition = [%d,%d]\n",
5294 proc_bind, __kmp_gtid_from_thread( team->t.t_threads[0] ), team->t.t_id,
5295 masters_place, first_place, last_place ) );
5297 switch ( proc_bind ) {
5299 case proc_bind_default:
5305 KMP_DEBUG_ASSERT( team->t.t_nproc == 1 );
5308 case proc_bind_master:
5311 int n_th = team->t.t_nproc;
5312 for ( f = 1; f < n_th; f++ ) {
5313 kmp_info_t *th = team->t.t_threads[f];
5314 KMP_DEBUG_ASSERT( th != NULL );
5315 th->th.th_first_place = first_place;
5316 th->th.th_last_place = last_place;
5317 th->th.th_new_place = masters_place;
5319 KA_TRACE( 100, (
"__kmp_partition_places: master: T#%d(%d:%d) place %d partition = [%d,%d]\n",
5320 __kmp_gtid_from_thread( team->t.t_threads[f] ),
5321 team->t.t_id, f, masters_place, first_place, last_place ) );
5326 case proc_bind_close:
5329 int n_th = team->t.t_nproc;
5331 if ( first_place <= last_place ) {
5332 n_places = last_place - first_place + 1;
5335 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
5337 if ( n_th <= n_places ) {
5338 int place = masters_place;
5339 for ( f = 1; f < n_th; f++ ) {
5340 kmp_info_t *th = team->t.t_threads[f];
5341 KMP_DEBUG_ASSERT( th != NULL );
5343 if ( place == last_place ) {
5344 place = first_place;
5346 else if ( place == __kmp_affinity_num_masks - 1) {
5352 th->th.th_first_place = first_place;
5353 th->th.th_last_place = last_place;
5354 th->th.th_new_place = place;
5356 KA_TRACE( 100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
5357 __kmp_gtid_from_thread( team->t.t_threads[f] ),
5358 team->t.t_id, f, place, first_place, last_place ) );
5362 int S, rem, gap, s_count;
5363 S = n_th / n_places;
5365 rem = n_th - ( S * n_places );
5366 gap = rem > 0 ? n_places/rem : n_places;
5367 int place = masters_place;
5369 for ( f = 0; f < n_th; f++ ) {
5370 kmp_info_t *th = team->t.t_threads[f];
5371 KMP_DEBUG_ASSERT( th != NULL );
5373 th->th.th_first_place = first_place;
5374 th->th.th_last_place = last_place;
5375 th->th.th_new_place = place;
5378 if ( (s_count == S) && rem && (gap_ct == gap) ) {
5381 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
5383 if ( place == last_place ) {
5384 place = first_place;
5386 else if ( place == __kmp_affinity_num_masks - 1) {
5396 else if (s_count == S) {
5397 if ( place == last_place ) {
5398 place = first_place;
5400 else if ( place == __kmp_affinity_num_masks - 1) {
5410 KA_TRACE( 100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
5411 __kmp_gtid_from_thread( team->t.t_threads[f] ),
5412 team->t.t_id, f, th->th.th_new_place, first_place,
5415 KMP_DEBUG_ASSERT( place == masters_place );
5420 case proc_bind_spread:
5423 int n_th = team->t.t_nproc;
5425 if ( first_place <= last_place ) {
5426 n_places = last_place - first_place + 1;
5429 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
5431 if ( n_th <= n_places ) {
5432 int place = masters_place;
5433 int S = n_places/n_th;
5434 int s_count, rem, gap, gap_ct;
5435 rem = n_places - n_th*S;
5436 gap = rem ? n_th/rem : 1;
5438 for ( f = 0; f < n_th; f++ ) {
5439 kmp_info_t *th = team->t.t_threads[f];
5440 KMP_DEBUG_ASSERT( th != NULL );
5442 th->th.th_first_place = place;
5443 th->th.th_new_place = place;
5445 while (s_count < S) {
5446 if ( place == last_place ) {
5447 place = first_place;
5449 else if ( place == __kmp_affinity_num_masks - 1) {
5457 if (rem && (gap_ct == gap)) {
5458 if ( place == last_place ) {
5459 place = first_place;
5461 else if ( place == __kmp_affinity_num_masks - 1) {
5470 th->th.th_last_place = place;
5473 if ( place == last_place ) {
5474 place = first_place;
5476 else if ( place == __kmp_affinity_num_masks - 1) {
5483 KA_TRACE( 100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
5484 __kmp_gtid_from_thread( team->t.t_threads[f] ),
5485 team->t.t_id, f, th->th.th_new_place,
5486 th->th.th_first_place, th->th.th_last_place ) );
5488 KMP_DEBUG_ASSERT( place == masters_place );
5491 int S, rem, gap, s_count;
5492 S = n_th / n_places;
5494 rem = n_th - ( S * n_places );
5495 gap = rem > 0 ? n_places/rem : n_places;
5496 int place = masters_place;
5498 for ( f = 0; f < n_th; f++ ) {
5499 kmp_info_t *th = team->t.t_threads[f];
5500 KMP_DEBUG_ASSERT( th != NULL );
5502 th->th.th_first_place = place;
5503 th->th.th_last_place = place;
5504 th->th.th_new_place = place;
5507 if ( (s_count == S) && rem && (gap_ct == gap) ) {
5510 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
5512 if ( place == last_place ) {
5513 place = first_place;
5515 else if ( place == __kmp_affinity_num_masks - 1) {
5525 else if (s_count == S) {
5526 if ( place == last_place ) {
5527 place = first_place;
5529 else if ( place == __kmp_affinity_num_masks - 1) {
5539 KA_TRACE( 100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
5540 __kmp_gtid_from_thread( team->t.t_threads[f] ),
5541 team->t.t_id, f, th->th.th_new_place,
5542 th->th.th_first_place, th->th.th_last_place) );
5544 KMP_DEBUG_ASSERT( place == masters_place );
5553 KA_TRACE( 20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id ) );
5560 __kmp_allocate_team( kmp_root_t *root,
int new_nproc,
int max_nproc,
5562 kmp_proc_bind_t new_proc_bind,
5565 kmp_internal_control_t *new_icvs,
5567 int new_set_nproc,
int new_set_dynamic,
int new_set_nested,
5568 int new_set_blocktime,
int new_bt_intervals,
int new_bt_set,
5577 KA_TRACE( 20, (
"__kmp_allocate_team: called\n"));
5578 KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 );
5579 KMP_DEBUG_ASSERT( max_nproc >= new_nproc );
5586 if ( ! root->r.r_active && new_nproc > 1 ) {
5588 KMP_DEBUG_ASSERT( new_nproc == max_nproc );
5590 team = root -> r.r_hot_team;
5592 #if OMP_30_ENABLED && KMP_DEBUG
5593 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5594 KA_TRACE( 20, (
"__kmp_allocate_team: hot team task_team = %p before reinit\n",
5595 team -> t.t_task_team ));
5600 if( team -> t.t_nproc > new_nproc ) {
5601 KA_TRACE( 20, (
"__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc ));
5604 team -> t.t_size_changed = 1;
5607 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5608 kmp_task_team_t *task_team = team->t.t_task_team;
5609 if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) {
5616 KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
5617 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
5620 KA_TRACE( 20, (
"__kmp_allocate_team: setting task_team %p to NULL\n",
5621 &team->t.t_task_team ) );
5622 team->t.t_task_team = NULL;
5625 KMP_DEBUG_ASSERT( task_team == NULL );
5628 #endif // OMP_30_ENABLED
5631 for( f = new_nproc ; f < team->t.t_nproc ; f++ ) {
5632 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
5633 __kmp_free_thread( team->t.t_threads[ f ] );
5634 team -> t.t_threads[ f ] = NULL;
5637 team -> t.t_nproc = new_nproc;
5640 team -> t.t_sched = new_icvs->sched;
5642 __kmp_reinitialize_team( team,
5644 new_icvs, root->r.r_uber_thread->th.th_ident
5646 new_set_nproc, new_set_dynamic, new_set_nested,
5647 new_set_blocktime, new_bt_intervals, new_bt_set
5653 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5654 kmp_task_team_t *task_team = team->t.t_task_team;
5655 if ( task_team != NULL ) {
5656 KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) );
5657 task_team->tt.tt_nproc = new_nproc;
5658 task_team->tt.tt_unfinished_threads = new_nproc;
5659 task_team->tt.tt_ref_ct = new_nproc - 1;
5665 for( f = 0 ; f < new_nproc ; f++ ) {
5666 team -> t.t_threads[ f ] -> th.th_team_nproc = team->t.t_nproc;
5671 KF_TRACE( 10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n",
5672 0, team->t.t_threads[0], team ) );
5674 __kmp_push_current_task_to_thread( team -> t.t_threads[ 0 ], team, 0 );
5678 for ( f = 0; f < team->t.t_nproc; f++ ) {
5679 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
5680 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
5685 team->t.t_proc_bind = new_proc_bind;
5686 # if KMP_OS_WINDOWS || KMP_OS_LINUX
5687 __kmp_partition_places( team );
5692 else if ( team -> t.t_nproc < new_nproc ) {
5694 kmp_affin_mask_t *old_mask;
5695 if ( KMP_AFFINITY_CAPABLE() ) {
5696 KMP_CPU_ALLOC(old_mask);
5700 KA_TRACE( 20, (
"__kmp_allocate_team: increasing hot team thread count to %d\n", new_nproc ));
5703 team -> t.t_size_changed = 1;
5707 if(team -> t.t_max_nproc < new_nproc) {
5709 __kmp_reallocate_team_arrays(team, new_nproc);
5710 __kmp_reinitialize_team( team,
5714 new_set_nproc, new_set_dynamic, new_set_nested,
5715 new_set_blocktime, new_bt_intervals, new_bt_set
5727 __kmp_set_thread_affinity_mask_full_tmp( old_mask );
5731 for( f = team->t.t_nproc ; f < new_nproc ; f++ ) {
5732 kmp_info_t * new_worker = __kmp_allocate_thread( root, team, f );
5733 KMP_DEBUG_ASSERT( new_worker );
5734 team->t.t_threads[ f ] = new_worker;
5735 new_worker->th.th_team_nproc = team->t.t_nproc;
5737 KA_TRACE( 20, (
"__kmp_allocate_team: team %d init T#%d arrived: join=%u, plain=%u\n",
5738 team->t.t_id, __kmp_gtid_from_tid( f, team ), team->t.t_id, f,
5739 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5740 team->t.t_bar[bs_plain_barrier].b_arrived ) );
5744 kmp_balign_t * balign = new_worker->th.th_bar;
5745 for ( b = 0; b < bp_last_bar; ++ b ) {
5746 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
5752 if ( KMP_AFFINITY_CAPABLE() ) {
5754 __kmp_set_system_affinity( old_mask, TRUE );
5755 KMP_CPU_FREE(old_mask);
5760 __kmp_initialize_team( team, new_nproc,
5763 root->r.r_uber_thread->th.th_ident
5765 new_set_nproc, new_set_dynamic, new_set_nested,
5766 new_set_blocktime, new_bt_intervals, new_bt_set
5771 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5772 kmp_task_team_t *task_team = team->t.t_task_team;
5773 if ( task_team != NULL ) {
5774 KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) );
5775 task_team->tt.tt_nproc = new_nproc;
5776 task_team->tt.tt_unfinished_threads = new_nproc;
5777 task_team->tt.tt_ref_ct = new_nproc - 1;
5783 for( f = 0 ; f < team->t.t_nproc ; f++ )
5784 __kmp_initialize_info( team->t.t_threads[ f ], team, f,
5785 __kmp_gtid_from_tid( f, team ) );
5787 for ( f = 0; f < team->t.t_nproc; ++ f ) {
5788 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
5789 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
5794 team->t.t_proc_bind = new_proc_bind;
5795 # if KMP_OS_WINDOWS || KMP_OS_LINUX
5796 __kmp_partition_places( team );
5802 KA_TRACE( 20, (
"__kmp_allocate_team: reusing hot team\n" ));
5806 if ( team -> t.t_size_changed == -1 ) {
5807 team -> t.t_size_changed = 1;
5809 team -> t.t_size_changed = 0;
5815 team -> t.t_sched = new_icvs->sched;
5818 __kmp_reinitialize_team( team,
5820 new_icvs, root->r.r_uber_thread->th.th_ident
5822 new_set_nproc, new_set_dynamic, new_set_nested,
5823 new_set_blocktime, new_bt_intervals, new_bt_set
5828 KF_TRACE( 10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n",
5829 0, team->t.t_threads[0], team ) );
5830 __kmp_push_current_task_to_thread( team -> t.t_threads[ 0 ], team, 0 );
5834 # if (KMP_OS_WINDOWS || KMP_OS_LINUX)
5835 if ( team->t.t_proc_bind == new_proc_bind ) {
5836 KA_TRACE( 200, (
"__kmp_allocate_team: reusing hot team #%d bindings: proc_bind = %d, partition = [%d,%d]\n",
5837 team->t.t_id, new_proc_bind, team->t.t_first_place,
5838 team->t.t_last_place ) );
5841 team->t.t_proc_bind = new_proc_bind;
5842 __kmp_partition_places( team );
5845 if ( team->t.t_proc_bind != new_proc_bind ) {
5846 team->t.t_proc_bind = new_proc_bind;
5853 __kmp_alloc_argv_entries( argc, team, TRUE );
5854 team -> t.t_argc = argc;
5860 KF_TRACE( 10, (
" hot_team = %p\n", team ) );
5862 #if OMP_30_ENABLED && KMP_DEBUG
5863 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5864 KA_TRACE( 20, (
"__kmp_allocate_team: hot team task_team = %p after reinit\n",
5865 team -> t.t_task_team ));
5876 for( team = (kmp_team_t*) __kmp_team_pool ; (team) ; )
5879 if ( team->t.t_max_nproc >= max_nproc ) {
5881 __kmp_team_pool = team->t.t_next_pool;
5884 __kmp_initialize_team( team, new_nproc,
5889 new_set_nproc, new_set_dynamic, new_set_nested,
5890 new_set_blocktime, new_bt_intervals, new_bt_set
5895 KA_TRACE( 20, (
"__kmp_allocate_team: setting task_team %p to NULL\n",
5896 &team->t.t_task_team ) );
5897 team -> t.t_task_team = NULL;
5901 __kmp_alloc_argv_entries( argc, team, TRUE );
5902 team -> t.t_argc = argc;
5904 KA_TRACE( 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5905 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5908 for ( b = 0; b < bs_last_barrier; ++ b) {
5909 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
5914 team->t.t_proc_bind = new_proc_bind;
5917 KA_TRACE( 20, (
"__kmp_allocate_team: using team from pool %d.\n", team->t.t_id ));
5926 team = __kmp_reap_team( team );
5927 __kmp_team_pool = team;
5932 team = (kmp_team_t*) __kmp_allocate(
sizeof( kmp_team_t ) );
5935 team -> t.t_max_nproc = max_nproc;
5939 __kmp_allocate_team_arrays( team, max_nproc );
5941 KA_TRACE( 20, (
"__kmp_allocate_team: making a new team\n" ) );
5942 __kmp_initialize_team( team, new_nproc,
5947 new_set_nproc, new_set_dynamic, new_set_nested,
5948 new_set_blocktime, new_bt_intervals, new_bt_set
5953 KA_TRACE( 20, (
"__kmp_allocate_team: setting task_team %p to NULL\n",
5954 &team->t.t_task_team ) );
5955 team -> t.t_task_team = NULL;
5958 if ( __kmp_storage_map ) {
5959 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc );
5963 __kmp_alloc_argv_entries( argc, team, FALSE );
5964 team -> t.t_argc = argc;
5966 KA_TRACE( 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5967 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5970 for ( b = 0; b < bs_last_barrier; ++ b ) {
5971 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
5976 team->t.t_proc_bind = new_proc_bind;
5981 KA_TRACE( 20, (
"__kmp_allocate_team: done creating a new team %d.\n", team->t.t_id ));
5992 __kmp_free_team( kmp_root_t *root, kmp_team_t *team )
5995 KA_TRACE( 20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), team->t.t_id ));
5998 KMP_DEBUG_ASSERT( root );
5999 KMP_DEBUG_ASSERT( team );
6000 KMP_DEBUG_ASSERT( team->t.t_nproc <= team->t.t_max_nproc );
6001 KMP_DEBUG_ASSERT( team->t.t_threads );
6004 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
6005 team -> t.t_copyin_counter = 0;
6009 if( team != root->r.r_hot_team ) {
6012 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
6013 kmp_task_team_t *task_team = team->t.t_task_team;
6014 if ( task_team != NULL ) {
6021 KA_TRACE( 20, (
"__kmp_free_team: deactivating task_team %p\n",
6023 KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
6024 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
6026 team->t.t_task_team = NULL;
6032 team -> t.t_parent = NULL;
6036 for ( f = 1; f < team->t.t_nproc; ++ f ) {
6037 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
6038 __kmp_free_thread( team->t.t_threads[ f ] );
6039 team->t.t_threads[ f ] = NULL;
6045 team -> t.t_next_pool = (kmp_team_t*) __kmp_team_pool;
6046 __kmp_team_pool = (
volatile kmp_team_t*) team;
6055 __kmp_reap_team( kmp_team_t *team )
6057 kmp_team_t *next_pool = team -> t.t_next_pool;
6059 KMP_DEBUG_ASSERT( team );
6060 KMP_DEBUG_ASSERT( team -> t.t_dispatch );
6061 KMP_DEBUG_ASSERT( team -> t.t_disp_buffer );
6062 KMP_DEBUG_ASSERT( team -> t.t_threads );
6065 KMP_DEBUG_ASSERT( team -> t.t_set_nproc );
6067 KMP_DEBUG_ASSERT( team -> t.t_argv );
6073 __kmp_free_team_arrays( team );
6074 #if (KMP_PERF_V106 == KMP_ON)
6075 if ( team -> t.t_argv != &team -> t.t_inline_argv[0] )
6076 __kmp_free( (
void*) team -> t.t_argv );
6078 __kmp_free( (
void*) team -> t.t_argv );
6114 __kmp_free_thread( kmp_info_t *this_th )
6119 KA_TRACE( 20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
6120 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid ));
6122 KMP_DEBUG_ASSERT( this_th );
6126 TCW_PTR(this_th->th.th_team, NULL);
6127 TCW_PTR(this_th->th.th_root, NULL);
6128 TCW_PTR(this_th->th.th_dispatch, NULL);
6134 gtid = this_th->th.th_info.ds.ds_gtid;
6135 if ( __kmp_thread_pool_insert_pt != NULL ) {
6136 KMP_DEBUG_ASSERT( __kmp_thread_pool != NULL );
6137 if ( __kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid ) {
6138 __kmp_thread_pool_insert_pt = NULL;
6149 if ( __kmp_thread_pool_insert_pt != NULL ) {
6150 scan = &( __kmp_thread_pool_insert_pt->th.th_next_pool );
6153 scan = (kmp_info_t **)&__kmp_thread_pool;
6155 for (; ( *scan != NULL ) && ( (*scan)->th.th_info.ds.ds_gtid < gtid );
6156 scan = &( (*scan)->th.th_next_pool ) );
6162 TCW_PTR(this_th->th.th_next_pool, *scan);
6163 __kmp_thread_pool_insert_pt = *scan = this_th;
6164 KMP_DEBUG_ASSERT( ( this_th->th.th_next_pool == NULL )
6165 || ( this_th->th.th_info.ds.ds_gtid
6166 < this_th->th.th_next_pool->th.th_info.ds.ds_gtid ) );
6167 TCW_4(this_th->th.th_in_pool, TRUE);
6168 __kmp_thread_pool_nth++;
6170 TCW_4(__kmp_nth, __kmp_nth - 1);
6172 #ifdef KMP_ADJUST_BLOCKTIME
6175 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
6176 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
6177 if ( __kmp_nth <= __kmp_avail_proc ) {
6178 __kmp_zero_bt = FALSE;
6187 __kmp_join_barrier(
int gtid )
6189 register kmp_info_t *this_thr = __kmp_threads[ gtid ];
6190 register kmp_team_t *team;
6191 register kmp_uint nproc;
6192 kmp_info_t *master_thread;
6198 void * itt_sync_obj = NULL;
6200 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG )
6201 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
6208 team = this_thr -> th.th_team;
6210 nproc = this_thr -> th.th_team_nproc;
6211 KMP_DEBUG_ASSERT( nproc == team->t.t_nproc );
6212 tid = __kmp_tid_from_gtid(gtid);
6214 team_id = team -> t.t_id;
6217 master_thread = this_thr -> th.th_team_master;
6219 if ( master_thread != team->t.t_threads[0] ) {
6220 __kmp_print_structure();
6223 KMP_DEBUG_ASSERT( master_thread == team->t.t_threads[0] );
6227 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
6228 KMP_DEBUG_ASSERT( TCR_PTR(this_thr->th.th_team) );
6229 KMP_DEBUG_ASSERT( TCR_PTR(this_thr->th.th_root) );
6230 KMP_DEBUG_ASSERT( this_thr == team -> t.t_threads[tid] );
6232 KA_TRACE( 10, (
"__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n",
6233 gtid, team_id, tid ));
6236 if ( __kmp_tasking_mode == tskm_extra_barrier ) {
6237 __kmp_tasking_barrier( team, this_thr, gtid );
6239 KA_TRACE( 10, (
"__kmp_join_barrier: T#%d(%d:%d) past taking barrier\n",
6240 gtid, team_id, tid ));
6243 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
6244 KA_TRACE( 20, (
"__kmp_join_barrier: T#%d, old team = %d, old task_team = %p, th_task_team = %p\n",
6245 __kmp_gtid_from_thread( this_thr ), team_id, team -> t.t_task_team,
6246 this_thr->th.th_task_team ) );
6247 KMP_DEBUG_ASSERT( this_thr->th.th_task_team == team->t.t_task_team );
6260 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
6262 this_thr -> th.th_team_bt_intervals = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
6263 this_thr -> th.th_team_bt_set = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
6265 this_thr -> th.th_team_bt_intervals = team -> t.t_set_bt_intervals[tid];
6266 this_thr -> th.th_team_bt_set= team -> t.t_set_bt_set[tid];
6267 #endif // OMP_30_ENABLED
6271 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG )
6272 __kmp_itt_barrier_starting( gtid, itt_sync_obj );
6275 if ( __kmp_barrier_gather_pattern[ bs_forkjoin_barrier ] == bp_linear_bar || __kmp_barrier_gather_branch_bits[ bs_forkjoin_barrier ] == 0 ) {
6276 __kmp_linear_barrier_gather( bs_forkjoin_barrier, this_thr, gtid, tid, NULL
6277 USE_ITT_BUILD_ARG( itt_sync_obj )
6279 }
else if ( __kmp_barrier_gather_pattern[ bs_forkjoin_barrier ] == bp_tree_bar ) {
6280 __kmp_tree_barrier_gather( bs_forkjoin_barrier, this_thr, gtid, tid, NULL
6281 USE_ITT_BUILD_ARG( itt_sync_obj )
6284 __kmp_hyper_barrier_gather( bs_forkjoin_barrier, this_thr, gtid, tid, NULL
6285 USE_ITT_BUILD_ARG( itt_sync_obj )
6290 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG )
6291 __kmp_itt_barrier_middle( gtid, itt_sync_obj );
6304 if ( KMP_MASTER_TID( tid ) ) {
6305 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
6308 __kmp_task_team_wait( this_thr, team
6309 USE_ITT_BUILD_ARG( itt_sync_obj )
6312 #if USE_ITT_BUILD && USE_ITT_NOTIFY
6314 if( __itt_frame_submit_v3_ptr && __kmp_forkjoin_frames_mode ) {
6315 kmp_uint64 tmp = __itt_get_timestamp();
6316 ident_t * loc = team->t.t_ident;
6317 switch( __kmp_forkjoin_frames_mode ) {
6319 __kmp_itt_frame_submit( gtid, this_thr->th.th_frame_time, tmp, 0, loc );
6322 __kmp_itt_frame_submit( gtid, this_thr->th.th_bar_arrive_time, tmp, 1, loc );
6325 __kmp_itt_frame_submit( gtid, this_thr->th.th_frame_time, tmp, 0, loc );
6326 __kmp_itt_frame_submit( gtid, this_thr->th.th_bar_arrive_time, tmp, 1, loc );
6335 if( KMP_MASTER_TID( tid )) {
6336 KA_TRACE( 15, (
"__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n",
6337 gtid, team_id, tid, nproc ));
6344 KA_TRACE( 10, (
"__kmp_join_barrier: T#%d(%d:%d) leaving\n",
6345 gtid, team_id, tid ));
6352 __kmp_fork_barrier(
int gtid,
int tid )
6354 kmp_info_t *this_thr = __kmp_threads[ gtid ];
6355 kmp_team_t *team = ( tid == 0 ) ? this_thr -> th.th_team : NULL;
6357 void * itt_sync_obj = NULL;
6360 KA_TRACE( 10, (
"__kmp_fork_barrier: T#%d(%d:%d) has arrived\n",
6361 gtid, ( team != NULL ) ? team->t.t_id : -1, tid ));
6364 if ( KMP_MASTER_TID( tid ) ) {
6366 #if USE_ITT_BUILD && USE_ITT_NOTIFY
6367 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
6368 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier, 1 );
6370 __kmp_itt_barrier_middle( gtid, itt_sync_obj );
6376 register kmp_info_t **other_threads = team -> t.t_threads;
6382 for( i = 1; i < team -> t.t_nproc ; i++ ) {
6383 KA_TRACE( 500, (
"__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork "
6385 gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid,
6386 team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid,
6387 other_threads[i]->th.th_bar[ bs_forkjoin_barrier ].bb.b_go ) );
6389 KMP_DEBUG_ASSERT( ( TCR_4( other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go )
6390 & ~(KMP_BARRIER_SLEEP_STATE) )
6391 == KMP_INIT_BARRIER_STATE );
6392 KMP_DEBUG_ASSERT( other_threads[i]->th.th_team == team );
6398 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
6399 __kmp_task_team_setup( this_thr, team );
6413 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
6415 this_thr -> th.th_team_bt_intervals = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
6416 this_thr -> th.th_team_bt_set = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
6418 this_thr -> th.th_team_bt_intervals = team -> t.t_set_bt_intervals[tid];
6419 this_thr -> th.th_team_bt_set= team -> t.t_set_bt_set[tid];
6420 #endif // OMP_30_ENABLED
6424 if ( __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] == bp_linear_bar || __kmp_barrier_release_branch_bits[ bs_forkjoin_barrier ] == 0 ) {
6425 __kmp_linear_barrier_release( bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
6426 USE_ITT_BUILD_ARG( itt_sync_obj )
6428 }
else if ( __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] == bp_tree_bar ) {
6429 __kmp_tree_barrier_release( bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
6430 USE_ITT_BUILD_ARG( itt_sync_obj )
6433 __kmp_hyper_barrier_release( bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
6434 USE_ITT_BUILD_ARG( itt_sync_obj )
6441 if ( TCR_4(__kmp_global.g.g_done) ) {
6444 if ( this_thr->th.th_task_team != NULL ) {
6445 if ( KMP_MASTER_TID( tid ) ) {
6446 TCW_PTR(this_thr->th.th_task_team, NULL);
6449 __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
6454 #if USE_ITT_BUILD && USE_ITT_NOTIFY
6455 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
6456 if ( !KMP_MASTER_TID( tid ) ) {
6457 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
6459 __kmp_itt_barrier_finished( gtid, itt_sync_obj );
6463 KA_TRACE( 10, (
"__kmp_fork_barrier: T#%d is leaving early\n", gtid ));
6474 team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team);
6475 KMP_DEBUG_ASSERT( team != NULL );
6476 tid = __kmp_tid_from_gtid( gtid );
6480 # if KMP_BARRIER_ICV_PULL
6485 if (! KMP_MASTER_TID( tid ) ) {
6487 KA_TRACE( 10, (
"__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid ));
6488 load_icvs(&team->t.t_threads[0]->th.th_fixed_icvs);
6489 __kmp_init_implicit_task( team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE );
6490 store_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, &team->t.t_threads[0]->th.th_fixed_icvs);
6493 # endif // KMP_BARRIER_ICV_PULL
6495 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
6496 __kmp_task_team_sync( this_thr, team );
6501 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
6502 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
6503 if ( proc_bind == proc_bind_intel ) {
6509 if( __kmp_affinity_type == affinity_balanced && team->t.t_size_changed ) {
6510 __kmp_balanced_affinity( tid, team->t.t_nproc );
6513 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
6515 else if ( ( proc_bind != proc_bind_false )
6516 && ( proc_bind != proc_bind_disabled )) {
6517 if ( this_thr->th.th_new_place == this_thr->th.th_current_place ) {
6518 KA_TRACE( 100, (
"__kmp_fork_barrier: T#%d already in correct place %d\n",
6519 __kmp_gtid_from_thread( this_thr ), this_thr->th.th_current_place ) );
6522 __kmp_affinity_set_place( gtid );
6527 #if USE_ITT_BUILD && USE_ITT_NOTIFY
6528 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
6529 if ( !KMP_MASTER_TID( tid ) ) {
6530 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
6531 __kmp_itt_barrier_finished( gtid, itt_sync_obj );
6535 KA_TRACE( 10, (
"__kmp_fork_barrier: T#%d(%d:%d) is leaving\n",
6536 gtid, team->t.t_id, tid ));
6544 __kmp_launch_thread( kmp_info_t *this_thr )
6546 int gtid = this_thr->th.th_info.ds.ds_gtid;
6548 kmp_team_t *(*
volatile pteam);
6551 KA_TRACE( 10, (
"__kmp_launch_thread: T#%d start\n", gtid ) );
6553 if( __kmp_env_consistency_check ) {
6554 this_thr -> th.th_cons = __kmp_allocate_cons_stack( gtid );
6558 while( ! TCR_4(__kmp_global.g.g_done) ) {
6559 KMP_DEBUG_ASSERT( this_thr == __kmp_threads[ gtid ] );
6563 KA_TRACE( 20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid ));
6566 __kmp_fork_barrier( gtid, KMP_GTID_DNE );
6568 pteam = (kmp_team_t *(*))(& this_thr->th.th_team);
6571 if ( TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done) ) {
6573 if ( TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL ) {
6575 KA_TRACE( 20, (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
6576 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn ));
6578 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
6579 if ( __kmp_inherit_fp_control && (*pteam)->t.t_fp_control_saved ) {
6580 __kmp_clear_x87_fpu_status_word();
6581 __kmp_load_x87_fpu_control_word( &(*pteam)->t.t_x87_fpu_control_word );
6582 __kmp_load_mxcsr( &(*pteam)->t.t_mxcsr );
6586 rc = (*pteam) -> t.t_invoke( gtid );
6590 KA_TRACE( 20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
6591 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn ));
6595 __kmp_join_barrier( gtid );
6598 TCR_SYNC_PTR(__kmp_global.g.g_done);
6601 if ( TCR_PTR( this_thr->th.th_task_team ) != NULL ) {
6602 __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
6607 __kmp_common_destroy_gtid( gtid );
6609 KA_TRACE( 10, (
"__kmp_launch_thread: T#%d done\n", gtid ) );
6620 __kmp_internal_end_dest(
void *specific_gtid )
6622 #if KMP_COMPILER_ICC
6623 #pragma warning( push )
6624 #pragma warning( disable: 810 ) // conversion from "void *" to "int" may lose significant bits
6627 int gtid = (kmp_intptr_t)specific_gtid - 1;
6628 #if KMP_COMPILER_ICC
6629 #pragma warning( pop )
6632 KA_TRACE( 30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
6646 if(gtid >= 0 && KMP_UBER_GTID(gtid))
6647 __kmp_gtid_set_specific( gtid );
6648 #ifdef KMP_TDATA_GTID
6651 __kmp_internal_end_thread( gtid );
6654 #if KMP_OS_UNIX && GUIDEDLL_EXPORTS
6660 __attribute__(( destructor ))
6662 __kmp_internal_end_dtor(
void )
6664 __kmp_internal_end_atexit();
6668 __kmp_internal_end_fini(
void )
6670 __kmp_internal_end_atexit();
6677 __kmp_internal_end_atexit(
void )
6679 KA_TRACE( 30, (
"__kmp_internal_end_atexit\n" ) );
6701 __kmp_internal_end_library( -1 );
6703 __kmp_close_console();
6709 kmp_info_t * thread,
6717 KMP_DEBUG_ASSERT( thread != NULL );
6719 gtid = thread->th.th_info.ds.ds_gtid;
6723 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
6725 KA_TRACE( 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", gtid ) );
6729 &thread->th.th_bar[ bs_forkjoin_barrier ].bb.b_go,
6736 __kmp_reap_worker( thread );
6751 if ( thread->th.th_active_in_pool ) {
6752 thread->th.th_active_in_pool = FALSE;
6753 KMP_TEST_THEN_DEC32(
6754 (kmp_int32 *) &__kmp_thread_pool_active_nth );
6755 KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
6759 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth > 0 );
6760 --__kmp_thread_pool_nth;
6765 __kmp_free_fast_memory( thread );
6768 __kmp_suspend_uninitialize_thread( thread );
6770 KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] == thread );
6771 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
6776 #ifdef KMP_ADJUST_BLOCKTIME
6779 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
6780 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
6781 if ( __kmp_nth <= __kmp_avail_proc ) {
6782 __kmp_zero_bt = FALSE;
6788 if( __kmp_env_consistency_check ) {
6789 if ( thread->th.th_cons ) {
6790 __kmp_free_cons_stack( thread->th.th_cons );
6791 thread->th.th_cons = NULL;
6795 if ( thread->th.th_pri_common != NULL ) {
6796 __kmp_free( thread->th.th_pri_common );
6797 thread->th.th_pri_common = NULL;
6801 if ( thread->th.th_local.bget_data != NULL ) {
6802 __kmp_finalize_bget( thread );
6806 #if (KMP_OS_WINDOWS || KMP_OS_LINUX)
6807 if ( thread->th.th_affin_mask != NULL ) {
6808 KMP_CPU_FREE( thread->th.th_affin_mask );
6809 thread->th.th_affin_mask = NULL;
6813 __kmp_reap_team( thread->th.th_serial_team );
6814 thread->th.th_serial_team = NULL;
6815 __kmp_free( thread );
6822 __kmp_internal_end(
void)
6827 __kmp_unregister_library();
6835 __kmp_reclaim_dead_roots();
6838 for( i=0 ; i<__kmp_threads_capacity ; i++ )
6840 if( __kmp_root[i] -> r.r_active )
6843 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6845 if ( i < __kmp_threads_capacity ) {
6863 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
6864 if ( TCR_4( __kmp_init_monitor ) ) {
6865 __kmp_reap_monitor( & __kmp_monitor );
6866 TCW_4( __kmp_init_monitor, 0 );
6868 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
6869 KA_TRACE( 10, (
"__kmp_internal_end: monitor reaped\n" ) );
6874 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6875 if( __kmp_root[i] ) {
6876 KMP_ASSERT( ! KMP_UBER_GTID( i ) );
6877 KMP_ASSERT( ! __kmp_root[i] -> r.r_active );
6886 while ( __kmp_thread_pool != NULL ) {
6888 kmp_info_t * thread = (kmp_info_t *) __kmp_thread_pool;
6889 __kmp_thread_pool = thread->th.th_next_pool;
6891 thread->th.th_next_pool = NULL;
6892 thread->th.th_in_pool = FALSE;
6893 __kmp_reap_thread( thread, 0 );
6895 __kmp_thread_pool_insert_pt = NULL;
6898 while ( __kmp_team_pool != NULL ) {
6900 kmp_team_t * team = (kmp_team_t *) __kmp_team_pool;
6901 __kmp_team_pool = team->t.t_next_pool;
6903 team->t.t_next_pool = NULL;
6904 __kmp_reap_team( team );
6908 __kmp_reap_task_teams( );
6911 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
6918 TCW_SYNC_4(__kmp_init_common, FALSE);
6920 KA_TRACE( 10, (
"__kmp_internal_end: all workers reaped\n" ) );
6929 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
6930 if ( TCR_4( __kmp_init_monitor ) ) {
6931 __kmp_reap_monitor( & __kmp_monitor );
6932 TCW_4( __kmp_init_monitor, 0 );
6934 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
6935 KA_TRACE( 10, (
"__kmp_internal_end: monitor reaped\n" ) );
6938 TCW_4(__kmp_init_gtid, FALSE);
6946 __kmp_internal_end_library(
int gtid_req )
6956 if( __kmp_global.g.g_abort ) {
6957 KA_TRACE( 11, (
"__kmp_internal_end_library: abort, exiting\n" ));
6961 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
6962 KA_TRACE( 10, (
"__kmp_internal_end_library: already finished\n" ));
6971 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
6972 KA_TRACE( 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req ));
6973 if( gtid == KMP_GTID_SHUTDOWN ) {
6974 KA_TRACE( 10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system already shutdown\n" ));
6976 }
else if( gtid == KMP_GTID_MONITOR ) {
6977 KA_TRACE( 10, (
"__kmp_internal_end_library: monitor thread, gtid not registered, or system shutdown\n" ));
6979 }
else if( gtid == KMP_GTID_DNE ) {
6980 KA_TRACE( 10, (
"__kmp_internal_end_library: gtid not registered or system shutdown\n" ));
6982 }
else if( KMP_UBER_GTID( gtid )) {
6984 if( __kmp_root[gtid] -> r.r_active ) {
6985 __kmp_global.g.g_abort = -1;
6986 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6987 KA_TRACE( 10, (
"__kmp_internal_end_library: root still active, abort T#%d\n", gtid ));
6990 KA_TRACE( 10, (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid ));
6991 __kmp_unregister_root_current_thread( gtid );
6998 #ifdef DUMP_DEBUG_ON_EXIT
6999 if ( __kmp_debug_buf )
7000 __kmp_dump_debug_buffer( );
7006 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7009 if( __kmp_global.g.g_abort ) {
7010 KA_TRACE( 10, (
"__kmp_internal_end_library: abort, exiting\n" ));
7012 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7015 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
7016 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7026 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
7029 __kmp_internal_end();
7031 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
7032 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7034 KA_TRACE( 10, (
"__kmp_internal_end_library: exit\n" ) );
7036 #ifdef DUMP_DEBUG_ON_EXIT
7037 if ( __kmp_debug_buf )
7038 __kmp_dump_debug_buffer();
7042 __kmp_close_console();
7045 __kmp_fini_allocator();
7050 __kmp_internal_end_thread(
int gtid_req )
7060 if( __kmp_global.g.g_abort ) {
7061 KA_TRACE( 11, (
"__kmp_internal_end_thread: abort, exiting\n" ));
7065 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
7066 KA_TRACE( 10, (
"__kmp_internal_end_thread: already finished\n" ));
7074 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
7075 KA_TRACE( 10, (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req ));
7076 if( gtid == KMP_GTID_SHUTDOWN ) {
7077 KA_TRACE( 10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system already shutdown\n" ));
7079 }
else if( gtid == KMP_GTID_MONITOR ) {
7080 KA_TRACE( 10, (
"__kmp_internal_end_thread: monitor thread, gtid not registered, or system shutdown\n" ));
7082 }
else if( gtid == KMP_GTID_DNE ) {
7083 KA_TRACE( 10, (
"__kmp_internal_end_thread: gtid not registered or system shutdown\n" ));
7086 }
else if( KMP_UBER_GTID( gtid )) {
7088 if( __kmp_root[gtid] -> r.r_active ) {
7089 __kmp_global.g.g_abort = -1;
7090 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
7091 KA_TRACE( 10, (
"__kmp_internal_end_thread: root still active, abort T#%d\n", gtid ));
7094 KA_TRACE( 10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n", gtid ));
7095 __kmp_unregister_root_current_thread( gtid );
7099 KA_TRACE( 10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid ));
7103 kmp_info_t *this_thr = __kmp_threads[ gtid ];
7104 if (TCR_PTR(this_thr->th.th_task_team) != NULL) {
7105 __kmp_unref_task_team(this_thr->th.th_task_team, this_thr);
7110 KA_TRACE( 10, (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid ));
7114 #if defined GUIDEDLL_EXPORTS
7122 KA_TRACE( 10, (
"__kmp_internal_end_thread: exiting\n") );
7126 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7129 if( __kmp_global.g.g_abort ) {
7130 KA_TRACE( 10, (
"__kmp_internal_end_thread: abort, exiting\n" ));
7132 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7135 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
7136 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7148 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
7150 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
7151 if ( KMP_UBER_GTID( i ) ) {
7152 KA_TRACE( 10, (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i ));
7153 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
7154 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7161 __kmp_internal_end();
7163 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
7164 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7166 KA_TRACE( 10, (
"__kmp_internal_end_thread: exit\n" ) );
7168 #ifdef DUMP_DEBUG_ON_EXIT
7169 if ( __kmp_debug_buf )
7170 __kmp_dump_debug_buffer();
7177 static long __kmp_registration_flag = 0;
7179 static char * __kmp_registration_str = NULL;
7185 __kmp_reg_status_name() {
7191 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int) getpid() );
7196 __kmp_register_library_startup(
7200 char * name = __kmp_reg_status_name();
7207 __kmp_initialize_system_tick();
7209 __kmp_read_system_time( & time.dtime );
7210 __kmp_registration_flag = 0xCAFE0000L | ( time.ltime & 0x0000FFFFL );
7211 __kmp_registration_str =
7214 & __kmp_registration_flag,
7215 __kmp_registration_flag,
7219 KA_TRACE( 50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name, __kmp_registration_str ) );
7223 char * value = NULL;
7226 __kmp_env_set( name, __kmp_registration_str, 0 );
7228 value = __kmp_env_get( name );
7229 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
7238 char * tail = value;
7239 char * flag_addr_str = NULL;
7240 char * flag_val_str = NULL;
7241 char const * file_name = NULL;
7242 __kmp_str_split( tail,
'-', & flag_addr_str, & tail );
7243 __kmp_str_split( tail,
'-', & flag_val_str, & tail );
7245 if ( tail != NULL ) {
7246 long * flag_addr = 0;
7248 sscanf( flag_addr_str,
"%p", & flag_addr );
7249 sscanf( flag_val_str,
"%lx", & flag_val );
7250 if ( flag_addr != 0 && flag_val != 0 && strcmp( file_name,
"" ) != 0 ) {
7254 if ( __kmp_is_address_mapped( flag_addr ) && * flag_addr == flag_val ) {
7262 switch ( neighbor ) {
7267 file_name =
"unknown library";
7271 char * duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK" );
7272 if ( ! __kmp_str_match_true( duplicate_ok ) ) {
7276 KMP_MSG( DuplicateLibrary, KMP_LIBRARY_FILE, file_name ),
7277 KMP_HNT( DuplicateLibrary ),
7281 KMP_INTERNAL_FREE( duplicate_ok );
7282 __kmp_duplicate_library_ok = 1;
7287 __kmp_env_unset( name );
7290 KMP_DEBUG_ASSERT( 0 );
7295 KMP_INTERNAL_FREE( (
void *) value );
7298 KMP_INTERNAL_FREE( (
void *) name );
7304 __kmp_unregister_library(
void ) {
7306 char * name = __kmp_reg_status_name();
7307 char * value = __kmp_env_get( name );
7309 KMP_DEBUG_ASSERT( __kmp_registration_flag != 0 );
7310 KMP_DEBUG_ASSERT( __kmp_registration_str != NULL );
7311 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
7313 __kmp_env_unset( name );
7316 KMP_INTERNAL_FREE( __kmp_registration_str );
7317 KMP_INTERNAL_FREE( value );
7318 KMP_INTERNAL_FREE( name );
7320 __kmp_registration_flag = 0;
7321 __kmp_registration_str = NULL;
7330 __kmp_do_serial_initialize(
void )
7335 KA_TRACE( 10, (
"__kmp_serial_initialize: enter\n" ) );
7337 KMP_DEBUG_ASSERT(
sizeof( kmp_int32 ) == 4 );
7338 KMP_DEBUG_ASSERT(
sizeof( kmp_uint32 ) == 4 );
7339 KMP_DEBUG_ASSERT(
sizeof( kmp_int64 ) == 8 );
7340 KMP_DEBUG_ASSERT(
sizeof( kmp_uint64 ) == 8 );
7341 KMP_DEBUG_ASSERT(
sizeof( kmp_intptr_t ) ==
sizeof(
void * ) );
7343 __kmp_validate_locks();
7346 __kmp_init_allocator();
7352 __kmp_register_library_startup( );
7355 if( TCR_4(__kmp_global.g.g_done) ) {
7356 KA_TRACE( 10, (
"__kmp_do_serial_initialize: reinitialization of library\n" ) );
7359 __kmp_global.g.g_abort = 0;
7360 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
7363 #if KMP_USE_ADAPTIVE_LOCKS
7364 #if KMP_DEBUG_ADAPTIVE_LOCKS
7365 __kmp_init_speculative_stats();
7368 __kmp_init_lock( & __kmp_global_lock );
7369 __kmp_init_queuing_lock( & __kmp_dispatch_lock );
7370 __kmp_init_lock( & __kmp_debug_lock );
7371 __kmp_init_atomic_lock( & __kmp_atomic_lock );
7372 __kmp_init_atomic_lock( & __kmp_atomic_lock_1i );
7373 __kmp_init_atomic_lock( & __kmp_atomic_lock_2i );
7374 __kmp_init_atomic_lock( & __kmp_atomic_lock_4i );
7375 __kmp_init_atomic_lock( & __kmp_atomic_lock_4r );
7376 __kmp_init_atomic_lock( & __kmp_atomic_lock_8i );
7377 __kmp_init_atomic_lock( & __kmp_atomic_lock_8r );
7378 __kmp_init_atomic_lock( & __kmp_atomic_lock_8c );
7379 __kmp_init_atomic_lock( & __kmp_atomic_lock_10r );
7380 __kmp_init_atomic_lock( & __kmp_atomic_lock_16r );
7381 __kmp_init_atomic_lock( & __kmp_atomic_lock_16c );
7382 __kmp_init_atomic_lock( & __kmp_atomic_lock_20c );
7383 __kmp_init_atomic_lock( & __kmp_atomic_lock_32c );
7384 __kmp_init_bootstrap_lock( & __kmp_forkjoin_lock );
7385 __kmp_init_bootstrap_lock( & __kmp_exit_lock );
7386 __kmp_init_bootstrap_lock( & __kmp_monitor_lock );
7387 __kmp_init_bootstrap_lock( & __kmp_tp_cached_lock );
7391 __kmp_runtime_initialize();
7397 __kmp_abort_delay = 0;
7401 __kmp_dflt_team_nth_ub = __kmp_xproc;
7402 if( __kmp_dflt_team_nth_ub < KMP_MIN_NTH ) {
7403 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
7405 if( __kmp_dflt_team_nth_ub > __kmp_sys_max_nth ) {
7406 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
7408 __kmp_max_nth = __kmp_sys_max_nth;
7411 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
7412 __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
7413 __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
7415 __kmp_library = library_throughput;
7417 __kmp_static = kmp_sch_static_balanced;
7422 #endif // OMP_30_ENABLED
7425 #if KMP_FAST_REDUCTION_BARRIER
7426 #define kmp_reduction_barrier_gather_bb ((int)1)
7427 #define kmp_reduction_barrier_release_bb ((int)1)
7428 #define kmp_reduction_barrier_gather_pat bp_hyper_bar
7429 #define kmp_reduction_barrier_release_pat bp_hyper_bar
7430 #endif // KMP_FAST_REDUCTION_BARRIER
7431 for ( i=bs_plain_barrier; i<bs_last_barrier; i++ ) {
7432 __kmp_barrier_gather_branch_bits [ i ] = __kmp_barrier_gather_bb_dflt;
7433 __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt;
7434 __kmp_barrier_gather_pattern [ i ] = __kmp_barrier_gather_pat_dflt;
7435 __kmp_barrier_release_pattern[ i ] = __kmp_barrier_release_pat_dflt;
7436 #if KMP_FAST_REDUCTION_BARRIER
7437 if( i == bs_reduction_barrier ) {
7438 __kmp_barrier_gather_branch_bits [ i ] = kmp_reduction_barrier_gather_bb;
7439 __kmp_barrier_release_branch_bits[ i ] = kmp_reduction_barrier_release_bb;
7440 __kmp_barrier_gather_pattern [ i ] = kmp_reduction_barrier_gather_pat;
7441 __kmp_barrier_release_pattern[ i ] = kmp_reduction_barrier_release_pat;
7443 #endif // KMP_FAST_REDUCTION_BARRIER
7445 #if KMP_FAST_REDUCTION_BARRIER
7446 #undef kmp_reduction_barrier_release_pat
7447 #undef kmp_reduction_barrier_gather_pat
7448 #undef kmp_reduction_barrier_release_bb
7449 #undef kmp_reduction_barrier_gather_bb
7450 #endif // KMP_FAST_REDUCTION_BARRIER
7453 __kmp_barrier_gather_branch_bits [ 0 ] = 3;
7454 __kmp_barrier_release_branch_bits[ 1 ] = 1;
7459 __kmp_env_checks = TRUE;
7461 __kmp_env_checks = FALSE;
7465 __kmp_foreign_tp = TRUE;
7467 __kmp_global.g.g_dynamic = FALSE;
7468 __kmp_global.g.g_dynamic_mode = dynamic_default;
7470 __kmp_env_initialize( NULL );
7473 char const * val = __kmp_env_get(
"KMP_DUMP_CATALOG" );
7474 if ( __kmp_str_match_true( val ) ) {
7475 kmp_str_buf_t buffer;
7476 __kmp_str_buf_init( & buffer );
7477 __kmp_i18n_dump_catalog( & buffer );
7478 __kmp_printf(
"%s", buffer.str );
7479 __kmp_str_buf_free( & buffer );
7481 __kmp_env_free( & val );
7484 __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub );
7486 __kmp_tp_capacity = __kmp_default_tp_capacity(__kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
7491 KMP_DEBUG_ASSERT( __kmp_thread_pool == NULL );
7492 KMP_DEBUG_ASSERT( __kmp_thread_pool_insert_pt == NULL );
7493 KMP_DEBUG_ASSERT( __kmp_team_pool == NULL );
7494 __kmp_thread_pool = NULL;
7495 __kmp_thread_pool_insert_pt = NULL;
7496 __kmp_team_pool = NULL;
7501 size = (
sizeof(kmp_info_t*) +
sizeof(kmp_root_t*))*__kmp_threads_capacity + CACHE_LINE;
7502 __kmp_threads = (kmp_info_t**) __kmp_allocate( size );
7503 __kmp_root = (kmp_root_t**) ((
char*)__kmp_threads +
sizeof(kmp_info_t*) * __kmp_threads_capacity );
7506 KMP_DEBUG_ASSERT( __kmp_all_nth == 0 );
7507 KMP_DEBUG_ASSERT( __kmp_nth == 0 );
7512 gtid = __kmp_register_root( TRUE );
7513 KA_TRACE( 10, (
"__kmp_do_serial_initialize T#%d\n", gtid ));
7514 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
7515 KMP_ASSERT( KMP_INITIAL_GTID( gtid ) );
7519 __kmp_common_initialize();
7523 __kmp_register_atfork();
7526 #if ! defined GUIDEDLL_EXPORTS
7531 int rc = atexit( __kmp_internal_end_atexit );
7533 __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError,
"atexit()" ), KMP_ERR( rc ), __kmp_msg_null );
7538 #if KMP_HANDLE_SIGNALS
7545 __kmp_install_signals( FALSE );
7548 __kmp_install_signals( TRUE );
7553 __kmp_init_counter ++;
7555 __kmp_init_serial = TRUE;
7557 if (__kmp_settings) {
7562 if (__kmp_display_env || __kmp_display_env_verbose) {
7563 __kmp_env_print_2();
7565 #endif // OMP_40_ENABLED
7569 KA_TRACE( 10, (
"__kmp_do_serial_initialize: exit\n" ) );
7573 __kmp_serial_initialize(
void )
7575 if ( __kmp_init_serial ) {
7578 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7579 if ( __kmp_init_serial ) {
7580 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7583 __kmp_do_serial_initialize();
7584 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7588 __kmp_do_middle_initialize(
void )
7591 int prev_dflt_team_nth;
7593 if( !__kmp_init_serial ) {
7594 __kmp_do_serial_initialize();
7597 KA_TRACE( 10, (
"__kmp_middle_initialize: enter\n" ) );
7603 prev_dflt_team_nth = __kmp_dflt_team_nth;
7605 #if KMP_OS_WINDOWS || KMP_OS_LINUX
7610 __kmp_affinity_initialize();
7616 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
7617 if ( TCR_PTR( __kmp_threads[ i ] ) != NULL ) {
7618 __kmp_affinity_set_init_mask( i, TRUE );
7623 KMP_ASSERT( __kmp_xproc > 0 );
7624 if ( __kmp_avail_proc == 0 ) {
7625 __kmp_avail_proc = __kmp_xproc;
7630 while ( __kmp_nested_nth.used && ! __kmp_nested_nth.nth[ j ] ) {
7631 __kmp_nested_nth.nth[ j ] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_avail_proc;
7635 if ( __kmp_dflt_team_nth == 0 ) {
7636 #ifdef KMP_DFLT_NTH_CORES
7640 __kmp_dflt_team_nth = __kmp_ncores;
7641 KA_TRACE( 20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_ncores (%d)\n",
7642 __kmp_dflt_team_nth ) );
7647 __kmp_dflt_team_nth = __kmp_avail_proc;
7648 KA_TRACE( 20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_avail_proc(%d)\n",
7649 __kmp_dflt_team_nth ) );
7653 if ( __kmp_dflt_team_nth < KMP_MIN_NTH ) {
7654 __kmp_dflt_team_nth = KMP_MIN_NTH;
7656 if( __kmp_dflt_team_nth > __kmp_sys_max_nth ) {
7657 __kmp_dflt_team_nth = __kmp_sys_max_nth;
7664 KMP_DEBUG_ASSERT( __kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub );
7666 if ( __kmp_dflt_team_nth != prev_dflt_team_nth ) {
7673 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
7674 kmp_info_t *thread = __kmp_threads[ i ];
7675 if ( thread == NULL )
continue;
7677 if ( thread->th.th_current_task->td_icvs.nproc != 0 )
continue;
7679 if ( thread->th.th_team->t.t_set_nproc[ thread->th.th_info.ds.ds_tid ] != 0 )
continue;
7682 set__nproc_p( __kmp_threads[ i ], __kmp_dflt_team_nth );
7685 KA_TRACE( 20, (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
7686 __kmp_dflt_team_nth) );
7688 #ifdef KMP_ADJUST_BLOCKTIME
7691 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
7692 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
7693 if ( __kmp_nth > __kmp_avail_proc ) {
7694 __kmp_zero_bt = TRUE;
7700 TCW_SYNC_4(__kmp_init_middle, TRUE);
7702 KA_TRACE( 10, (
"__kmp_do_middle_initialize: exit\n" ) );
7706 __kmp_middle_initialize(
void )
7708 if ( __kmp_init_middle ) {
7711 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7712 if ( __kmp_init_middle ) {
7713 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7716 __kmp_do_middle_initialize();
7717 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7721 __kmp_parallel_initialize(
void )
7723 int gtid = __kmp_entry_gtid();
7726 if( TCR_4(__kmp_init_parallel) )
return;
7727 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7728 if( TCR_4(__kmp_init_parallel) ) { __kmp_release_bootstrap_lock( &__kmp_initz_lock );
return; }
7731 if( TCR_4(__kmp_global.g.g_done) ) {
7732 KA_TRACE( 10, (
"__kmp_parallel_initialize: attempt to init while shutting down\n" ) );
7733 __kmp_infinite_loop();
7739 if( !__kmp_init_middle ) {
7740 __kmp_do_middle_initialize();
7744 KA_TRACE( 10, (
"__kmp_parallel_initialize: enter\n" ) );
7745 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
7747 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
7752 __kmp_store_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word );
7753 __kmp_store_mxcsr( &__kmp_init_mxcsr );
7754 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7758 # if KMP_HANDLE_SIGNALS
7760 __kmp_install_signals( TRUE );
7764 __kmp_suspend_initialize();
7766 # if defined(USE_LOAD_BALANCE)
7767 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
7768 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7771 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
7772 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7776 if ( __kmp_version ) {
7777 __kmp_print_version_2();
7781 TCW_SYNC_4(__kmp_init_parallel, TRUE);
7784 KA_TRACE( 10, (
"__kmp_parallel_initialize: exit\n" ) );
7786 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7793 __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7796 kmp_disp_t *dispatch;
7801 this_thr->th.th_local.this_construct = 0;
7802 this_thr->th.th_local.last_construct = 0;
7803 #if KMP_CACHE_MANAGE
7804 KMP_CACHE_PREFETCH( &this_thr -> th.th_bar[ bs_forkjoin_barrier ].bb.b_arrived );
7806 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7807 KMP_DEBUG_ASSERT( dispatch );
7808 KMP_DEBUG_ASSERT( team -> t.t_dispatch );
7811 dispatch -> th_disp_index = 0;
7813 if( __kmp_env_consistency_check )
7814 __kmp_push_parallel( gtid, team->t.t_ident );
7820 __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7823 if( __kmp_env_consistency_check )
7824 __kmp_pop_parallel( gtid, team->t.t_ident );
7828 __kmp_invoke_task_func(
int gtid )
7831 int tid = __kmp_tid_from_gtid( gtid );
7832 kmp_info_t *this_thr = __kmp_threads[ gtid ];
7833 kmp_team_t *team = this_thr -> th.th_team;
7835 __kmp_run_before_invoked_task( gtid, tid, this_thr, team );
7837 if ( __itt_stack_caller_create_ptr ) {
7838 __kmp_itt_stack_callee_enter( (__itt_caller)team->t.t_stack_id );
7841 rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
7842 gtid, tid, (
int) team->t.t_argc, (
void **) team->t.t_argv );
7845 if ( __itt_stack_caller_create_ptr ) {
7846 __kmp_itt_stack_callee_leave( (__itt_caller)team->t.t_stack_id );
7849 __kmp_run_after_invoked_task( gtid, tid, this_thr, team );
7856 __kmp_teams_master( microtask_t microtask,
int gtid )
7859 kmp_info_t *this_thr = __kmp_threads[ gtid ];
7860 kmp_team_t *team = this_thr -> th.th_team;
7861 ident_t *loc = team->t.t_ident;
7864 int tid = __kmp_tid_from_gtid( gtid );
7865 KA_TRACE( 20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n",
7866 gtid, tid, microtask) );
7871 this_thr->th.th_set_nproc = this_thr->th.th_set_nth_teams;
7872 __kmp_fork_call( loc, gtid, TRUE,
7875 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
7877 __kmp_join_call( loc, gtid, 1 );
7882 __kmp_invoke_teams_master(
int gtid )
7885 if ( !__kmp_threads[gtid]-> th.th_team->t.t_serialized )
7886 KMP_DEBUG_ASSERT( (
void*)__kmp_threads[gtid]-> th.th_team->t.t_pkfn == (
void*)__kmp_teams_master );
7889 __kmp_teams_master( (microtask_t)__kmp_threads[gtid]->th.th_team_microtask, gtid );
7901 __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads )
7903 kmp_info_t *thr = __kmp_threads[gtid];
7905 if( num_threads > 0 )
7906 thr -> th.th_set_nproc = num_threads;
7914 __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
int num_threads )
7916 kmp_info_t *thr = __kmp_threads[gtid];
7918 if( num_teams > 0 ) {
7919 thr -> th.th_set_nproc = num_teams;
7921 thr -> th.th_set_nproc = 1;
7925 if( num_threads > 0 ) {
7926 thr -> th.th_set_nth_teams = num_threads;
7928 if( !TCR_4(__kmp_init_middle) )
7929 __kmp_middle_initialize();
7930 thr -> th.th_set_nth_teams = __kmp_avail_proc / thr -> th.th_set_nproc;
7939 __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind )
7941 kmp_info_t *thr = __kmp_threads[gtid];
7942 thr -> th.th_set_proc_bind = proc_bind;
7950 __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team )
7952 kmp_info_t *this_thr = __kmp_threads[gtid];
7958 KMP_DEBUG_ASSERT( team );
7959 KMP_DEBUG_ASSERT( this_thr -> th.th_team == team );
7960 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
7963 team -> t.t_construct = 0;
7964 team -> t.t_ordered.dt.t_value = 0;
7967 KMP_DEBUG_ASSERT( team -> t.t_disp_buffer );
7968 if ( team->t.t_max_nproc > 1 ) {
7970 for (i = 0; i < KMP_MAX_DISP_BUF; ++i)
7971 team -> t.t_disp_buffer[ i ].buffer_index = i;
7973 team -> t.t_disp_buffer[ 0 ].buffer_index = 0;
7977 KMP_ASSERT( this_thr -> th.th_team == team );
7980 for( f=0 ; f<team->t.t_nproc ; f++ ) {
7981 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
7982 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
7987 __kmp_fork_barrier( gtid, 0 );
7992 __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team )
7994 kmp_info_t *this_thr = __kmp_threads[gtid];
7996 KMP_DEBUG_ASSERT( team );
7997 KMP_DEBUG_ASSERT( this_thr -> th.th_team == team );
7998 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
8004 if (__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc ) {
8005 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n",gtid, gtid, __kmp_threads[gtid]);
8006 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, team->t.t_nproc=%d\n",
8007 gtid, __kmp_threads[gtid]->th.th_team_nproc, team, team->t.t_nproc);
8008 __kmp_print_structure();
8010 KMP_DEBUG_ASSERT( __kmp_threads[gtid] &&
8011 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc );
8014 __kmp_join_barrier( gtid );
8017 KMP_ASSERT( this_thr -> th.th_team == team );
8024 #ifdef USE_LOAD_BALANCE
8031 __kmp_active_hot_team_nproc( kmp_root_t *root )
8035 kmp_team_t *hot_team;
8037 if ( root->r.r_active ) {
8040 hot_team = root->r.r_hot_team;
8041 if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
8042 return hot_team->t.t_nproc - 1;
8049 for ( i = 1; i < hot_team->t.t_nproc; i++ ) {
8050 if ( hot_team->t.t_threads[i]->th.th_active ) {
8062 __kmp_load_balance_nproc( kmp_root_t *root,
int set_nproc )
8066 int hot_team_active;
8067 int team_curr_active;
8070 KB_TRACE( 20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n",
8071 root, set_nproc ) );
8072 KMP_DEBUG_ASSERT( root );
8074 KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_threads[0]->th.th_current_task->td_icvs.dynamic == TRUE );
8076 KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_set_dynamic[0] == TRUE );
8078 KMP_DEBUG_ASSERT( set_nproc > 1 );
8080 if ( set_nproc == 1) {
8081 KB_TRACE( 20, (
"__kmp_load_balance_nproc: serial execution.\n" ) );
8092 pool_active = TCR_4(__kmp_thread_pool_active_nth);
8093 hot_team_active = __kmp_active_hot_team_nproc( root );
8094 team_curr_active = pool_active + hot_team_active + 1;
8099 system_active = __kmp_get_load_balance( __kmp_avail_proc + team_curr_active );
8100 KB_TRACE( 30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d hot team active = %d\n",
8101 system_active, pool_active, hot_team_active ) );
8103 if ( system_active < 0 ) {
8110 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
8111 KMP_WARNING( CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit" );
8116 retval = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
8117 : root->r.r_hot_team->t.t_nproc);
8118 if ( retval > set_nproc ) {
8121 if ( retval < KMP_MIN_NTH ) {
8122 retval = KMP_MIN_NTH;
8125 KB_TRACE( 20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n", retval ) );
8135 if ( system_active < team_curr_active ) {
8136 system_active = team_curr_active;
8138 retval = __kmp_avail_proc - system_active + team_curr_active;
8139 if ( retval > set_nproc ) {
8142 if ( retval < KMP_MIN_NTH ) {
8143 retval = KMP_MIN_NTH;
8146 KB_TRACE( 20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval ) );
8158 __kmp_cleanup(
void )
8162 KA_TRACE( 10, (
"__kmp_cleanup: enter\n" ) );
8164 if (TCR_4(__kmp_init_parallel)) {
8165 #if KMP_HANDLE_SIGNALS
8166 __kmp_remove_signals();
8168 TCW_4(__kmp_init_parallel, FALSE);
8171 if (TCR_4(__kmp_init_middle)) {
8172 #if KMP_OS_WINDOWS || KMP_OS_LINUX
8173 __kmp_affinity_uninitialize();
8175 TCW_4(__kmp_init_middle, FALSE);
8178 KA_TRACE( 10, (
"__kmp_cleanup: go serial cleanup\n" ) );
8180 if (__kmp_init_serial) {
8182 __kmp_runtime_destroy();
8184 __kmp_init_serial = FALSE;
8187 for ( f = 0; f < __kmp_threads_capacity; f++ ) {
8188 if ( __kmp_root[ f ] != NULL ) {
8189 __kmp_free( __kmp_root[ f ] );
8190 __kmp_root[ f ] = NULL;
8193 __kmp_free( __kmp_threads );
8196 __kmp_threads = NULL;
8198 __kmp_threads_capacity = 0;
8200 __kmp_cleanup_user_locks();
8202 #if KMP_OS_LINUX || KMP_OS_WINDOWS
8203 KMP_INTERNAL_FREE( (
void *) __kmp_cpuinfo_file );
8204 __kmp_cpuinfo_file = NULL;
8207 #if KMP_USE_ADAPTIVE_LOCKS
8208 #if KMP_DEBUG_ADAPTIVE_LOCKS
8209 __kmp_print_speculative_stats();
8212 KMP_INTERNAL_FREE( __kmp_nested_nth.nth );
8213 __kmp_nested_nth.nth = NULL;
8214 __kmp_nested_nth.size = 0;
8215 __kmp_nested_nth.used = 0;
8217 __kmp_i18n_catclose();
8219 KA_TRACE( 10, (
"__kmp_cleanup: exit\n" ) );
8226 __kmp_ignore_mppbeg(
void )
8230 if ((env = getenv(
"KMP_IGNORE_MPPBEG" )) != NULL) {
8231 if (__kmp_str_match_false( env ))
8239 __kmp_ignore_mppend(
void )
8243 if ((env = getenv(
"KMP_IGNORE_MPPEND" )) != NULL) {
8244 if (__kmp_str_match_false( env ))
8252 __kmp_internal_begin(
void )
8259 gtid = __kmp_entry_gtid();
8260 root = __kmp_threads[ gtid ] -> th.th_root;
8261 KMP_ASSERT( KMP_UBER_GTID( gtid ));
8263 if( root->r.r_begin )
return;
8264 __kmp_acquire_lock( &root->r.r_begin_lock, gtid );
8265 if( root->r.r_begin ) {
8266 __kmp_release_lock( & root->r.r_begin_lock, gtid );
8270 root -> r.r_begin = TRUE;
8272 __kmp_release_lock( & root->r.r_begin_lock, gtid );
8280 __kmp_user_set_library (
enum library_type arg)
8288 gtid = __kmp_entry_gtid();
8289 thread = __kmp_threads[ gtid ];
8291 root = thread -> th.th_root;
8293 KA_TRACE( 20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, library_serial ));
8294 if (root->r.r_in_parallel) {
8295 KMP_WARNING( SetLibraryIncorrectCall );
8300 case library_serial :
8301 thread -> th.th_set_nproc = 0;
8302 set__nproc_p( thread, 1 );
8304 case library_turnaround :
8305 thread -> th.th_set_nproc = 0;
8306 set__nproc_p( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
8308 case library_throughput :
8309 thread -> th.th_set_nproc = 0;
8310 set__nproc_p( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
8313 KMP_FATAL( UnknownLibraryType, arg );
8316 __kmp_aux_set_library ( arg );
8320 __kmp_aux_set_stacksize(
size_t arg )
8322 if (! __kmp_init_serial)
8323 __kmp_serial_initialize();
8326 if (arg & (0x1000 - 1)) {
8327 arg &= ~(0x1000 - 1);
8332 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
8335 if (! TCR_4(__kmp_init_parallel)) {
8338 if (value < __kmp_sys_min_stksize )
8339 value = __kmp_sys_min_stksize ;
8340 else if (value > KMP_MAX_STKSIZE)
8341 value = KMP_MAX_STKSIZE;
8343 __kmp_stksize = value;
8345 __kmp_env_stksize = TRUE;
8348 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
8354 __kmp_aux_set_library (
enum library_type arg)
8356 __kmp_library = arg;
8358 switch ( __kmp_library ) {
8359 case library_serial :
8361 KMP_INFORM( LibraryIsSerial );
8362 (void) __kmp_change_library( TRUE );
8365 case library_turnaround :
8366 (void) __kmp_change_library( TRUE );
8368 case library_throughput :
8369 (void) __kmp_change_library( FALSE );
8372 KMP_FATAL( UnknownLibraryType, arg );
8380 __kmp_aux_set_blocktime (
int arg, kmp_info_t *thread,
int tid)
8382 int blocktime = arg;
8386 __kmp_save_internal_controls( thread );
8389 if (blocktime < KMP_MIN_BLOCKTIME)
8390 blocktime = KMP_MIN_BLOCKTIME;
8391 else if (blocktime > KMP_MAX_BLOCKTIME)
8392 blocktime = KMP_MAX_BLOCKTIME;
8394 set__blocktime_team( thread -> th.th_team, tid, blocktime );
8395 set__blocktime_team( thread -> th.th_serial_team, 0, blocktime );
8398 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8400 set__bt_intervals_team( thread -> th.th_team, tid, bt_intervals );
8401 set__bt_intervals_team( thread -> th.th_serial_team, 0, bt_intervals );
8406 set__bt_set_team( thread -> th.th_team, tid, bt_set );
8407 set__bt_set_team( thread -> th.th_serial_team, 0, bt_set );
8408 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, bt_intervals=%d, monitor_updates=%d\n",
8409 __kmp_gtid_from_tid(tid, thread->th.th_team),
8410 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals, __kmp_monitor_wakeups ) );
8414 __kmp_aux_set_defaults(
8418 if ( ! __kmp_init_serial ) {
8419 __kmp_serial_initialize();
8421 __kmp_env_initialize( str );
8425 || __kmp_display_env || __kmp_display_env_verbose
8438 PACKED_REDUCTION_METHOD_T
8439 __kmp_determine_reduction_method(
ident_t *loc, kmp_int32 global_tid,
8440 kmp_int32 num_vars,
size_t reduce_size,
void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
8441 kmp_critical_name *lck )
8449 PACKED_REDUCTION_METHOD_T retval;
8453 KMP_DEBUG_ASSERT( loc );
8454 KMP_DEBUG_ASSERT( lck );
8456 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED ( ( loc->flags & ( KMP_IDENT_ATOMIC_REDUCE ) ) == ( KMP_IDENT_ATOMIC_REDUCE ) )
8457 #define FAST_REDUCTION_TREE_METHOD_GENERATED ( ( reduce_data ) && ( reduce_func ) )
8459 retval = critical_reduce_block;
8461 team_size = __kmp_get_team_num_threads( global_tid );
8463 if( team_size == 1 ) {
8465 retval = empty_reduce_block;
8469 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8470 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8474 #if KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_DARWIN
8476 #define REDUCTION_TEAMSIZE_CUTOFF 8
8478 #define REDUCTION_TEAMSIZE_CUTOFF 4
8480 if( tree_available ) {
8481 if( team_size <= REDUCTION_TEAMSIZE_CUTOFF ) {
8482 if ( atomic_available ) {
8483 retval = atomic_reduce_block;
8486 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8488 }
else if ( atomic_available ) {
8489 retval = atomic_reduce_block;
8492 #error "Unknown or unsupported OS"
8493 #endif // KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_DARWIN
8495 #elif KMP_ARCH_X86 || KMP_ARCH_ARM
8497 #if KMP_OS_LINUX || KMP_OS_WINDOWS
8501 if( atomic_available ) {
8502 if( num_vars <= 2 ) {
8503 retval = atomic_reduce_block;
8509 if( atomic_available && ( num_vars <= 3 ) ) {
8510 retval = atomic_reduce_block;
8511 }
else if( tree_available ) {
8512 if( ( reduce_size > ( 9 *
sizeof( kmp_real64 ) ) ) && ( reduce_size < ( 2000 *
sizeof( kmp_real64 ) ) ) ) {
8513 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8518 #error "Unknown or unsupported OS"
8522 #error "Unknown or unsupported architecture"
8529 if( __kmp_force_reduction_method != reduction_method_not_defined ) {
8531 PACKED_REDUCTION_METHOD_T forced_retval;
8533 int atomic_available, tree_available;
8535 switch( ( forced_retval = __kmp_force_reduction_method ) )
8537 case critical_reduce_block:
8539 if( team_size <= 1 ) {
8540 forced_retval = empty_reduce_block;
8544 case atomic_reduce_block:
8545 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8546 KMP_ASSERT( atomic_available );
8549 case tree_reduce_block:
8550 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8551 KMP_ASSERT( tree_available );
8552 #if KMP_FAST_REDUCTION_BARRIER
8553 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8561 retval = forced_retval;
8564 KA_TRACE(10, (
"reduction method selected=%08x\n", retval ) );
8566 #undef FAST_REDUCTION_TREE_METHOD_GENERATED
8567 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
8574 __kmp_get_reduce_method(
void ) {
8575 return ( ( __kmp_entry_thread() -> th.th_local.packed_reduction_method ) >> 8 );
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)