39 #include "kmp_error.h"
40 #include "kmp_stats.h"
42 #define MAX_MESSAGE 512
63 if (__kmp_ignore_mppbeg() == FALSE) {
64 __kmp_internal_begin();
66 KC_TRACE( 10, (
"__kmpc_begin: called\n" ) );
84 if (__kmp_ignore_mppend() == FALSE) {
85 KC_TRACE( 10, (
"__kmpc_end: called\n" ) );
86 KA_TRACE( 30, (
"__kmpc_end\n" ));
88 __kmp_internal_end_thread( -1 );
114 kmp_int32 gtid = __kmp_entry_gtid();
116 KC_TRACE( 10, (
"__kmpc_global_thread_num: T#%d\n", gtid ) );
137 KC_TRACE( 10, (
"__kmpc_global_num_threads: num_threads = %d\n", __kmp_nth ) );
139 return TCR_4(__kmp_nth);
151 KC_TRACE( 10, (
"__kmpc_bound_thread_num: called\n" ) );
152 return __kmp_tid_from_gtid( __kmp_entry_gtid() );
163 KC_TRACE( 10, (
"__kmpc_bound_num_threads: called\n" ) );
165 return __kmp_entry_thread() -> th.th_team -> t.t_nproc;
187 if (__kmp_par_range == 0) {
194 semi2 = strchr(semi2,
';');
198 semi2 = strchr(semi2 + 1,
';');
202 if (__kmp_par_range_filename[0]) {
203 const char *name = semi2 - 1;
204 while ((name > loc->
psource) && (*name !=
'/') && (*name !=
';')) {
207 if ((*name ==
'/') || (*name ==
';')) {
210 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
211 return __kmp_par_range < 0;
214 semi3 = strchr(semi2 + 1,
';');
215 if (__kmp_par_range_routine[0]) {
216 if ((semi3 != NULL) && (semi3 > semi2)
217 && (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
218 return __kmp_par_range < 0;
221 if (KMP_SSCANF(semi3 + 1,
"%d", &line_no) == 1) {
222 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
223 return __kmp_par_range > 0;
225 return __kmp_par_range < 0;
241 return __kmp_entry_thread() -> th.th_root -> r.r_active;
256 KA_TRACE( 20, (
"__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
257 global_tid, num_threads ) );
259 __kmp_push_num_threads( loc, global_tid, num_threads );
263 __kmpc_pop_num_threads(
ident_t *loc, kmp_int32 global_tid )
265 KA_TRACE( 20, (
"__kmpc_pop_num_threads: enter\n" ) );
274 __kmpc_push_proc_bind(
ident_t *loc, kmp_int32 global_tid, kmp_int32 proc_bind )
276 KA_TRACE( 20, (
"__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n",
277 global_tid, proc_bind ) );
279 __kmp_push_proc_bind( loc, global_tid, (kmp_proc_bind_t)proc_bind );
299 int gtid = __kmp_entry_gtid();
303 va_start( ap, microtask );
305 #if INCLUDE_SSC_MARKS
308 __kmp_fork_call( loc, gtid, fork_context_intel,
310 VOLATILE_CAST(microtask_t) microtask,
311 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
313 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
319 #if INCLUDE_SSC_MARKS
322 __kmp_join_call( loc, gtid );
344 KA_TRACE( 20, (
"__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
345 global_tid, num_teams, num_threads ) );
347 __kmp_push_num_teams( loc, global_tid, num_teams, num_threads );
362 int gtid = __kmp_entry_gtid();
363 kmp_info_t *this_thr = __kmp_threads[ gtid ];
365 va_start( ap, microtask );
368 this_thr->th.th_teams_microtask = microtask;
369 this_thr->th.th_teams_level = this_thr->th.th_team->t.t_level;
372 if ( this_thr->th.th_teams_size.nteams == 0 ) {
373 __kmp_push_num_teams( loc, gtid, 0, 0 );
375 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
376 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
377 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
379 __kmp_fork_call( loc, gtid, fork_context_intel,
381 VOLATILE_CAST(microtask_t) __kmp_teams_master,
382 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
383 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
389 __kmp_join_call( loc, gtid );
390 this_thr->th.th_teams_microtask = NULL;
391 this_thr->th.th_teams_level = 0;
392 *(kmp_int64*)(&this_thr->th.th_teams_size) = 0L;
405 __kmpc_invoke_task_func(
int gtid )
407 return __kmp_invoke_task_func( gtid );
425 __kmp_serialized_parallel(loc, global_tid);
440 kmp_internal_control_t *top;
441 kmp_info_t *this_thr;
442 kmp_team_t *serial_team;
444 KC_TRACE( 10, (
"__kmpc_end_serialized_parallel: called by T#%d\n", global_tid ) );
452 if( ! TCR_4( __kmp_init_parallel ) )
453 __kmp_parallel_initialize();
455 this_thr = __kmp_threads[ global_tid ];
456 serial_team = this_thr->th.th_serial_team;
459 kmp_task_team_t * task_team = this_thr->th.th_task_team;
462 if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks )
463 __kmp_task_team_wait(this_thr, serial_team, NULL );
467 KMP_DEBUG_ASSERT( serial_team );
468 KMP_ASSERT( serial_team -> t.t_serialized );
469 KMP_DEBUG_ASSERT( this_thr -> th.th_team == serial_team );
470 KMP_DEBUG_ASSERT( serial_team != this_thr->th.th_root->r.r_root_team );
471 KMP_DEBUG_ASSERT( serial_team -> t.t_threads );
472 KMP_DEBUG_ASSERT( serial_team -> t.t_threads[0] == this_thr );
475 top = serial_team -> t.t_control_stack_top;
476 if ( top && top -> serial_nesting_level == serial_team -> t.t_serialized ) {
477 copy_icvs( &serial_team -> t.t_threads[0] -> th.th_current_task -> td_icvs, top );
478 serial_team -> t.t_control_stack_top = top -> next;
483 serial_team -> t.t_level--;
486 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
488 dispatch_private_info_t * disp_buffer = serial_team->t.t_dispatch->th_disp_buffer;
489 serial_team->t.t_dispatch->th_disp_buffer =
490 serial_team->t.t_dispatch->th_disp_buffer->next;
491 __kmp_free( disp_buffer );
494 -- serial_team -> t.t_serialized;
495 if ( serial_team -> t.t_serialized == 0 ) {
499 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
500 if ( __kmp_inherit_fp_control && serial_team->t.t_fp_control_saved ) {
501 __kmp_clear_x87_fpu_status_word();
502 __kmp_load_x87_fpu_control_word( &serial_team->t.t_x87_fpu_control_word );
503 __kmp_load_mxcsr( &serial_team->t.t_mxcsr );
507 this_thr -> th.th_team = serial_team -> t.t_parent;
508 this_thr -> th.th_info.ds.ds_tid = serial_team -> t.t_master_tid;
511 this_thr -> th.th_team_nproc = serial_team -> t.t_parent -> t.t_nproc;
512 this_thr -> th.th_team_master = serial_team -> t.t_parent -> t.t_threads[0];
513 this_thr -> th.th_team_serialized = this_thr -> th.th_team -> t.t_serialized;
516 this_thr -> th.th_dispatch = & this_thr -> th.th_team ->
517 t.t_dispatch[ serial_team -> t.t_master_tid ];
519 __kmp_pop_current_task_from_thread( this_thr );
521 KMP_ASSERT( this_thr -> th.th_current_task -> td_flags.executing == 0 );
522 this_thr -> th.th_current_task -> td_flags.executing = 1;
524 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
526 this_thr->th.th_task_team = this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
527 KA_TRACE(20, (
"__kmpc_end_serialized_parallel: T#%d restoring task_team %p / team %p\n",
528 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
531 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
532 KA_TRACE( 20, (
"__kmpc_end_serialized_parallel: T#%d decreasing nesting depth of serial team %p to %d\n",
533 global_tid, serial_team, serial_team -> t.t_serialized ) );
538 kmp_uint64 cur_time = 0;
540 if ( __itt_get_timestamp_ptr ) {
541 cur_time = __itt_get_timestamp();
544 if ( this_thr->th.th_team->t.t_level == 0
546 && this_thr->th.th_teams_microtask == NULL
550 this_thr->th.th_ident = loc;
551 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) &&
552 ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
554 __kmp_itt_frame_submit( global_tid, this_thr->th.th_frame_time_serialized,
555 cur_time, 0, loc, this_thr->th.th_team_nproc, 0 );
556 if ( __kmp_forkjoin_frames_mode == 3 )
558 __kmp_itt_frame_submit( global_tid, serial_team->t.t_region_time,
559 cur_time, 0, loc, this_thr->th.th_team_nproc, 2 );
560 }
else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) &&
561 ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames )
563 __kmp_itt_region_joined( global_tid, 1 );
567 if ( __kmp_env_consistency_check )
568 __kmp_pop_parallel( global_tid, NULL );
582 KC_TRACE( 10, (
"__kmpc_flush: called\n" ) );
587 #if ( KMP_ARCH_X86 || KMP_ARCH_X86_64 )
600 if ( ! __kmp_cpuinfo.initialized ) {
601 __kmp_query_cpuid( & __kmp_cpuinfo );
603 if ( ! __kmp_cpuinfo.sse2 ) {
606 #if KMP_COMPILER_ICC || KMP_COMPILER_MSVC
609 __sync_synchronize();
610 #endif // KMP_COMPILER_ICC
613 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64)
629 #error Unknown or unsupported architecture
650 int explicit_barrier_flag;
651 KC_TRACE( 10, (
"__kmpc_barrier: called T#%d\n", global_tid ) );
653 if (! TCR_4(__kmp_init_parallel))
654 __kmp_parallel_initialize();
656 if ( __kmp_env_consistency_check ) {
658 KMP_WARNING( ConstructIdentInvalid );
661 __kmp_check_barrier( global_tid, ct_barrier, loc );
664 __kmp_threads[ global_tid ]->th.th_ident = loc;
672 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
688 KC_TRACE( 10, (
"__kmpc_master: called T#%d\n", global_tid ) );
690 if( ! TCR_4( __kmp_init_parallel ) )
691 __kmp_parallel_initialize();
693 if( KMP_MASTER_GTID( global_tid ))
696 if ( __kmp_env_consistency_check ) {
697 #if KMP_USE_DYNAMIC_LOCK
699 __kmp_push_sync( global_tid, ct_master, loc, NULL, 0 );
701 __kmp_check_sync( global_tid, ct_master, loc, NULL, 0 );
704 __kmp_push_sync( global_tid, ct_master, loc, NULL );
706 __kmp_check_sync( global_tid, ct_master, loc, NULL );
724 KC_TRACE( 10, (
"__kmpc_end_master: called T#%d\n", global_tid ) );
726 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( global_tid ));
728 if ( __kmp_env_consistency_check ) {
730 KMP_WARNING( ThreadIdentInvalid );
732 if( KMP_MASTER_GTID( global_tid ))
733 __kmp_pop_sync( global_tid, ct_master, loc );
749 KMP_DEBUG_ASSERT( __kmp_init_serial );
751 KC_TRACE( 10, (
"__kmpc_ordered: called T#%d\n", gtid ));
753 if (! TCR_4(__kmp_init_parallel))
754 __kmp_parallel_initialize();
757 __kmp_itt_ordered_prep( gtid );
761 th = __kmp_threads[ gtid ];
763 if ( th -> th.th_dispatch -> th_deo_fcn != 0 )
764 (*th->th.th_dispatch->th_deo_fcn)( & gtid, & cid, loc );
766 __kmp_parallel_deo( & gtid, & cid, loc );
769 __kmp_itt_ordered_start( gtid );
786 KC_TRACE( 10, (
"__kmpc_end_ordered: called T#%d\n", gtid ) );
789 __kmp_itt_ordered_end( gtid );
793 th = __kmp_threads[ gtid ];
795 if ( th -> th.th_dispatch -> th_dxo_fcn != 0 )
796 (*th->th.th_dispatch->th_dxo_fcn)( & gtid, & cid, loc );
798 __kmp_parallel_dxo( & gtid, & cid, loc );
801 #if KMP_USE_DYNAMIC_LOCK
803 static __forceinline kmp_indirect_lock_t *
804 __kmp_get_indirect_csptr(kmp_critical_name * crit,
ident_t const * loc, kmp_int32 gtid, kmp_dyna_lockseq_t seq)
808 kmp_indirect_lock_t **lck, *ret;
809 lck = (kmp_indirect_lock_t **)crit;
810 ret = (kmp_indirect_lock_t *)TCR_PTR(*lck);
813 kmp_indirect_locktag_t tag = DYNA_GET_I_TAG(seq);
814 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
816 DYNA_I_LOCK_FUNC(ilk, init)(ilk->lock);
817 DYNA_SET_I_LOCK_LOCATION(ilk, loc);
818 DYNA_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
819 KA_TRACE(20, (
"__kmp_get_indirect_csptr: initialized indirect lock #%d\n", tag));
821 __kmp_itt_critical_creating(ilk->lock, loc);
823 int status = KMP_COMPARE_AND_STORE_PTR(lck, 0, ilk);
826 __kmp_itt_critical_destroyed(ilk->lock);
830 ret = (kmp_indirect_lock_t *)TCR_PTR(*lck);
831 KMP_DEBUG_ASSERT(ret != NULL);
838 #define DYNA_ACQUIRE_TAS_LOCK(lock, gtid) { \
839 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
840 if (l->lk.poll != DYNA_LOCK_FREE(tas) || \
841 ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas))) { \
843 KMP_FSYNC_PREPARE(l); \
844 KMP_INIT_YIELD(spins); \
845 if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
848 KMP_YIELD_SPIN(spins); \
850 while (l->lk.poll != DYNA_LOCK_FREE(tas) || \
851 ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas))) { \
852 if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
855 KMP_YIELD_SPIN(spins); \
859 KMP_FSYNC_ACQUIRED(l); \
863 #define DYNA_TEST_TAS_LOCK(lock, gtid, rc) { \
864 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
865 rc = l->lk.poll == DYNA_LOCK_FREE(tas) && \
866 KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas)); \
870 #define DYNA_RELEASE_TAS_LOCK(lock, gtid) { \
871 TCW_4(((kmp_tas_lock_t *)lock)->lk.poll, DYNA_LOCK_FREE(tas)); \
878 # include <sys/syscall.h>
880 # define FUTEX_WAIT 0
883 # define FUTEX_WAKE 1
887 #define DYNA_ACQUIRE_FUTEX_LOCK(lock, gtid) { \
888 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
889 kmp_int32 gtid_code = (gtid+1) << 1; \
891 KMP_FSYNC_PREPARE(ftx); \
892 kmp_int32 poll_val; \
893 while ((poll_val = KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), DYNA_LOCK_FREE(futex), \
894 DYNA_LOCK_BUSY(gtid_code, futex))) != DYNA_LOCK_FREE(futex)) { \
895 kmp_int32 cond = DYNA_LOCK_STRIP(poll_val) & 1; \
897 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, poll_val | DYNA_LOCK_BUSY(1, futex))) { \
900 poll_val |= DYNA_LOCK_BUSY(1, futex); \
903 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, NULL, NULL, 0)) != 0) { \
908 KMP_FSYNC_ACQUIRED(ftx); \
912 #define DYNA_TEST_FUTEX_LOCK(lock, gtid, rc) { \
913 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
914 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), DYNA_LOCK_FREE(futex), DYNA_LOCK_BUSY(gtid+1, futex) << 1)) { \
915 KMP_FSYNC_ACQUIRED(ftx); \
923 #define DYNA_RELEASE_FUTEX_LOCK(lock, gtid) { \
924 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
926 KMP_FSYNC_RELEASING(ftx); \
927 kmp_int32 poll_val = KMP_XCHG_FIXED32(&(ftx->lk.poll), DYNA_LOCK_FREE(futex)); \
928 if (DYNA_LOCK_STRIP(poll_val) & 1) { \
929 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, DYNA_LOCK_BUSY(1, futex), NULL, NULL, 0); \
932 KMP_YIELD(TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
935 #endif // DYNA_HAS_FUTEX
937 #else // KMP_USE_DYNAMIC_LOCK
939 static kmp_user_lock_p
940 __kmp_get_critical_section_ptr( kmp_critical_name * crit,
ident_t const * loc, kmp_int32 gtid )
942 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
948 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
955 lck = __kmp_user_lock_allocate( &idx, gtid, kmp_lf_critical_section );
956 __kmp_init_user_lock_with_checks( lck );
957 __kmp_set_user_lock_location( lck, loc );
959 __kmp_itt_critical_creating( lck );
972 int status = KMP_COMPARE_AND_STORE_PTR( lck_pp, 0, lck );
977 __kmp_itt_critical_destroyed( lck );
981 __kmp_destroy_user_lock_with_checks( lck );
982 __kmp_user_lock_free( &idx, gtid, lck );
983 lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
984 KMP_DEBUG_ASSERT( lck != NULL );
990 #endif // KMP_USE_DYNAMIC_LOCK
1005 kmp_user_lock_p lck;
1007 KC_TRACE( 10, (
"__kmpc_critical: called T#%d\n", global_tid ) );
1009 #if KMP_USE_DYNAMIC_LOCK
1012 if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
1013 lck = (kmp_user_lock_p)crit;
1015 if (*((kmp_dyna_lock_t *)lck) == 0) {
1016 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)lck, 0, DYNA_GET_D_TAG(__kmp_user_lock_seq));
1018 if (__kmp_env_consistency_check) {
1019 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
1022 __kmp_itt_critical_acquiring(lck);
1024 # if DYNA_USE_FAST_TAS
1025 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1026 DYNA_ACQUIRE_TAS_LOCK(lck, global_tid);
1028 # elif DYNA_USE_FAST_FUTEX
1029 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1030 DYNA_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1034 DYNA_D_LOCK_FUNC(lck, set)((kmp_dyna_lock_t *)lck, global_tid);
1037 kmp_indirect_lock_t *ilk = __kmp_get_indirect_csptr(crit, loc, global_tid, __kmp_user_lock_seq);
1039 if (__kmp_env_consistency_check) {
1040 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
1043 __kmp_itt_critical_acquiring(lck);
1045 DYNA_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1048 #else // KMP_USE_DYNAMIC_LOCK
1052 KMP_CHECK_USER_LOCK_INIT();
1054 if ( ( __kmp_user_lock_kind == lk_tas )
1055 && (
sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1056 lck = (kmp_user_lock_p)crit;
1058 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1059 else if ( ( __kmp_user_lock_kind == lk_futex )
1060 && (
sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1061 lck = (kmp_user_lock_p)crit;
1065 lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
1068 if ( __kmp_env_consistency_check )
1069 __kmp_push_sync( global_tid, ct_critical, loc, lck );
1078 __kmp_itt_critical_acquiring( lck );
1081 __kmp_acquire_user_lock_with_checks( lck, global_tid );
1083 #endif // KMP_USE_DYNAMIC_LOCK
1086 __kmp_itt_critical_acquired( lck );
1089 KA_TRACE( 15, (
"__kmpc_critical: done T#%d\n", global_tid ));
1104 kmp_user_lock_p lck;
1106 KC_TRACE( 10, (
"__kmpc_end_critical: called T#%d\n", global_tid ));
1108 #if KMP_USE_DYNAMIC_LOCK
1109 if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
1110 lck = (kmp_user_lock_p)crit;
1111 KMP_ASSERT(lck != NULL);
1112 if (__kmp_env_consistency_check) {
1113 __kmp_pop_sync(global_tid, ct_critical, loc);
1116 __kmp_itt_critical_releasing( lck );
1118 # if DYNA_USE_FAST_TAS
1119 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1120 DYNA_RELEASE_TAS_LOCK(lck, global_tid);
1122 # elif DYNA_USE_FAST_FUTEX
1123 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1124 DYNA_RELEASE_FUTEX_LOCK(lck, global_tid);
1128 DYNA_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1131 kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1132 KMP_ASSERT(ilk != NULL);
1134 if (__kmp_env_consistency_check) {
1135 __kmp_pop_sync(global_tid, ct_critical, loc);
1138 __kmp_itt_critical_releasing( lck );
1140 DYNA_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1143 #else // KMP_USE_DYNAMIC_LOCK
1145 if ( ( __kmp_user_lock_kind == lk_tas )
1146 && (
sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1147 lck = (kmp_user_lock_p)crit;
1149 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1150 else if ( ( __kmp_user_lock_kind == lk_futex )
1151 && (
sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1152 lck = (kmp_user_lock_p)crit;
1156 lck = (kmp_user_lock_p) TCR_PTR(*((kmp_user_lock_p *)crit));
1159 KMP_ASSERT(lck != NULL);
1161 if ( __kmp_env_consistency_check )
1162 __kmp_pop_sync( global_tid, ct_critical, loc );
1165 __kmp_itt_critical_releasing( lck );
1168 __kmp_release_user_lock_with_checks( lck, global_tid );
1170 #endif // KMP_USE_DYNAMIC_LOCK
1172 KA_TRACE( 15, (
"__kmpc_end_critical: done T#%d\n", global_tid ));
1188 KC_TRACE( 10, (
"__kmpc_barrier_master: called T#%d\n", global_tid ) );
1190 if (! TCR_4(__kmp_init_parallel))
1191 __kmp_parallel_initialize();
1193 if ( __kmp_env_consistency_check )
1194 __kmp_check_barrier( global_tid, ct_barrier, loc );
1197 __kmp_threads[global_tid]->th.th_ident = loc;
1199 status = __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL );
1201 return (status != 0) ? 0 : 1;
1216 KC_TRACE( 10, (
"__kmpc_end_barrier_master: called T#%d\n", global_tid ));
1218 __kmp_end_split_barrier ( bs_plain_barrier, global_tid );
1236 KC_TRACE( 10, (
"__kmpc_barrier_master_nowait: called T#%d\n", global_tid ));
1238 if (! TCR_4(__kmp_init_parallel))
1239 __kmp_parallel_initialize();
1241 if ( __kmp_env_consistency_check ) {
1243 KMP_WARNING( ConstructIdentInvalid );
1245 __kmp_check_barrier( global_tid, ct_barrier, loc );
1249 __kmp_threads[global_tid]->th.th_ident = loc;
1251 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
1255 if ( __kmp_env_consistency_check ) {
1259 if ( global_tid < 0 ) {
1260 KMP_WARNING( ThreadIdentInvalid );
1266 __kmp_pop_sync( global_tid, ct_master, loc );
1289 kmp_int32 rc = __kmp_enter_single( global_tid, loc, TRUE );
1305 __kmp_exit_single( global_tid );
1318 KE_TRACE( 10, (
"__kmpc_for_static_fini called T#%d\n", global_tid));
1320 if ( __kmp_env_consistency_check )
1321 __kmp_pop_workshare( global_tid, ct_pdo, loc );
1330 ompc_set_num_threads(
int arg )
1333 __kmp_set_num_threads( arg, __kmp_entry_gtid() );
1337 ompc_set_dynamic(
int flag )
1342 thread = __kmp_entry_thread();
1344 __kmp_save_internal_controls( thread );
1346 set__dynamic( thread, flag ? TRUE : FALSE );
1350 ompc_set_nested(
int flag )
1355 thread = __kmp_entry_thread();
1357 __kmp_save_internal_controls( thread );
1359 set__nested( thread, flag ? TRUE : FALSE );
1363 ompc_set_max_active_levels(
int max_active_levels )
1369 __kmp_set_max_active_levels( __kmp_entry_gtid(), max_active_levels );
1373 ompc_set_schedule( omp_sched_t kind,
int modifier )
1376 __kmp_set_schedule( __kmp_entry_gtid(), ( kmp_sched_t ) kind, modifier );
1380 ompc_get_ancestor_thread_num(
int level )
1382 return __kmp_get_ancestor_thread_num( __kmp_entry_gtid(), level );
1386 ompc_get_team_size(
int level )
1388 return __kmp_get_team_size( __kmp_entry_gtid(), level );
1392 kmpc_set_stacksize(
int arg )
1395 __kmp_aux_set_stacksize( arg );
1399 kmpc_set_stacksize_s(
size_t arg )
1402 __kmp_aux_set_stacksize( arg );
1406 kmpc_set_blocktime(
int arg )
1411 gtid = __kmp_entry_gtid();
1412 tid = __kmp_tid_from_gtid(gtid);
1413 thread = __kmp_thread_from_gtid(gtid);
1415 __kmp_aux_set_blocktime( arg, thread, tid );
1419 kmpc_set_library(
int arg )
1422 __kmp_user_set_library( (
enum library_type)arg );
1426 kmpc_set_defaults(
char const * str )
1429 __kmp_aux_set_defaults( str, KMP_STRLEN( str ) );
1433 kmpc_set_affinity_mask_proc(
int proc,
void **mask )
1435 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1438 if ( ! TCR_4(__kmp_init_middle) ) {
1439 __kmp_middle_initialize();
1441 return __kmp_aux_set_affinity_mask_proc( proc, mask );
1446 kmpc_unset_affinity_mask_proc(
int proc,
void **mask )
1448 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1451 if ( ! TCR_4(__kmp_init_middle) ) {
1452 __kmp_middle_initialize();
1454 return __kmp_aux_unset_affinity_mask_proc( proc, mask );
1459 kmpc_get_affinity_mask_proc(
int proc,
void **mask )
1461 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1464 if ( ! TCR_4(__kmp_init_middle) ) {
1465 __kmp_middle_initialize();
1467 return __kmp_aux_get_affinity_mask_proc( proc, mask );
1518 KC_TRACE( 10, (
"__kmpc_copyprivate: called T#%d\n", gtid ));
1522 data_ptr = & __kmp_team_from_gtid( gtid )->t.t_copypriv_data;
1524 if ( __kmp_env_consistency_check ) {
1526 KMP_WARNING( ConstructIdentInvalid );
1532 if (didit) *data_ptr = cpy_data;
1536 __kmp_threads[gtid]->th.th_ident = loc;
1538 __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1540 if (! didit) (*cpy_func)( cpy_data, *data_ptr );
1546 __kmp_threads[gtid]->th.th_ident = loc;
1548 __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1553 #define INIT_LOCK __kmp_init_user_lock_with_checks
1554 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
1555 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
1556 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
1557 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
1558 #define ACQUIRE_NESTED_LOCK_TIMED __kmp_acquire_nested_user_lock_with_checks_timed
1559 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
1560 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
1561 #define TEST_LOCK __kmp_test_user_lock_with_checks
1562 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
1563 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
1564 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
1573 __kmpc_init_lock(
ident_t * loc, kmp_int32 gtid,
void ** user_lock ) {
1574 #if KMP_USE_DYNAMIC_LOCK
1575 KMP_DEBUG_ASSERT(__kmp_init_serial);
1576 if (__kmp_env_consistency_check && user_lock == NULL) {
1577 KMP_FATAL(LockIsUninitialized,
"omp_init_lock");
1579 if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
1580 DYNA_INIT_D_LOCK(user_lock, __kmp_user_lock_seq);
1582 __kmp_itt_lock_creating((kmp_user_lock_p)user_lock, NULL);
1585 DYNA_INIT_I_LOCK(user_lock, __kmp_user_lock_seq);
1586 kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(user_lock);
1587 DYNA_SET_I_LOCK_LOCATION(ilk, loc);
1589 __kmp_itt_lock_creating(ilk->lock, loc);
1593 #else // KMP_USE_DYNAMIC_LOCK
1595 static char const *
const func =
"omp_init_lock";
1596 kmp_user_lock_p lck;
1597 KMP_DEBUG_ASSERT( __kmp_init_serial );
1599 if ( __kmp_env_consistency_check ) {
1600 if ( user_lock == NULL ) {
1601 KMP_FATAL( LockIsUninitialized, func );
1605 KMP_CHECK_USER_LOCK_INIT();
1607 if ( ( __kmp_user_lock_kind == lk_tas )
1608 && (
sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1609 lck = (kmp_user_lock_p)user_lock;
1611 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1612 else if ( ( __kmp_user_lock_kind == lk_futex )
1613 && (
sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1614 lck = (kmp_user_lock_p)user_lock;
1618 lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
1621 __kmp_set_user_lock_location( lck, loc );
1624 __kmp_itt_lock_creating( lck );
1627 #endif // KMP_USE_DYNAMIC_LOCK
1632 __kmpc_init_nest_lock(
ident_t * loc, kmp_int32 gtid,
void ** user_lock ) {
1633 #if KMP_USE_DYNAMIC_LOCK
1635 KMP_DEBUG_ASSERT(__kmp_init_serial);
1636 if (__kmp_env_consistency_check && user_lock == NULL) {
1637 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock");
1640 kmp_dyna_lockseq_t nested_seq;
1641 switch (__kmp_user_lock_seq) {
1642 case lockseq_tas: nested_seq = lockseq_nested_tas;
break;
1644 case lockseq_futex: nested_seq = lockseq_nested_futex;
break;
1646 case lockseq_ticket: nested_seq = lockseq_nested_ticket;
break;
1647 case lockseq_queuing: nested_seq = lockseq_nested_queuing;
break;
1648 case lockseq_drdpa: nested_seq = lockseq_nested_drdpa;
break;
1649 default: nested_seq = lockseq_nested_queuing;
break;
1652 DYNA_INIT_I_LOCK(user_lock, nested_seq);
1654 kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(user_lock);
1655 DYNA_SET_I_LOCK_LOCATION(ilk, loc);
1657 __kmp_itt_lock_creating(ilk->lock, loc);
1660 #else // KMP_USE_DYNAMIC_LOCK
1662 static char const *
const func =
"omp_init_nest_lock";
1663 kmp_user_lock_p lck;
1664 KMP_DEBUG_ASSERT( __kmp_init_serial );
1666 if ( __kmp_env_consistency_check ) {
1667 if ( user_lock == NULL ) {
1668 KMP_FATAL( LockIsUninitialized, func );
1672 KMP_CHECK_USER_LOCK_INIT();
1674 if ( ( __kmp_user_lock_kind == lk_tas ) && (
sizeof( lck->tas.lk.poll )
1675 +
sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1676 lck = (kmp_user_lock_p)user_lock;
1678 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1679 else if ( ( __kmp_user_lock_kind == lk_futex )
1680 && (
sizeof( lck->futex.lk.poll ) +
sizeof( lck->futex.lk.depth_locked )
1681 <= OMP_NEST_LOCK_T_SIZE ) ) {
1682 lck = (kmp_user_lock_p)user_lock;
1686 lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
1689 INIT_NESTED_LOCK( lck );
1690 __kmp_set_user_lock_location( lck, loc );
1693 __kmp_itt_lock_creating( lck );
1696 #endif // KMP_USE_DYNAMIC_LOCK
1700 __kmpc_destroy_lock(
ident_t * loc, kmp_int32 gtid,
void ** user_lock ) {
1701 #if KMP_USE_DYNAMIC_LOCK
1704 kmp_user_lock_p lck;
1705 if (DYNA_EXTRACT_D_TAG(user_lock) == 0) {
1706 lck = ((kmp_indirect_lock_t *)DYNA_LOOKUP_I_LOCK(user_lock))->lock;
1708 lck = (kmp_user_lock_p)user_lock;
1710 __kmp_itt_lock_destroyed(lck);
1712 DYNA_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
1714 kmp_user_lock_p lck;
1716 if ( ( __kmp_user_lock_kind == lk_tas )
1717 && (
sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1718 lck = (kmp_user_lock_p)user_lock;
1720 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1721 else if ( ( __kmp_user_lock_kind == lk_futex )
1722 && (
sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1723 lck = (kmp_user_lock_p)user_lock;
1727 lck = __kmp_lookup_user_lock( user_lock,
"omp_destroy_lock" );
1731 __kmp_itt_lock_destroyed( lck );
1733 DESTROY_LOCK( lck );
1735 if ( ( __kmp_user_lock_kind == lk_tas )
1736 && (
sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1739 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1740 else if ( ( __kmp_user_lock_kind == lk_futex )
1741 && (
sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1746 __kmp_user_lock_free( user_lock, gtid, lck );
1748 #endif // KMP_USE_DYNAMIC_LOCK
1753 __kmpc_destroy_nest_lock(
ident_t * loc, kmp_int32 gtid,
void ** user_lock ) {
1754 #if KMP_USE_DYNAMIC_LOCK
1757 kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(user_lock);
1758 __kmp_itt_lock_destroyed(ilk->lock);
1760 DYNA_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
1762 #else // KMP_USE_DYNAMIC_LOCK
1764 kmp_user_lock_p lck;
1766 if ( ( __kmp_user_lock_kind == lk_tas ) && (
sizeof( lck->tas.lk.poll )
1767 +
sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1768 lck = (kmp_user_lock_p)user_lock;
1770 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1771 else if ( ( __kmp_user_lock_kind == lk_futex )
1772 && (
sizeof( lck->futex.lk.poll ) +
sizeof( lck->futex.lk.depth_locked )
1773 <= OMP_NEST_LOCK_T_SIZE ) ) {
1774 lck = (kmp_user_lock_p)user_lock;
1778 lck = __kmp_lookup_user_lock( user_lock,
"omp_destroy_nest_lock" );
1782 __kmp_itt_lock_destroyed( lck );
1785 DESTROY_NESTED_LOCK( lck );
1787 if ( ( __kmp_user_lock_kind == lk_tas ) && (
sizeof( lck->tas.lk.poll )
1788 +
sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1791 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1792 else if ( ( __kmp_user_lock_kind == lk_futex )
1793 && (
sizeof( lck->futex.lk.poll ) +
sizeof( lck->futex.lk.depth_locked )
1794 <= OMP_NEST_LOCK_T_SIZE ) ) {
1799 __kmp_user_lock_free( user_lock, gtid, lck );
1801 #endif // KMP_USE_DYNAMIC_LOCK
1805 __kmpc_set_lock(
ident_t * loc, kmp_int32 gtid,
void ** user_lock ) {
1807 #if KMP_USE_DYNAMIC_LOCK
1808 int tag = DYNA_EXTRACT_D_TAG(user_lock);
1810 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
1812 # if DYNA_USE_FAST_TAS
1813 if (tag == locktag_tas && !__kmp_env_consistency_check) {
1814 DYNA_ACQUIRE_TAS_LOCK(user_lock, gtid);
1816 # elif DYNA_USE_FAST_FUTEX
1817 if (tag == locktag_futex && !__kmp_env_consistency_check) {
1818 DYNA_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
1822 __kmp_direct_set_ops[tag]((kmp_dyna_lock_t *)user_lock, gtid);
1825 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
1828 #else // KMP_USE_DYNAMIC_LOCK
1830 kmp_user_lock_p lck;
1832 if ( ( __kmp_user_lock_kind == lk_tas )
1833 && (
sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1834 lck = (kmp_user_lock_p)user_lock;
1836 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1837 else if ( ( __kmp_user_lock_kind == lk_futex )
1838 && (
sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1839 lck = (kmp_user_lock_p)user_lock;
1843 lck = __kmp_lookup_user_lock( user_lock,
"omp_set_lock" );
1847 __kmp_itt_lock_acquiring( lck );
1850 ACQUIRE_LOCK( lck, gtid );
1853 __kmp_itt_lock_acquired( lck );
1856 #endif // KMP_USE_DYNAMIC_LOCK
1860 __kmpc_set_nest_lock(
ident_t * loc, kmp_int32 gtid,
void ** user_lock ) {
1861 #if KMP_USE_DYNAMIC_LOCK
1864 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
1866 DYNA_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
1868 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
1871 #else // KMP_USE_DYNAMIC_LOCK
1872 kmp_user_lock_p lck;
1874 if ( ( __kmp_user_lock_kind == lk_tas ) && (
sizeof( lck->tas.lk.poll )
1875 +
sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1876 lck = (kmp_user_lock_p)user_lock;
1878 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1879 else if ( ( __kmp_user_lock_kind == lk_futex )
1880 && (
sizeof( lck->futex.lk.poll ) +
sizeof( lck->futex.lk.depth_locked )
1881 <= OMP_NEST_LOCK_T_SIZE ) ) {
1882 lck = (kmp_user_lock_p)user_lock;
1886 lck = __kmp_lookup_user_lock( user_lock,
"omp_set_nest_lock" );
1890 __kmp_itt_lock_acquiring( lck );
1893 ACQUIRE_NESTED_LOCK( lck, gtid );
1896 __kmp_itt_lock_acquired( lck );
1898 #endif // KMP_USE_DYNAMIC_LOCK
1902 __kmpc_unset_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock )
1904 #if KMP_USE_DYNAMIC_LOCK
1906 int tag = DYNA_EXTRACT_D_TAG(user_lock);
1908 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
1910 # if DYNA_USE_FAST_TAS
1911 if (tag == locktag_tas && !__kmp_env_consistency_check) {
1912 DYNA_RELEASE_TAS_LOCK(user_lock, gtid);
1914 # elif DYNA_USE_FAST_FUTEX
1915 if (tag == locktag_futex && !__kmp_env_consistency_check) {
1916 DYNA_RELEASE_FUTEX_LOCK(user_lock, gtid);
1920 __kmp_direct_unset_ops[tag]((kmp_dyna_lock_t *)user_lock, gtid);
1923 #else // KMP_USE_DYNAMIC_LOCK
1925 kmp_user_lock_p lck;
1930 if ( ( __kmp_user_lock_kind == lk_tas )
1931 && (
sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1932 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1935 __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
1937 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
1941 lck = (kmp_user_lock_p)user_lock;
1944 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1945 else if ( ( __kmp_user_lock_kind == lk_futex )
1946 && (
sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1947 lck = (kmp_user_lock_p)user_lock;
1951 lck = __kmp_lookup_user_lock( user_lock,
"omp_unset_lock" );
1955 __kmp_itt_lock_releasing( lck );
1958 RELEASE_LOCK( lck, gtid );
1960 #endif // KMP_USE_DYNAMIC_LOCK
1965 __kmpc_unset_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock )
1967 #if KMP_USE_DYNAMIC_LOCK
1970 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
1972 DYNA_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
1974 #else // KMP_USE_DYNAMIC_LOCK
1976 kmp_user_lock_p lck;
1980 if ( ( __kmp_user_lock_kind == lk_tas ) && (
sizeof( lck->tas.lk.poll )
1981 +
sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1982 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1984 kmp_tas_lock_t *tl = (kmp_tas_lock_t*)user_lock;
1986 __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
1988 if ( --(tl->lk.depth_locked) == 0 ) {
1989 TCW_4(tl->lk.poll, 0);
1994 lck = (kmp_user_lock_p)user_lock;
1997 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1998 else if ( ( __kmp_user_lock_kind == lk_futex )
1999 && (
sizeof( lck->futex.lk.poll ) +
sizeof( lck->futex.lk.depth_locked )
2000 <= OMP_NEST_LOCK_T_SIZE ) ) {
2001 lck = (kmp_user_lock_p)user_lock;
2005 lck = __kmp_lookup_user_lock( user_lock,
"omp_unset_nest_lock" );
2009 __kmp_itt_lock_releasing( lck );
2012 RELEASE_NESTED_LOCK( lck, gtid );
2014 #endif // KMP_USE_DYNAMIC_LOCK
2019 __kmpc_test_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock )
2024 #if KMP_USE_DYNAMIC_LOCK
2026 int tag = DYNA_EXTRACT_D_TAG(user_lock);
2028 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2030 # if DYNA_USE_FAST_TAS
2031 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2032 DYNA_TEST_TAS_LOCK(user_lock, gtid, rc);
2034 # elif DYNA_USE_FAST_FUTEX
2035 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2036 DYNA_TEST_FUTEX_LOCK(user_lock, gtid, rc);
2040 rc = __kmp_direct_test_ops[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2044 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2049 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2054 #else // KMP_USE_DYNAMIC_LOCK
2056 kmp_user_lock_p lck;
2059 if ( ( __kmp_user_lock_kind == lk_tas )
2060 && (
sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2061 lck = (kmp_user_lock_p)user_lock;
2063 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2064 else if ( ( __kmp_user_lock_kind == lk_futex )
2065 && (
sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2066 lck = (kmp_user_lock_p)user_lock;
2070 lck = __kmp_lookup_user_lock( user_lock,
"omp_test_lock" );
2074 __kmp_itt_lock_acquiring( lck );
2077 rc = TEST_LOCK( lck, gtid );
2080 __kmp_itt_lock_acquired( lck );
2082 __kmp_itt_lock_cancelled( lck );
2085 return ( rc ? FTN_TRUE : FTN_FALSE );
2089 #endif // KMP_USE_DYNAMIC_LOCK
2094 __kmpc_test_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock )
2096 #if KMP_USE_DYNAMIC_LOCK
2099 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2101 rc = DYNA_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
2104 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2106 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2111 #else // KMP_USE_DYNAMIC_LOCK
2113 kmp_user_lock_p lck;
2116 if ( ( __kmp_user_lock_kind == lk_tas ) && (
sizeof( lck->tas.lk.poll )
2117 +
sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2118 lck = (kmp_user_lock_p)user_lock;
2120 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2121 else if ( ( __kmp_user_lock_kind == lk_futex )
2122 && (
sizeof( lck->futex.lk.poll ) +
sizeof( lck->futex.lk.depth_locked )
2123 <= OMP_NEST_LOCK_T_SIZE ) ) {
2124 lck = (kmp_user_lock_p)user_lock;
2128 lck = __kmp_lookup_user_lock( user_lock,
"omp_test_nest_lock" );
2132 __kmp_itt_lock_acquiring( lck );
2135 rc = TEST_NESTED_LOCK( lck, gtid );
2138 __kmp_itt_lock_acquired( lck );
2140 __kmp_itt_lock_cancelled( lck );
2147 #endif // KMP_USE_DYNAMIC_LOCK
2160 #define __KMP_SET_REDUCTION_METHOD(gtid,rmethod) \
2161 ( ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method ) = ( rmethod ) )
2163 #define __KMP_GET_REDUCTION_METHOD(gtid) \
2164 ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method )
2170 static __forceinline
void
2171 __kmp_enter_critical_section_reduce_block(
ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
2177 kmp_user_lock_p lck;
2179 #if KMP_USE_DYNAMIC_LOCK
2181 if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
2182 lck = (kmp_user_lock_p)crit;
2183 if (*((kmp_dyna_lock_t *)lck) == 0) {
2184 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)lck, 0, DYNA_GET_D_TAG(__kmp_user_lock_seq));
2186 KMP_DEBUG_ASSERT(lck != NULL);
2187 if (__kmp_env_consistency_check) {
2188 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
2190 DYNA_D_LOCK_FUNC(lck, set)((kmp_dyna_lock_t *)lck, global_tid);
2192 kmp_indirect_lock_t *ilk = __kmp_get_indirect_csptr(crit, loc, global_tid, __kmp_user_lock_seq);
2193 KMP_DEBUG_ASSERT(ilk != NULL);
2194 if (__kmp_env_consistency_check) {
2195 __kmp_push_sync(global_tid, ct_critical, loc, ilk->lock, __kmp_user_lock_seq);
2197 DYNA_I_LOCK_FUNC(ilk, set)(ilk->lock, global_tid);
2200 #else // KMP_USE_DYNAMIC_LOCK
2205 if ( __kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE ) {
2206 lck = (kmp_user_lock_p)crit;
2209 lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
2211 KMP_DEBUG_ASSERT( lck != NULL );
2213 if ( __kmp_env_consistency_check )
2214 __kmp_push_sync( global_tid, ct_critical, loc, lck );
2216 __kmp_acquire_user_lock_with_checks( lck, global_tid );
2218 #endif // KMP_USE_DYNAMIC_LOCK
2222 static __forceinline
void
2223 __kmp_end_critical_section_reduce_block(
ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
2225 kmp_user_lock_p lck;
2227 #if KMP_USE_DYNAMIC_LOCK
2229 if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
2230 lck = (kmp_user_lock_p)crit;
2231 if (__kmp_env_consistency_check)
2232 __kmp_pop_sync(global_tid, ct_critical, loc);
2233 DYNA_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
2235 kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
2236 if (__kmp_env_consistency_check)
2237 __kmp_pop_sync(global_tid, ct_critical, loc);
2238 DYNA_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
2241 #else // KMP_USE_DYNAMIC_LOCK
2245 if ( __kmp_base_user_lock_size > 32 ) {
2246 lck = *( (kmp_user_lock_p *) crit );
2247 KMP_ASSERT( lck != NULL );
2249 lck = (kmp_user_lock_p) crit;
2252 if ( __kmp_env_consistency_check )
2253 __kmp_pop_sync( global_tid, ct_critical, loc );
2255 __kmp_release_user_lock_with_checks( lck, global_tid );
2257 #endif // KMP_USE_DYNAMIC_LOCK
2277 ident_t *loc, kmp_int32 global_tid,
2278 kmp_int32 num_vars,
size_t reduce_size,
void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
2279 kmp_critical_name *lck ) {
2283 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2287 int teams_swapped = 0, task_state;
2289 KA_TRACE( 10, (
"__kmpc_reduce_nowait() enter: called T#%d\n", global_tid ) );
2296 if( ! TCR_4( __kmp_init_parallel ) )
2297 __kmp_parallel_initialize();
2300 #if KMP_USE_DYNAMIC_LOCK
2301 if ( __kmp_env_consistency_check )
2302 __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
2304 if ( __kmp_env_consistency_check )
2305 __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
2309 th = __kmp_thread_from_gtid(global_tid);
2310 if( th->th.th_teams_microtask ) {
2311 team = th->th.th_team;
2312 if( team->t.t_level == th->th.th_teams_level ) {
2314 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid);
2317 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2318 th->th.th_team = team->t.t_parent;
2319 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
2320 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
2321 task_state = th->th.th_task_state;
2322 th->th.th_task_state = 0;
2325 #endif // OMP_40_ENABLED
2336 packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2337 __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2339 if( packed_reduction_method == critical_reduce_block ) {
2341 __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2344 }
else if( packed_reduction_method == empty_reduce_block ) {
2349 }
else if( packed_reduction_method == atomic_reduce_block ) {
2356 if ( __kmp_env_consistency_check )
2357 __kmp_pop_sync( global_tid, ct_reduce, loc );
2359 }
else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2372 __kmp_threads[global_tid]->th.th_ident = loc;
2374 retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, FALSE, reduce_size, reduce_data, reduce_func );
2375 retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2379 if ( __kmp_env_consistency_check ) {
2381 __kmp_pop_sync( global_tid, ct_reduce, loc );
2392 if( teams_swapped ) {
2394 th->th.th_info.ds.ds_tid = 0;
2395 th->th.th_team = team;
2396 th->th.th_team_nproc = team->t.t_nproc;
2397 th->th.th_task_team = team->t.t_task_team[task_state];
2398 th->th.th_task_state = task_state;
2401 KA_TRACE( 10, (
"__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2417 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2419 KA_TRACE( 10, (
"__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid ) );
2421 packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2423 if( packed_reduction_method == critical_reduce_block ) {
2425 __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2427 }
else if( packed_reduction_method == empty_reduce_block ) {
2431 }
else if( packed_reduction_method == atomic_reduce_block ) {
2438 }
else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2449 if ( __kmp_env_consistency_check )
2450 __kmp_pop_sync( global_tid, ct_reduce, loc );
2452 KA_TRACE( 10, (
"__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2474 ident_t *loc, kmp_int32 global_tid,
2475 kmp_int32 num_vars,
size_t reduce_size,
void *reduce_data,
2476 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
2477 kmp_critical_name *lck )
2481 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2483 KA_TRACE( 10, (
"__kmpc_reduce() enter: called T#%d\n", global_tid ) );
2490 if( ! TCR_4( __kmp_init_parallel ) )
2491 __kmp_parallel_initialize();
2494 #if KMP_USE_DYNAMIC_LOCK
2495 if ( __kmp_env_consistency_check )
2496 __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
2498 if ( __kmp_env_consistency_check )
2499 __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
2502 packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2503 __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2505 if( packed_reduction_method == critical_reduce_block ) {
2507 __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2510 }
else if( packed_reduction_method == empty_reduce_block ) {
2515 }
else if( packed_reduction_method == atomic_reduce_block ) {
2519 }
else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2525 __kmp_threads[global_tid]->th.th_ident = loc;
2527 retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, TRUE, reduce_size, reduce_data, reduce_func );
2528 retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2532 if ( __kmp_env_consistency_check ) {
2534 __kmp_pop_sync( global_tid, ct_reduce, loc );
2545 KA_TRACE( 10, (
"__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2562 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2564 KA_TRACE( 10, (
"__kmpc_end_reduce() enter: called T#%d\n", global_tid ) );
2566 packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2571 if( packed_reduction_method == critical_reduce_block ) {
2573 __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2577 __kmp_threads[global_tid]->th.th_ident = loc;
2579 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2581 }
else if( packed_reduction_method == empty_reduce_block ) {
2587 __kmp_threads[global_tid]->th.th_ident = loc;
2589 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2591 }
else if( packed_reduction_method == atomic_reduce_block ) {
2595 __kmp_threads[global_tid]->th.th_ident = loc;
2597 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2599 }
else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2602 __kmp_end_split_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid );
2611 if ( __kmp_env_consistency_check )
2612 __kmp_pop_sync( global_tid, ct_reduce, loc );
2614 KA_TRACE( 10, (
"__kmpc_end_reduce() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2619 #undef __KMP_GET_REDUCTION_METHOD
2620 #undef __KMP_SET_REDUCTION_METHOD
2625 __kmpc_get_taskid() {
2628 kmp_info_t * thread;
2630 gtid = __kmp_get_gtid();
2634 thread = __kmp_thread_from_gtid( gtid );
2635 return thread->th.th_current_task->td_task_id;
2641 __kmpc_get_parent_taskid() {
2644 kmp_info_t * thread;
2645 kmp_taskdata_t * parent_task;
2647 gtid = __kmp_get_gtid();
2651 thread = __kmp_thread_from_gtid( gtid );
2652 parent_task = thread->th.th_current_task->td_parent;
2653 return ( parent_task == NULL ? 0 : parent_task->td_task_id );
2657 void __kmpc_place_threads(
int nC,
int nT,
int nO)
2659 if ( ! __kmp_init_serial ) {
2660 __kmp_serial_initialize();
2662 __kmp_place_num_cores = nC;
2663 __kmp_place_num_threads_per_core = nT;
2664 __kmp_place_core_offset = nO;
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
#define KMP_START_EXPLICIT_TIMER(name)
"Starts" an explicit timer which will need a corresponding KMP_STOP_EXPLICIT_TIMER() macro...
#define KMP_STOP_EXPLICIT_TIMER(name)
"Stops" an explicit timer.
#define KMP_TIME_BLOCK(name)
Uses specified timer (name) to time code block.
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
#define KMP_IDENT_AUTOPAR
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)