18 #include "kmp_error.h" 22 #include "kmp_stats.h" 25 #include "ompt-internal.h" 26 #include "ompt-specific.h" 29 #define MAX_MESSAGE 512 45 if ((env = getenv(
"KMP_INITIAL_THREAD_BIND")) != NULL &&
46 __kmp_str_match_true(env)) {
47 __kmp_middle_initialize();
48 KC_TRACE(10, (
"__kmpc_begin: middle initialization called\n"));
49 }
else if (__kmp_ignore_mppbeg() == FALSE) {
51 __kmp_internal_begin();
52 KC_TRACE(10, (
"__kmpc_begin: called\n"));
70 if (__kmp_ignore_mppend() == FALSE) {
71 KC_TRACE(10, (
"__kmpc_end: called\n"));
72 KA_TRACE(30, (
"__kmpc_end\n"));
74 __kmp_internal_end_thread(-1);
97 kmp_int32 gtid = __kmp_entry_gtid();
99 KC_TRACE(10, (
"__kmpc_global_thread_num: T#%d\n", gtid));
120 (
"__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
122 return TCR_4(__kmp_all_nth);
132 KC_TRACE(10, (
"__kmpc_bound_thread_num: called\n"));
133 return __kmp_tid_from_gtid(__kmp_entry_gtid());
142 KC_TRACE(10, (
"__kmpc_bound_num_threads: called\n"));
144 return __kmp_entry_thread()->th.th_team->t.t_nproc;
164 if (__kmp_par_range == 0) {
171 semi2 = strchr(semi2,
';');
175 semi2 = strchr(semi2 + 1,
';');
179 if (__kmp_par_range_filename[0]) {
180 const char *name = semi2 - 1;
181 while ((name > loc->
psource) && (*name !=
'/') && (*name !=
';')) {
184 if ((*name ==
'/') || (*name ==
';')) {
187 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
188 return __kmp_par_range < 0;
191 semi3 = strchr(semi2 + 1,
';');
192 if (__kmp_par_range_routine[0]) {
193 if ((semi3 != NULL) && (semi3 > semi2) &&
194 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
195 return __kmp_par_range < 0;
198 if (KMP_SSCANF(semi3 + 1,
"%d", &line_no) == 1) {
199 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
200 return __kmp_par_range > 0;
202 return __kmp_par_range < 0;
216 return __kmp_entry_thread()->th.th_root->r.r_active;
229 kmp_int32 num_threads) {
230 KA_TRACE(20, (
"__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
231 global_tid, num_threads));
233 __kmp_push_num_threads(loc, global_tid, num_threads);
236 void __kmpc_pop_num_threads(
ident_t *loc, kmp_int32 global_tid) {
237 KA_TRACE(20, (
"__kmpc_pop_num_threads: enter\n"));
244 void __kmpc_push_proc_bind(
ident_t *loc, kmp_int32 global_tid,
245 kmp_int32 proc_bind) {
246 KA_TRACE(20, (
"__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
249 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
265 int gtid = __kmp_entry_gtid();
267 #if (KMP_STATS_ENABLED) 279 va_start(ap, microtask);
282 ompt_frame_t *ompt_frame;
284 kmp_info_t *master_th = __kmp_threads[gtid];
285 kmp_team_t *parent_team = master_th->th.th_team;
286 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
288 ompt_frame = &(lwt->ompt_task_info.frame);
290 int tid = __kmp_tid_from_gtid(gtid);
292 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
294 ompt_frame->reenter_runtime_frame = __builtin_frame_address(1);
298 #if INCLUDE_SSC_MARKS 301 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
303 VOLATILE_CAST(
void *) microtask,
305 VOLATILE_CAST(microtask_t) microtask,
306 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
308 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
314 #if INCLUDE_SSC_MARKS 317 __kmp_join_call(loc, gtid
341 kmp_int32 num_teams, kmp_int32 num_threads) {
343 (
"__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
344 global_tid, num_teams, num_threads));
346 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
361 int gtid = __kmp_entry_gtid();
362 kmp_info_t *this_thr = __kmp_threads[gtid];
364 va_start(ap, microtask);
369 this_thr->th.th_teams_microtask = microtask;
370 this_thr->th.th_teams_level =
371 this_thr->th.th_team->t.t_level;
374 kmp_team_t *parent_team = this_thr->th.th_team;
375 int tid = __kmp_tid_from_gtid(gtid);
377 parent_team->t.t_implicit_task_taskdata[tid]
378 .ompt_task_info.frame.reenter_runtime_frame =
379 __builtin_frame_address(1);
385 if (this_thr->th.th_teams_size.nteams == 0) {
386 __kmp_push_num_teams(loc, gtid, 0, 0);
388 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
389 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
390 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
392 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
394 VOLATILE_CAST(
void *) microtask,
396 VOLATILE_CAST(microtask_t)
398 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
399 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
405 __kmp_join_call(loc, gtid
412 this_thr->th.th_teams_microtask = NULL;
413 this_thr->th.th_teams_level = 0;
414 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
423 int __kmpc_invoke_task_func(
int gtid) {
return __kmp_invoke_task_func(gtid); }
441 __kmp_serialized_parallel(loc, global_tid);
452 kmp_internal_control_t *top;
453 kmp_info_t *this_thr;
454 kmp_team_t *serial_team;
457 (
"__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
465 if (!TCR_4(__kmp_init_parallel))
466 __kmp_parallel_initialize();
468 this_thr = __kmp_threads[global_tid];
469 serial_team = this_thr->th.th_serial_team;
472 kmp_task_team_t *task_team = this_thr->th.th_task_team;
475 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
476 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
480 KMP_DEBUG_ASSERT(serial_team);
481 KMP_ASSERT(serial_team->t.t_serialized);
482 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
483 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
484 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
485 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
489 top = serial_team->t.t_control_stack_top;
490 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
491 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
492 serial_team->t.t_control_stack_top = top->next;
497 serial_team->t.t_level--;
500 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
502 dispatch_private_info_t *disp_buffer =
503 serial_team->t.t_dispatch->th_disp_buffer;
504 serial_team->t.t_dispatch->th_disp_buffer =
505 serial_team->t.t_dispatch->th_disp_buffer->next;
506 __kmp_free(disp_buffer);
509 --serial_team->t.t_serialized;
510 if (serial_team->t.t_serialized == 0) {
514 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 515 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
516 __kmp_clear_x87_fpu_status_word();
517 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
518 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
522 this_thr->th.th_team = serial_team->t.t_parent;
523 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
526 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc;
527 this_thr->th.th_team_master =
528 serial_team->t.t_parent->t.t_threads[0];
529 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
532 this_thr->th.th_dispatch =
533 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
535 __kmp_pop_current_task_from_thread(this_thr);
537 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
538 this_thr->th.th_current_task->td_flags.executing = 1;
540 if (__kmp_tasking_mode != tskm_immediate_exec) {
542 this_thr->th.th_task_team =
543 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
545 (
"__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 547 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
550 if (__kmp_tasking_mode != tskm_immediate_exec) {
551 KA_TRACE(20, (
"__kmpc_end_serialized_parallel: T#%d decreasing nesting " 552 "depth of serial team %p to %d\n",
553 global_tid, serial_team, serial_team->t.t_serialized));
557 if (__kmp_env_consistency_check)
558 __kmp_pop_parallel(global_tid, NULL);
570 KC_TRACE(10, (
"__kmpc_flush: called\n"));
575 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 589 if (!__kmp_cpuinfo.initialized) {
590 __kmp_query_cpuid(&__kmp_cpuinfo);
592 if (!__kmp_cpuinfo.sse2) {
597 #elif KMP_COMPILER_MSVC 600 __sync_synchronize();
601 #endif // KMP_COMPILER_ICC 604 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64) 620 #error Unknown or unsupported architecture 634 KC_TRACE(10, (
"__kmpc_barrier: called T#%d\n", global_tid));
636 if (!TCR_4(__kmp_init_parallel))
637 __kmp_parallel_initialize();
639 if (__kmp_env_consistency_check) {
641 KMP_WARNING(ConstructIdentInvalid);
644 __kmp_check_barrier(global_tid, ct_barrier, loc);
647 #if OMPT_SUPPORT && OMPT_TRACE 648 ompt_frame_t *ompt_frame;
650 ompt_frame = __ompt_get_task_frame_internal(0);
651 if (ompt_frame->reenter_runtime_frame == NULL)
652 ompt_frame->reenter_runtime_frame = __builtin_frame_address(1);
655 __kmp_threads[global_tid]->th.th_ident = loc;
663 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
664 #if OMPT_SUPPORT && OMPT_TRACE 666 ompt_frame->reenter_runtime_frame = NULL;
681 KC_TRACE(10, (
"__kmpc_master: called T#%d\n", global_tid));
683 if (!TCR_4(__kmp_init_parallel))
684 __kmp_parallel_initialize();
686 if (KMP_MASTER_GTID(global_tid)) {
688 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
692 #if OMPT_SUPPORT && OMPT_TRACE 694 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_master_begin)) {
695 kmp_info_t *this_thr = __kmp_threads[global_tid];
696 kmp_team_t *team = this_thr->th.th_team;
698 int tid = __kmp_tid_from_gtid(global_tid);
699 ompt_callbacks.ompt_callback(ompt_event_master_begin)(
700 team->t.ompt_team_info.parallel_id,
701 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
706 if (__kmp_env_consistency_check) {
707 #if KMP_USE_DYNAMIC_LOCK 709 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
711 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
714 __kmp_push_sync(global_tid, ct_master, loc, NULL);
716 __kmp_check_sync(global_tid, ct_master, loc, NULL);
732 KC_TRACE(10, (
"__kmpc_end_master: called T#%d\n", global_tid));
734 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
735 KMP_POP_PARTITIONED_TIMER();
737 #if OMPT_SUPPORT && OMPT_TRACE 738 kmp_info_t *this_thr = __kmp_threads[global_tid];
739 kmp_team_t *team = this_thr->th.th_team;
740 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_master_end)) {
741 int tid = __kmp_tid_from_gtid(global_tid);
742 ompt_callbacks.ompt_callback(ompt_event_master_end)(
743 team->t.ompt_team_info.parallel_id,
744 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
748 if (__kmp_env_consistency_check) {
750 KMP_WARNING(ThreadIdentInvalid);
752 if (KMP_MASTER_GTID(global_tid))
753 __kmp_pop_sync(global_tid, ct_master, loc);
767 KMP_DEBUG_ASSERT(__kmp_init_serial);
769 KC_TRACE(10, (
"__kmpc_ordered: called T#%d\n", gtid));
771 if (!TCR_4(__kmp_init_parallel))
772 __kmp_parallel_initialize();
775 __kmp_itt_ordered_prep(gtid);
779 th = __kmp_threads[gtid];
781 #if OMPT_SUPPORT && OMPT_TRACE 784 th->th.ompt_thread_info.wait_id = (uint64_t)loc;
785 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
788 if (ompt_callbacks.ompt_callback(ompt_event_wait_ordered)) {
789 ompt_callbacks.ompt_callback(ompt_event_wait_ordered)(
790 th->th.ompt_thread_info.wait_id);
795 if (th->th.th_dispatch->th_deo_fcn != 0)
796 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc);
798 __kmp_parallel_deo(>id, &cid, loc);
800 #if OMPT_SUPPORT && OMPT_TRACE 803 th->th.ompt_thread_info.state = ompt_state_work_parallel;
804 th->th.ompt_thread_info.wait_id = 0;
807 if (ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)) {
808 ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)(
809 th->th.ompt_thread_info.wait_id);
815 __kmp_itt_ordered_start(gtid);
830 KC_TRACE(10, (
"__kmpc_end_ordered: called T#%d\n", gtid));
833 __kmp_itt_ordered_end(gtid);
837 th = __kmp_threads[gtid];
839 if (th->th.th_dispatch->th_dxo_fcn != 0)
840 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc);
842 __kmp_parallel_dxo(>id, &cid, loc);
844 #if OMPT_SUPPORT && OMPT_BLAME 846 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
847 ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
848 th->th.ompt_thread_info.wait_id);
853 #if KMP_USE_DYNAMIC_LOCK 855 static __forceinline
void 856 __kmp_init_indirect_csptr(kmp_critical_name *crit,
ident_t const *loc,
857 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
861 kmp_indirect_lock_t **lck;
862 lck = (kmp_indirect_lock_t **)crit;
863 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
864 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
865 KMP_SET_I_LOCK_LOCATION(ilk, loc);
866 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
868 (
"__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
870 __kmp_itt_critical_creating(ilk->lock, loc);
872 int status = KMP_COMPARE_AND_STORE_PTR(lck,
nullptr, ilk);
875 __kmp_itt_critical_destroyed(ilk->lock);
881 KMP_DEBUG_ASSERT(*lck != NULL);
885 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 887 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 888 if (l->lk.poll != KMP_LOCK_FREE(tas) || \ 889 !KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), \ 890 KMP_LOCK_BUSY(gtid + 1, tas))) { \ 892 KMP_FSYNC_PREPARE(l); \ 893 KMP_INIT_YIELD(spins); \ 894 if (TCR_4(__kmp_nth) > \ 895 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 898 KMP_YIELD_SPIN(spins); \ 900 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 901 while (l->lk.poll != KMP_LOCK_FREE(tas) || \ 902 !KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), \ 903 KMP_LOCK_BUSY(gtid + 1, tas))) { \ 904 __kmp_spin_backoff(&backoff); \ 905 if (TCR_4(__kmp_nth) > \ 906 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 909 KMP_YIELD_SPIN(spins); \ 913 KMP_FSYNC_ACQUIRED(l); \ 917 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 919 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 920 rc = l->lk.poll == KMP_LOCK_FREE(tas) && \ 921 KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), \ 922 KMP_LOCK_BUSY(gtid + 1, tas)); \ 926 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 928 TCW_4(((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); \ 934 #include <sys/syscall.h> 944 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 946 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 947 kmp_int32 gtid_code = (gtid + 1) << 1; \ 949 KMP_FSYNC_PREPARE(ftx); \ 950 kmp_int32 poll_val; \ 951 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 952 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 953 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 954 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 956 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 958 KMP_LOCK_BUSY(1, futex))) { \ 961 poll_val |= KMP_LOCK_BUSY(1, futex); \ 964 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 965 NULL, NULL, 0)) != 0) { \ 970 KMP_FSYNC_ACQUIRED(ftx); \ 974 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 976 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 977 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 978 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 979 KMP_FSYNC_ACQUIRED(ftx); \ 987 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 989 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 991 KMP_FSYNC_RELEASING(ftx); \ 992 kmp_int32 poll_val = \ 993 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 994 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 995 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 996 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 999 KMP_YIELD(TCR_4(__kmp_nth) > \ 1000 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \ 1003 #endif // KMP_USE_FUTEX 1005 #else // KMP_USE_DYNAMIC_LOCK 1007 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1010 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1013 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1020 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1021 __kmp_init_user_lock_with_checks(lck);
1022 __kmp_set_user_lock_location(lck, loc);
1024 __kmp_itt_critical_creating(lck);
1035 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1040 __kmp_itt_critical_destroyed(lck);
1044 __kmp_destroy_user_lock_with_checks(lck);
1045 __kmp_user_lock_free(&idx, gtid, lck);
1046 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1047 KMP_DEBUG_ASSERT(lck != NULL);
1053 #endif // KMP_USE_DYNAMIC_LOCK 1066 kmp_critical_name *crit) {
1067 #if KMP_USE_DYNAMIC_LOCK 1068 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1071 KMP_TIME_PARTITIONED_BLOCK(
1073 kmp_user_lock_p lck;
1075 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1079 KMP_CHECK_USER_LOCK_INIT();
1081 if ((__kmp_user_lock_kind == lk_tas) &&
1082 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1083 lck = (kmp_user_lock_p)crit;
1086 else if ((__kmp_user_lock_kind == lk_futex) &&
1087 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1088 lck = (kmp_user_lock_p)crit;
1092 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1095 if (__kmp_env_consistency_check)
1096 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1104 __kmp_itt_critical_acquiring(lck);
1108 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1111 __kmp_itt_critical_acquired(lck);
1115 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1116 #endif // KMP_USE_DYNAMIC_LOCK 1119 #if KMP_USE_DYNAMIC_LOCK 1122 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1124 #define KMP_TSX_LOCK(seq) lockseq_##seq 1126 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1129 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1130 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm) 1132 #define KMP_CPUINFO_RTM 0 1136 if (hint & kmp_lock_hint_hle)
1137 return KMP_TSX_LOCK(hle);
1138 if (hint & kmp_lock_hint_rtm)
1139 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1140 if (hint & kmp_lock_hint_adaptive)
1141 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1144 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1145 return __kmp_user_lock_seq;
1146 if ((hint & omp_lock_hint_speculative) &&
1147 (hint & omp_lock_hint_nonspeculative))
1148 return __kmp_user_lock_seq;
1151 if (hint & omp_lock_hint_contended)
1152 return lockseq_queuing;
1155 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1159 if (hint & omp_lock_hint_speculative)
1160 return KMP_TSX_LOCK(hle);
1162 return __kmp_user_lock_seq;
1178 void __kmpc_critical_with_hint(
ident_t *loc, kmp_int32 global_tid,
1179 kmp_critical_name *crit, uintptr_t hint) {
1181 kmp_user_lock_p lck;
1183 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1185 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1188 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1189 if (KMP_IS_D_LOCK(lckseq)) {
1190 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
1191 KMP_GET_D_TAG(lckseq));
1193 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1199 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1200 lck = (kmp_user_lock_p)lk;
1201 if (__kmp_env_consistency_check) {
1202 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1203 __kmp_map_hint_to_lock(hint));
1206 __kmp_itt_critical_acquiring(lck);
1208 #if KMP_USE_INLINED_TAS 1209 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1210 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1212 #elif KMP_USE_INLINED_FUTEX 1213 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1214 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1218 KMP_D_LOCK_FUNC(lk,
set)(lk, global_tid);
1221 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1223 if (__kmp_env_consistency_check) {
1224 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1225 __kmp_map_hint_to_lock(hint));
1228 __kmp_itt_critical_acquiring(lck);
1230 KMP_I_LOCK_FUNC(ilk,
set)(lck, global_tid);
1234 __kmp_itt_critical_acquired(lck);
1237 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1238 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1241 #endif // KMP_USE_DYNAMIC_LOCK 1253 kmp_critical_name *crit) {
1254 kmp_user_lock_p lck;
1256 KC_TRACE(10, (
"__kmpc_end_critical: called T#%d\n", global_tid));
1258 #if KMP_USE_DYNAMIC_LOCK 1259 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1260 lck = (kmp_user_lock_p)crit;
1261 KMP_ASSERT(lck != NULL);
1262 if (__kmp_env_consistency_check) {
1263 __kmp_pop_sync(global_tid, ct_critical, loc);
1266 __kmp_itt_critical_releasing(lck);
1268 #if KMP_USE_INLINED_TAS 1269 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1270 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1272 #elif KMP_USE_INLINED_FUTEX 1273 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1274 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1278 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1281 kmp_indirect_lock_t *ilk =
1282 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1283 KMP_ASSERT(ilk != NULL);
1285 if (__kmp_env_consistency_check) {
1286 __kmp_pop_sync(global_tid, ct_critical, loc);
1289 __kmp_itt_critical_releasing(lck);
1291 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1294 #else // KMP_USE_DYNAMIC_LOCK 1296 if ((__kmp_user_lock_kind == lk_tas) &&
1297 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1298 lck = (kmp_user_lock_p)crit;
1301 else if ((__kmp_user_lock_kind == lk_futex) &&
1302 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1303 lck = (kmp_user_lock_p)crit;
1307 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1310 KMP_ASSERT(lck != NULL);
1312 if (__kmp_env_consistency_check)
1313 __kmp_pop_sync(global_tid, ct_critical, loc);
1316 __kmp_itt_critical_releasing(lck);
1320 __kmp_release_user_lock_with_checks(lck, global_tid);
1322 #if OMPT_SUPPORT && OMPT_BLAME 1324 ompt_callbacks.ompt_callback(ompt_event_release_critical)) {
1325 ompt_callbacks.ompt_callback(ompt_event_release_critical)((uint64_t)lck);
1329 #endif // KMP_USE_DYNAMIC_LOCK 1330 KMP_POP_PARTITIONED_TIMER();
1331 KA_TRACE(15, (
"__kmpc_end_critical: done T#%d\n", global_tid));
1346 KC_TRACE(10, (
"__kmpc_barrier_master: called T#%d\n", global_tid));
1348 if (!TCR_4(__kmp_init_parallel))
1349 __kmp_parallel_initialize();
1351 if (__kmp_env_consistency_check)
1352 __kmp_check_barrier(global_tid, ct_barrier, loc);
1355 __kmp_threads[global_tid]->th.th_ident = loc;
1357 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1359 return (status != 0) ? 0 : 1;
1372 KC_TRACE(10, (
"__kmpc_end_barrier_master: called T#%d\n", global_tid));
1374 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1390 KC_TRACE(10, (
"__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1392 if (!TCR_4(__kmp_init_parallel))
1393 __kmp_parallel_initialize();
1395 if (__kmp_env_consistency_check) {
1397 KMP_WARNING(ConstructIdentInvalid);
1399 __kmp_check_barrier(global_tid, ct_barrier, loc);
1403 __kmp_threads[global_tid]->th.th_ident = loc;
1405 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1409 if (__kmp_env_consistency_check) {
1413 if (global_tid < 0) {
1414 KMP_WARNING(ThreadIdentInvalid);
1420 __kmp_pop_sync(global_tid, ct_master, loc);
1440 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1445 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1448 #if OMPT_SUPPORT && OMPT_TRACE 1449 kmp_info_t *this_thr = __kmp_threads[global_tid];
1450 kmp_team_t *team = this_thr->th.th_team;
1451 int tid = __kmp_tid_from_gtid(global_tid);
1455 if (ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)) {
1456 ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)(
1457 team->t.ompt_team_info.parallel_id,
1458 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id,
1459 team->t.ompt_team_info.microtask);
1462 if (ompt_callbacks.ompt_callback(ompt_event_single_others_begin)) {
1463 ompt_callbacks.ompt_callback(ompt_event_single_others_begin)(
1464 team->t.ompt_team_info.parallel_id,
1465 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1467 this_thr->th.ompt_thread_info.state = ompt_state_wait_single;
1485 __kmp_exit_single(global_tid);
1486 KMP_POP_PARTITIONED_TIMER();
1488 #if OMPT_SUPPORT && OMPT_TRACE 1489 kmp_info_t *this_thr = __kmp_threads[global_tid];
1490 kmp_team_t *team = this_thr->th.th_team;
1491 int tid = __kmp_tid_from_gtid(global_tid);
1494 ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)) {
1495 ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)(
1496 team->t.ompt_team_info.parallel_id,
1497 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1510 KE_TRACE(10, (
"__kmpc_for_static_fini called T#%d\n", global_tid));
1512 #if OMPT_SUPPORT && OMPT_TRACE 1513 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_end)) {
1514 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1515 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
1516 ompt_callbacks.ompt_callback(ompt_event_loop_end)(team_info->parallel_id,
1517 task_info->task_id);
1521 if (__kmp_env_consistency_check)
1522 __kmp_pop_workshare(global_tid, ct_pdo, loc);
1528 void ompc_set_num_threads(
int arg) {
1530 __kmp_set_num_threads(arg, __kmp_entry_gtid());
1533 void ompc_set_dynamic(
int flag) {
1537 thread = __kmp_entry_thread();
1539 __kmp_save_internal_controls(thread);
1541 set__dynamic(thread, flag ? TRUE : FALSE);
1544 void ompc_set_nested(
int flag) {
1548 thread = __kmp_entry_thread();
1550 __kmp_save_internal_controls(thread);
1552 set__nested(thread, flag ? TRUE : FALSE);
1555 void ompc_set_max_active_levels(
int max_active_levels) {
1560 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1563 void ompc_set_schedule(omp_sched_t kind,
int modifier) {
1565 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1568 int ompc_get_ancestor_thread_num(
int level) {
1569 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1572 int ompc_get_team_size(
int level) {
1573 return __kmp_get_team_size(__kmp_entry_gtid(), level);
1576 void kmpc_set_stacksize(
int arg) {
1578 __kmp_aux_set_stacksize(arg);
1581 void kmpc_set_stacksize_s(
size_t arg) {
1583 __kmp_aux_set_stacksize(arg);
1586 void kmpc_set_blocktime(
int arg) {
1590 gtid = __kmp_entry_gtid();
1591 tid = __kmp_tid_from_gtid(gtid);
1592 thread = __kmp_thread_from_gtid(gtid);
1594 __kmp_aux_set_blocktime(arg, thread, tid);
1597 void kmpc_set_library(
int arg) {
1599 __kmp_user_set_library((
enum library_type)arg);
1602 void kmpc_set_defaults(
char const *str) {
1604 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
1607 void kmpc_set_disp_num_buffers(
int arg) {
1610 if (__kmp_init_serial == 0 && arg > 0)
1611 __kmp_dispatch_num_buffers = arg;
1614 int kmpc_set_affinity_mask_proc(
int proc,
void **mask) {
1615 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1618 if (!TCR_4(__kmp_init_middle)) {
1619 __kmp_middle_initialize();
1621 return __kmp_aux_set_affinity_mask_proc(proc, mask);
1625 int kmpc_unset_affinity_mask_proc(
int proc,
void **mask) {
1626 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1629 if (!TCR_4(__kmp_init_middle)) {
1630 __kmp_middle_initialize();
1632 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
1636 int kmpc_get_affinity_mask_proc(
int proc,
void **mask) {
1637 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1640 if (!TCR_4(__kmp_init_middle)) {
1641 __kmp_middle_initialize();
1643 return __kmp_aux_get_affinity_mask_proc(proc, mask);
1693 void *cpy_data,
void (*cpy_func)(
void *,
void *),
1697 KC_TRACE(10, (
"__kmpc_copyprivate: called T#%d\n", gtid));
1701 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
1703 if (__kmp_env_consistency_check) {
1705 KMP_WARNING(ConstructIdentInvalid);
1712 *data_ptr = cpy_data;
1716 __kmp_threads[gtid]->th.th_ident = loc;
1718 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
1721 (*cpy_func)(cpy_data, *data_ptr);
1727 __kmp_threads[gtid]->th.th_ident = loc;
1730 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
1735 #define INIT_LOCK __kmp_init_user_lock_with_checks 1736 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 1737 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 1738 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 1739 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 1740 #define ACQUIRE_NESTED_LOCK_TIMED \ 1741 __kmp_acquire_nested_user_lock_with_checks_timed 1742 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 1743 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 1744 #define TEST_LOCK __kmp_test_user_lock_with_checks 1745 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 1746 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 1747 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 1752 #if KMP_USE_DYNAMIC_LOCK 1755 static __forceinline
void __kmp_init_lock_with_hint(
ident_t *loc,
void **lock,
1756 kmp_dyna_lockseq_t seq) {
1757 if (KMP_IS_D_LOCK(seq)) {
1758 KMP_INIT_D_LOCK(lock, seq);
1760 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
1763 KMP_INIT_I_LOCK(lock, seq);
1765 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
1766 __kmp_itt_lock_creating(ilk->lock, loc);
1772 static __forceinline
void 1773 __kmp_init_nest_lock_with_hint(
ident_t *loc,
void **lock,
1774 kmp_dyna_lockseq_t seq) {
1777 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
1778 seq = __kmp_user_lock_seq;
1782 seq = lockseq_nested_tas;
1786 seq = lockseq_nested_futex;
1789 case lockseq_ticket:
1790 seq = lockseq_nested_ticket;
1792 case lockseq_queuing:
1793 seq = lockseq_nested_queuing;
1796 seq = lockseq_nested_drdpa;
1799 seq = lockseq_nested_queuing;
1801 KMP_INIT_I_LOCK(lock, seq);
1803 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
1804 __kmp_itt_lock_creating(ilk->lock, loc);
1809 void __kmpc_init_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
void **user_lock,
1811 KMP_DEBUG_ASSERT(__kmp_init_serial);
1812 if (__kmp_env_consistency_check && user_lock == NULL) {
1813 KMP_FATAL(LockIsUninitialized,
"omp_init_lock_with_hint");
1816 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
1820 void __kmpc_init_nest_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
1821 void **user_lock, uintptr_t hint) {
1822 KMP_DEBUG_ASSERT(__kmp_init_serial);
1823 if (__kmp_env_consistency_check && user_lock == NULL) {
1824 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock_with_hint");
1827 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
1830 #endif // KMP_USE_DYNAMIC_LOCK 1833 void __kmpc_init_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
1834 #if KMP_USE_DYNAMIC_LOCK 1836 KMP_DEBUG_ASSERT(__kmp_init_serial);
1837 if (__kmp_env_consistency_check && user_lock == NULL) {
1838 KMP_FATAL(LockIsUninitialized,
"omp_init_lock");
1840 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
1842 #else // KMP_USE_DYNAMIC_LOCK 1844 static char const *
const func =
"omp_init_lock";
1845 kmp_user_lock_p lck;
1846 KMP_DEBUG_ASSERT(__kmp_init_serial);
1848 if (__kmp_env_consistency_check) {
1849 if (user_lock == NULL) {
1850 KMP_FATAL(LockIsUninitialized, func);
1854 KMP_CHECK_USER_LOCK_INIT();
1856 if ((__kmp_user_lock_kind == lk_tas) &&
1857 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
1858 lck = (kmp_user_lock_p)user_lock;
1861 else if ((__kmp_user_lock_kind == lk_futex) &&
1862 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
1863 lck = (kmp_user_lock_p)user_lock;
1867 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
1870 __kmp_set_user_lock_location(lck, loc);
1872 #if OMPT_SUPPORT && OMPT_TRACE 1873 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_init_lock)) {
1874 ompt_callbacks.ompt_callback(ompt_event_init_lock)((uint64_t)lck);
1879 __kmp_itt_lock_creating(lck);
1882 #endif // KMP_USE_DYNAMIC_LOCK 1886 void __kmpc_init_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
1887 #if KMP_USE_DYNAMIC_LOCK 1889 KMP_DEBUG_ASSERT(__kmp_init_serial);
1890 if (__kmp_env_consistency_check && user_lock == NULL) {
1891 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock");
1893 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
1895 #else // KMP_USE_DYNAMIC_LOCK 1897 static char const *
const func =
"omp_init_nest_lock";
1898 kmp_user_lock_p lck;
1899 KMP_DEBUG_ASSERT(__kmp_init_serial);
1901 if (__kmp_env_consistency_check) {
1902 if (user_lock == NULL) {
1903 KMP_FATAL(LockIsUninitialized, func);
1907 KMP_CHECK_USER_LOCK_INIT();
1909 if ((__kmp_user_lock_kind == lk_tas) &&
1910 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
1911 OMP_NEST_LOCK_T_SIZE)) {
1912 lck = (kmp_user_lock_p)user_lock;
1915 else if ((__kmp_user_lock_kind == lk_futex) &&
1916 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
1917 OMP_NEST_LOCK_T_SIZE)) {
1918 lck = (kmp_user_lock_p)user_lock;
1922 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
1925 INIT_NESTED_LOCK(lck);
1926 __kmp_set_user_lock_location(lck, loc);
1928 #if OMPT_SUPPORT && OMPT_TRACE 1929 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)) {
1930 ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)((uint64_t)lck);
1935 __kmp_itt_lock_creating(lck);
1938 #endif // KMP_USE_DYNAMIC_LOCK 1941 void __kmpc_destroy_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
1942 #if KMP_USE_DYNAMIC_LOCK 1945 kmp_user_lock_p lck;
1946 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
1947 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
1949 lck = (kmp_user_lock_p)user_lock;
1951 __kmp_itt_lock_destroyed(lck);
1953 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
1955 kmp_user_lock_p lck;
1957 if ((__kmp_user_lock_kind == lk_tas) &&
1958 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
1959 lck = (kmp_user_lock_p)user_lock;
1962 else if ((__kmp_user_lock_kind == lk_futex) &&
1963 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
1964 lck = (kmp_user_lock_p)user_lock;
1968 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_lock");
1971 #if OMPT_SUPPORT && OMPT_TRACE 1972 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_destroy_lock)) {
1973 ompt_callbacks.ompt_callback(ompt_event_destroy_lock)((uint64_t)lck);
1978 __kmp_itt_lock_destroyed(lck);
1982 if ((__kmp_user_lock_kind == lk_tas) &&
1983 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
1987 else if ((__kmp_user_lock_kind == lk_futex) &&
1988 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
1993 __kmp_user_lock_free(user_lock, gtid, lck);
1995 #endif // KMP_USE_DYNAMIC_LOCK 1999 void __kmpc_destroy_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2000 #if KMP_USE_DYNAMIC_LOCK 2003 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2004 __kmp_itt_lock_destroyed(ilk->lock);
2006 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2008 #else // KMP_USE_DYNAMIC_LOCK 2010 kmp_user_lock_p lck;
2012 if ((__kmp_user_lock_kind == lk_tas) &&
2013 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2014 OMP_NEST_LOCK_T_SIZE)) {
2015 lck = (kmp_user_lock_p)user_lock;
2018 else if ((__kmp_user_lock_kind == lk_futex) &&
2019 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2020 OMP_NEST_LOCK_T_SIZE)) {
2021 lck = (kmp_user_lock_p)user_lock;
2025 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_nest_lock");
2028 #if OMPT_SUPPORT && OMPT_TRACE 2030 ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)) {
2031 ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)((uint64_t)lck);
2036 __kmp_itt_lock_destroyed(lck);
2039 DESTROY_NESTED_LOCK(lck);
2041 if ((__kmp_user_lock_kind == lk_tas) &&
2042 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2043 OMP_NEST_LOCK_T_SIZE)) {
2047 else if ((__kmp_user_lock_kind == lk_futex) &&
2048 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2049 OMP_NEST_LOCK_T_SIZE)) {
2054 __kmp_user_lock_free(user_lock, gtid, lck);
2056 #endif // KMP_USE_DYNAMIC_LOCK 2059 void __kmpc_set_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2061 #if KMP_USE_DYNAMIC_LOCK 2062 int tag = KMP_EXTRACT_D_TAG(user_lock);
2064 __kmp_itt_lock_acquiring(
2068 #if KMP_USE_INLINED_TAS 2069 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2070 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2072 #elif KMP_USE_INLINED_FUTEX 2073 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2074 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2078 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2081 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2084 #else // KMP_USE_DYNAMIC_LOCK 2086 kmp_user_lock_p lck;
2088 if ((__kmp_user_lock_kind == lk_tas) &&
2089 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2090 lck = (kmp_user_lock_p)user_lock;
2093 else if ((__kmp_user_lock_kind == lk_futex) &&
2094 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2095 lck = (kmp_user_lock_p)user_lock;
2099 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_lock");
2103 __kmp_itt_lock_acquiring(lck);
2106 ACQUIRE_LOCK(lck, gtid);
2109 __kmp_itt_lock_acquired(lck);
2112 #if OMPT_SUPPORT && OMPT_TRACE 2113 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_acquired_lock)) {
2114 ompt_callbacks.ompt_callback(ompt_event_acquired_lock)((uint64_t)lck);
2118 #endif // KMP_USE_DYNAMIC_LOCK 2121 void __kmpc_set_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2122 #if KMP_USE_DYNAMIC_LOCK 2125 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2127 KMP_D_LOCK_FUNC(user_lock,
set)((kmp_dyna_lock_t *)user_lock, gtid);
2129 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2132 #if OMPT_SUPPORT && OMPT_TRACE 2138 #else // KMP_USE_DYNAMIC_LOCK 2140 kmp_user_lock_p lck;
2142 if ((__kmp_user_lock_kind == lk_tas) &&
2143 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2144 OMP_NEST_LOCK_T_SIZE)) {
2145 lck = (kmp_user_lock_p)user_lock;
2148 else if ((__kmp_user_lock_kind == lk_futex) &&
2149 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2150 OMP_NEST_LOCK_T_SIZE)) {
2151 lck = (kmp_user_lock_p)user_lock;
2155 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_nest_lock");
2159 __kmp_itt_lock_acquiring(lck);
2162 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2165 __kmp_itt_lock_acquired(lck);
2168 #if OMPT_SUPPORT && OMPT_TRACE 2170 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2171 if (ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first))
2172 ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first)(
2175 if (ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next))
2176 ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next)(
2182 #endif // KMP_USE_DYNAMIC_LOCK 2185 void __kmpc_unset_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2186 #if KMP_USE_DYNAMIC_LOCK 2188 int tag = KMP_EXTRACT_D_TAG(user_lock);
2190 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2192 #if KMP_USE_INLINED_TAS 2193 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2194 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2196 #elif KMP_USE_INLINED_FUTEX 2197 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2198 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2202 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2205 #else // KMP_USE_DYNAMIC_LOCK 2207 kmp_user_lock_p lck;
2212 if ((__kmp_user_lock_kind == lk_tas) &&
2213 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2214 #if KMP_OS_LINUX && \ 2215 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2218 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2220 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2224 lck = (kmp_user_lock_p)user_lock;
2228 else if ((__kmp_user_lock_kind == lk_futex) &&
2229 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2230 lck = (kmp_user_lock_p)user_lock;
2234 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_lock");
2238 __kmp_itt_lock_releasing(lck);
2241 RELEASE_LOCK(lck, gtid);
2243 #if OMPT_SUPPORT && OMPT_BLAME 2244 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_release_lock)) {
2245 ompt_callbacks.ompt_callback(ompt_event_release_lock)((uint64_t)lck);
2249 #endif // KMP_USE_DYNAMIC_LOCK 2253 void __kmpc_unset_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2254 #if KMP_USE_DYNAMIC_LOCK 2257 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2259 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2261 #else // KMP_USE_DYNAMIC_LOCK 2263 kmp_user_lock_p lck;
2267 if ((__kmp_user_lock_kind == lk_tas) &&
2268 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2269 OMP_NEST_LOCK_T_SIZE)) {
2270 #if KMP_OS_LINUX && \ 2271 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2273 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2275 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2277 if (--(tl->lk.depth_locked) == 0) {
2278 TCW_4(tl->lk.poll, 0);
2283 lck = (kmp_user_lock_p)user_lock;
2287 else if ((__kmp_user_lock_kind == lk_futex) &&
2288 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2289 OMP_NEST_LOCK_T_SIZE)) {
2290 lck = (kmp_user_lock_p)user_lock;
2294 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_nest_lock");
2298 __kmp_itt_lock_releasing(lck);
2302 release_status = RELEASE_NESTED_LOCK(lck, gtid);
2303 #if OMPT_SUPPORT && OMPT_BLAME 2305 if (release_status == KMP_LOCK_RELEASED) {
2306 if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)) {
2307 ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)(
2310 }
else if (ompt_callbacks.ompt_callback(
2311 ompt_event_release_nest_lock_prev)) {
2312 ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)(
2318 #endif // KMP_USE_DYNAMIC_LOCK 2322 int __kmpc_test_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2325 #if KMP_USE_DYNAMIC_LOCK 2327 int tag = KMP_EXTRACT_D_TAG(user_lock);
2329 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2331 #if KMP_USE_INLINED_TAS 2332 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2333 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
2335 #elif KMP_USE_INLINED_FUTEX 2336 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2337 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
2341 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2345 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2350 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2355 #else // KMP_USE_DYNAMIC_LOCK 2357 kmp_user_lock_p lck;
2360 if ((__kmp_user_lock_kind == lk_tas) &&
2361 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2362 lck = (kmp_user_lock_p)user_lock;
2365 else if ((__kmp_user_lock_kind == lk_futex) &&
2366 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2367 lck = (kmp_user_lock_p)user_lock;
2371 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_lock");
2375 __kmp_itt_lock_acquiring(lck);
2378 rc = TEST_LOCK(lck, gtid);
2381 __kmp_itt_lock_acquired(lck);
2383 __kmp_itt_lock_cancelled(lck);
2386 return (rc ? FTN_TRUE : FTN_FALSE);
2390 #endif // KMP_USE_DYNAMIC_LOCK 2394 int __kmpc_test_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2395 #if KMP_USE_DYNAMIC_LOCK 2398 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2400 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
2403 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2405 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2410 #else // KMP_USE_DYNAMIC_LOCK 2412 kmp_user_lock_p lck;
2415 if ((__kmp_user_lock_kind == lk_tas) &&
2416 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2417 OMP_NEST_LOCK_T_SIZE)) {
2418 lck = (kmp_user_lock_p)user_lock;
2421 else if ((__kmp_user_lock_kind == lk_futex) &&
2422 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2423 OMP_NEST_LOCK_T_SIZE)) {
2424 lck = (kmp_user_lock_p)user_lock;
2428 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_nest_lock");
2432 __kmp_itt_lock_acquiring(lck);
2435 rc = TEST_NESTED_LOCK(lck, gtid);
2438 __kmp_itt_lock_acquired(lck);
2440 __kmp_itt_lock_cancelled(lck);
2447 #endif // KMP_USE_DYNAMIC_LOCK 2457 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 2458 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 2460 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 2461 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 2467 static __forceinline
void 2468 __kmp_enter_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
2469 kmp_critical_name *crit) {
2475 kmp_user_lock_p lck;
2477 #if KMP_USE_DYNAMIC_LOCK 2479 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
2482 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
2483 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
2484 KMP_GET_D_TAG(__kmp_user_lock_seq));
2486 __kmp_init_indirect_csptr(crit, loc, global_tid,
2487 KMP_GET_I_TAG(__kmp_user_lock_seq));
2493 if (KMP_EXTRACT_D_TAG(lk) != 0) {
2494 lck = (kmp_user_lock_p)lk;
2495 KMP_DEBUG_ASSERT(lck != NULL);
2496 if (__kmp_env_consistency_check) {
2497 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
2499 KMP_D_LOCK_FUNC(lk,
set)(lk, global_tid);
2501 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
2503 KMP_DEBUG_ASSERT(lck != NULL);
2504 if (__kmp_env_consistency_check) {
2505 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
2507 KMP_I_LOCK_FUNC(ilk,
set)(lck, global_tid);
2510 #else // KMP_USE_DYNAMIC_LOCK 2515 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
2516 lck = (kmp_user_lock_p)crit;
2518 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
2520 KMP_DEBUG_ASSERT(lck != NULL);
2522 if (__kmp_env_consistency_check)
2523 __kmp_push_sync(global_tid, ct_critical, loc, lck);
2525 __kmp_acquire_user_lock_with_checks(lck, global_tid);
2527 #endif // KMP_USE_DYNAMIC_LOCK 2531 static __forceinline
void 2532 __kmp_end_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
2533 kmp_critical_name *crit) {
2535 kmp_user_lock_p lck;
2537 #if KMP_USE_DYNAMIC_LOCK 2539 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
2540 lck = (kmp_user_lock_p)crit;
2541 if (__kmp_env_consistency_check)
2542 __kmp_pop_sync(global_tid, ct_critical, loc);
2543 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
2545 kmp_indirect_lock_t *ilk =
2546 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
2547 if (__kmp_env_consistency_check)
2548 __kmp_pop_sync(global_tid, ct_critical, loc);
2549 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
2552 #else // KMP_USE_DYNAMIC_LOCK 2557 if (__kmp_base_user_lock_size > 32) {
2558 lck = *((kmp_user_lock_p *)crit);
2559 KMP_ASSERT(lck != NULL);
2561 lck = (kmp_user_lock_p)crit;
2564 if (__kmp_env_consistency_check)
2565 __kmp_pop_sync(global_tid, ct_critical, loc);
2567 __kmp_release_user_lock_with_checks(lck, global_tid);
2569 #endif // KMP_USE_DYNAMIC_LOCK 2590 size_t reduce_size,
void *reduce_data,
2591 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
2592 kmp_critical_name *lck) {
2596 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2600 int teams_swapped = 0, task_state;
2602 KA_TRACE(10, (
"__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
2610 if (!TCR_4(__kmp_init_parallel))
2611 __kmp_parallel_initialize();
2614 #if KMP_USE_DYNAMIC_LOCK 2615 if (__kmp_env_consistency_check)
2616 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
2618 if (__kmp_env_consistency_check)
2619 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
2623 th = __kmp_thread_from_gtid(global_tid);
2624 if (th->th.th_teams_microtask) {
2626 team = th->th.th_team;
2627 if (team->t.t_level == th->th.th_teams_level) {
2629 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid);
2632 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2633 th->th.th_team = team->t.t_parent;
2634 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
2635 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
2636 task_state = th->th.th_task_state;
2637 th->th.th_task_state = 0;
2640 #endif // OMP_40_ENABLED 2658 packed_reduction_method = __kmp_determine_reduction_method(
2659 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
2660 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
2662 if (packed_reduction_method == critical_reduce_block) {
2664 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
2667 }
else if (packed_reduction_method == empty_reduce_block) {
2673 }
else if (packed_reduction_method == atomic_reduce_block) {
2683 if (__kmp_env_consistency_check)
2684 __kmp_pop_sync(global_tid, ct_reduce, loc);
2686 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
2687 tree_reduce_block)) {
2703 __kmp_threads[global_tid]->th.th_ident = loc;
2706 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
2707 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
2708 retval = (retval != 0) ? (0) : (1);
2712 if (__kmp_env_consistency_check) {
2714 __kmp_pop_sync(global_tid, ct_reduce, loc);
2724 if (teams_swapped) {
2726 th->th.th_info.ds.ds_tid = 0;
2727 th->th.th_team = team;
2728 th->th.th_team_nproc = team->t.t_nproc;
2729 th->th.th_task_team = team->t.t_task_team[task_state];
2730 th->th.th_task_state = task_state;
2735 (
"__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
2736 global_tid, packed_reduction_method, retval));
2750 kmp_critical_name *lck) {
2752 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2754 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
2756 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
2758 if (packed_reduction_method == critical_reduce_block) {
2760 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
2762 }
else if (packed_reduction_method == empty_reduce_block) {
2767 }
else if (packed_reduction_method == atomic_reduce_block) {
2774 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
2775 tree_reduce_block)) {
2785 if (__kmp_env_consistency_check)
2786 __kmp_pop_sync(global_tid, ct_reduce, loc);
2788 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
2789 global_tid, packed_reduction_method));
2812 size_t reduce_size,
void *reduce_data,
2813 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
2814 kmp_critical_name *lck) {
2817 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2819 KA_TRACE(10, (
"__kmpc_reduce() enter: called T#%d\n", global_tid));
2827 if (!TCR_4(__kmp_init_parallel))
2828 __kmp_parallel_initialize();
2831 #if KMP_USE_DYNAMIC_LOCK 2832 if (__kmp_env_consistency_check)
2833 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
2835 if (__kmp_env_consistency_check)
2836 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
2839 packed_reduction_method = __kmp_determine_reduction_method(
2840 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
2841 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
2843 if (packed_reduction_method == critical_reduce_block) {
2845 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
2848 }
else if (packed_reduction_method == empty_reduce_block) {
2854 }
else if (packed_reduction_method == atomic_reduce_block) {
2858 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
2859 tree_reduce_block)) {
2865 __kmp_threads[global_tid]->th.th_ident =
2869 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
2870 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
2871 retval = (retval != 0) ? (0) : (1);
2875 if (__kmp_env_consistency_check) {
2877 __kmp_pop_sync(global_tid, ct_reduce, loc);
2888 (
"__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
2889 global_tid, packed_reduction_method, retval));
2905 kmp_critical_name *lck) {
2907 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2909 KA_TRACE(10, (
"__kmpc_end_reduce() enter: called T#%d\n", global_tid));
2911 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
2916 if (packed_reduction_method == critical_reduce_block) {
2918 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
2922 __kmp_threads[global_tid]->th.th_ident = loc;
2924 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
2926 }
else if (packed_reduction_method == empty_reduce_block) {
2932 __kmp_threads[global_tid]->th.th_ident = loc;
2934 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
2936 }
else if (packed_reduction_method == atomic_reduce_block) {
2940 __kmp_threads[global_tid]->th.th_ident = loc;
2942 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
2944 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
2945 tree_reduce_block)) {
2948 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
2957 if (__kmp_env_consistency_check)
2958 __kmp_pop_sync(global_tid, ct_reduce, loc);
2960 KA_TRACE(10, (
"__kmpc_end_reduce() exit: called T#%d: method %08x\n",
2961 global_tid, packed_reduction_method));
2966 #undef __KMP_GET_REDUCTION_METHOD 2967 #undef __KMP_SET_REDUCTION_METHOD 2971 kmp_uint64 __kmpc_get_taskid() {
2976 gtid = __kmp_get_gtid();
2980 thread = __kmp_thread_from_gtid(gtid);
2981 return thread->th.th_current_task->td_task_id;
2985 kmp_uint64 __kmpc_get_parent_taskid() {
2989 kmp_taskdata_t *parent_task;
2991 gtid = __kmp_get_gtid();
2995 thread = __kmp_thread_from_gtid(gtid);
2996 parent_task = thread->th.th_current_task->td_parent;
2997 return (parent_task == NULL ? 0 : parent_task->td_task_id);
3013 void __kmpc_doacross_init(
ident_t *loc,
int gtid,
int num_dims,
3014 struct kmp_dim *dims) {
3016 kmp_int64 last, trace_count;
3017 kmp_info_t *th = __kmp_threads[gtid];
3018 kmp_team_t *team = th->th.th_team;
3020 kmp_disp_t *pr_buf = th->th.th_dispatch;
3021 dispatch_shared_info_t *sh_buf;
3025 (
"__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3026 gtid, num_dims, !team->t.t_serialized));
3027 KMP_DEBUG_ASSERT(dims != NULL);
3028 KMP_DEBUG_ASSERT(num_dims > 0);
3030 if (team->t.t_serialized) {
3031 KA_TRACE(20, (
"__kmpc_doacross_init() exit: serialized team\n"));
3034 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3035 idx = pr_buf->th_doacross_buf_idx++;
3037 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3040 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3041 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3042 th,
sizeof(kmp_int64) * (4 * num_dims + 1));
3043 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3044 pr_buf->th_doacross_info[0] =
3045 (kmp_int64)num_dims;
3048 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3049 pr_buf->th_doacross_info[2] = dims[0].lo;
3050 pr_buf->th_doacross_info[3] = dims[0].up;
3051 pr_buf->th_doacross_info[4] = dims[0].st;
3053 for (j = 1; j < num_dims; ++j) {
3056 if (dims[j].st == 1) {
3058 range_length = dims[j].up - dims[j].lo + 1;
3060 if (dims[j].st > 0) {
3061 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3062 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3064 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3066 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3069 pr_buf->th_doacross_info[last++] = range_length;
3070 pr_buf->th_doacross_info[last++] = dims[j].lo;
3071 pr_buf->th_doacross_info[last++] = dims[j].up;
3072 pr_buf->th_doacross_info[last++] = dims[j].st;
3077 if (dims[0].st == 1) {
3078 trace_count = dims[0].up - dims[0].lo + 1;
3079 }
else if (dims[0].st > 0) {
3080 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3081 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3083 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3084 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3086 for (j = 1; j < num_dims; ++j) {
3087 trace_count *= pr_buf->th_doacross_info[4 * j + 1];
3089 KMP_DEBUG_ASSERT(trace_count > 0);
3093 if (idx != sh_buf->doacross_buf_idx) {
3095 __kmp_wait_yield_4((
volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3100 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
3101 (kmp_int64 *)&sh_buf->doacross_flags, NULL, (kmp_int64)1);
3102 if (flags == NULL) {
3105 trace_count / 8 + 8;
3106 sh_buf->doacross_flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3107 }
else if ((kmp_int64)flags == 1) {
3109 while ((
volatile kmp_int64)sh_buf->doacross_flags == 1) {
3113 KMP_DEBUG_ASSERT((kmp_int64)sh_buf->doacross_flags >
3115 pr_buf->th_doacross_flags =
3116 sh_buf->doacross_flags;
3118 KA_TRACE(20, (
"__kmpc_doacross_init() exit: T#%d\n", gtid));
3121 void __kmpc_doacross_wait(
ident_t *loc,
int gtid,
long long *vec) {
3122 kmp_int32 shft, num_dims, i;
3124 kmp_int64 iter_number;
3125 kmp_info_t *th = __kmp_threads[gtid];
3126 kmp_team_t *team = th->th.th_team;
3128 kmp_int64 lo, up, st;
3130 KA_TRACE(20, (
"__kmpc_doacross_wait() enter: called T#%d\n", gtid));
3131 if (team->t.t_serialized) {
3132 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: serialized team\n"));
3137 pr_buf = th->th.th_dispatch;
3138 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3139 num_dims = pr_buf->th_doacross_info[0];
3140 lo = pr_buf->th_doacross_info[2];
3141 up = pr_buf->th_doacross_info[3];
3142 st = pr_buf->th_doacross_info[4];
3144 if (vec[0] < lo || vec[0] > up) {
3145 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 3146 "bounds [%lld,%lld]\n",
3147 gtid, vec[0], lo, up));
3150 iter_number = vec[0] - lo;
3151 }
else if (st > 0) {
3152 if (vec[0] < lo || vec[0] > up) {
3153 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 3154 "bounds [%lld,%lld]\n",
3155 gtid, vec[0], lo, up));
3158 iter_number = (kmp_uint64)(vec[0] - lo) / st;
3160 if (vec[0] > lo || vec[0] < up) {
3161 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 3162 "bounds [%lld,%lld]\n",
3163 gtid, vec[0], lo, up));
3166 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
3168 for (i = 1; i < num_dims; ++i) {
3170 kmp_int32 j = i * 4;
3171 ln = pr_buf->th_doacross_info[j + 1];
3172 lo = pr_buf->th_doacross_info[j + 2];
3173 up = pr_buf->th_doacross_info[j + 3];
3174 st = pr_buf->th_doacross_info[j + 4];
3176 if (vec[i] < lo || vec[i] > up) {
3177 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 3178 "bounds [%lld,%lld]\n",
3179 gtid, vec[i], lo, up));
3183 }
else if (st > 0) {
3184 if (vec[i] < lo || vec[i] > up) {
3185 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 3186 "bounds [%lld,%lld]\n",
3187 gtid, vec[i], lo, up));
3190 iter = (kmp_uint64)(vec[i] - lo) / st;
3192 if (vec[i] > lo || vec[i] < up) {
3193 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 3194 "bounds [%lld,%lld]\n",
3195 gtid, vec[i], lo, up));
3198 iter = (kmp_uint64)(lo - vec[i]) / (-st);
3200 iter_number = iter + ln * iter_number;
3202 shft = iter_number % 32;
3205 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
3209 (
"__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
3210 gtid, (iter_number << 5) + shft));
3213 void __kmpc_doacross_post(
ident_t *loc,
int gtid,
long long *vec) {
3214 kmp_int32 shft, num_dims, i;
3216 kmp_int64 iter_number;
3217 kmp_info_t *th = __kmp_threads[gtid];
3218 kmp_team_t *team = th->th.th_team;
3222 KA_TRACE(20, (
"__kmpc_doacross_post() enter: called T#%d\n", gtid));
3223 if (team->t.t_serialized) {
3224 KA_TRACE(20, (
"__kmpc_doacross_post() exit: serialized team\n"));
3230 pr_buf = th->th.th_dispatch;
3231 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3232 num_dims = pr_buf->th_doacross_info[0];
3233 lo = pr_buf->th_doacross_info[2];
3234 st = pr_buf->th_doacross_info[4];
3236 iter_number = vec[0] - lo;
3237 }
else if (st > 0) {
3238 iter_number = (kmp_uint64)(vec[0] - lo) / st;
3240 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
3242 for (i = 1; i < num_dims; ++i) {
3244 kmp_int32 j = i * 4;
3245 ln = pr_buf->th_doacross_info[j + 1];
3246 lo = pr_buf->th_doacross_info[j + 2];
3247 st = pr_buf->th_doacross_info[j + 4];
3250 }
else if (st > 0) {
3251 iter = (kmp_uint64)(vec[i] - lo) / st;
3253 iter = (kmp_uint64)(lo - vec[i]) / (-st);
3255 iter_number = iter + ln * iter_number;
3257 shft = iter_number % 32;
3260 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
3261 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
3262 KA_TRACE(20, (
"__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
3263 (iter_number << 5) + shft));
3266 void __kmpc_doacross_fini(
ident_t *loc,
int gtid) {
3268 kmp_info_t *th = __kmp_threads[gtid];
3269 kmp_team_t *team = th->th.th_team;
3270 kmp_disp_t *pr_buf = th->th.th_dispatch;
3272 KA_TRACE(20, (
"__kmpc_doacross_fini() enter: called T#%d\n", gtid));
3273 if (team->t.t_serialized) {
3274 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: serialized team %p\n", team));
3277 num_done = KMP_TEST_THEN_INC64((kmp_int64 *)pr_buf->th_doacross_info[1]) + 1;
3278 if (num_done == th->th.th_team_nproc) {
3280 int idx = pr_buf->th_doacross_buf_idx - 1;
3281 dispatch_shared_info_t *sh_buf =
3282 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3283 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
3284 (kmp_int64)&sh_buf->doacross_num_done);
3285 KMP_DEBUG_ASSERT(num_done == (kmp_int64)sh_buf->doacross_num_done);
3286 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
3287 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
3288 sh_buf->doacross_flags = NULL;
3289 sh_buf->doacross_num_done = 0;
3290 sh_buf->doacross_buf_idx +=
3291 __kmp_dispatch_num_buffers;
3294 __kmp_thread_free(th, (
void *)pr_buf->th_doacross_info);
3295 pr_buf->th_doacross_info = NULL;
3296 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: T#%d\n", gtid));
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
#define KMP_START_EXPLICIT_TIMER(name)
"Starts" an explicit timer which will need a corresponding KMP_STOP_EXPLICIT_TIMER() macro...
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
#define KMP_IDENT_AUTOPAR
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)