Intel® OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Modules Pages
kmp_csupport.c
1 /*
2  * kmp_csupport.c -- kfront linkage support for OpenMP.
3  */
4 
5 /* <copyright>
6  Copyright (c) 1997-2015 Intel Corporation. All Rights Reserved.
7 
8  Redistribution and use in source and binary forms, with or without
9  modification, are permitted provided that the following conditions
10  are met:
11 
12  * Redistributions of source code must retain the above copyright
13  notice, this list of conditions and the following disclaimer.
14  * Redistributions in binary form must reproduce the above copyright
15  notice, this list of conditions and the following disclaimer in the
16  documentation and/or other materials provided with the distribution.
17  * Neither the name of Intel Corporation nor the names of its
18  contributors may be used to endorse or promote products derived
19  from this software without specific prior written permission.
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 
33 </copyright> */
34 
35 #include "omp.h" /* extern "C" declarations of user-visible routines */
36 #include "kmp.h"
37 #include "kmp_i18n.h"
38 #include "kmp_itt.h"
39 #include "kmp_error.h"
40 #include "kmp_stats.h"
41 
42 #define MAX_MESSAGE 512
43 
44 /* ------------------------------------------------------------------------ */
45 /* ------------------------------------------------------------------------ */
46 
47 /* flags will be used in future, e.g., to implement */
48 /* openmp_strict library restrictions */
49 
59 void
60 __kmpc_begin(ident_t *loc, kmp_int32 flags)
61 {
62  // By default __kmp_ignore_mppbeg() returns TRUE.
63  if (__kmp_ignore_mppbeg() == FALSE) {
64  __kmp_internal_begin();
65 
66  KC_TRACE( 10, ("__kmpc_begin: called\n" ) );
67  }
68 }
69 
77 void
79 {
80  // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() call no-op.
81  // However, this can be overridden with KMP_IGNORE_MPPEND environment variable.
82  // If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() returns FALSE and __kmpc_end()
83  // will unregister this root (it can cause library shut down).
84  if (__kmp_ignore_mppend() == FALSE) {
85  KC_TRACE( 10, ("__kmpc_end: called\n" ) );
86  KA_TRACE( 30, ("__kmpc_end\n" ));
87 
88  __kmp_internal_end_thread( -1 );
89  }
90 }
91 
111 kmp_int32
113 {
114  kmp_int32 gtid = __kmp_entry_gtid();
115 
116  KC_TRACE( 10, ("__kmpc_global_thread_num: T#%d\n", gtid ) );
117 
118  return gtid;
119 }
120 
134 kmp_int32
136 {
137  KC_TRACE( 10, ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_nth ) );
138 
139  return TCR_4(__kmp_nth);
140 }
141 
148 kmp_int32
150 {
151  KC_TRACE( 10, ("__kmpc_bound_thread_num: called\n" ) );
152  return __kmp_tid_from_gtid( __kmp_entry_gtid() );
153 }
154 
160 kmp_int32
162 {
163  KC_TRACE( 10, ("__kmpc_bound_num_threads: called\n" ) );
164 
165  return __kmp_entry_thread() -> th.th_team -> t.t_nproc;
166 }
167 
174 kmp_int32
176 {
177 #ifndef KMP_DEBUG
178 
179  return TRUE;
180 
181 #else
182 
183  const char *semi2;
184  const char *semi3;
185  int line_no;
186 
187  if (__kmp_par_range == 0) {
188  return TRUE;
189  }
190  semi2 = loc->psource;
191  if (semi2 == NULL) {
192  return TRUE;
193  }
194  semi2 = strchr(semi2, ';');
195  if (semi2 == NULL) {
196  return TRUE;
197  }
198  semi2 = strchr(semi2 + 1, ';');
199  if (semi2 == NULL) {
200  return TRUE;
201  }
202  if (__kmp_par_range_filename[0]) {
203  const char *name = semi2 - 1;
204  while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
205  name--;
206  }
207  if ((*name == '/') || (*name == ';')) {
208  name++;
209  }
210  if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
211  return __kmp_par_range < 0;
212  }
213  }
214  semi3 = strchr(semi2 + 1, ';');
215  if (__kmp_par_range_routine[0]) {
216  if ((semi3 != NULL) && (semi3 > semi2)
217  && (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
218  return __kmp_par_range < 0;
219  }
220  }
221  if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
222  if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
223  return __kmp_par_range > 0;
224  }
225  return __kmp_par_range < 0;
226  }
227  return TRUE;
228 
229 #endif /* KMP_DEBUG */
230 
231 }
232 
238 kmp_int32
240 {
241  return __kmp_entry_thread() -> th.th_root -> r.r_active;
242 }
243 
253 void
254 __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads )
255 {
256  KA_TRACE( 20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
257  global_tid, num_threads ) );
258 
259  __kmp_push_num_threads( loc, global_tid, num_threads );
260 }
261 
262 void
263 __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid )
264 {
265  KA_TRACE( 20, ("__kmpc_pop_num_threads: enter\n" ) );
266 
267  /* the num_threads are automatically popped */
268 }
269 
270 
271 #if OMP_40_ENABLED
272 
273 void
274 __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, kmp_int32 proc_bind )
275 {
276  KA_TRACE( 20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n",
277  global_tid, proc_bind ) );
278 
279  __kmp_push_proc_bind( loc, global_tid, (kmp_proc_bind_t)proc_bind );
280 }
281 
282 #endif /* OMP_40_ENABLED */
283 
284 
294 void
295 __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
296 {
297  KMP_STOP_EXPLICIT_TIMER(OMP_serial);
298  KMP_COUNT_BLOCK(OMP_PARALLEL);
299  int gtid = __kmp_entry_gtid();
300  // maybe to save thr_state is enough here
301  {
302  va_list ap;
303  va_start( ap, microtask );
304 
305 #if INCLUDE_SSC_MARKS
306  SSC_MARK_FORKING();
307 #endif
308  __kmp_fork_call( loc, gtid, fork_context_intel,
309  argc,
310  VOLATILE_CAST(microtask_t) microtask,
311  VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
312 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
313 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
314  &ap
315 #else
316  ap
317 #endif
318  );
319 #if INCLUDE_SSC_MARKS
320  SSC_MARK_JOINING();
321 #endif
322  __kmp_join_call( loc, gtid );
323 
324  va_end( ap );
325  }
326  KMP_START_EXPLICIT_TIMER(OMP_serial);
327 }
328 
329 #if OMP_40_ENABLED
330 
341 void
342 __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads )
343 {
344  KA_TRACE( 20, ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
345  global_tid, num_teams, num_threads ) );
346 
347  __kmp_push_num_teams( loc, global_tid, num_teams, num_threads );
348 }
349 
359 void
360 __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
361 {
362  int gtid = __kmp_entry_gtid();
363  kmp_info_t *this_thr = __kmp_threads[ gtid ];
364  va_list ap;
365  va_start( ap, microtask );
366 
367  // remember teams entry point and nesting level
368  this_thr->th.th_teams_microtask = microtask;
369  this_thr->th.th_teams_level = this_thr->th.th_team->t.t_level; // AC: can be >0 on host
370 
371  // check if __kmpc_push_num_teams called, set default number of teams otherwise
372  if ( this_thr->th.th_teams_size.nteams == 0 ) {
373  __kmp_push_num_teams( loc, gtid, 0, 0 );
374  }
375  KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
376  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
377  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
378 
379  __kmp_fork_call( loc, gtid, fork_context_intel,
380  argc,
381  VOLATILE_CAST(microtask_t) __kmp_teams_master,
382  VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
383 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
384  &ap
385 #else
386  ap
387 #endif
388  );
389  __kmp_join_call( loc, gtid );
390  this_thr->th.th_teams_microtask = NULL;
391  this_thr->th.th_teams_level = 0;
392  *(kmp_int64*)(&this_thr->th.th_teams_size) = 0L;
393  va_end( ap );
394 }
395 #endif /* OMP_40_ENABLED */
396 
397 
398 //
399 // I don't think this function should ever have been exported.
400 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
401 // openmp code ever called it, but it's been exported from the RTL for so
402 // long that I'm afraid to remove the definition.
403 //
404 int
405 __kmpc_invoke_task_func( int gtid )
406 {
407  return __kmp_invoke_task_func( gtid );
408 }
409 
422 void
423 __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
424 {
425  __kmp_serialized_parallel(loc, global_tid); /* The implementation is now in kmp_runtime.c so that it can share static functions with
426  * kmp_fork_call since the tasks to be done are similar in each case.
427  */
428 }
429 
437 void
438 __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
439 {
440  kmp_internal_control_t *top;
441  kmp_info_t *this_thr;
442  kmp_team_t *serial_team;
443 
444  KC_TRACE( 10, ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid ) );
445 
446  /* skip all this code for autopar serialized loops since it results in
447  unacceptable overhead */
448  if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) )
449  return;
450 
451  // Not autopar code
452  if( ! TCR_4( __kmp_init_parallel ) )
453  __kmp_parallel_initialize();
454 
455  this_thr = __kmp_threads[ global_tid ];
456  serial_team = this_thr->th.th_serial_team;
457 
458  #if OMP_41_ENABLED
459  kmp_task_team_t * task_team = this_thr->th.th_task_team;
460 
461  // we need to wait for the proxy tasks before finishing the thread
462  if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks )
463  __kmp_task_team_wait(this_thr, serial_team, NULL ); // is an ITT object needed here?
464  #endif
465 
466  KMP_MB();
467  KMP_DEBUG_ASSERT( serial_team );
468  KMP_ASSERT( serial_team -> t.t_serialized );
469  KMP_DEBUG_ASSERT( this_thr -> th.th_team == serial_team );
470  KMP_DEBUG_ASSERT( serial_team != this_thr->th.th_root->r.r_root_team );
471  KMP_DEBUG_ASSERT( serial_team -> t.t_threads );
472  KMP_DEBUG_ASSERT( serial_team -> t.t_threads[0] == this_thr );
473 
474  /* If necessary, pop the internal control stack values and replace the team values */
475  top = serial_team -> t.t_control_stack_top;
476  if ( top && top -> serial_nesting_level == serial_team -> t.t_serialized ) {
477  copy_icvs( &serial_team -> t.t_threads[0] -> th.th_current_task -> td_icvs, top );
478  serial_team -> t.t_control_stack_top = top -> next;
479  __kmp_free(top);
480  }
481 
482  //if( serial_team -> t.t_serialized > 1 )
483  serial_team -> t.t_level--;
484 
485  /* pop dispatch buffers stack */
486  KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
487  {
488  dispatch_private_info_t * disp_buffer = serial_team->t.t_dispatch->th_disp_buffer;
489  serial_team->t.t_dispatch->th_disp_buffer =
490  serial_team->t.t_dispatch->th_disp_buffer->next;
491  __kmp_free( disp_buffer );
492  }
493 
494  -- serial_team -> t.t_serialized;
495  if ( serial_team -> t.t_serialized == 0 ) {
496 
497  /* return to the parallel section */
498 
499 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
500  if ( __kmp_inherit_fp_control && serial_team->t.t_fp_control_saved ) {
501  __kmp_clear_x87_fpu_status_word();
502  __kmp_load_x87_fpu_control_word( &serial_team->t.t_x87_fpu_control_word );
503  __kmp_load_mxcsr( &serial_team->t.t_mxcsr );
504  }
505 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
506 
507  this_thr -> th.th_team = serial_team -> t.t_parent;
508  this_thr -> th.th_info.ds.ds_tid = serial_team -> t.t_master_tid;
509 
510  /* restore values cached in the thread */
511  this_thr -> th.th_team_nproc = serial_team -> t.t_parent -> t.t_nproc; /* JPH */
512  this_thr -> th.th_team_master = serial_team -> t.t_parent -> t.t_threads[0]; /* JPH */
513  this_thr -> th.th_team_serialized = this_thr -> th.th_team -> t.t_serialized;
514 
515  /* TODO the below shouldn't need to be adjusted for serialized teams */
516  this_thr -> th.th_dispatch = & this_thr -> th.th_team ->
517  t.t_dispatch[ serial_team -> t.t_master_tid ];
518 
519  __kmp_pop_current_task_from_thread( this_thr );
520 
521  KMP_ASSERT( this_thr -> th.th_current_task -> td_flags.executing == 0 );
522  this_thr -> th.th_current_task -> td_flags.executing = 1;
523 
524  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
525  // Copy the task team from the new child / old parent team to the thread.
526  this_thr->th.th_task_team = this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
527  KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / team %p\n",
528  global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
529  }
530  } else {
531  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
532  KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d decreasing nesting depth of serial team %p to %d\n",
533  global_tid, serial_team, serial_team -> t.t_serialized ) );
534  }
535  }
536 
537 #if USE_ITT_BUILD
538  kmp_uint64 cur_time = 0;
539 #if USE_ITT_NOTIFY
540  if ( __itt_get_timestamp_ptr ) {
541  cur_time = __itt_get_timestamp();
542  }
543 #endif /* USE_ITT_NOTIFY */
544  if ( this_thr->th.th_team->t.t_level == 0
545 #if OMP_40_ENABLED
546  && this_thr->th.th_teams_microtask == NULL
547 #endif
548  ) {
549  // Report the barrier
550  this_thr->th.th_ident = loc;
551  if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) &&
552  ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
553  {
554  __kmp_itt_frame_submit( global_tid, this_thr->th.th_frame_time_serialized,
555  cur_time, 0, loc, this_thr->th.th_team_nproc, 0 );
556  if ( __kmp_forkjoin_frames_mode == 3 )
557  // Since barrier frame for serialized region is equal to the region we use the same begin timestamp as for the barrier.
558  __kmp_itt_frame_submit( global_tid, serial_team->t.t_region_time,
559  cur_time, 0, loc, this_thr->th.th_team_nproc, 2 );
560  } else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) &&
561  ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames )
562  // Mark the end of the "parallel" region for VTune. Only use one of frame notification scheme at the moment.
563  __kmp_itt_region_joined( global_tid, 1 );
564  }
565 #endif /* USE_ITT_BUILD */
566 
567  if ( __kmp_env_consistency_check )
568  __kmp_pop_parallel( global_tid, NULL );
569 }
570 
579 void
581 {
582  KC_TRACE( 10, ("__kmpc_flush: called\n" ) );
583 
584  /* need explicit __mf() here since use volatile instead in library */
585  KMP_MB(); /* Flush all pending memory write invalidates. */
586 
587  #if ( KMP_ARCH_X86 || KMP_ARCH_X86_64 )
588  #if KMP_MIC
589  // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
590  // We shouldn't need it, though, since the ABI rules require that
591  // * If the compiler generates NGO stores it also generates the fence
592  // * If users hand-code NGO stores they should insert the fence
593  // therefore no incomplete unordered stores should be visible.
594  #else
595  // C74404
596  // This is to address non-temporal store instructions (sfence needed).
597  // The clflush instruction is addressed either (mfence needed).
598  // Probably the non-temporal load monvtdqa instruction should also be addressed.
599  // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
600  if ( ! __kmp_cpuinfo.initialized ) {
601  __kmp_query_cpuid( & __kmp_cpuinfo );
602  }; // if
603  if ( ! __kmp_cpuinfo.sse2 ) {
604  // CPU cannot execute SSE2 instructions.
605  } else {
606  #if KMP_COMPILER_ICC || KMP_COMPILER_MSVC
607  _mm_mfence();
608  #else
609  __sync_synchronize();
610  #endif // KMP_COMPILER_ICC
611  }; // if
612  #endif // KMP_MIC
613  #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64)
614  // Nothing to see here move along
615  #elif KMP_ARCH_PPC64
616  // Nothing needed here (we have a real MB above).
617  #if KMP_OS_CNK
618  // The flushing thread needs to yield here; this prevents a
619  // busy-waiting thread from saturating the pipeline. flush is
620  // often used in loops like this:
621  // while (!flag) {
622  // #pragma omp flush(flag)
623  // }
624  // and adding the yield here is good for at least a 10x speedup
625  // when running >2 threads per core (on the NAS LU benchmark).
626  __kmp_yield(TRUE);
627  #endif
628  #else
629  #error Unknown or unsupported architecture
630  #endif
631 
632 }
633 
634 /* -------------------------------------------------------------------------- */
635 
636 /* -------------------------------------------------------------------------- */
637 
645 void
646 __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
647 {
648  KMP_COUNT_BLOCK(OMP_BARRIER);
649  KMP_TIME_BLOCK(OMP_barrier);
650  int explicit_barrier_flag;
651  KC_TRACE( 10, ("__kmpc_barrier: called T#%d\n", global_tid ) );
652 
653  if (! TCR_4(__kmp_init_parallel))
654  __kmp_parallel_initialize();
655 
656  if ( __kmp_env_consistency_check ) {
657  if ( loc == 0 ) {
658  KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user?
659  }; // if
660 
661  __kmp_check_barrier( global_tid, ct_barrier, loc );
662  }
663 
664  __kmp_threads[ global_tid ]->th.th_ident = loc;
665  // TODO: explicit barrier_wait_id:
666  // this function is called when 'barrier' directive is present or
667  // implicit barrier at the end of a worksharing construct.
668  // 1) better to add a per-thread barrier counter to a thread data structure
669  // 2) set to 0 when a new team is created
670  // 4) no sync is required
671 
672  __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
673 }
674 
675 /* The BARRIER for a MASTER section is always explicit */
682 kmp_int32
683 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
684 {
685  KMP_COUNT_BLOCK(OMP_MASTER);
686  int status = 0;
687 
688  KC_TRACE( 10, ("__kmpc_master: called T#%d\n", global_tid ) );
689 
690  if( ! TCR_4( __kmp_init_parallel ) )
691  __kmp_parallel_initialize();
692 
693  if( KMP_MASTER_GTID( global_tid ))
694  status = 1;
695 
696  if ( __kmp_env_consistency_check ) {
697 #if KMP_USE_DYNAMIC_LOCK
698  if (status)
699  __kmp_push_sync( global_tid, ct_master, loc, NULL, 0 );
700  else
701  __kmp_check_sync( global_tid, ct_master, loc, NULL, 0 );
702 #else
703  if (status)
704  __kmp_push_sync( global_tid, ct_master, loc, NULL );
705  else
706  __kmp_check_sync( global_tid, ct_master, loc, NULL );
707 #endif
708  }
709 
710  return status;
711 }
712 
721 void
722 __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
723 {
724  KC_TRACE( 10, ("__kmpc_end_master: called T#%d\n", global_tid ) );
725 
726  KMP_DEBUG_ASSERT( KMP_MASTER_GTID( global_tid ));
727 
728  if ( __kmp_env_consistency_check ) {
729  if( global_tid < 0 )
730  KMP_WARNING( ThreadIdentInvalid );
731 
732  if( KMP_MASTER_GTID( global_tid ))
733  __kmp_pop_sync( global_tid, ct_master, loc );
734  }
735 }
736 
744 void
745 __kmpc_ordered( ident_t * loc, kmp_int32 gtid )
746 {
747  int cid = 0;
748  kmp_info_t *th;
749  KMP_DEBUG_ASSERT( __kmp_init_serial );
750 
751  KC_TRACE( 10, ("__kmpc_ordered: called T#%d\n", gtid ));
752 
753  if (! TCR_4(__kmp_init_parallel))
754  __kmp_parallel_initialize();
755 
756 #if USE_ITT_BUILD
757  __kmp_itt_ordered_prep( gtid );
758  // TODO: ordered_wait_id
759 #endif /* USE_ITT_BUILD */
760 
761  th = __kmp_threads[ gtid ];
762 
763  if ( th -> th.th_dispatch -> th_deo_fcn != 0 )
764  (*th->th.th_dispatch->th_deo_fcn)( & gtid, & cid, loc );
765  else
766  __kmp_parallel_deo( & gtid, & cid, loc );
767 
768 #if USE_ITT_BUILD
769  __kmp_itt_ordered_start( gtid );
770 #endif /* USE_ITT_BUILD */
771 }
772 
780 void
781 __kmpc_end_ordered( ident_t * loc, kmp_int32 gtid )
782 {
783  int cid = 0;
784  kmp_info_t *th;
785 
786  KC_TRACE( 10, ("__kmpc_end_ordered: called T#%d\n", gtid ) );
787 
788 #if USE_ITT_BUILD
789  __kmp_itt_ordered_end( gtid );
790  // TODO: ordered_wait_id
791 #endif /* USE_ITT_BUILD */
792 
793  th = __kmp_threads[ gtid ];
794 
795  if ( th -> th.th_dispatch -> th_dxo_fcn != 0 )
796  (*th->th.th_dispatch->th_dxo_fcn)( & gtid, & cid, loc );
797  else
798  __kmp_parallel_dxo( & gtid, & cid, loc );
799 }
800 
801 #if KMP_USE_DYNAMIC_LOCK
802 
803 static __forceinline kmp_indirect_lock_t *
804 __kmp_get_indirect_csptr(kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid, kmp_dyna_lockseq_t seq)
805 {
806  // Code from __kmp_get_critical_section_ptr
807  // This function returns an indirect lock object instead of a user lock.
808  kmp_indirect_lock_t **lck, *ret;
809  lck = (kmp_indirect_lock_t **)crit;
810  ret = (kmp_indirect_lock_t *)TCR_PTR(*lck);
811  if (ret == NULL) {
812  void *idx;
813  kmp_indirect_locktag_t tag = DYNA_GET_I_TAG(seq);
814  kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
815  ret = ilk;
816  DYNA_I_LOCK_FUNC(ilk, init)(ilk->lock);
817  DYNA_SET_I_LOCK_LOCATION(ilk, loc);
818  DYNA_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
819  KA_TRACE(20, ("__kmp_get_indirect_csptr: initialized indirect lock #%d\n", tag));
820 #if USE_ITT_BUILD
821  __kmp_itt_critical_creating(ilk->lock, loc);
822 #endif
823  int status = KMP_COMPARE_AND_STORE_PTR(lck, 0, ilk);
824  if (status == 0) {
825 #if USE_ITT_BUILD
826  __kmp_itt_critical_destroyed(ilk->lock);
827 #endif
828  // Postponing destroy, to avoid costly dispatch here.
829  //DYNA_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
830  ret = (kmp_indirect_lock_t *)TCR_PTR(*lck);
831  KMP_DEBUG_ASSERT(ret != NULL);
832  }
833  }
834  return ret;
835 }
836 
837 // Fast-path acquire tas lock
838 #define DYNA_ACQUIRE_TAS_LOCK(lock, gtid) { \
839  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
840  if (l->lk.poll != DYNA_LOCK_FREE(tas) || \
841  ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas))) { \
842  kmp_uint32 spins; \
843  KMP_FSYNC_PREPARE(l); \
844  KMP_INIT_YIELD(spins); \
845  if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
846  KMP_YIELD(TRUE); \
847  } else { \
848  KMP_YIELD_SPIN(spins); \
849  } \
850  while (l->lk.poll != DYNA_LOCK_FREE(tas) || \
851  ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas))) { \
852  if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
853  KMP_YIELD(TRUE); \
854  } else { \
855  KMP_YIELD_SPIN(spins); \
856  } \
857  } \
858  } \
859  KMP_FSYNC_ACQUIRED(l); \
860 }
861 
862 // Fast-path test tas lock
863 #define DYNA_TEST_TAS_LOCK(lock, gtid, rc) { \
864  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
865  rc = l->lk.poll == DYNA_LOCK_FREE(tas) && \
866  KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas)); \
867 }
868 
869 // Fast-path release tas lock
870 #define DYNA_RELEASE_TAS_LOCK(lock, gtid) { \
871  TCW_4(((kmp_tas_lock_t *)lock)->lk.poll, DYNA_LOCK_FREE(tas)); \
872  KMP_MB(); \
873 }
874 
875 #if DYNA_HAS_FUTEX
876 
877 # include <unistd.h>
878 # include <sys/syscall.h>
879 # ifndef FUTEX_WAIT
880 # define FUTEX_WAIT 0
881 # endif
882 # ifndef FUTEX_WAKE
883 # define FUTEX_WAKE 1
884 # endif
885 
886 // Fast-path acquire futex lock
887 #define DYNA_ACQUIRE_FUTEX_LOCK(lock, gtid) { \
888  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
889  kmp_int32 gtid_code = (gtid+1) << 1; \
890  KMP_MB(); \
891  KMP_FSYNC_PREPARE(ftx); \
892  kmp_int32 poll_val; \
893  while ((poll_val = KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), DYNA_LOCK_FREE(futex), \
894  DYNA_LOCK_BUSY(gtid_code, futex))) != DYNA_LOCK_FREE(futex)) { \
895  kmp_int32 cond = DYNA_LOCK_STRIP(poll_val) & 1; \
896  if (!cond) { \
897  if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, poll_val | DYNA_LOCK_BUSY(1, futex))) { \
898  continue; \
899  } \
900  poll_val |= DYNA_LOCK_BUSY(1, futex); \
901  } \
902  kmp_int32 rc; \
903  if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, NULL, NULL, 0)) != 0) { \
904  continue; \
905  } \
906  gtid_code |= 1; \
907  } \
908  KMP_FSYNC_ACQUIRED(ftx); \
909 }
910 
911 // Fast-path test futex lock
912 #define DYNA_TEST_FUTEX_LOCK(lock, gtid, rc) { \
913  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
914  if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), DYNA_LOCK_FREE(futex), DYNA_LOCK_BUSY(gtid+1, futex) << 1)) { \
915  KMP_FSYNC_ACQUIRED(ftx); \
916  rc = TRUE; \
917  } else { \
918  rc = FALSE; \
919  } \
920 }
921 
922 // Fast-path release futex lock
923 #define DYNA_RELEASE_FUTEX_LOCK(lock, gtid) { \
924  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
925  KMP_MB(); \
926  KMP_FSYNC_RELEASING(ftx); \
927  kmp_int32 poll_val = KMP_XCHG_FIXED32(&(ftx->lk.poll), DYNA_LOCK_FREE(futex)); \
928  if (DYNA_LOCK_STRIP(poll_val) & 1) { \
929  syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, DYNA_LOCK_BUSY(1, futex), NULL, NULL, 0); \
930  } \
931  KMP_MB(); \
932  KMP_YIELD(TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
933 }
934 
935 #endif // DYNA_HAS_FUTEX
936 
937 #else // KMP_USE_DYNAMIC_LOCK
938 
939 static kmp_user_lock_p
940 __kmp_get_critical_section_ptr( kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid )
941 {
942  kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
943 
944  //
945  // Because of the double-check, the following load
946  // doesn't need to be volatile.
947  //
948  kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
949 
950  if ( lck == NULL ) {
951  void * idx;
952 
953  // Allocate & initialize the lock.
954  // Remember allocated locks in table in order to free them in __kmp_cleanup()
955  lck = __kmp_user_lock_allocate( &idx, gtid, kmp_lf_critical_section );
956  __kmp_init_user_lock_with_checks( lck );
957  __kmp_set_user_lock_location( lck, loc );
958 #if USE_ITT_BUILD
959  __kmp_itt_critical_creating( lck );
960  // __kmp_itt_critical_creating() should be called *before* the first usage of underlying
961  // lock. It is the only place where we can guarantee it. There are chances the lock will
962  // destroyed with no usage, but it is not a problem, because this is not real event seen
963  // by user but rather setting name for object (lock). See more details in kmp_itt.h.
964 #endif /* USE_ITT_BUILD */
965 
966  //
967  // Use a cmpxchg instruction to slam the start of the critical
968  // section with the lock pointer. If another thread beat us
969  // to it, deallocate the lock, and use the lock that the other
970  // thread allocated.
971  //
972  int status = KMP_COMPARE_AND_STORE_PTR( lck_pp, 0, lck );
973 
974  if ( status == 0 ) {
975  // Deallocate the lock and reload the value.
976 #if USE_ITT_BUILD
977  __kmp_itt_critical_destroyed( lck );
978  // Let ITT know the lock is destroyed and the same memory location may be reused for
979  // another purpose.
980 #endif /* USE_ITT_BUILD */
981  __kmp_destroy_user_lock_with_checks( lck );
982  __kmp_user_lock_free( &idx, gtid, lck );
983  lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
984  KMP_DEBUG_ASSERT( lck != NULL );
985  }
986  }
987  return lck;
988 }
989 
990 #endif // KMP_USE_DYNAMIC_LOCK
991 
1002 void
1003 __kmpc_critical( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
1004  KMP_COUNT_BLOCK(OMP_CRITICAL);
1005  kmp_user_lock_p lck;
1006 
1007  KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) );
1008 
1009 #if KMP_USE_DYNAMIC_LOCK
1010  // Assumption: all direct locks fit in OMP_CRITICAL_SIZE.
1011  // The global sequence __kmp_user_lock_seq is used unless compiler pushes a value.
1012  if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
1013  lck = (kmp_user_lock_p)crit;
1014  // The thread that reaches here first needs to tag the lock word.
1015  if (*((kmp_dyna_lock_t *)lck) == 0) {
1016  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)lck, 0, DYNA_GET_D_TAG(__kmp_user_lock_seq));
1017  }
1018  if (__kmp_env_consistency_check) {
1019  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
1020  }
1021 # if USE_ITT_BUILD
1022  __kmp_itt_critical_acquiring(lck);
1023 # endif
1024 # if DYNA_USE_FAST_TAS
1025  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1026  DYNA_ACQUIRE_TAS_LOCK(lck, global_tid);
1027  } else
1028 # elif DYNA_USE_FAST_FUTEX
1029  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1030  DYNA_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1031  } else
1032 # endif
1033  {
1034  DYNA_D_LOCK_FUNC(lck, set)((kmp_dyna_lock_t *)lck, global_tid);
1035  }
1036  } else {
1037  kmp_indirect_lock_t *ilk = __kmp_get_indirect_csptr(crit, loc, global_tid, __kmp_user_lock_seq);
1038  lck = ilk->lock;
1039  if (__kmp_env_consistency_check) {
1040  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
1041  }
1042 # if USE_ITT_BUILD
1043  __kmp_itt_critical_acquiring(lck);
1044 # endif
1045  DYNA_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1046  }
1047 
1048 #else // KMP_USE_DYNAMIC_LOCK
1049 
1050  //TODO: add THR_OVHD_STATE
1051 
1052  KMP_CHECK_USER_LOCK_INIT();
1053 
1054  if ( ( __kmp_user_lock_kind == lk_tas )
1055  && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1056  lck = (kmp_user_lock_p)crit;
1057  }
1058 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1059  else if ( ( __kmp_user_lock_kind == lk_futex )
1060  && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1061  lck = (kmp_user_lock_p)crit;
1062  }
1063 #endif
1064  else { // ticket, queuing or drdpa
1065  lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
1066  }
1067 
1068  if ( __kmp_env_consistency_check )
1069  __kmp_push_sync( global_tid, ct_critical, loc, lck );
1070 
1071  /* since the critical directive binds to all threads, not just
1072  * the current team we have to check this even if we are in a
1073  * serialized team */
1074  /* also, even if we are the uber thread, we still have to conduct the lock,
1075  * as we have to contend with sibling threads */
1076 
1077 #if USE_ITT_BUILD
1078  __kmp_itt_critical_acquiring( lck );
1079 #endif /* USE_ITT_BUILD */
1080  // Value of 'crit' should be good for using as a critical_id of the critical section directive.
1081  __kmp_acquire_user_lock_with_checks( lck, global_tid );
1082 
1083 #endif // KMP_USE_DYNAMIC_LOCK
1084 
1085 #if USE_ITT_BUILD
1086  __kmp_itt_critical_acquired( lck );
1087 #endif /* USE_ITT_BUILD */
1088 
1089  KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid ));
1090 } // __kmpc_critical
1091 
1101 void
1102 __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
1103 {
1104  kmp_user_lock_p lck;
1105 
1106  KC_TRACE( 10, ("__kmpc_end_critical: called T#%d\n", global_tid ));
1107 
1108 #if KMP_USE_DYNAMIC_LOCK
1109  if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
1110  lck = (kmp_user_lock_p)crit;
1111  KMP_ASSERT(lck != NULL);
1112  if (__kmp_env_consistency_check) {
1113  __kmp_pop_sync(global_tid, ct_critical, loc);
1114  }
1115 # if USE_ITT_BUILD
1116  __kmp_itt_critical_releasing( lck );
1117 # endif
1118 # if DYNA_USE_FAST_TAS
1119  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1120  DYNA_RELEASE_TAS_LOCK(lck, global_tid);
1121  } else
1122 # elif DYNA_USE_FAST_FUTEX
1123  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1124  DYNA_RELEASE_FUTEX_LOCK(lck, global_tid);
1125  } else
1126 # endif
1127  {
1128  DYNA_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1129  }
1130  } else {
1131  kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1132  KMP_ASSERT(ilk != NULL);
1133  lck = ilk->lock;
1134  if (__kmp_env_consistency_check) {
1135  __kmp_pop_sync(global_tid, ct_critical, loc);
1136  }
1137 # if USE_ITT_BUILD
1138  __kmp_itt_critical_releasing( lck );
1139 # endif
1140  DYNA_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1141  }
1142 
1143 #else // KMP_USE_DYNAMIC_LOCK
1144 
1145  if ( ( __kmp_user_lock_kind == lk_tas )
1146  && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1147  lck = (kmp_user_lock_p)crit;
1148  }
1149 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1150  else if ( ( __kmp_user_lock_kind == lk_futex )
1151  && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1152  lck = (kmp_user_lock_p)crit;
1153  }
1154 #endif
1155  else { // ticket, queuing or drdpa
1156  lck = (kmp_user_lock_p) TCR_PTR(*((kmp_user_lock_p *)crit));
1157  }
1158 
1159  KMP_ASSERT(lck != NULL);
1160 
1161  if ( __kmp_env_consistency_check )
1162  __kmp_pop_sync( global_tid, ct_critical, loc );
1163 
1164 #if USE_ITT_BUILD
1165  __kmp_itt_critical_releasing( lck );
1166 #endif /* USE_ITT_BUILD */
1167  // Value of 'crit' should be good for using as a critical_id of the critical section directive.
1168  __kmp_release_user_lock_with_checks( lck, global_tid );
1169 
1170 #endif // KMP_USE_DYNAMIC_LOCK
1171 
1172  KA_TRACE( 15, ("__kmpc_end_critical: done T#%d\n", global_tid ));
1173 }
1174 
1183 kmp_int32
1184 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
1185 {
1186  int status;
1187 
1188  KC_TRACE( 10, ("__kmpc_barrier_master: called T#%d\n", global_tid ) );
1189 
1190  if (! TCR_4(__kmp_init_parallel))
1191  __kmp_parallel_initialize();
1192 
1193  if ( __kmp_env_consistency_check )
1194  __kmp_check_barrier( global_tid, ct_barrier, loc );
1195 
1196 #if USE_ITT_NOTIFY
1197  __kmp_threads[global_tid]->th.th_ident = loc;
1198 #endif
1199  status = __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL );
1200 
1201  return (status != 0) ? 0 : 1;
1202 }
1203 
1213 void
1214 __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
1215 {
1216  KC_TRACE( 10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid ));
1217 
1218  __kmp_end_split_barrier ( bs_plain_barrier, global_tid );
1219 }
1220 
1231 kmp_int32
1232 __kmpc_barrier_master_nowait( ident_t * loc, kmp_int32 global_tid )
1233 {
1234  kmp_int32 ret;
1235 
1236  KC_TRACE( 10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid ));
1237 
1238  if (! TCR_4(__kmp_init_parallel))
1239  __kmp_parallel_initialize();
1240 
1241  if ( __kmp_env_consistency_check ) {
1242  if ( loc == 0 ) {
1243  KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user?
1244  }
1245  __kmp_check_barrier( global_tid, ct_barrier, loc );
1246  }
1247 
1248 #if USE_ITT_NOTIFY
1249  __kmp_threads[global_tid]->th.th_ident = loc;
1250 #endif
1251  __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
1252 
1253  ret = __kmpc_master (loc, global_tid);
1254 
1255  if ( __kmp_env_consistency_check ) {
1256  /* there's no __kmpc_end_master called; so the (stats) */
1257  /* actions of __kmpc_end_master are done here */
1258 
1259  if ( global_tid < 0 ) {
1260  KMP_WARNING( ThreadIdentInvalid );
1261  }
1262  if (ret) {
1263  /* only one thread should do the pop since only */
1264  /* one did the push (see __kmpc_master()) */
1265 
1266  __kmp_pop_sync( global_tid, ct_master, loc );
1267  }
1268  }
1269 
1270  return (ret);
1271 }
1272 
1273 /* The BARRIER for a SINGLE process section is always explicit */
1285 kmp_int32
1286 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
1287 {
1288  KMP_COUNT_BLOCK(OMP_SINGLE);
1289  kmp_int32 rc = __kmp_enter_single( global_tid, loc, TRUE );
1290  return rc;
1291 }
1292 
1302 void
1303 __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
1304 {
1305  __kmp_exit_single( global_tid );
1306 }
1307 
1315 void
1316 __kmpc_for_static_fini( ident_t *loc, kmp_int32 global_tid )
1317 {
1318  KE_TRACE( 10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1319 
1320  if ( __kmp_env_consistency_check )
1321  __kmp_pop_workshare( global_tid, ct_pdo, loc );
1322 }
1323 
1324 /*
1325  * User routines which take C-style arguments (call by value)
1326  * different from the Fortran equivalent routines
1327  */
1328 
1329 void
1330 ompc_set_num_threads( int arg )
1331 {
1332 // !!!!! TODO: check the per-task binding
1333  __kmp_set_num_threads( arg, __kmp_entry_gtid() );
1334 }
1335 
1336 void
1337 ompc_set_dynamic( int flag )
1338 {
1339  kmp_info_t *thread;
1340 
1341  /* For the thread-private implementation of the internal controls */
1342  thread = __kmp_entry_thread();
1343 
1344  __kmp_save_internal_controls( thread );
1345 
1346  set__dynamic( thread, flag ? TRUE : FALSE );
1347 }
1348 
1349 void
1350 ompc_set_nested( int flag )
1351 {
1352  kmp_info_t *thread;
1353 
1354  /* For the thread-private internal controls implementation */
1355  thread = __kmp_entry_thread();
1356 
1357  __kmp_save_internal_controls( thread );
1358 
1359  set__nested( thread, flag ? TRUE : FALSE );
1360 }
1361 
1362 void
1363 ompc_set_max_active_levels( int max_active_levels )
1364 {
1365  /* TO DO */
1366  /* we want per-task implementation of this internal control */
1367 
1368  /* For the per-thread internal controls implementation */
1369  __kmp_set_max_active_levels( __kmp_entry_gtid(), max_active_levels );
1370 }
1371 
1372 void
1373 ompc_set_schedule( omp_sched_t kind, int modifier )
1374 {
1375 // !!!!! TODO: check the per-task binding
1376  __kmp_set_schedule( __kmp_entry_gtid(), ( kmp_sched_t ) kind, modifier );
1377 }
1378 
1379 int
1380 ompc_get_ancestor_thread_num( int level )
1381 {
1382  return __kmp_get_ancestor_thread_num( __kmp_entry_gtid(), level );
1383 }
1384 
1385 int
1386 ompc_get_team_size( int level )
1387 {
1388  return __kmp_get_team_size( __kmp_entry_gtid(), level );
1389 }
1390 
1391 void
1392 kmpc_set_stacksize( int arg )
1393 {
1394  // __kmp_aux_set_stacksize initializes the library if needed
1395  __kmp_aux_set_stacksize( arg );
1396 }
1397 
1398 void
1399 kmpc_set_stacksize_s( size_t arg )
1400 {
1401  // __kmp_aux_set_stacksize initializes the library if needed
1402  __kmp_aux_set_stacksize( arg );
1403 }
1404 
1405 void
1406 kmpc_set_blocktime( int arg )
1407 {
1408  int gtid, tid;
1409  kmp_info_t *thread;
1410 
1411  gtid = __kmp_entry_gtid();
1412  tid = __kmp_tid_from_gtid(gtid);
1413  thread = __kmp_thread_from_gtid(gtid);
1414 
1415  __kmp_aux_set_blocktime( arg, thread, tid );
1416 }
1417 
1418 void
1419 kmpc_set_library( int arg )
1420 {
1421  // __kmp_user_set_library initializes the library if needed
1422  __kmp_user_set_library( (enum library_type)arg );
1423 }
1424 
1425 void
1426 kmpc_set_defaults( char const * str )
1427 {
1428  // __kmp_aux_set_defaults initializes the library if needed
1429  __kmp_aux_set_defaults( str, KMP_STRLEN( str ) );
1430 }
1431 
1432 int
1433 kmpc_set_affinity_mask_proc( int proc, void **mask )
1434 {
1435 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1436  return -1;
1437 #else
1438  if ( ! TCR_4(__kmp_init_middle) ) {
1439  __kmp_middle_initialize();
1440  }
1441  return __kmp_aux_set_affinity_mask_proc( proc, mask );
1442 #endif
1443 }
1444 
1445 int
1446 kmpc_unset_affinity_mask_proc( int proc, void **mask )
1447 {
1448 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1449  return -1;
1450 #else
1451  if ( ! TCR_4(__kmp_init_middle) ) {
1452  __kmp_middle_initialize();
1453  }
1454  return __kmp_aux_unset_affinity_mask_proc( proc, mask );
1455 #endif
1456 }
1457 
1458 int
1459 kmpc_get_affinity_mask_proc( int proc, void **mask )
1460 {
1461 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1462  return -1;
1463 #else
1464  if ( ! TCR_4(__kmp_init_middle) ) {
1465  __kmp_middle_initialize();
1466  }
1467  return __kmp_aux_get_affinity_mask_proc( proc, mask );
1468 #endif
1469 }
1470 
1471 
1472 /* -------------------------------------------------------------------------- */
1513 void
1514 __kmpc_copyprivate( ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void*,void*), kmp_int32 didit )
1515 {
1516  void **data_ptr;
1517 
1518  KC_TRACE( 10, ("__kmpc_copyprivate: called T#%d\n", gtid ));
1519 
1520  KMP_MB();
1521 
1522  data_ptr = & __kmp_team_from_gtid( gtid )->t.t_copypriv_data;
1523 
1524  if ( __kmp_env_consistency_check ) {
1525  if ( loc == 0 ) {
1526  KMP_WARNING( ConstructIdentInvalid );
1527  }
1528  }
1529 
1530  /* ToDo: Optimize the following two barriers into some kind of split barrier */
1531 
1532  if (didit) *data_ptr = cpy_data;
1533 
1534  /* This barrier is not a barrier region boundary */
1535 #if USE_ITT_NOTIFY
1536  __kmp_threads[gtid]->th.th_ident = loc;
1537 #endif
1538  __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1539 
1540  if (! didit) (*cpy_func)( cpy_data, *data_ptr );
1541 
1542  /* Consider next barrier the user-visible barrier for barrier region boundaries */
1543  /* Nesting checks are already handled by the single construct checks */
1544 
1545 #if USE_ITT_NOTIFY
1546  __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. tasks can overwrite the location)
1547 #endif
1548  __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1549 }
1550 
1551 /* -------------------------------------------------------------------------- */
1552 
1553 #define INIT_LOCK __kmp_init_user_lock_with_checks
1554 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
1555 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
1556 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
1557 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
1558 #define ACQUIRE_NESTED_LOCK_TIMED __kmp_acquire_nested_user_lock_with_checks_timed
1559 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
1560 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
1561 #define TEST_LOCK __kmp_test_user_lock_with_checks
1562 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
1563 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
1564 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
1565 
1566 /*
1567  * TODO: Make check abort messages use location info & pass it
1568  * into with_checks routines
1569  */
1570 
1571 /* initialize the lock */
1572 void
1573 __kmpc_init_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
1574 #if KMP_USE_DYNAMIC_LOCK
1575  KMP_DEBUG_ASSERT(__kmp_init_serial);
1576  if (__kmp_env_consistency_check && user_lock == NULL) {
1577  KMP_FATAL(LockIsUninitialized, "omp_init_lock");
1578  }
1579  if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
1580  DYNA_INIT_D_LOCK(user_lock, __kmp_user_lock_seq);
1581 # if USE_ITT_BUILD
1582  __kmp_itt_lock_creating((kmp_user_lock_p)user_lock, NULL);
1583 # endif
1584  } else {
1585  DYNA_INIT_I_LOCK(user_lock, __kmp_user_lock_seq);
1586  kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(user_lock);
1587  DYNA_SET_I_LOCK_LOCATION(ilk, loc);
1588 # if USE_ITT_BUILD
1589  __kmp_itt_lock_creating(ilk->lock, loc);
1590 # endif
1591  }
1592 
1593 #else // KMP_USE_DYNAMIC_LOCK
1594 
1595  static char const * const func = "omp_init_lock";
1596  kmp_user_lock_p lck;
1597  KMP_DEBUG_ASSERT( __kmp_init_serial );
1598 
1599  if ( __kmp_env_consistency_check ) {
1600  if ( user_lock == NULL ) {
1601  KMP_FATAL( LockIsUninitialized, func );
1602  }
1603  }
1604 
1605  KMP_CHECK_USER_LOCK_INIT();
1606 
1607  if ( ( __kmp_user_lock_kind == lk_tas )
1608  && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1609  lck = (kmp_user_lock_p)user_lock;
1610  }
1611 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1612  else if ( ( __kmp_user_lock_kind == lk_futex )
1613  && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1614  lck = (kmp_user_lock_p)user_lock;
1615  }
1616 #endif
1617  else {
1618  lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
1619  }
1620  INIT_LOCK( lck );
1621  __kmp_set_user_lock_location( lck, loc );
1622 
1623 #if USE_ITT_BUILD
1624  __kmp_itt_lock_creating( lck );
1625 #endif /* USE_ITT_BUILD */
1626 
1627 #endif // KMP_USE_DYNAMIC_LOCK
1628 } // __kmpc_init_lock
1629 
1630 /* initialize the lock */
1631 void
1632 __kmpc_init_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
1633 #if KMP_USE_DYNAMIC_LOCK
1634 
1635  KMP_DEBUG_ASSERT(__kmp_init_serial);
1636  if (__kmp_env_consistency_check && user_lock == NULL) {
1637  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
1638  }
1639  // Invoke init function after converting to nested version.
1640  kmp_dyna_lockseq_t nested_seq;
1641  switch (__kmp_user_lock_seq) {
1642  case lockseq_tas: nested_seq = lockseq_nested_tas; break;
1643 #if DYNA_HAS_FUTEX
1644  case lockseq_futex: nested_seq = lockseq_nested_futex; break;
1645 #endif
1646  case lockseq_ticket: nested_seq = lockseq_nested_ticket; break;
1647  case lockseq_queuing: nested_seq = lockseq_nested_queuing; break;
1648  case lockseq_drdpa: nested_seq = lockseq_nested_drdpa; break;
1649  default: nested_seq = lockseq_nested_queuing; break;
1650  // Use nested queuing lock for lock kinds without "nested" implementation.
1651  }
1652  DYNA_INIT_I_LOCK(user_lock, nested_seq);
1653  // All nested locks are indirect locks.
1654  kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(user_lock);
1655  DYNA_SET_I_LOCK_LOCATION(ilk, loc);
1656 # if USE_ITT_BUILD
1657  __kmp_itt_lock_creating(ilk->lock, loc);
1658 # endif
1659 
1660 #else // KMP_USE_DYNAMIC_LOCK
1661 
1662  static char const * const func = "omp_init_nest_lock";
1663  kmp_user_lock_p lck;
1664  KMP_DEBUG_ASSERT( __kmp_init_serial );
1665 
1666  if ( __kmp_env_consistency_check ) {
1667  if ( user_lock == NULL ) {
1668  KMP_FATAL( LockIsUninitialized, func );
1669  }
1670  }
1671 
1672  KMP_CHECK_USER_LOCK_INIT();
1673 
1674  if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1675  + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1676  lck = (kmp_user_lock_p)user_lock;
1677  }
1678 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1679  else if ( ( __kmp_user_lock_kind == lk_futex )
1680  && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1681  <= OMP_NEST_LOCK_T_SIZE ) ) {
1682  lck = (kmp_user_lock_p)user_lock;
1683  }
1684 #endif
1685  else {
1686  lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
1687  }
1688 
1689  INIT_NESTED_LOCK( lck );
1690  __kmp_set_user_lock_location( lck, loc );
1691 
1692 #if USE_ITT_BUILD
1693  __kmp_itt_lock_creating( lck );
1694 #endif /* USE_ITT_BUILD */
1695 
1696 #endif // KMP_USE_DYNAMIC_LOCK
1697 } // __kmpc_init_nest_lock
1698 
1699 void
1700 __kmpc_destroy_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
1701 #if KMP_USE_DYNAMIC_LOCK
1702 
1703 # if USE_ITT_BUILD
1704  kmp_user_lock_p lck;
1705  if (DYNA_EXTRACT_D_TAG(user_lock) == 0) {
1706  lck = ((kmp_indirect_lock_t *)DYNA_LOOKUP_I_LOCK(user_lock))->lock;
1707  } else {
1708  lck = (kmp_user_lock_p)user_lock;
1709  }
1710  __kmp_itt_lock_destroyed(lck);
1711 # endif
1712  DYNA_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
1713 #else
1714  kmp_user_lock_p lck;
1715 
1716  if ( ( __kmp_user_lock_kind == lk_tas )
1717  && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1718  lck = (kmp_user_lock_p)user_lock;
1719  }
1720 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1721  else if ( ( __kmp_user_lock_kind == lk_futex )
1722  && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1723  lck = (kmp_user_lock_p)user_lock;
1724  }
1725 #endif
1726  else {
1727  lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_lock" );
1728  }
1729 
1730 #if USE_ITT_BUILD
1731  __kmp_itt_lock_destroyed( lck );
1732 #endif /* USE_ITT_BUILD */
1733  DESTROY_LOCK( lck );
1734 
1735  if ( ( __kmp_user_lock_kind == lk_tas )
1736  && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1737  ;
1738  }
1739 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1740  else if ( ( __kmp_user_lock_kind == lk_futex )
1741  && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1742  ;
1743  }
1744 #endif
1745  else {
1746  __kmp_user_lock_free( user_lock, gtid, lck );
1747  }
1748 #endif // KMP_USE_DYNAMIC_LOCK
1749 } // __kmpc_destroy_lock
1750 
1751 /* destroy the lock */
1752 void
1753 __kmpc_destroy_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
1754 #if KMP_USE_DYNAMIC_LOCK
1755 
1756 # if USE_ITT_BUILD
1757  kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(user_lock);
1758  __kmp_itt_lock_destroyed(ilk->lock);
1759 # endif
1760  DYNA_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
1761 
1762 #else // KMP_USE_DYNAMIC_LOCK
1763 
1764  kmp_user_lock_p lck;
1765 
1766  if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1767  + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1768  lck = (kmp_user_lock_p)user_lock;
1769  }
1770 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1771  else if ( ( __kmp_user_lock_kind == lk_futex )
1772  && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1773  <= OMP_NEST_LOCK_T_SIZE ) ) {
1774  lck = (kmp_user_lock_p)user_lock;
1775  }
1776 #endif
1777  else {
1778  lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_nest_lock" );
1779  }
1780 
1781 #if USE_ITT_BUILD
1782  __kmp_itt_lock_destroyed( lck );
1783 #endif /* USE_ITT_BUILD */
1784 
1785  DESTROY_NESTED_LOCK( lck );
1786 
1787  if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1788  + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1789  ;
1790  }
1791 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1792  else if ( ( __kmp_user_lock_kind == lk_futex )
1793  && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1794  <= OMP_NEST_LOCK_T_SIZE ) ) {
1795  ;
1796  }
1797 #endif
1798  else {
1799  __kmp_user_lock_free( user_lock, gtid, lck );
1800  }
1801 #endif // KMP_USE_DYNAMIC_LOCK
1802 } // __kmpc_destroy_nest_lock
1803 
1804 void
1805 __kmpc_set_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
1806  KMP_COUNT_BLOCK(OMP_set_lock);
1807 #if KMP_USE_DYNAMIC_LOCK
1808  int tag = DYNA_EXTRACT_D_TAG(user_lock);
1809 # if USE_ITT_BUILD
1810  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); // itt function will get to the right lock object.
1811 # endif
1812 # if DYNA_USE_FAST_TAS
1813  if (tag == locktag_tas && !__kmp_env_consistency_check) {
1814  DYNA_ACQUIRE_TAS_LOCK(user_lock, gtid);
1815  } else
1816 # elif DYNA_USE_FAST_FUTEX
1817  if (tag == locktag_futex && !__kmp_env_consistency_check) {
1818  DYNA_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
1819  } else
1820 # endif
1821  {
1822  __kmp_direct_set_ops[tag]((kmp_dyna_lock_t *)user_lock, gtid);
1823  }
1824 # if USE_ITT_BUILD
1825  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
1826 # endif
1827 
1828 #else // KMP_USE_DYNAMIC_LOCK
1829 
1830  kmp_user_lock_p lck;
1831 
1832  if ( ( __kmp_user_lock_kind == lk_tas )
1833  && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1834  lck = (kmp_user_lock_p)user_lock;
1835  }
1836 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1837  else if ( ( __kmp_user_lock_kind == lk_futex )
1838  && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1839  lck = (kmp_user_lock_p)user_lock;
1840  }
1841 #endif
1842  else {
1843  lck = __kmp_lookup_user_lock( user_lock, "omp_set_lock" );
1844  }
1845 
1846 #if USE_ITT_BUILD
1847  __kmp_itt_lock_acquiring( lck );
1848 #endif /* USE_ITT_BUILD */
1849 
1850  ACQUIRE_LOCK( lck, gtid );
1851 
1852 #if USE_ITT_BUILD
1853  __kmp_itt_lock_acquired( lck );
1854 #endif /* USE_ITT_BUILD */
1855 
1856 #endif // KMP_USE_DYNAMIC_LOCK
1857 }
1858 
1859 void
1860 __kmpc_set_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
1861 #if KMP_USE_DYNAMIC_LOCK
1862 
1863 # if USE_ITT_BUILD
1864  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
1865 # endif
1866  DYNA_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
1867 # if USE_ITT_BUILD
1868  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
1869 #endif
1870 
1871 #else // KMP_USE_DYNAMIC_LOCK
1872  kmp_user_lock_p lck;
1873 
1874  if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1875  + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1876  lck = (kmp_user_lock_p)user_lock;
1877  }
1878 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1879  else if ( ( __kmp_user_lock_kind == lk_futex )
1880  && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1881  <= OMP_NEST_LOCK_T_SIZE ) ) {
1882  lck = (kmp_user_lock_p)user_lock;
1883  }
1884 #endif
1885  else {
1886  lck = __kmp_lookup_user_lock( user_lock, "omp_set_nest_lock" );
1887  }
1888 
1889 #if USE_ITT_BUILD
1890  __kmp_itt_lock_acquiring( lck );
1891 #endif /* USE_ITT_BUILD */
1892 
1893  ACQUIRE_NESTED_LOCK( lck, gtid );
1894 
1895 #if USE_ITT_BUILD
1896  __kmp_itt_lock_acquired( lck );
1897 #endif /* USE_ITT_BUILD */
1898 #endif // KMP_USE_DYNAMIC_LOCK
1899 }
1900 
1901 void
1902 __kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
1903 {
1904 #if KMP_USE_DYNAMIC_LOCK
1905 
1906  int tag = DYNA_EXTRACT_D_TAG(user_lock);
1907 # if USE_ITT_BUILD
1908  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
1909 # endif
1910 # if DYNA_USE_FAST_TAS
1911  if (tag == locktag_tas && !__kmp_env_consistency_check) {
1912  DYNA_RELEASE_TAS_LOCK(user_lock, gtid);
1913  } else
1914 # elif DYNA_USE_FAST_FUTEX
1915  if (tag == locktag_futex && !__kmp_env_consistency_check) {
1916  DYNA_RELEASE_FUTEX_LOCK(user_lock, gtid);
1917  } else
1918 # endif
1919  {
1920  __kmp_direct_unset_ops[tag]((kmp_dyna_lock_t *)user_lock, gtid);
1921  }
1922 
1923 #else // KMP_USE_DYNAMIC_LOCK
1924 
1925  kmp_user_lock_p lck;
1926 
1927  /* Can't use serial interval since not block structured */
1928  /* release the lock */
1929 
1930  if ( ( __kmp_user_lock_kind == lk_tas )
1931  && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1932 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1933  // "fast" path implemented to fix customer performance issue
1934 #if USE_ITT_BUILD
1935  __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
1936 #endif /* USE_ITT_BUILD */
1937  TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
1938  KMP_MB();
1939  return;
1940 #else
1941  lck = (kmp_user_lock_p)user_lock;
1942 #endif
1943  }
1944 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1945  else if ( ( __kmp_user_lock_kind == lk_futex )
1946  && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1947  lck = (kmp_user_lock_p)user_lock;
1948  }
1949 #endif
1950  else {
1951  lck = __kmp_lookup_user_lock( user_lock, "omp_unset_lock" );
1952  }
1953 
1954 #if USE_ITT_BUILD
1955  __kmp_itt_lock_releasing( lck );
1956 #endif /* USE_ITT_BUILD */
1957 
1958  RELEASE_LOCK( lck, gtid );
1959 
1960 #endif // KMP_USE_DYNAMIC_LOCK
1961 }
1962 
1963 /* release the lock */
1964 void
1965 __kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
1966 {
1967 #if KMP_USE_DYNAMIC_LOCK
1968 
1969 # if USE_ITT_BUILD
1970  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
1971 # endif
1972  DYNA_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
1973 
1974 #else // KMP_USE_DYNAMIC_LOCK
1975 
1976  kmp_user_lock_p lck;
1977 
1978  /* Can't use serial interval since not block structured */
1979 
1980  if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1981  + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1982 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1983  // "fast" path implemented to fix customer performance issue
1984  kmp_tas_lock_t *tl = (kmp_tas_lock_t*)user_lock;
1985 #if USE_ITT_BUILD
1986  __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
1987 #endif /* USE_ITT_BUILD */
1988  if ( --(tl->lk.depth_locked) == 0 ) {
1989  TCW_4(tl->lk.poll, 0);
1990  }
1991  KMP_MB();
1992  return;
1993 #else
1994  lck = (kmp_user_lock_p)user_lock;
1995 #endif
1996  }
1997 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1998  else if ( ( __kmp_user_lock_kind == lk_futex )
1999  && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2000  <= OMP_NEST_LOCK_T_SIZE ) ) {
2001  lck = (kmp_user_lock_p)user_lock;
2002  }
2003 #endif
2004  else {
2005  lck = __kmp_lookup_user_lock( user_lock, "omp_unset_nest_lock" );
2006  }
2007 
2008 #if USE_ITT_BUILD
2009  __kmp_itt_lock_releasing( lck );
2010 #endif /* USE_ITT_BUILD */
2011 
2012  RELEASE_NESTED_LOCK( lck, gtid );
2013 
2014 #endif // KMP_USE_DYNAMIC_LOCK
2015 }
2016 
2017 /* try to acquire the lock */
2018 int
2019 __kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2020 {
2021  KMP_COUNT_BLOCK(OMP_test_lock);
2022  KMP_TIME_BLOCK(OMP_test_lock);
2023 
2024 #if KMP_USE_DYNAMIC_LOCK
2025  int rc;
2026  int tag = DYNA_EXTRACT_D_TAG(user_lock);
2027 # if USE_ITT_BUILD
2028  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2029 # endif
2030 # if DYNA_USE_FAST_TAS
2031  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2032  DYNA_TEST_TAS_LOCK(user_lock, gtid, rc);
2033  } else
2034 # elif DYNA_USE_FAST_FUTEX
2035  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2036  DYNA_TEST_FUTEX_LOCK(user_lock, gtid, rc);
2037  } else
2038 # endif
2039  {
2040  rc = __kmp_direct_test_ops[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2041  }
2042  if (rc) {
2043 # if USE_ITT_BUILD
2044  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2045 # endif
2046  return FTN_TRUE;
2047  } else {
2048 # if USE_ITT_BUILD
2049  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2050 # endif
2051  return FTN_FALSE;
2052  }
2053 
2054 #else // KMP_USE_DYNAMIC_LOCK
2055 
2056  kmp_user_lock_p lck;
2057  int rc;
2058 
2059  if ( ( __kmp_user_lock_kind == lk_tas )
2060  && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2061  lck = (kmp_user_lock_p)user_lock;
2062  }
2063 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2064  else if ( ( __kmp_user_lock_kind == lk_futex )
2065  && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2066  lck = (kmp_user_lock_p)user_lock;
2067  }
2068 #endif
2069  else {
2070  lck = __kmp_lookup_user_lock( user_lock, "omp_test_lock" );
2071  }
2072 
2073 #if USE_ITT_BUILD
2074  __kmp_itt_lock_acquiring( lck );
2075 #endif /* USE_ITT_BUILD */
2076 
2077  rc = TEST_LOCK( lck, gtid );
2078 #if USE_ITT_BUILD
2079  if ( rc ) {
2080  __kmp_itt_lock_acquired( lck );
2081  } else {
2082  __kmp_itt_lock_cancelled( lck );
2083  }
2084 #endif /* USE_ITT_BUILD */
2085  return ( rc ? FTN_TRUE : FTN_FALSE );
2086 
2087  /* Can't use serial interval since not block structured */
2088 
2089 #endif // KMP_USE_DYNAMIC_LOCK
2090 }
2091 
2092 /* try to acquire the lock */
2093 int
2094 __kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
2095 {
2096 #if KMP_USE_DYNAMIC_LOCK
2097  int rc;
2098 # if USE_ITT_BUILD
2099  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2100 # endif
2101  rc = DYNA_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
2102 # if USE_ITT_BUILD
2103  if (rc) {
2104  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2105  } else {
2106  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2107  }
2108 # endif
2109  return rc;
2110 
2111 #else // KMP_USE_DYNAMIC_LOCK
2112 
2113  kmp_user_lock_p lck;
2114  int rc;
2115 
2116  if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
2117  + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2118  lck = (kmp_user_lock_p)user_lock;
2119  }
2120 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2121  else if ( ( __kmp_user_lock_kind == lk_futex )
2122  && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
2123  <= OMP_NEST_LOCK_T_SIZE ) ) {
2124  lck = (kmp_user_lock_p)user_lock;
2125  }
2126 #endif
2127  else {
2128  lck = __kmp_lookup_user_lock( user_lock, "omp_test_nest_lock" );
2129  }
2130 
2131 #if USE_ITT_BUILD
2132  __kmp_itt_lock_acquiring( lck );
2133 #endif /* USE_ITT_BUILD */
2134 
2135  rc = TEST_NESTED_LOCK( lck, gtid );
2136 #if USE_ITT_BUILD
2137  if ( rc ) {
2138  __kmp_itt_lock_acquired( lck );
2139  } else {
2140  __kmp_itt_lock_cancelled( lck );
2141  }
2142 #endif /* USE_ITT_BUILD */
2143  return rc;
2144 
2145  /* Can't use serial interval since not block structured */
2146 
2147 #endif // KMP_USE_DYNAMIC_LOCK
2148 }
2149 
2150 
2151 /*--------------------------------------------------------------------------------------------------------------------*/
2152 
2153 /*
2154  * Interface to fast scalable reduce methods routines
2155  */
2156 
2157 // keep the selected method in a thread local structure for cross-function usage: will be used in __kmpc_end_reduce* functions;
2158 // another solution: to re-determine the method one more time in __kmpc_end_reduce* functions (new prototype required then)
2159 // AT: which solution is better?
2160 #define __KMP_SET_REDUCTION_METHOD(gtid,rmethod) \
2161  ( ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method ) = ( rmethod ) )
2162 
2163 #define __KMP_GET_REDUCTION_METHOD(gtid) \
2164  ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method )
2165 
2166 // description of the packed_reduction_method variable: look at the macros in kmp.h
2167 
2168 
2169 // used in a critical section reduce block
2170 static __forceinline void
2171 __kmp_enter_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
2172 
2173  // this lock was visible to a customer and to the Intel(R) Thread Profiler as a serial overhead span
2174  // (although it's used for an internal purpose only)
2175  // why was it visible in previous implementation?
2176  // should we keep it visible in new reduce block?
2177  kmp_user_lock_p lck;
2178 
2179 #if KMP_USE_DYNAMIC_LOCK
2180 
2181  if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
2182  lck = (kmp_user_lock_p)crit;
2183  if (*((kmp_dyna_lock_t *)lck) == 0) {
2184  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)lck, 0, DYNA_GET_D_TAG(__kmp_user_lock_seq));
2185  }
2186  KMP_DEBUG_ASSERT(lck != NULL);
2187  if (__kmp_env_consistency_check) {
2188  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
2189  }
2190  DYNA_D_LOCK_FUNC(lck, set)((kmp_dyna_lock_t *)lck, global_tid);
2191  } else {
2192  kmp_indirect_lock_t *ilk = __kmp_get_indirect_csptr(crit, loc, global_tid, __kmp_user_lock_seq);
2193  KMP_DEBUG_ASSERT(ilk != NULL);
2194  if (__kmp_env_consistency_check) {
2195  __kmp_push_sync(global_tid, ct_critical, loc, ilk->lock, __kmp_user_lock_seq);
2196  }
2197  DYNA_I_LOCK_FUNC(ilk, set)(ilk->lock, global_tid);
2198  }
2199 
2200 #else // KMP_USE_DYNAMIC_LOCK
2201 
2202  // We know that the fast reduction code is only emitted by Intel compilers
2203  // with 32 byte critical sections. If there isn't enough space, then we
2204  // have to use a pointer.
2205  if ( __kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE ) {
2206  lck = (kmp_user_lock_p)crit;
2207  }
2208  else {
2209  lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
2210  }
2211  KMP_DEBUG_ASSERT( lck != NULL );
2212 
2213  if ( __kmp_env_consistency_check )
2214  __kmp_push_sync( global_tid, ct_critical, loc, lck );
2215 
2216  __kmp_acquire_user_lock_with_checks( lck, global_tid );
2217 
2218 #endif // KMP_USE_DYNAMIC_LOCK
2219 }
2220 
2221 // used in a critical section reduce block
2222 static __forceinline void
2223 __kmp_end_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
2224 
2225  kmp_user_lock_p lck;
2226 
2227 #if KMP_USE_DYNAMIC_LOCK
2228 
2229  if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
2230  lck = (kmp_user_lock_p)crit;
2231  if (__kmp_env_consistency_check)
2232  __kmp_pop_sync(global_tid, ct_critical, loc);
2233  DYNA_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
2234  } else {
2235  kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
2236  if (__kmp_env_consistency_check)
2237  __kmp_pop_sync(global_tid, ct_critical, loc);
2238  DYNA_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
2239  }
2240 
2241 #else // KMP_USE_DYNAMIC_LOCK
2242 
2243  // We know that the fast reduction code is only emitted by Intel compilers with 32 byte critical
2244  // sections. If there isn't enough space, then we have to use a pointer.
2245  if ( __kmp_base_user_lock_size > 32 ) {
2246  lck = *( (kmp_user_lock_p *) crit );
2247  KMP_ASSERT( lck != NULL );
2248  } else {
2249  lck = (kmp_user_lock_p) crit;
2250  }
2251 
2252  if ( __kmp_env_consistency_check )
2253  __kmp_pop_sync( global_tid, ct_critical, loc );
2254 
2255  __kmp_release_user_lock_with_checks( lck, global_tid );
2256 
2257 #endif // KMP_USE_DYNAMIC_LOCK
2258 } // __kmp_end_critical_section_reduce_block
2259 
2260 
2261 /* 2.a.i. Reduce Block without a terminating barrier */
2275 kmp_int32
2277  ident_t *loc, kmp_int32 global_tid,
2278  kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
2279  kmp_critical_name *lck ) {
2280 
2281  KMP_COUNT_BLOCK(REDUCE_nowait);
2282  int retval;
2283  PACKED_REDUCTION_METHOD_T packed_reduction_method;
2284 #if OMP_40_ENABLED
2285  kmp_team_t *team;
2286  kmp_info_t *th;
2287  int teams_swapped = 0, task_state;
2288 #endif
2289  KA_TRACE( 10, ( "__kmpc_reduce_nowait() enter: called T#%d\n", global_tid ) );
2290 
2291  // why do we need this initialization here at all?
2292  // Reduction clause can not be used as a stand-alone directive.
2293 
2294  // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed
2295  // possible detection of false-positive race by the threadchecker ???
2296  if( ! TCR_4( __kmp_init_parallel ) )
2297  __kmp_parallel_initialize();
2298 
2299  // check correctness of reduce block nesting
2300 #if KMP_USE_DYNAMIC_LOCK
2301  if ( __kmp_env_consistency_check )
2302  __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
2303 #else
2304  if ( __kmp_env_consistency_check )
2305  __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
2306 #endif
2307 
2308 #if OMP_40_ENABLED
2309  th = __kmp_thread_from_gtid(global_tid);
2310  if( th->th.th_teams_microtask ) { // AC: check if we are inside the teams construct?
2311  team = th->th.th_team;
2312  if( team->t.t_level == th->th.th_teams_level ) {
2313  // this is reduction at teams construct
2314  KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
2315  // Let's swap teams temporarily for the reduction barrier
2316  teams_swapped = 1;
2317  th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2318  th->th.th_team = team->t.t_parent;
2319  th->th.th_team_nproc = th->th.th_team->t.t_nproc;
2320  th->th.th_task_team = th->th.th_team->t.t_task_team[0];
2321  task_state = th->th.th_task_state;
2322  th->th.th_task_state = 0;
2323  }
2324  }
2325 #endif // OMP_40_ENABLED
2326 
2327  // packed_reduction_method value will be reused by __kmp_end_reduce* function, the value should be kept in a variable
2328  // the variable should be either a construct-specific or thread-specific property, not a team specific property
2329  // (a thread can reach the next reduce block on the next construct, reduce method may differ on the next construct)
2330  // an ident_t "loc" parameter could be used as a construct-specific property (what if loc == 0?)
2331  // (if both construct-specific and team-specific variables were shared, then unness extra syncs should be needed)
2332  // a thread-specific variable is better regarding two issues above (next construct and extra syncs)
2333  // a thread-specific "th_local.reduction_method" variable is used currently
2334  // each thread executes 'determine' and 'set' lines (no need to execute by one thread, to avoid unness extra syncs)
2335 
2336  packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2337  __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2338 
2339  if( packed_reduction_method == critical_reduce_block ) {
2340 
2341  __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2342  retval = 1;
2343 
2344  } else if( packed_reduction_method == empty_reduce_block ) {
2345 
2346  // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2347  retval = 1;
2348 
2349  } else if( packed_reduction_method == atomic_reduce_block ) {
2350 
2351  retval = 2;
2352 
2353  // all threads should do this pop here (because __kmpc_end_reduce_nowait() won't be called by the code gen)
2354  // (it's not quite good, because the checking block has been closed by this 'pop',
2355  // but atomic operation has not been executed yet, will be executed slightly later, literally on next instruction)
2356  if ( __kmp_env_consistency_check )
2357  __kmp_pop_sync( global_tid, ct_reduce, loc );
2358 
2359  } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2360 
2361  //AT: performance issue: a real barrier here
2362  //AT: (if master goes slow, other threads are blocked here waiting for the master to come and release them)
2363  //AT: (it's not what a customer might expect specifying NOWAIT clause)
2364  //AT: (specifying NOWAIT won't result in improvement of performance, it'll be confusing to a customer)
2365  //AT: another implementation of *barrier_gather*nowait() (or some other design) might go faster
2366  // and be more in line with sense of NOWAIT
2367  //AT: TO DO: do epcc test and compare times
2368 
2369  // this barrier should be invisible to a customer and to the Intel(R) Thread Profiler
2370  // (it's neither a terminating barrier nor customer's code, it's used for an internal purpose)
2371 #if USE_ITT_NOTIFY
2372  __kmp_threads[global_tid]->th.th_ident = loc;
2373 #endif
2374  retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, FALSE, reduce_size, reduce_data, reduce_func );
2375  retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2376 
2377  // all other workers except master should do this pop here
2378  // ( none of other workers will get to __kmpc_end_reduce_nowait() )
2379  if ( __kmp_env_consistency_check ) {
2380  if( retval == 0 ) {
2381  __kmp_pop_sync( global_tid, ct_reduce, loc );
2382  }
2383  }
2384 
2385  } else {
2386 
2387  // should never reach this block
2388  KMP_ASSERT( 0 ); // "unexpected method"
2389 
2390  }
2391 #if OMP_40_ENABLED
2392  if( teams_swapped ) {
2393  // Restore thread structure
2394  th->th.th_info.ds.ds_tid = 0;
2395  th->th.th_team = team;
2396  th->th.th_team_nproc = team->t.t_nproc;
2397  th->th.th_task_team = team->t.t_task_team[task_state];
2398  th->th.th_task_state = task_state;
2399  }
2400 #endif
2401  KA_TRACE( 10, ( "__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2402 
2403  return retval;
2404 }
2405 
2414 void
2415 __kmpc_end_reduce_nowait( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) {
2416 
2417  PACKED_REDUCTION_METHOD_T packed_reduction_method;
2418 
2419  KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid ) );
2420 
2421  packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2422 
2423  if( packed_reduction_method == critical_reduce_block ) {
2424 
2425  __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2426 
2427  } else if( packed_reduction_method == empty_reduce_block ) {
2428 
2429  // usage: if team size == 1, no synchronization is required ( on Intel platforms only )
2430 
2431  } else if( packed_reduction_method == atomic_reduce_block ) {
2432 
2433  // neither master nor other workers should get here
2434  // (code gen does not generate this call in case 2: atomic reduce block)
2435  // actually it's better to remove this elseif at all;
2436  // after removal this value will checked by the 'else' and will assert
2437 
2438  } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2439 
2440  // only master gets here
2441 
2442  } else {
2443 
2444  // should never reach this block
2445  KMP_ASSERT( 0 ); // "unexpected method"
2446 
2447  }
2448 
2449  if ( __kmp_env_consistency_check )
2450  __kmp_pop_sync( global_tid, ct_reduce, loc );
2451 
2452  KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2453 
2454  return;
2455 }
2456 
2457 /* 2.a.ii. Reduce Block with a terminating barrier */
2458 
2472 kmp_int32
2474  ident_t *loc, kmp_int32 global_tid,
2475  kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
2476  void (*reduce_func)(void *lhs_data, void *rhs_data),
2477  kmp_critical_name *lck )
2478 {
2479  KMP_COUNT_BLOCK(REDUCE_wait);
2480  int retval;
2481  PACKED_REDUCTION_METHOD_T packed_reduction_method;
2482 
2483  KA_TRACE( 10, ( "__kmpc_reduce() enter: called T#%d\n", global_tid ) );
2484 
2485  // why do we need this initialization here at all?
2486  // Reduction clause can not be a stand-alone directive.
2487 
2488  // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed
2489  // possible detection of false-positive race by the threadchecker ???
2490  if( ! TCR_4( __kmp_init_parallel ) )
2491  __kmp_parallel_initialize();
2492 
2493  // check correctness of reduce block nesting
2494 #if KMP_USE_DYNAMIC_LOCK
2495  if ( __kmp_env_consistency_check )
2496  __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
2497 #else
2498  if ( __kmp_env_consistency_check )
2499  __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
2500 #endif
2501 
2502  packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2503  __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2504 
2505  if( packed_reduction_method == critical_reduce_block ) {
2506 
2507  __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2508  retval = 1;
2509 
2510  } else if( packed_reduction_method == empty_reduce_block ) {
2511 
2512  // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2513  retval = 1;
2514 
2515  } else if( packed_reduction_method == atomic_reduce_block ) {
2516 
2517  retval = 2;
2518 
2519  } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2520 
2521  //case tree_reduce_block:
2522  // this barrier should be visible to a customer and to the Intel(R) Thread Profiler
2523  // (it's a terminating barrier on constructs if NOWAIT not specified)
2524 #if USE_ITT_NOTIFY
2525  __kmp_threads[global_tid]->th.th_ident = loc; // needed for correct notification of frames
2526 #endif
2527  retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, TRUE, reduce_size, reduce_data, reduce_func );
2528  retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2529 
2530  // all other workers except master should do this pop here
2531  // ( none of other workers except master will enter __kmpc_end_reduce() )
2532  if ( __kmp_env_consistency_check ) {
2533  if( retval == 0 ) { // 0: all other workers; 1: master
2534  __kmp_pop_sync( global_tid, ct_reduce, loc );
2535  }
2536  }
2537 
2538  } else {
2539 
2540  // should never reach this block
2541  KMP_ASSERT( 0 ); // "unexpected method"
2542 
2543  }
2544 
2545  KA_TRACE( 10, ( "__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2546 
2547  return retval;
2548 }
2549 
2559 void
2560 __kmpc_end_reduce( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) {
2561 
2562  PACKED_REDUCTION_METHOD_T packed_reduction_method;
2563 
2564  KA_TRACE( 10, ( "__kmpc_end_reduce() enter: called T#%d\n", global_tid ) );
2565 
2566  packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2567 
2568  // this barrier should be visible to a customer and to the Intel(R) Thread Profiler
2569  // (it's a terminating barrier on constructs if NOWAIT not specified)
2570 
2571  if( packed_reduction_method == critical_reduce_block ) {
2572 
2573  __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2574 
2575  // TODO: implicit barrier: should be exposed
2576 #if USE_ITT_NOTIFY
2577  __kmp_threads[global_tid]->th.th_ident = loc;
2578 #endif
2579  __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2580 
2581  } else if( packed_reduction_method == empty_reduce_block ) {
2582 
2583  // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2584 
2585  // TODO: implicit barrier: should be exposed
2586 #if USE_ITT_NOTIFY
2587  __kmp_threads[global_tid]->th.th_ident = loc;
2588 #endif
2589  __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2590 
2591  } else if( packed_reduction_method == atomic_reduce_block ) {
2592 
2593  // TODO: implicit barrier: should be exposed
2594 #if USE_ITT_NOTIFY
2595  __kmp_threads[global_tid]->th.th_ident = loc;
2596 #endif
2597  __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2598 
2599  } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2600 
2601  // only master executes here (master releases all other workers)
2602  __kmp_end_split_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid );
2603 
2604  } else {
2605 
2606  // should never reach this block
2607  KMP_ASSERT( 0 ); // "unexpected method"
2608 
2609  }
2610 
2611  if ( __kmp_env_consistency_check )
2612  __kmp_pop_sync( global_tid, ct_reduce, loc );
2613 
2614  KA_TRACE( 10, ( "__kmpc_end_reduce() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2615 
2616  return;
2617 }
2618 
2619 #undef __KMP_GET_REDUCTION_METHOD
2620 #undef __KMP_SET_REDUCTION_METHOD
2621 
2622 /*-- end of interface to fast scalable reduce routines ---------------------------------------------------------------*/
2623 
2624 kmp_uint64
2625 __kmpc_get_taskid() {
2626 
2627  kmp_int32 gtid;
2628  kmp_info_t * thread;
2629 
2630  gtid = __kmp_get_gtid();
2631  if ( gtid < 0 ) {
2632  return 0;
2633  }; // if
2634  thread = __kmp_thread_from_gtid( gtid );
2635  return thread->th.th_current_task->td_task_id;
2636 
2637 } // __kmpc_get_taskid
2638 
2639 
2640 kmp_uint64
2641 __kmpc_get_parent_taskid() {
2642 
2643  kmp_int32 gtid;
2644  kmp_info_t * thread;
2645  kmp_taskdata_t * parent_task;
2646 
2647  gtid = __kmp_get_gtid();
2648  if ( gtid < 0 ) {
2649  return 0;
2650  }; // if
2651  thread = __kmp_thread_from_gtid( gtid );
2652  parent_task = thread->th.th_current_task->td_parent;
2653  return ( parent_task == NULL ? 0 : parent_task->td_task_id );
2654 
2655 } // __kmpc_get_parent_taskid
2656 
2657 void __kmpc_place_threads(int nC, int nT, int nO)
2658 {
2659  if ( ! __kmp_init_serial ) {
2660  __kmp_serial_initialize();
2661  }
2662  __kmp_place_num_cores = nC;
2663  __kmp_place_num_threads_per_core = nT;
2664  __kmp_place_core_offset = nO;
2665 }
2666 
2667 // end of file //
2668 
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.c:683
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition: kmp.h:1308
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
Definition: kmp_csupport.c:112
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
#define KMP_START_EXPLICIT_TIMER(name)
"Starts" an explicit timer which will need a corresponding KMP_STOP_EXPLICIT_TIMER() macro...
Definition: kmp_stats.h:668
#define KMP_STOP_EXPLICIT_TIMER(name)
"Stops" an explicit timer.
Definition: kmp_stats.h:682
#define KMP_TIME_BLOCK(name)
Uses specified timer (name) to time code block.
Definition: kmp_stats.h:629
void __kmpc_flush(ident_t *loc)
Definition: kmp_csupport.c:580
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end(ident_t *loc)
Definition: kmp_csupport.c:78
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
Definition: kmp_csupport.c:781
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.c:438
#define KMP_IDENT_AUTOPAR
Definition: kmp.h:204
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
Definition: kmp_csupport.c:60
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
Definition: kmp_csupport.c:149
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
Definition: kmp_csupport.c:745
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:654
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition: kmp.h:221
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.c:722
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
Definition: kmp_csupport.c:254
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
Definition: kmp_csupport.c:360
kmp_int32 __kmpc_in_parallel(ident_t *loc)
Definition: kmp_csupport.c:239
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
Definition: kmp_csupport.c:175
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
Definition: kmp_csupport.c:135
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
Definition: kmp_csupport.c:161
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.c:646
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
Definition: kmp_csupport.c:342
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.c:423
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
Definition: kmp_csupport.c:295
char const * psource
Definition: kmp.h:230
kmp_int32 flags
Definition: kmp.h:223