51 #include "kmp_error.h"
52 #include "kmp_stats.h"
53 #if KMP_OS_WINDOWS && KMP_ARCH_X86
61 template<
typename T >
67 struct i_maxmin< int > {
68 static const int mx = 0x7fffffff;
69 static const int mn = 0x80000000;
72 struct i_maxmin< unsigned int > {
73 static const unsigned int mx = 0xffffffff;
74 static const unsigned int mn = 0x00000000;
77 struct i_maxmin< long long > {
78 static const long long mx = 0x7fffffffffffffffLL;
79 static const long long mn = 0x8000000000000000LL;
82 struct i_maxmin< unsigned long long > {
83 static const unsigned long long mx = 0xffffffffffffffffLL;
84 static const unsigned long long mn = 0x0000000000000000LL;
88 #ifdef KMP_STATIC_STEAL_ENABLED
91 template<
typename T >
92 struct dispatch_private_infoXX_template {
93 typedef typename traits_t< T >::unsigned_t UT;
94 typedef typename traits_t< T >::signed_t ST;
101 T static_steal_counter;
111 struct KMP_ALIGN( 32 ) {
128 template<
typename T >
129 struct dispatch_private_infoXX_template {
130 typedef typename traits_t< T >::unsigned_t UT;
131 typedef typename traits_t< T >::signed_t ST;
154 template<
typename T >
155 struct KMP_ALIGN_CACHE dispatch_private_info_template {
157 union KMP_ALIGN_CACHE private_info_tmpl {
158 dispatch_private_infoXX_template< T > p;
159 dispatch_private_info64_t p64;
163 kmp_uint32 ordered_bumped;
164 kmp_int32 ordered_dummy[KMP_MAX_ORDERED-3];
165 dispatch_private_info * next;
167 kmp_uint32 type_size;
168 enum cons_type pushed_ws;
173 template<
typename UT >
174 struct dispatch_shared_infoXX_template {
177 volatile UT iteration;
178 volatile UT num_done;
179 volatile UT ordered_iteration;
180 UT ordered_dummy[KMP_MAX_ORDERED-1];
184 template<
typename UT >
185 struct dispatch_shared_info_template {
187 union shared_info_tmpl {
188 dispatch_shared_infoXX_template< UT > s;
189 dispatch_shared_info64_t s64;
191 volatile kmp_uint32 buffer_index;
197 #undef USE_TEST_LOCKS
200 template<
typename T >
201 static __forceinline T
202 test_then_add(
volatile T *p, T d ) { KMP_ASSERT(0); };
205 __forceinline kmp_int32
206 test_then_add< kmp_int32 >(
volatile kmp_int32 *p, kmp_int32 d )
209 r = KMP_TEST_THEN_ADD32( p, d );
214 __forceinline kmp_int64
215 test_then_add< kmp_int64 >(
volatile kmp_int64 *p, kmp_int64 d )
218 r = KMP_TEST_THEN_ADD64( p, d );
223 template<
typename T >
224 static __forceinline T
225 test_then_inc_acq(
volatile T *p ) { KMP_ASSERT(0); };
228 __forceinline kmp_int32
229 test_then_inc_acq< kmp_int32 >(
volatile kmp_int32 *p )
232 r = KMP_TEST_THEN_INC_ACQ32( p );
237 __forceinline kmp_int64
238 test_then_inc_acq< kmp_int64 >(
volatile kmp_int64 *p )
241 r = KMP_TEST_THEN_INC_ACQ64( p );
246 template<
typename T >
247 static __forceinline T
248 test_then_inc(
volatile T *p ) { KMP_ASSERT(0); };
251 __forceinline kmp_int32
252 test_then_inc< kmp_int32 >(
volatile kmp_int32 *p )
255 r = KMP_TEST_THEN_INC32( p );
260 __forceinline kmp_int64
261 test_then_inc< kmp_int64 >(
volatile kmp_int64 *p )
264 r = KMP_TEST_THEN_INC64( p );
269 template<
typename T >
270 static __forceinline kmp_int32
271 compare_and_swap(
volatile T *p, T c, T s ) { KMP_ASSERT(0); };
274 __forceinline kmp_int32
275 compare_and_swap< kmp_int32 >(
volatile kmp_int32 *p, kmp_int32 c, kmp_int32 s )
277 return KMP_COMPARE_AND_STORE_REL32( p, c, s );
281 __forceinline kmp_int32
282 compare_and_swap< kmp_int64 >(
volatile kmp_int64 *p, kmp_int64 c, kmp_int64 s )
284 return KMP_COMPARE_AND_STORE_REL64( p, c, s );
300 template<
typename UT >
303 __kmp_wait_yield(
volatile UT * spinner,
305 kmp_uint32 (* pred)( UT, UT )
306 USE_ITT_BUILD_ARG(
void * obj)
310 register volatile UT * spin = spinner;
311 register UT check = checker;
312 register kmp_uint32 spins;
313 register kmp_uint32 (*f) ( UT, UT ) = pred;
316 KMP_FSYNC_SPIN_INIT( obj, (
void*) spin );
317 KMP_INIT_YIELD( spins );
319 while(!f(r = *spin, check))
321 KMP_FSYNC_SPIN_PREPARE( obj );
330 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
331 KMP_YIELD_SPIN( spins );
333 KMP_FSYNC_SPIN_ACQUIRED( obj );
337 template<
typename UT >
338 static kmp_uint32 __kmp_eq( UT value, UT checker) {
339 return value == checker;
342 template<
typename UT >
343 static kmp_uint32 __kmp_neq( UT value, UT checker) {
344 return value != checker;
347 template<
typename UT >
348 static kmp_uint32 __kmp_lt( UT value, UT checker) {
349 return value < checker;
352 template<
typename UT >
353 static kmp_uint32 __kmp_ge( UT value, UT checker) {
354 return value >= checker;
357 template<
typename UT >
358 static kmp_uint32 __kmp_le( UT value, UT checker) {
359 return value <= checker;
367 __kmp_dispatch_deo_error(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
371 KMP_DEBUG_ASSERT( gtid_ref );
373 if ( __kmp_env_consistency_check ) {
374 th = __kmp_threads[*gtid_ref];
375 if ( th -> th.th_root -> r.r_active
376 && ( th -> th.th_dispatch -> th_dispatch_pr_current -> pushed_ws != ct_none ) ) {
377 #if KMP_USE_DYNAMIC_LOCK
378 __kmp_push_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref, NULL, 0 );
380 __kmp_push_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref, NULL );
386 template<
typename UT >
388 __kmp_dispatch_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
390 typedef typename traits_t< UT >::signed_t ST;
391 dispatch_private_info_template< UT > * pr;
393 int gtid = *gtid_ref;
395 kmp_info_t *th = __kmp_threads[ gtid ];
396 KMP_DEBUG_ASSERT( th -> th.th_dispatch );
398 KD_TRACE(100, (
"__kmp_dispatch_deo: T#%d called\n", gtid ) );
399 if ( __kmp_env_consistency_check ) {
400 pr =
reinterpret_cast< dispatch_private_info_template< UT >*
>
401 ( th -> th.th_dispatch -> th_dispatch_pr_current );
402 if ( pr -> pushed_ws != ct_none ) {
403 #if KMP_USE_DYNAMIC_LOCK
404 __kmp_push_sync( gtid, ct_ordered_in_pdo, loc_ref, NULL, 0 );
406 __kmp_push_sync( gtid, ct_ordered_in_pdo, loc_ref, NULL );
411 if ( ! th -> th.th_team -> t.t_serialized ) {
412 dispatch_shared_info_template< UT > * sh =
reinterpret_cast< dispatch_shared_info_template< UT >*
>
413 ( th -> th.th_dispatch -> th_dispatch_sh_current );
416 if ( ! __kmp_env_consistency_check ) {
417 pr =
reinterpret_cast< dispatch_private_info_template< UT >*
>
418 ( th -> th.th_dispatch -> th_dispatch_pr_current );
420 lower = pr->u.p.ordered_lower;
422 #if ! defined( KMP_GOMP_COMPAT )
423 if ( __kmp_env_consistency_check ) {
424 if ( pr->ordered_bumped ) {
425 struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons;
426 __kmp_error_construct2(
427 kmp_i18n_msg_CnsMultipleNesting,
428 ct_ordered_in_pdo, loc_ref,
429 & p->stack_data[ p->w_top ]
440 buff = __kmp_str_format(
441 "__kmp_dispatch_deo: T#%%d before wait: ordered_iter:%%%s lower:%%%s\n",
442 traits_t< UT >::spec, traits_t< UT >::spec );
443 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
444 __kmp_str_free( &buff );
448 __kmp_wait_yield< UT >( &sh->u.s.ordered_iteration, lower, __kmp_ge< UT >
449 USE_ITT_BUILD_ARG( NULL )
456 buff = __kmp_str_format(
457 "__kmp_dispatch_deo: T#%%d after wait: ordered_iter:%%%s lower:%%%s\n",
458 traits_t< UT >::spec, traits_t< UT >::spec );
459 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
460 __kmp_str_free( &buff );
464 KD_TRACE(100, (
"__kmp_dispatch_deo: T#%d returned\n", gtid ) );
468 __kmp_dispatch_dxo_error(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
472 if ( __kmp_env_consistency_check ) {
473 th = __kmp_threads[*gtid_ref];
474 if ( th -> th.th_dispatch -> th_dispatch_pr_current -> pushed_ws != ct_none ) {
475 __kmp_pop_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref );
480 template<
typename UT >
482 __kmp_dispatch_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
484 typedef typename traits_t< UT >::signed_t ST;
485 dispatch_private_info_template< UT > * pr;
487 int gtid = *gtid_ref;
489 kmp_info_t *th = __kmp_threads[ gtid ];
490 KMP_DEBUG_ASSERT( th -> th.th_dispatch );
492 KD_TRACE(100, (
"__kmp_dispatch_dxo: T#%d called\n", gtid ) );
493 if ( __kmp_env_consistency_check ) {
494 pr =
reinterpret_cast< dispatch_private_info_template< UT >*
>
495 ( th -> th.th_dispatch -> th_dispatch_pr_current );
496 if ( pr -> pushed_ws != ct_none ) {
497 __kmp_pop_sync( gtid, ct_ordered_in_pdo, loc_ref );
501 if ( ! th -> th.th_team -> t.t_serialized ) {
502 dispatch_shared_info_template< UT > * sh =
reinterpret_cast< dispatch_shared_info_template< UT >*
>
503 ( th -> th.th_dispatch -> th_dispatch_sh_current );
505 if ( ! __kmp_env_consistency_check ) {
506 pr =
reinterpret_cast< dispatch_private_info_template< UT >*
>
507 ( th -> th.th_dispatch -> th_dispatch_pr_current );
510 KMP_FSYNC_RELEASING( & sh->u.s.ordered_iteration );
511 #if ! defined( KMP_GOMP_COMPAT )
512 if ( __kmp_env_consistency_check ) {
513 if ( pr->ordered_bumped != 0 ) {
514 struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons;
516 __kmp_error_construct2(
517 kmp_i18n_msg_CnsMultipleNesting,
518 ct_ordered_in_pdo, loc_ref,
519 & p->stack_data[ p->w_top ]
527 pr->ordered_bumped += 1;
529 KD_TRACE(1000, (
"__kmp_dispatch_dxo: T#%d bumping ordered ordered_bumped=%d\n",
530 gtid, pr->ordered_bumped ) );
535 test_then_inc< ST >( (
volatile ST *) & sh->u.s.ordered_iteration );
539 KD_TRACE(100, (
"__kmp_dispatch_dxo: T#%d returned\n", gtid ) );
543 template<
typename UT >
544 static __forceinline
long double
545 __kmp_pow(
long double x, UT y) {
548 KMP_DEBUG_ASSERT(x > 0.0 && x < 1.0);
564 template<
typename T >
565 static __inline
typename traits_t< T >::unsigned_t
566 __kmp_dispatch_guided_remaining(
568 typename traits_t< T >::floating_t base,
569 typename traits_t< T >::unsigned_t idx
578 typedef typename traits_t< T >::unsigned_t UT;
580 long double x = tc * __kmp_pow< UT >(base, idx);
592 static int guided_int_param = 2;
593 static double guided_flt_param = 0.5;
597 template<
typename T >
605 typename traits_t< T >::signed_t st,
606 typename traits_t< T >::signed_t chunk,
609 typedef typename traits_t< T >::unsigned_t UT;
610 typedef typename traits_t< T >::signed_t ST;
611 typedef typename traits_t< T >::floating_t DBL;
612 static const int ___kmp_size_type =
sizeof( UT );
618 kmp_uint32 my_buffer_index;
619 dispatch_private_info_template< T > * pr;
620 dispatch_shared_info_template< UT >
volatile * sh;
622 KMP_BUILD_ASSERT(
sizeof( dispatch_private_info_template< T > ) ==
sizeof( dispatch_private_info ) );
623 KMP_BUILD_ASSERT(
sizeof( dispatch_shared_info_template< UT > ) ==
sizeof( dispatch_shared_info ) );
625 if ( ! TCR_4( __kmp_init_parallel ) )
626 __kmp_parallel_initialize();
628 #if INCLUDE_SSC_MARKS
629 SSC_MARK_DISPATCH_INIT();
635 buff = __kmp_str_format(
636 "__kmp_dispatch_init: T#%%d called: schedule:%%d chunk:%%%s lb:%%%s ub:%%%s st:%%%s\n",
637 traits_t< ST >::spec, traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
638 KD_TRACE(10, ( buff, gtid, schedule, chunk, lb, ub, st ) );
639 __kmp_str_free( &buff );
643 th = __kmp_threads[ gtid ];
644 team = th -> th.th_team;
645 active = ! team -> t.t_serialized;
646 th->th.th_ident = loc;
649 kmp_uint64 cur_chunk = chunk;
650 int itt_need_metadata_reporting = __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
651 KMP_MASTER_GTID(gtid) &&
653 th->th.th_teams_microtask == NULL &&
655 team->t.t_active_level == 1;
658 pr =
reinterpret_cast< dispatch_private_info_template< T >*
>
659 ( th -> th.th_dispatch -> th_disp_buffer );
661 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
662 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
664 my_buffer_index = th->th.th_dispatch->th_disp_index ++;
667 pr =
reinterpret_cast< dispatch_private_info_template< T > *
>
668 ( &th -> th.th_dispatch -> th_disp_buffer[ my_buffer_index % KMP_MAX_DISP_BUF ] );
669 sh =
reinterpret_cast< dispatch_shared_info_template< UT >
volatile *
>
670 ( &team -> t.t_disp_buffer[ my_buffer_index % KMP_MAX_DISP_BUF ] );
680 pr->type_size = ___kmp_size_type;
688 schedule = __kmp_static;
690 if ( schedule == kmp_sch_runtime ) {
692 schedule = team -> t.t_sched.r_sched_type;
695 schedule = __kmp_guided;
697 schedule = __kmp_static;
700 chunk = team -> t.t_sched.chunk;
706 buff = __kmp_str_format(
707 "__kmp_dispatch_init: T#%%d new: schedule:%%d chunk:%%%s\n",
708 traits_t< ST >::spec );
709 KD_TRACE(10, ( buff, gtid, schedule, chunk ) );
710 __kmp_str_free( &buff );
715 schedule = __kmp_guided;
718 chunk = KMP_DEFAULT_CHUNK;
724 schedule = __kmp_auto;
729 buff = __kmp_str_format(
730 "__kmp_dispatch_init: kmp_sch_auto: T#%%d new: schedule:%%d chunk:%%%s\n",
731 traits_t< ST >::spec );
732 KD_TRACE(10, ( buff, gtid, schedule, chunk ) );
733 __kmp_str_free( &buff );
739 if ( team->t.t_nproc > 1<<20 && schedule == kmp_sch_guided_analytical_chunked ) {
740 schedule = kmp_sch_guided_iterative_chunked;
741 KMP_WARNING( DispatchManyThreads );
743 pr->u.p.parm1 = chunk;
746 "unknown scheduling type" );
750 if ( __kmp_env_consistency_check ) {
752 __kmp_error_construct(
753 kmp_i18n_msg_CnsLoopIncrZeroProhibited,
754 ( pr->ordered ? ct_pdo_ordered : ct_pdo ), loc
759 tc = ( ub - lb + st );
774 }
else if ( ub < lb ) {
784 pr->u.p.last_upper = ub + st;
790 if ( pr->ordered == 0 ) {
791 th -> th.th_dispatch -> th_deo_fcn = __kmp_dispatch_deo_error;
792 th -> th.th_dispatch -> th_dxo_fcn = __kmp_dispatch_dxo_error;
794 pr->ordered_bumped = 0;
796 pr->u.p.ordered_lower = 1;
797 pr->u.p.ordered_upper = 0;
799 th -> th.th_dispatch -> th_deo_fcn = __kmp_dispatch_deo< UT >;
800 th -> th.th_dispatch -> th_dxo_fcn = __kmp_dispatch_dxo< UT >;
804 if ( __kmp_env_consistency_check ) {
805 enum cons_type ws = pr->ordered ? ct_pdo_ordered : ct_pdo;
807 __kmp_push_workshare( gtid, ws, loc );
810 __kmp_check_workshare( gtid, ws, loc );
811 pr->pushed_ws = ct_none;
815 switch ( schedule ) {
816 #if ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 )
819 T nproc = team->t.t_nproc;
822 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_static_steal case\n", gtid ) );
824 ntc = (tc % chunk ? 1 : 0) + tc / chunk;
825 if ( nproc > 1 && ntc >= nproc ) {
826 T
id = __kmp_tid_from_gtid(gtid);
827 T small_chunk, extras;
829 small_chunk = ntc / nproc;
830 extras = ntc % nproc;
832 init =
id * small_chunk + (
id < extras ?
id : extras );
833 pr->u.p.count = init;
834 pr->u.p.ub = init + small_chunk + (
id < extras ? 1 : 0 );
842 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_balanced\n",
844 schedule = kmp_sch_static_balanced;
850 case kmp_sch_static_balanced:
852 T nproc = team->t.t_nproc;
855 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_static_balanced case\n",
859 T
id = __kmp_tid_from_gtid(gtid);
865 pr->u.p.parm1 = (
id == tc - 1);
868 pr->u.p.parm1 = FALSE;
872 T small_chunk = tc / nproc;
873 T extras = tc % nproc;
874 init =
id * small_chunk + (
id < extras ?
id : extras);
875 limit = init + small_chunk - (
id < extras ? 0 : 1);
876 pr->u.p.parm1 = (
id == nproc - 1);
882 pr->u.p.parm1 = TRUE;
886 pr->u.p.parm1 = FALSE;
892 if ( itt_need_metadata_reporting )
893 cur_chunk = limit - init + 1;
896 pr->u.p.lb = lb + init;
897 pr->u.p.ub = lb + limit;
899 T ub_tmp = lb + limit * st;
900 pr->u.p.lb = lb + init * st;
903 pr->u.p.ub = ( ub_tmp + st > ub ? ub : ub_tmp );
905 pr->u.p.ub = ( ub_tmp + st < ub ? ub : ub_tmp );
909 pr->u.p.ordered_lower = init;
910 pr->u.p.ordered_upper = limit;
914 case kmp_sch_guided_iterative_chunked :
916 T nproc = team->t.t_nproc;
917 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d kmp_sch_guided_iterative_chunked case\n",gtid));
920 if ( (2L * chunk + 1 ) * nproc >= tc ) {
922 schedule = kmp_sch_dynamic_chunked;
925 pr->u.p.parm2 = guided_int_param * nproc * ( chunk + 1 );
926 *(
double*)&pr->u.p.parm3 = guided_flt_param / nproc;
929 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_greedy\n",gtid));
930 schedule = kmp_sch_static_greedy;
932 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n",gtid));
937 case kmp_sch_guided_analytical_chunked:
939 T nproc = team->t.t_nproc;
940 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_guided_analytical_chunked case\n", gtid));
943 if ( (2L * chunk + 1 ) * nproc >= tc ) {
945 schedule = kmp_sch_dynamic_chunked;
950 #if KMP_OS_WINDOWS && KMP_ARCH_X86
963 unsigned int oldFpcw = _control87(0,0);
964 _control87(_PC_64,_MCW_PC);
967 long double target = ((
long double)chunk * 2 + 1) * nproc / tc;
974 x = (
long double)1.0 - (
long double)0.5 / nproc;
985 ptrdiff_t natural_alignment = (ptrdiff_t)&t.b - (ptrdiff_t)&t - (ptrdiff_t)1;
987 KMP_DEBUG_ASSERT( ( ( (ptrdiff_t)&pr->u.p.parm3 ) & ( natural_alignment ) ) == 0 );
992 *(DBL*)&pr->u.p.parm3 = x;
1005 p = __kmp_pow< UT >(x,right);
1010 }
while(p>target && right < (1<<27));
1017 while ( left + 1 < right ) {
1018 mid = (left + right) / 2;
1019 if ( __kmp_pow< UT >(x,mid) > target ) {
1028 KMP_ASSERT(cross && __kmp_pow< UT >(x, cross - 1) > target && __kmp_pow< UT >(x, cross) <= target);
1031 pr->u.p.parm2 = cross;
1034 #if ( ( KMP_OS_LINUX || KMP_OS_WINDOWS ) && KMP_ARCH_X86 ) && ( ! defined( KMP_I8 ) )
1035 #define GUIDED_ANALYTICAL_WORKAROUND (*( DBL * )&pr->u.p.parm3)
1037 #define GUIDED_ANALYTICAL_WORKAROUND (x)
1040 pr->u.p.count = tc - __kmp_dispatch_guided_remaining(tc, GUIDED_ANALYTICAL_WORKAROUND, cross) - cross * chunk;
1041 #if KMP_OS_WINDOWS && KMP_ARCH_X86
1043 _control87(oldFpcw,_MCW_PC);
1047 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_greedy\n",
1049 schedule = kmp_sch_static_greedy;
1055 case kmp_sch_static_greedy:
1056 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n",gtid));
1057 pr->u.p.parm1 = ( team -> t.t_nproc > 1 ) ?
1058 ( tc + team->t.t_nproc - 1 ) / team->t.t_nproc :
1061 case kmp_sch_static_chunked :
1062 case kmp_sch_dynamic_chunked :
1063 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n", gtid));
1065 case kmp_sch_trapezoidal :
1069 T parm1, parm2, parm3, parm4;
1070 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_trapezoidal case\n", gtid ) );
1075 parm2 = ( tc / (2 * team->t.t_nproc) );
1086 }
else if ( parm1 > parm2 ) {
1091 parm3 = ( parm2 + parm1 );
1092 parm3 = ( 2 * tc + parm3 - 1) / parm3;
1099 parm4 = ( parm3 - 1 );
1100 parm4 = ( parm2 - parm1 ) / parm4;
1107 pr->u.p.parm1 = parm1;
1108 pr->u.p.parm2 = parm2;
1109 pr->u.p.parm3 = parm3;
1110 pr->u.p.parm4 = parm4;
1118 KMP_MSG( UnknownSchedTypeDetected ),
1119 KMP_HNT( GetNewerLibrary ),
1125 pr->schedule = schedule;
1129 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d sh->buffer_index:%d\n",
1130 gtid, my_buffer_index, sh->buffer_index) );
1131 __kmp_wait_yield< kmp_uint32 >( & sh->buffer_index, my_buffer_index, __kmp_eq< kmp_uint32 >
1132 USE_ITT_BUILD_ARG( NULL )
1137 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d sh->buffer_index:%d\n",
1138 gtid, my_buffer_index, sh->buffer_index) );
1140 th -> th.th_dispatch -> th_dispatch_pr_current = (dispatch_private_info_t*) pr;
1141 th -> th.th_dispatch -> th_dispatch_sh_current = (dispatch_shared_info_t*) sh;
1143 if ( pr->ordered ) {
1144 __kmp_itt_ordered_init( gtid );
1147 if ( itt_need_metadata_reporting ) {
1149 kmp_uint64 schedtype = 0;
1150 switch ( schedule ) {
1151 case kmp_sch_static_chunked:
1152 case kmp_sch_static_balanced:
1154 case kmp_sch_static_greedy:
1155 cur_chunk = pr->u.p.parm1;
1157 case kmp_sch_dynamic_chunked:
1160 case kmp_sch_guided_iterative_chunked:
1161 case kmp_sch_guided_analytical_chunked:
1170 __kmp_itt_metadata_loop(loc, schedtype, tc, cur_chunk);
1179 buff = __kmp_str_format(
1180 "__kmp_dispatch_init: T#%%d returning: schedule:%%d ordered:%%%s lb:%%%s ub:%%%s" \
1181 " st:%%%s tc:%%%s count:%%%s\n\tordered_lower:%%%s ordered_upper:%%%s" \
1182 " parm1:%%%s parm2:%%%s parm3:%%%s parm4:%%%s\n",
1183 traits_t< UT >::spec, traits_t< T >::spec, traits_t< T >::spec,
1184 traits_t< ST >::spec, traits_t< UT >::spec, traits_t< UT >::spec,
1185 traits_t< UT >::spec, traits_t< UT >::spec, traits_t< T >::spec,
1186 traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec );
1187 KD_TRACE(10, ( buff,
1188 gtid, pr->schedule, pr->ordered, pr->u.p.lb, pr->u.p.ub,
1189 pr->u.p.st, pr->u.p.tc, pr->u.p.count,
1190 pr->u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1,
1191 pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4 ) );
1192 __kmp_str_free( &buff );
1195 #if ( KMP_STATIC_STEAL_ENABLED )
1196 if ( ___kmp_size_type < 8 ) {
1205 volatile T * p = &pr->u.p.static_steal_counter;
1209 #endif // ( KMP_STATIC_STEAL_ENABLED && USE_STEALING )
1219 template<
typename UT >
1221 __kmp_dispatch_finish(
int gtid,
ident_t *loc )
1223 typedef typename traits_t< UT >::signed_t ST;
1224 kmp_info_t *th = __kmp_threads[ gtid ];
1226 KD_TRACE(100, (
"__kmp_dispatch_finish: T#%d called\n", gtid ) );
1227 if ( ! th -> th.th_team -> t.t_serialized ) {
1229 dispatch_private_info_template< UT > * pr =
1230 reinterpret_cast< dispatch_private_info_template< UT >*
>
1231 ( th->th.th_dispatch->th_dispatch_pr_current );
1232 dispatch_shared_info_template< UT >
volatile * sh =
1233 reinterpret_cast< dispatch_shared_info_template< UT >volatile*
>
1234 ( th->th.th_dispatch->th_dispatch_sh_current );
1235 KMP_DEBUG_ASSERT( pr );
1236 KMP_DEBUG_ASSERT( sh );
1237 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
1238 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
1240 if ( pr->ordered_bumped ) {
1241 KD_TRACE(1000, (
"__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
1243 pr->ordered_bumped = 0;
1245 UT lower = pr->u.p.ordered_lower;
1251 buff = __kmp_str_format(
1252 "__kmp_dispatch_finish: T#%%d before wait: ordered_iteration:%%%s lower:%%%s\n",
1253 traits_t< UT >::spec, traits_t< UT >::spec );
1254 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
1255 __kmp_str_free( &buff );
1259 __kmp_wait_yield< UT >(&sh->u.s.ordered_iteration, lower, __kmp_ge< UT >
1260 USE_ITT_BUILD_ARG(NULL)
1267 buff = __kmp_str_format(
1268 "__kmp_dispatch_finish: T#%%d after wait: ordered_iteration:%%%s lower:%%%s\n",
1269 traits_t< UT >::spec, traits_t< UT >::spec );
1270 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
1271 __kmp_str_free( &buff );
1275 test_then_inc< ST >( (
volatile ST *) & sh->u.s.ordered_iteration );
1278 KD_TRACE(100, (
"__kmp_dispatch_finish: T#%d returned\n", gtid ) );
1281 #ifdef KMP_GOMP_COMPAT
1283 template<
typename UT >
1285 __kmp_dispatch_finish_chunk(
int gtid,
ident_t *loc )
1287 typedef typename traits_t< UT >::signed_t ST;
1288 kmp_info_t *th = __kmp_threads[ gtid ];
1290 KD_TRACE(100, (
"__kmp_dispatch_finish_chunk: T#%d called\n", gtid ) );
1291 if ( ! th -> th.th_team -> t.t_serialized ) {
1293 dispatch_private_info_template< UT > * pr =
1294 reinterpret_cast< dispatch_private_info_template< UT >*
>
1295 ( th->th.th_dispatch->th_dispatch_pr_current );
1296 dispatch_shared_info_template< UT >
volatile * sh =
1297 reinterpret_cast< dispatch_shared_info_template< UT >volatile*
>
1298 ( th->th.th_dispatch->th_dispatch_sh_current );
1299 KMP_DEBUG_ASSERT( pr );
1300 KMP_DEBUG_ASSERT( sh );
1301 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
1302 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
1305 UT lower = pr->u.p.ordered_lower;
1306 UT upper = pr->u.p.ordered_upper;
1307 UT inc = upper - lower + 1;
1309 if ( pr->ordered_bumped == inc ) {
1310 KD_TRACE(1000, (
"__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
1312 pr->ordered_bumped = 0;
1314 inc -= pr->ordered_bumped;
1320 buff = __kmp_str_format(
1321 "__kmp_dispatch_finish_chunk: T#%%d before wait: " \
1322 "ordered_iteration:%%%s lower:%%%s upper:%%%s\n",
1323 traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec );
1324 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower, upper ) );
1325 __kmp_str_free( &buff );
1329 __kmp_wait_yield< UT >(&sh->u.s.ordered_iteration, lower, __kmp_ge< UT >
1330 USE_ITT_BUILD_ARG(NULL)
1334 KD_TRACE(1000, (
"__kmp_dispatch_finish_chunk: T#%d resetting ordered_bumped to zero\n",
1336 pr->ordered_bumped = 0;
1342 buff = __kmp_str_format(
1343 "__kmp_dispatch_finish_chunk: T#%%d after wait: " \
1344 "ordered_iteration:%%%s inc:%%%s lower:%%%s upper:%%%s\n",
1345 traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec );
1346 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper ) );
1347 __kmp_str_free( &buff );
1351 test_then_add< ST >( (
volatile ST *) & sh->u.s.ordered_iteration, inc);
1355 KD_TRACE(100, (
"__kmp_dispatch_finish_chunk: T#%d returned\n", gtid ) );
1360 template<
typename T >
1362 __kmp_dispatch_next(
1363 ident_t *loc,
int gtid, kmp_int32 *p_last, T *p_lb, T *p_ub,
typename traits_t< T >::signed_t *p_st
1366 typedef typename traits_t< T >::unsigned_t UT;
1367 typedef typename traits_t< T >::signed_t ST;
1368 typedef typename traits_t< T >::floating_t DBL;
1369 static const int ___kmp_size_type =
sizeof( UT );
1372 dispatch_private_info_template< T > * pr;
1373 kmp_info_t * th = __kmp_threads[ gtid ];
1374 kmp_team_t * team = th -> th.th_team;
1376 KMP_DEBUG_ASSERT( p_last && p_lb && p_ub && p_st );
1381 buff = __kmp_str_format(
1382 "__kmp_dispatch_next: T#%%d called p_lb:%%%s p_ub:%%%s p_st:%%%s p_last: %%p\n",
1383 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
1384 KD_TRACE(1000, ( buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last ) );
1385 __kmp_str_free( &buff );
1389 if ( team -> t.t_serialized ) {
1391 pr =
reinterpret_cast< dispatch_private_info_template< T >*
>
1392 ( th -> th.th_dispatch -> th_disp_buffer );
1393 KMP_DEBUG_ASSERT( pr );
1395 if ( (status = (pr->u.p.tc != 0)) == 0 ) {
1402 if ( __kmp_env_consistency_check ) {
1403 if ( pr->pushed_ws != ct_none ) {
1404 pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc );
1407 }
else if ( pr->nomerge ) {
1410 UT limit, trip, init;
1412 T chunk = pr->u.p.parm1;
1414 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n", gtid ) );
1416 init = chunk * pr->u.p.count++;
1417 trip = pr->u.p.tc - 1;
1419 if ( (status = (init <= trip)) == 0 ) {
1426 if ( __kmp_env_consistency_check ) {
1427 if ( pr->pushed_ws != ct_none ) {
1428 pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc );
1433 limit = chunk + init - 1;
1436 if ( (last = (limit >= trip)) != 0 ) {
1439 pr->u.p.last_upper = pr->u.p.ub;
1442 if ( p_last != NULL )
1447 *p_lb = start + init;
1448 *p_ub = start + limit;
1450 *p_lb = start + init * incr;
1451 *p_ub = start + limit * incr;
1454 if ( pr->ordered ) {
1455 pr->u.p.ordered_lower = init;
1456 pr->u.p.ordered_upper = limit;
1461 buff = __kmp_str_format(
1462 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1463 traits_t< UT >::spec, traits_t< UT >::spec );
1464 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1465 __kmp_str_free( &buff );
1475 pr->u.p.last_upper = *p_ub;
1477 if ( p_last != NULL )
1486 buff = __kmp_str_format(
1487 "__kmp_dispatch_next: T#%%d serialized case: p_lb:%%%s " \
1488 "p_ub:%%%s p_st:%%%s p_last:%%p %%d returning:%%d\n",
1489 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
1490 KD_TRACE(10, ( buff, gtid, *p_lb, *p_ub, *p_st, p_last, *p_last, status) );
1491 __kmp_str_free( &buff );
1494 #if INCLUDE_SSC_MARKS
1495 SSC_MARK_DISPATCH_NEXT();
1500 dispatch_shared_info_template< UT > *sh;
1503 UT limit, trip, init;
1505 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
1506 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
1508 pr =
reinterpret_cast< dispatch_private_info_template< T >*
>
1509 ( th->th.th_dispatch->th_dispatch_pr_current );
1510 KMP_DEBUG_ASSERT( pr );
1511 sh =
reinterpret_cast< dispatch_shared_info_template< UT >*
>
1512 ( th->th.th_dispatch->th_dispatch_sh_current );
1513 KMP_DEBUG_ASSERT( sh );
1515 if ( pr->u.p.tc == 0 ) {
1519 switch (pr->schedule) {
1520 #if ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 )
1523 T chunk = pr->u.p.parm1;
1525 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_static_steal case\n", gtid) );
1527 trip = pr->u.p.tc - 1;
1529 if ( ___kmp_size_type > 4 ) {
1532 init = ( pr->u.p.count )++;
1533 status = ( init < (UT)pr->u.p.ub );
1545 union_i4 vold, vnew;
1546 vold.b = *(
volatile kmp_int64 * )(&pr->u.p.count);
1549 while( ! KMP_COMPARE_AND_STORE_ACQ64(
1550 (
volatile kmp_int64* )&pr->u.p.count,
1551 *VOLATILE_CAST(kmp_int64 *)&vold.b,
1552 *VOLATILE_CAST(kmp_int64 *)&vnew.b ) ) {
1554 vold.b = *(
volatile kmp_int64 * )(&pr->u.p.count);
1559 init = vnew.p.count;
1560 status = ( init < (UT)vnew.p.ub ) ;
1564 kmp_info_t **other_threads = team->t.t_threads;
1565 int while_limit = 10;
1566 int while_index = 0;
1570 while ( ( !status ) && ( while_limit != ++while_index ) ) {
1571 union_i4 vold, vnew;
1572 kmp_int32 remaining;
1573 T victimIdx = pr->u.p.parm4;
1574 T oldVictimIdx = victimIdx;
1575 dispatch_private_info_template< T > * victim;
1579 victimIdx = team->t.t_nproc - 1;
1583 victim =
reinterpret_cast< dispatch_private_info_template< T >*
>
1584 ( other_threads[victimIdx]->th.th_dispatch->th_dispatch_pr_current );
1585 }
while ( (victim == NULL || victim == pr) && oldVictimIdx != victimIdx );
1588 ( (*(
volatile T * )&victim->u.p.static_steal_counter) !=
1589 (*(
volatile T * )&pr->u.p.static_steal_counter) ) ) {
1595 if ( oldVictimIdx == victimIdx ) {
1598 pr->u.p.parm4 = victimIdx;
1601 vold.b = *(
volatile kmp_int64 * )( &victim->u.p.count );
1604 KMP_DEBUG_ASSERT( (vnew.p.ub - 1) * (UT)chunk <= trip );
1605 if ( vnew.p.count >= (UT)vnew.p.ub || (remaining = vnew.p.ub - vnew.p.count) < 4 ) {
1608 vnew.p.ub -= (remaining >> 2);
1609 KMP_DEBUG_ASSERT((vnew.p.ub - 1) * (UT)chunk <= trip);
1610 #pragma warning( push )
1612 #pragma warning( disable: 186 )
1613 KMP_DEBUG_ASSERT(vnew.p.ub >= 0);
1614 #pragma warning( pop )
1616 if ( KMP_COMPARE_AND_STORE_ACQ64(
1617 (
volatile kmp_int64 * )&victim->u.p.count,
1618 *VOLATILE_CAST(kmp_int64 *)&vold.b,
1619 *VOLATILE_CAST(kmp_int64 *)&vnew.b ) ) {
1628 init = vold.p.count;
1630 pr->u.p.count = init + 1;
1631 pr->u.p.ub = vnew.p.count;
1634 vold.p.count = init + 1;
1636 *(
volatile kmp_int64 * )(&pr->u.p.count) = vold.b;
1637 #endif // KMP_ARCH_X86
1648 if ( p_st != NULL ) *p_st = 0;
1650 start = pr->u.p.parm2;
1652 limit = chunk + init - 1;
1655 KMP_DEBUG_ASSERT(init <= trip);
1656 if ( (last = (limit >= trip)) != 0 )
1658 if ( p_st != NULL ) *p_st = incr;
1661 *p_lb = start + init;
1662 *p_ub = start + limit;
1664 *p_lb = start + init * incr;
1665 *p_ub = start + limit * incr;
1668 if ( pr->ordered ) {
1669 pr->u.p.ordered_lower = init;
1670 pr->u.p.ordered_upper = limit;
1675 buff = __kmp_str_format(
1676 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1677 traits_t< UT >::spec, traits_t< UT >::spec );
1678 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1679 __kmp_str_free( &buff );
1686 #endif // ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 )
1687 case kmp_sch_static_balanced:
1689 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_static_balanced case\n", gtid) );
1690 if ( (status = !pr->u.p.count) != 0 ) {
1694 last = pr->u.p.parm1;
1698 pr->u.p.lb = pr->u.p.ub + pr->u.p.st;
1700 if ( pr->ordered ) {
1705 buff = __kmp_str_format(
1706 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1707 traits_t< UT >::spec, traits_t< UT >::spec );
1708 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1709 __kmp_str_free( &buff );
1715 case kmp_sch_static_greedy:
1716 case kmp_sch_static_chunked:
1720 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_static_[affinity|chunked] case\n",
1722 parm1 = pr->u.p.parm1;
1724 trip = pr->u.p.tc - 1;
1725 init = parm1 * (pr->u.p.count + __kmp_tid_from_gtid(gtid));
1727 if ( (status = (init <= trip)) != 0 ) {
1730 limit = parm1 + init - 1;
1732 if ( (last = (limit >= trip)) != 0 )
1735 if ( p_st != NULL ) *p_st = incr;
1737 pr->u.p.count += team->t.t_nproc;
1740 *p_lb = start + init;
1741 *p_ub = start + limit;
1744 *p_lb = start + init * incr;
1745 *p_ub = start + limit * incr;
1748 if ( pr->ordered ) {
1749 pr->u.p.ordered_lower = init;
1750 pr->u.p.ordered_upper = limit;
1755 buff = __kmp_str_format(
1756 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1757 traits_t< UT >::spec, traits_t< UT >::spec );
1758 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1759 __kmp_str_free( &buff );
1767 case kmp_sch_dynamic_chunked:
1769 T chunk = pr->u.p.parm1;
1771 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n",
1774 init = chunk * test_then_inc_acq< ST >((
volatile ST *) & sh->u.s.iteration );
1775 trip = pr->u.p.tc - 1;
1777 if ( (status = (init <= trip)) == 0 ) {
1780 if ( p_st != NULL ) *p_st = 0;
1783 limit = chunk + init - 1;
1786 if ( (last = (limit >= trip)) != 0 )
1789 if ( p_st != NULL ) *p_st = incr;
1792 *p_lb = start + init;
1793 *p_ub = start + limit;
1795 *p_lb = start + init * incr;
1796 *p_ub = start + limit * incr;
1799 if ( pr->ordered ) {
1800 pr->u.p.ordered_lower = init;
1801 pr->u.p.ordered_upper = limit;
1806 buff = __kmp_str_format(
1807 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1808 traits_t< UT >::spec, traits_t< UT >::spec );
1809 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1810 __kmp_str_free( &buff );
1818 case kmp_sch_guided_iterative_chunked:
1820 T chunkspec = pr->u.p.parm1;
1822 (
"__kmp_dispatch_next: T#%d kmp_sch_guided_chunked iterative case\n",gtid));
1827 init = sh->u.s.iteration;
1828 remaining = trip - init;
1829 if ( remaining <= 0 ) {
1834 if ( (T)remaining < pr->u.p.parm2 ) {
1837 init = test_then_add<ST>( (ST*)&sh->u.s.iteration, (ST)chunkspec );
1838 remaining = trip - init;
1839 if (remaining <= 0) {
1844 if ( (T)remaining > chunkspec ) {
1845 limit = init + chunkspec - 1;
1848 limit = init + remaining - 1;
1853 limit = init + (UT)( remaining * *(
double*)&pr->u.p.parm3 );
1854 if ( compare_and_swap<ST>( (ST*)&sh->u.s.iteration, (ST)init, (ST)limit ) ) {
1861 if ( status != 0 ) {
1866 *p_lb = start + init * incr;
1867 *p_ub = start + limit * incr;
1868 if ( pr->ordered ) {
1869 pr->u.p.ordered_lower = init;
1870 pr->u.p.ordered_upper = limit;
1875 buff = __kmp_str_format(
1876 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1877 traits_t< UT >::spec, traits_t< UT >::spec );
1878 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1879 __kmp_str_free( &buff );
1892 case kmp_sch_guided_analytical_chunked:
1894 T chunkspec = pr->u.p.parm1;
1896 #if KMP_OS_WINDOWS && KMP_ARCH_X86
1899 unsigned int oldFpcw;
1900 unsigned int fpcwSet = 0;
1902 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_guided_chunked analytical case\n",
1907 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
1908 KMP_DEBUG_ASSERT((2UL * chunkspec + 1) * (UT)team->t.t_nproc < trip);
1911 chunkIdx = test_then_inc_acq< ST >((
volatile ST *) & sh->u.s.iteration );
1912 if ( chunkIdx >= (UT)pr->u.p.parm2 ) {
1915 init = chunkIdx * chunkspec + pr->u.p.count;
1917 if ( (status = (init > 0 && init <= trip)) != 0 ) {
1918 limit = init + chunkspec -1;
1920 if ( (last = (limit >= trip)) != 0 )
1929 #if KMP_OS_WINDOWS && KMP_ARCH_X86
1934 oldFpcw = _control87(0,0);
1935 _control87(_PC_64,_MCW_PC);
1940 init = __kmp_dispatch_guided_remaining< T >(
1941 trip, *( DBL * )&pr->u.p.parm3, chunkIdx );
1942 KMP_DEBUG_ASSERT(init);
1946 limit = trip - __kmp_dispatch_guided_remaining< T >(
1947 trip, *( DBL * )&pr->u.p.parm3, chunkIdx + 1 );
1948 KMP_ASSERT(init <= limit);
1949 if ( init < limit ) {
1950 KMP_DEBUG_ASSERT(limit <= trip);
1957 #if KMP_OS_WINDOWS && KMP_ARCH_X86
1961 if ( fpcwSet && ( oldFpcw & fpcwSet ) )
1962 _control87(oldFpcw,_MCW_PC);
1964 if ( status != 0 ) {
1969 *p_lb = start + init * incr;
1970 *p_ub = start + limit * incr;
1971 if ( pr->ordered ) {
1972 pr->u.p.ordered_lower = init;
1973 pr->u.p.ordered_upper = limit;
1978 buff = __kmp_str_format(
1979 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1980 traits_t< UT >::spec, traits_t< UT >::spec );
1981 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1982 __kmp_str_free( &buff );
1995 case kmp_sch_trapezoidal:
1998 T parm2 = pr->u.p.parm2;
1999 T parm3 = pr->u.p.parm3;
2000 T parm4 = pr->u.p.parm4;
2001 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_trapezoidal case\n",
2004 index = test_then_inc< ST >( (
volatile ST *) & sh->u.s.iteration );
2006 init = ( index * ( (2*parm2) - (index-1)*parm4 ) ) / 2;
2007 trip = pr->u.p.tc - 1;
2009 if ( (status = ((T)index < parm3 && init <= trip)) == 0 ) {
2012 if ( p_st != NULL ) *p_st = 0;
2015 limit = ( (index+1) * ( 2*parm2 - index*parm4 ) ) / 2 - 1;
2018 if ( (last = (limit >= trip)) != 0 )
2021 if ( p_st != NULL ) *p_st = incr;
2024 *p_lb = start + init;
2025 *p_ub = start + limit;
2027 *p_lb = start + init * incr;
2028 *p_ub = start + limit * incr;
2031 if ( pr->ordered ) {
2032 pr->u.p.ordered_lower = init;
2033 pr->u.p.ordered_upper = limit;
2038 buff = __kmp_str_format(
2039 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
2040 traits_t< UT >::spec, traits_t< UT >::spec );
2041 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
2042 __kmp_str_free( &buff );
2054 KMP_MSG( UnknownSchedTypeDetected ),
2055 KMP_HNT( GetNewerLibrary ),
2063 if ( status == 0 ) {
2066 num_done = test_then_inc< ST >( (
volatile ST *) & sh->u.s.num_done );
2071 buff = __kmp_str_format(
2072 "__kmp_dispatch_next: T#%%d increment num_done:%%%s\n",
2073 traits_t< UT >::spec );
2074 KD_TRACE(100, ( buff, gtid, sh->u.s.num_done ) );
2075 __kmp_str_free( &buff );
2079 if ( (ST)num_done == team->t.t_nproc-1 ) {
2084 sh->u.s.num_done = 0;
2085 sh->u.s.iteration = 0;
2088 if ( pr->ordered ) {
2089 sh->u.s.ordered_iteration = 0;
2094 sh -> buffer_index += KMP_MAX_DISP_BUF;
2095 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d change buffer_index:%d\n",
2096 gtid, sh->buffer_index) );
2101 if ( __kmp_env_consistency_check ) {
2102 if ( pr->pushed_ws != ct_none ) {
2103 pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc );
2107 th -> th.th_dispatch -> th_deo_fcn = NULL;
2108 th -> th.th_dispatch -> th_dxo_fcn = NULL;
2109 th -> th.th_dispatch -> th_dispatch_sh_current = NULL;
2110 th -> th.th_dispatch -> th_dispatch_pr_current = NULL;
2114 pr->u.p.last_upper = pr->u.p.ub;
2117 if ( p_last != NULL && status != 0 )
2125 buff = __kmp_str_format(
2126 "__kmp_dispatch_next: T#%%d normal case: " \
2127 "p_lb:%%%s p_ub:%%%s p_st:%%%s p_last:%%p returning:%%d\n",
2128 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
2129 KD_TRACE(10, ( buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last, status ) );
2130 __kmp_str_free( &buff );
2133 #if INCLUDE_SSC_MARKS
2134 SSC_MARK_DISPATCH_NEXT();
2139 template<
typename T >
2141 __kmp_dist_get_bounds(
2144 kmp_int32 *plastiter,
2147 typename traits_t< T >::signed_t incr
2150 typedef typename traits_t< T >::unsigned_t UT;
2151 typedef typename traits_t< T >::signed_t ST;
2152 register kmp_uint32 team_id;
2153 register kmp_uint32 nteams;
2154 register UT trip_count;
2155 register kmp_team_t *team;
2158 KMP_DEBUG_ASSERT( plastiter && plower && pupper );
2159 KE_TRACE( 10, (
"__kmpc_dist_get_bounds called (%d)\n", gtid));
2164 buff = __kmp_str_format(
"__kmpc_dist_get_bounds: T#%%d liter=%%d "\
2165 "iter=(%%%s, %%%s, %%%s) signed?<%s>\n",
2166 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
2167 traits_t< T >::spec );
2168 KD_TRACE(100, ( buff, gtid, *plastiter, *plower, *pupper, incr ) );
2169 __kmp_str_free( &buff );
2173 if( __kmp_env_consistency_check ) {
2175 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
2177 if( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
2187 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
2190 th = __kmp_threads[gtid];
2191 KMP_DEBUG_ASSERT(th->th.th_teams_microtask);
2192 team = th->th.th_team;
2194 nteams = th->th.th_teams_size.nteams;
2196 team_id = team->t.t_master_tid;
2197 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
2201 trip_count = *pupper - *plower + 1;
2202 }
else if(incr == -1) {
2203 trip_count = *plower - *pupper + 1;
2205 trip_count = (ST)(*pupper - *plower) / incr + 1;
2207 if( trip_count <= nteams ) {
2209 __kmp_static == kmp_sch_static_greedy || \
2210 __kmp_static == kmp_sch_static_balanced
2213 if( team_id < trip_count ) {
2214 *pupper = *plower = *plower + team_id * incr;
2216 *plower = *pupper + incr;
2218 if( plastiter != NULL )
2219 *plastiter = ( team_id == trip_count - 1 );
2221 if( __kmp_static == kmp_sch_static_balanced ) {
2222 register UT chunk = trip_count / nteams;
2223 register UT extras = trip_count % nteams;
2224 *plower += incr * ( team_id * chunk + ( team_id < extras ? team_id : extras ) );
2225 *pupper = *plower + chunk * incr - ( team_id < extras ? 0 : incr );
2226 if( plastiter != NULL )
2227 *plastiter = ( team_id == nteams - 1 );
2229 register T chunk_inc_count =
2230 ( trip_count / nteams + ( ( trip_count % nteams ) ? 1 : 0) ) * incr;
2231 register T upper = *pupper;
2232 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
2234 *plower += team_id * chunk_inc_count;
2235 *pupper = *plower + chunk_inc_count - incr;
2238 if( *pupper < *plower )
2239 *pupper = i_maxmin< T >::mx;
2240 if( plastiter != NULL )
2241 *plastiter = *plower <= upper && *pupper > upper - incr;
2242 if( *pupper > upper )
2245 if( *pupper > *plower )
2246 *pupper = i_maxmin< T >::mn;
2247 if( plastiter != NULL )
2248 *plastiter = *plower >= upper && *pupper < upper - incr;
2249 if( *pupper < upper )
2280 kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk )
2283 KMP_DEBUG_ASSERT( __kmp_init_serial );
2284 __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2291 kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk )
2294 KMP_DEBUG_ASSERT( __kmp_init_serial );
2295 __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2303 kmp_int64 lb, kmp_int64 ub,
2304 kmp_int64 st, kmp_int64 chunk )
2307 KMP_DEBUG_ASSERT( __kmp_init_serial );
2308 __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2316 kmp_uint64 lb, kmp_uint64 ub,
2317 kmp_int64 st, kmp_int64 chunk )
2320 KMP_DEBUG_ASSERT( __kmp_init_serial );
2321 __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2335 kmp_int32 *p_last, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk )
2338 KMP_DEBUG_ASSERT( __kmp_init_serial );
2339 __kmp_dist_get_bounds< kmp_int32 >( loc, gtid, p_last, &lb, &ub, st );
2340 __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2344 __kmpc_dist_dispatch_init_4u(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2345 kmp_int32 *p_last, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk )
2348 KMP_DEBUG_ASSERT( __kmp_init_serial );
2349 __kmp_dist_get_bounds< kmp_uint32 >( loc, gtid, p_last, &lb, &ub, st );
2350 __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2354 __kmpc_dist_dispatch_init_8(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2355 kmp_int32 *p_last, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk )
2358 KMP_DEBUG_ASSERT( __kmp_init_serial );
2359 __kmp_dist_get_bounds< kmp_int64 >( loc, gtid, p_last, &lb, &ub, st );
2360 __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2364 __kmpc_dist_dispatch_init_8u(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2365 kmp_int32 *p_last, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk )
2368 KMP_DEBUG_ASSERT( __kmp_init_serial );
2369 __kmp_dist_get_bounds< kmp_uint64 >( loc, gtid, p_last, &lb, &ub, st );
2370 __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2387 kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st )
2389 return __kmp_dispatch_next< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st );
2397 kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st )
2399 return __kmp_dispatch_next< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st );
2407 kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st )
2409 return __kmp_dispatch_next< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st );
2417 kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st )
2419 return __kmp_dispatch_next< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st );
2431 __kmp_dispatch_finish< kmp_uint32 >( gtid, loc );
2440 __kmp_dispatch_finish< kmp_uint64 >( gtid, loc );
2449 __kmp_dispatch_finish< kmp_uint32 >( gtid, loc );
2458 __kmp_dispatch_finish< kmp_uint64 >( gtid, loc );
2465 kmp_uint32 __kmp_eq_4( kmp_uint32 value, kmp_uint32 checker) {
2466 return value == checker;
2469 kmp_uint32 __kmp_neq_4( kmp_uint32 value, kmp_uint32 checker) {
2470 return value != checker;
2473 kmp_uint32 __kmp_lt_4( kmp_uint32 value, kmp_uint32 checker) {
2474 return value < checker;
2477 kmp_uint32 __kmp_ge_4( kmp_uint32 value, kmp_uint32 checker) {
2478 return value >= checker;
2481 kmp_uint32 __kmp_le_4( kmp_uint32 value, kmp_uint32 checker) {
2482 return value <= checker;
2484 kmp_uint32 __kmp_eq_8( kmp_uint64 value, kmp_uint64 checker) {
2485 return value == checker;
2488 kmp_uint32 __kmp_neq_8( kmp_uint64 value, kmp_uint64 checker) {
2489 return value != checker;
2492 kmp_uint32 __kmp_lt_8( kmp_uint64 value, kmp_uint64 checker) {
2493 return value < checker;
2496 kmp_uint32 __kmp_ge_8( kmp_uint64 value, kmp_uint64 checker) {
2497 return value >= checker;
2500 kmp_uint32 __kmp_le_8( kmp_uint64 value, kmp_uint64 checker) {
2501 return value <= checker;
2505 __kmp_wait_yield_4(
volatile kmp_uint32 * spinner,
2507 kmp_uint32 (* pred)( kmp_uint32, kmp_uint32 )
2512 register volatile kmp_uint32 * spin = spinner;
2513 register kmp_uint32 check = checker;
2514 register kmp_uint32 spins;
2515 register kmp_uint32 (*f) ( kmp_uint32, kmp_uint32 ) = pred;
2516 register kmp_uint32 r;
2518 KMP_FSYNC_SPIN_INIT( obj, (
void*) spin );
2519 KMP_INIT_YIELD( spins );
2521 while(!f(r = TCR_4(*spin), check)) {
2522 KMP_FSYNC_SPIN_PREPARE( obj );
2530 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2531 KMP_YIELD_SPIN( spins );
2533 KMP_FSYNC_SPIN_ACQUIRED( obj );
2538 __kmp_wait_yield_8(
volatile kmp_uint64 * spinner,
2540 kmp_uint32 (* pred)( kmp_uint64, kmp_uint64 )
2545 register volatile kmp_uint64 * spin = spinner;
2546 register kmp_uint64 check = checker;
2547 register kmp_uint32 spins;
2548 register kmp_uint32 (*f) ( kmp_uint64, kmp_uint64 ) = pred;
2549 register kmp_uint64 r;
2551 KMP_FSYNC_SPIN_INIT( obj, (
void*) spin );
2552 KMP_INIT_YIELD( spins );
2554 while(!f(r = *spin, check))
2556 KMP_FSYNC_SPIN_PREPARE( obj );
2565 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2566 KMP_YIELD_SPIN( spins );
2568 KMP_FSYNC_SPIN_ACQUIRED( obj );
2574 #ifdef KMP_GOMP_COMPAT
2577 __kmp_aux_dispatch_init_4(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2578 kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
2579 kmp_int32 chunk,
int push_ws )
2581 __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk,
2586 __kmp_aux_dispatch_init_4u(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2587 kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st,
2588 kmp_int32 chunk,
int push_ws )
2590 __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk,
2595 __kmp_aux_dispatch_init_8(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2596 kmp_int64 lb, kmp_int64 ub, kmp_int64 st,
2597 kmp_int64 chunk,
int push_ws )
2599 __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk,
2604 __kmp_aux_dispatch_init_8u(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2605 kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st,
2606 kmp_int64 chunk,
int push_ws )
2608 __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk,
2613 __kmp_aux_dispatch_fini_chunk_4(
ident_t *loc, kmp_int32 gtid )
2615 __kmp_dispatch_finish_chunk< kmp_uint32 >( gtid, loc );
2619 __kmp_aux_dispatch_fini_chunk_8(
ident_t *loc, kmp_int32 gtid )
2621 __kmp_dispatch_finish_chunk< kmp_uint64 >( gtid, loc );
2625 __kmp_aux_dispatch_fini_chunk_4u(
ident_t *loc, kmp_int32 gtid )
2627 __kmp_dispatch_finish_chunk< kmp_uint32 >( gtid, loc );
2631 __kmp_aux_dispatch_fini_chunk_8u(
ident_t *loc, kmp_int32 gtid )
2633 __kmp_dispatch_finish_chunk< kmp_uint64 >( gtid, loc );
void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk)
int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st)
void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk)
int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st)
int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk)
void __kmpc_dist_dispatch_init_4(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int32 *p_last, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk)
int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st)
void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk)