53 #include "kmp_error.h"
54 #if KMP_OS_WINDOWS && KMP_ARCH_X86
61 #ifdef KMP_STATIC_STEAL_ENABLED
64 template<
typename T >
65 struct dispatch_private_infoXX_template {
66 typedef typename traits_t< T >::unsigned_t UT;
67 typedef typename traits_t< T >::signed_t ST;
74 T static_steal_counter;
84 struct KMP_ALIGN( 32 ) {
101 template<
typename T >
102 struct dispatch_private_infoXX_template {
103 typedef typename traits_t< T >::unsigned_t UT;
104 typedef typename traits_t< T >::signed_t ST;
127 template<
typename T >
128 struct KMP_ALIGN_CACHE dispatch_private_info_template {
130 union KMP_ALIGN_CACHE private_info_tmpl {
131 dispatch_private_infoXX_template< T > p;
132 dispatch_private_info64_t p64;
136 kmp_uint32 ordered_bumped;
137 kmp_int32 ordered_dummy[KMP_MAX_ORDERED-3];
138 dispatch_private_info * next;
140 kmp_uint32 type_size;
141 enum cons_type pushed_ws;
146 template<
typename UT >
147 struct dispatch_shared_infoXX_template {
150 volatile UT iteration;
151 volatile UT num_done;
152 volatile UT ordered_iteration;
153 UT ordered_dummy[KMP_MAX_ORDERED-1];
157 template<
typename UT >
158 struct dispatch_shared_info_template {
160 union shared_info_tmpl {
161 dispatch_shared_infoXX_template< UT > s;
162 dispatch_shared_info64_t s64;
164 volatile kmp_uint32 buffer_index;
171 __kmp_static_delay(
int arg )
174 #if KMP_ARCH_X86_64 && KMP_OS_LINUX
176 KMP_ASSERT( arg >= 0 );
181 __kmp_static_yield(
int arg )
186 #undef USE_TEST_LOCKS
189 template<
typename T >
190 static __forceinline T
191 test_then_add(
volatile T *p, T d ) { KMP_ASSERT(0); };
194 __forceinline kmp_int32
195 test_then_add< kmp_int32 >(
volatile kmp_int32 *p, kmp_int32 d )
198 r = KMP_TEST_THEN_ADD32( p, d );
203 __forceinline kmp_int64
204 test_then_add< kmp_int64 >(
volatile kmp_int64 *p, kmp_int64 d )
207 r = KMP_TEST_THEN_ADD64( p, d );
212 template<
typename T >
213 static __forceinline T
214 test_then_inc_acq(
volatile T *p ) { KMP_ASSERT(0); };
217 __forceinline kmp_int32
218 test_then_inc_acq< kmp_int32 >(
volatile kmp_int32 *p )
221 r = KMP_TEST_THEN_INC_ACQ32( p );
226 __forceinline kmp_int64
227 test_then_inc_acq< kmp_int64 >(
volatile kmp_int64 *p )
230 r = KMP_TEST_THEN_INC_ACQ64( p );
235 template<
typename T >
236 static __forceinline T
237 test_then_inc(
volatile T *p ) { KMP_ASSERT(0); };
240 __forceinline kmp_int32
241 test_then_inc< kmp_int32 >(
volatile kmp_int32 *p )
244 r = KMP_TEST_THEN_INC32( p );
249 __forceinline kmp_int64
250 test_then_inc< kmp_int64 >(
volatile kmp_int64 *p )
253 r = KMP_TEST_THEN_INC64( p );
258 template<
typename T >
259 static __forceinline kmp_int32
260 compare_and_swap(
volatile T *p, T c, T s ) { KMP_ASSERT(0); };
263 __forceinline kmp_int32
264 compare_and_swap< kmp_int32 >(
volatile kmp_int32 *p, kmp_int32 c, kmp_int32 s )
266 return KMP_COMPARE_AND_STORE_REL32( p, c, s );
270 __forceinline kmp_int32
271 compare_and_swap< kmp_int64 >(
volatile kmp_int64 *p, kmp_int64 c, kmp_int64 s )
273 return KMP_COMPARE_AND_STORE_REL64( p, c, s );
289 template<
typename UT >
292 __kmp_wait_yield(
volatile UT * spinner,
294 kmp_uint32 (* pred)( UT, UT )
295 USE_ITT_BUILD_ARG(
void * obj)
299 register volatile UT * spin = spinner;
300 register UT check = checker;
301 register kmp_uint32 spins;
302 register kmp_uint32 (*f) ( UT, UT ) = pred;
305 KMP_FSYNC_SPIN_INIT( obj, (
void*) spin );
306 KMP_INIT_YIELD( spins );
308 while(!f(r = *spin, check))
310 KMP_FSYNC_SPIN_PREPARE( obj );
316 __kmp_static_delay(TRUE);
321 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
322 KMP_YIELD_SPIN( spins );
324 KMP_FSYNC_SPIN_ACQUIRED( obj );
328 template<
typename UT >
329 static kmp_uint32 __kmp_eq( UT value, UT checker) {
330 return value == checker;
333 template<
typename UT >
334 static kmp_uint32 __kmp_neq( UT value, UT checker) {
335 return value != checker;
338 template<
typename UT >
339 static kmp_uint32 __kmp_lt( UT value, UT checker) {
340 return value < checker;
343 template<
typename UT >
344 static kmp_uint32 __kmp_ge( UT value, UT checker) {
345 return value >= checker;
348 template<
typename UT >
349 static kmp_uint32 __kmp_le( UT value, UT checker) {
350 return value <= checker;
358 __kmp_dispatch_deo_error(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
362 KMP_DEBUG_ASSERT( gtid_ref );
364 if ( __kmp_env_consistency_check ) {
365 th = __kmp_threads[*gtid_ref];
366 if ( th -> th.th_root -> r.r_active
367 && ( th -> th.th_dispatch -> th_dispatch_pr_current -> pushed_ws != ct_none ) ) {
368 __kmp_push_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref, NULL );
373 template<
typename UT >
375 __kmp_dispatch_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
377 typedef typename traits_t< UT >::signed_t ST;
378 dispatch_private_info_template< UT > * pr;
380 int gtid = *gtid_ref;
382 kmp_info_t *th = __kmp_threads[ gtid ];
383 KMP_DEBUG_ASSERT( th -> th.th_dispatch );
385 KD_TRACE(100, (
"__kmp_dispatch_deo: T#%d called\n", gtid ) );
386 if ( __kmp_env_consistency_check ) {
387 pr =
reinterpret_cast< dispatch_private_info_template< UT >*
>
388 ( th -> th.th_dispatch -> th_dispatch_pr_current );
389 if ( pr -> pushed_ws != ct_none ) {
390 __kmp_push_sync( gtid, ct_ordered_in_pdo, loc_ref, NULL );
394 if ( ! th -> th.th_team -> t.t_serialized ) {
395 dispatch_shared_info_template< UT > * sh =
reinterpret_cast< dispatch_shared_info_template< UT >*
>
396 ( th -> th.th_dispatch -> th_dispatch_sh_current );
399 if ( ! __kmp_env_consistency_check ) {
400 pr =
reinterpret_cast< dispatch_private_info_template< UT >*
>
401 ( th -> th.th_dispatch -> th_dispatch_pr_current );
403 lower = pr->u.p.ordered_lower;
405 #if ! defined( KMP_GOMP_COMPAT )
406 if ( __kmp_env_consistency_check ) {
407 if ( pr->ordered_bumped ) {
408 struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons;
409 __kmp_error_construct2(
410 kmp_i18n_msg_CnsMultipleNesting,
411 ct_ordered_in_pdo, loc_ref,
412 & p->stack_data[ p->w_top ]
423 buff = __kmp_str_format(
424 "__kmp_dispatch_deo: T#%%d before wait: ordered_iter:%%%s lower:%%%s\n",
425 traits_t< UT >::spec, traits_t< UT >::spec );
426 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
427 __kmp_str_free( &buff );
431 __kmp_wait_yield< UT >( &sh->u.s.ordered_iteration, lower, __kmp_ge< UT >
432 USE_ITT_BUILD_ARG( NULL )
439 buff = __kmp_str_format(
440 "__kmp_dispatch_deo: T#%%d after wait: ordered_iter:%%%s lower:%%%s\n",
441 traits_t< UT >::spec, traits_t< UT >::spec );
442 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
443 __kmp_str_free( &buff );
447 KD_TRACE(100, (
"__kmp_dispatch_deo: T#%d returned\n", gtid ) );
451 __kmp_dispatch_dxo_error(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
455 if ( __kmp_env_consistency_check ) {
456 th = __kmp_threads[*gtid_ref];
457 if ( th -> th.th_dispatch -> th_dispatch_pr_current -> pushed_ws != ct_none ) {
458 __kmp_pop_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref );
463 template<
typename UT >
465 __kmp_dispatch_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
467 typedef typename traits_t< UT >::signed_t ST;
468 dispatch_private_info_template< UT > * pr;
470 int gtid = *gtid_ref;
472 kmp_info_t *th = __kmp_threads[ gtid ];
473 KMP_DEBUG_ASSERT( th -> th.th_dispatch );
475 KD_TRACE(100, (
"__kmp_dispatch_dxo: T#%d called\n", gtid ) );
476 if ( __kmp_env_consistency_check ) {
477 pr =
reinterpret_cast< dispatch_private_info_template< UT >*
>
478 ( th -> th.th_dispatch -> th_dispatch_pr_current );
479 if ( pr -> pushed_ws != ct_none ) {
480 __kmp_pop_sync( gtid, ct_ordered_in_pdo, loc_ref );
484 if ( ! th -> th.th_team -> t.t_serialized ) {
485 dispatch_shared_info_template< UT > * sh =
reinterpret_cast< dispatch_shared_info_template< UT >*
>
486 ( th -> th.th_dispatch -> th_dispatch_sh_current );
488 if ( ! __kmp_env_consistency_check ) {
489 pr =
reinterpret_cast< dispatch_private_info_template< UT >*
>
490 ( th -> th.th_dispatch -> th_dispatch_pr_current );
493 KMP_FSYNC_RELEASING( & sh->u.s.ordered_iteration );
494 #if ! defined( KMP_GOMP_COMPAT )
495 if ( __kmp_env_consistency_check ) {
496 if ( pr->ordered_bumped != 0 ) {
497 struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons;
499 __kmp_error_construct2(
500 kmp_i18n_msg_CnsMultipleNesting,
501 ct_ordered_in_pdo, loc_ref,
502 & p->stack_data[ p->w_top ]
510 pr->ordered_bumped += 1;
512 KD_TRACE(1000, (
"__kmp_dispatch_dxo: T#%d bumping ordered ordered_bumped=%d\n",
513 gtid, pr->ordered_bumped ) );
518 test_then_inc< ST >( (
volatile ST *) & sh->u.s.ordered_iteration );
522 KD_TRACE(100, (
"__kmp_dispatch_dxo: T#%d returned\n", gtid ) );
526 template<
typename UT >
527 static __forceinline
long double
528 __kmp_pow(
long double x, UT y) {
531 KMP_DEBUG_ASSERT(x > 0.0 && x < 1.0);
547 template<
typename T >
548 static __inline
typename traits_t< T >::unsigned_t
549 __kmp_dispatch_guided_remaining(
551 typename traits_t< T >::floating_t base,
552 typename traits_t< T >::unsigned_t idx
561 typedef typename traits_t< T >::unsigned_t UT;
563 long double x = tc * __kmp_pow< UT >(base, idx);
575 static int guided_int_param = 2;
576 static double guided_flt_param = 0.5;
580 template<
typename T >
588 typename traits_t< T >::signed_t st,
589 typename traits_t< T >::signed_t chunk,
592 typedef typename traits_t< T >::unsigned_t UT;
593 typedef typename traits_t< T >::signed_t ST;
594 typedef typename traits_t< T >::floating_t DBL;
595 static const int ___kmp_size_type =
sizeof( UT );
601 kmp_uint32 my_buffer_index;
602 dispatch_private_info_template< T > * pr;
603 dispatch_shared_info_template< UT >
volatile * sh;
605 KMP_BUILD_ASSERT(
sizeof( dispatch_private_info_template< T > ) ==
sizeof( dispatch_private_info ) );
606 KMP_BUILD_ASSERT(
sizeof( dispatch_shared_info_template< UT > ) ==
sizeof( dispatch_shared_info ) );
608 if ( ! TCR_4( __kmp_init_parallel ) )
609 __kmp_parallel_initialize();
615 buff = __kmp_str_format(
616 "__kmp_dispatch_init: T#%%d called: schedule:%%d chunk:%%%s lb:%%%s ub:%%%s st:%%%s\n",
617 traits_t< ST >::spec, traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
618 KD_TRACE(10, ( buff, gtid, schedule, chunk, lb, ub, st ) );
619 __kmp_str_free( &buff );
623 th = __kmp_threads[ gtid ];
624 team = th -> th.th_team;
625 active = ! team -> t.t_serialized;
626 th->th.th_ident = loc;
629 pr =
reinterpret_cast< dispatch_private_info_template< T >*
>
630 ( th -> th.th_dispatch -> th_disp_buffer );
632 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
633 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
635 my_buffer_index = th->th.th_dispatch->th_disp_index ++;
638 pr =
reinterpret_cast< dispatch_private_info_template< T > *
>
639 ( &th -> th.th_dispatch -> th_disp_buffer[ my_buffer_index % KMP_MAX_DISP_BUF ] );
640 sh =
reinterpret_cast< dispatch_shared_info_template< UT >
volatile *
>
641 ( &team -> t.t_disp_buffer[ my_buffer_index % KMP_MAX_DISP_BUF ] );
651 pr->type_size = ___kmp_size_type;
659 schedule = __kmp_static;
661 if ( schedule == kmp_sch_runtime ) {
664 schedule = team -> t.t_sched.r_sched_type;
667 schedule = __kmp_guided;
669 schedule = __kmp_static;
672 chunk = team -> t.t_sched.chunk;
674 kmp_r_sched_t r_sched = __kmp_get_schedule_global();
676 schedule = r_sched.r_sched_type;
677 chunk = r_sched.chunk;
684 buff = __kmp_str_format(
685 "__kmp_dispatch_init: T#%%d new: schedule:%%d chunk:%%%s\n",
686 traits_t< ST >::spec );
687 KD_TRACE(10, ( buff, gtid, schedule, chunk ) );
688 __kmp_str_free( &buff );
693 schedule = __kmp_guided;
696 chunk = KMP_DEFAULT_CHUNK;
703 schedule = __kmp_auto;
708 buff = __kmp_str_format(
709 "__kmp_dispatch_init: kmp_sch_auto: T#%%d new: schedule:%%d chunk:%%%s\n",
710 traits_t< ST >::spec );
711 KD_TRACE(10, ( buff, gtid, schedule, chunk ) );
712 __kmp_str_free( &buff );
716 #endif // OMP_30_ENABLED
719 if ( team->t.t_nproc > 1<<20 && schedule == kmp_sch_guided_analytical_chunked ) {
720 schedule = kmp_sch_guided_iterative_chunked;
721 KMP_WARNING( DispatchManyThreads );
723 pr->u.p.parm1 = chunk;
726 "unknown scheduling type" );
730 if ( __kmp_env_consistency_check ) {
732 __kmp_error_construct(
733 kmp_i18n_msg_CnsLoopIncrZeroProhibited,
734 ( pr->ordered ? ct_pdo_ordered : ct_pdo ), loc
739 tc = ( ub - lb + st );
754 }
else if ( ub < lb ) {
764 pr->u.p.last_upper = ub + st;
770 if ( pr->ordered == 0 ) {
771 th -> th.th_dispatch -> th_deo_fcn = __kmp_dispatch_deo_error;
772 th -> th.th_dispatch -> th_dxo_fcn = __kmp_dispatch_dxo_error;
774 pr->ordered_bumped = 0;
776 pr->u.p.ordered_lower = 1;
777 pr->u.p.ordered_upper = 0;
779 th -> th.th_dispatch -> th_deo_fcn = __kmp_dispatch_deo< UT >;
780 th -> th.th_dispatch -> th_dxo_fcn = __kmp_dispatch_dxo< UT >;
784 if ( __kmp_env_consistency_check ) {
785 enum cons_type ws = pr->ordered ? ct_pdo_ordered : ct_pdo;
787 __kmp_push_workshare( gtid, ws, loc );
790 __kmp_check_workshare( gtid, ws, loc );
791 pr->pushed_ws = ct_none;
795 switch ( schedule ) {
796 #if ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 )
799 T nproc = team->t.t_nproc;
802 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_static_steal case\n", gtid ) );
804 ntc = (tc % chunk ? 1 : 0) + tc / chunk;
805 if ( nproc > 1 && ntc >= nproc ) {
806 T
id = __kmp_tid_from_gtid(gtid);
807 T small_chunk, extras;
809 small_chunk = ntc / nproc;
810 extras = ntc % nproc;
812 init =
id * small_chunk + (
id < extras ?
id : extras );
813 pr->u.p.count = init;
814 pr->u.p.ub = init + small_chunk + (
id < extras ? 1 : 0 );
822 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_balanced\n",
824 schedule = kmp_sch_static_balanced;
830 case kmp_sch_static_balanced:
832 T nproc = team->t.t_nproc;
835 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_static_balanced case\n",
839 T
id = __kmp_tid_from_gtid(gtid);
845 pr->u.p.parm1 = (
id == tc - 1);
848 pr->u.p.parm1 = FALSE;
852 T small_chunk = tc / nproc;
853 T extras = tc % nproc;
854 init =
id * small_chunk + (
id < extras ?
id : extras);
855 limit = init + small_chunk - (
id < extras ? 0 : 1);
856 pr->u.p.parm1 = (
id == nproc - 1);
862 pr->u.p.parm1 = TRUE;
866 pr->u.p.parm1 = FALSE;
871 pr->u.p.lb = lb + init;
872 pr->u.p.ub = lb + limit;
874 T ub_tmp = lb + limit * st;
875 pr->u.p.lb = lb + init * st;
878 pr->u.p.ub = ( ub_tmp + st > ub ? ub : ub_tmp );
880 pr->u.p.ub = ( ub_tmp + st < ub ? ub : ub_tmp );
884 pr->u.p.ordered_lower = init;
885 pr->u.p.ordered_upper = limit;
889 case kmp_sch_guided_iterative_chunked :
891 T nproc = team->t.t_nproc;
892 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d kmp_sch_guided_iterative_chunked case\n",gtid));
895 if ( (2L * chunk + 1 ) * nproc >= tc ) {
897 schedule = kmp_sch_dynamic_chunked;
900 pr->u.p.parm2 = guided_int_param * nproc * ( chunk + 1 );
901 *(
double*)&pr->u.p.parm3 = guided_flt_param / nproc;
904 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_greedy\n",gtid));
905 schedule = kmp_sch_static_greedy;
907 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n",gtid));
912 case kmp_sch_guided_analytical_chunked:
914 T nproc = team->t.t_nproc;
915 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_guided_analytical_chunked case\n", gtid));
918 if ( (2L * chunk + 1 ) * nproc >= tc ) {
920 schedule = kmp_sch_dynamic_chunked;
925 #if KMP_OS_WINDOWS && KMP_ARCH_X86
938 unsigned int oldFpcw = _control87(0,0x30000);
941 long double target = ((
long double)chunk * 2 + 1) * nproc / tc;
948 x = (
long double)1.0 - (
long double)0.5 / nproc;
959 ptrdiff_t natural_alignment = (ptrdiff_t)&t.b - (ptrdiff_t)&t - (ptrdiff_t)1;
961 KMP_DEBUG_ASSERT( ( ( (ptrdiff_t)&pr->u.p.parm3 ) & ( natural_alignment ) ) == 0 );
966 *(DBL*)&pr->u.p.parm3 = x;
979 p = __kmp_pow< UT >(x,right);
984 }
while(p>target && right < (1<<27));
991 while ( left + 1 < right ) {
992 mid = (left + right) / 2;
993 if ( __kmp_pow< UT >(x,mid) > target ) {
1002 KMP_ASSERT(cross && __kmp_pow< UT >(x, cross - 1) > target && __kmp_pow< UT >(x, cross) <= target);
1005 pr->u.p.parm2 = cross;
1008 #if ( ( KMP_OS_LINUX || KMP_OS_WINDOWS ) && KMP_ARCH_X86 ) && ( ! defined( KMP_I8 ) )
1009 #define GUIDED_ANALYTICAL_WORKAROUND (*( DBL * )&pr->u.p.parm3)
1011 #define GUIDED_ANALYTICAL_WORKAROUND (x)
1014 pr->u.p.count = tc - __kmp_dispatch_guided_remaining(tc, GUIDED_ANALYTICAL_WORKAROUND, cross) - cross * chunk;
1015 #if KMP_OS_WINDOWS && KMP_ARCH_X86
1017 _control87(oldFpcw,0x30000);
1021 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_greedy\n",
1023 schedule = kmp_sch_static_greedy;
1029 case kmp_sch_static_greedy:
1030 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n",gtid));
1031 pr->u.p.parm1 = ( team -> t.t_nproc > 1 ) ?
1032 ( tc + team->t.t_nproc - 1 ) / team->t.t_nproc :
1035 case kmp_sch_static_chunked :
1036 case kmp_sch_dynamic_chunked :
1037 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n", gtid));
1039 case kmp_sch_trapezoidal :
1043 T parm1, parm2, parm3, parm4;
1044 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_trapezoidal case\n", gtid ) );
1049 parm2 = ( tc / (2 * team->t.t_nproc) );
1060 }
else if ( parm1 > parm2 ) {
1065 parm3 = ( parm2 + parm1 );
1066 parm3 = ( 2 * tc + parm3 - 1) / parm3;
1073 parm4 = ( parm3 - 1 );
1074 parm4 = ( parm2 - parm1 ) / parm4;
1081 pr->u.p.parm1 = parm1;
1082 pr->u.p.parm2 = parm2;
1083 pr->u.p.parm3 = parm3;
1084 pr->u.p.parm4 = parm4;
1092 KMP_MSG( UnknownSchedTypeDetected ),
1093 KMP_HNT( GetNewerLibrary ),
1099 pr->schedule = schedule;
1103 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d sh->buffer_index:%d\n",
1104 gtid, my_buffer_index, sh->buffer_index) );
1105 __kmp_wait_yield< kmp_uint32 >( & sh->buffer_index, my_buffer_index, __kmp_eq< kmp_uint32 >
1106 USE_ITT_BUILD_ARG( NULL )
1111 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d sh->buffer_index:%d\n",
1112 gtid, my_buffer_index, sh->buffer_index) );
1114 th -> th.th_dispatch -> th_dispatch_pr_current = (dispatch_private_info_t*) pr;
1115 th -> th.th_dispatch -> th_dispatch_sh_current = (dispatch_shared_info_t*) sh;
1117 if ( pr->ordered ) {
1118 __kmp_itt_ordered_init( gtid );
1126 buff = __kmp_str_format(
1127 "__kmp_dispatch_init: T#%%d returning: schedule:%%d ordered:%%%s lb:%%%s ub:%%%s" \
1128 " st:%%%s tc:%%%s count:%%%s\n\tordered_lower:%%%s ordered_upper:%%%s" \
1129 " parm1:%%%s parm2:%%%s parm3:%%%s parm4:%%%s\n",
1130 traits_t< UT >::spec, traits_t< T >::spec, traits_t< T >::spec,
1131 traits_t< ST >::spec, traits_t< UT >::spec, traits_t< UT >::spec,
1132 traits_t< UT >::spec, traits_t< UT >::spec, traits_t< T >::spec,
1133 traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec );
1134 KD_TRACE(10, ( buff,
1135 gtid, pr->schedule, pr->ordered, pr->u.p.lb, pr->u.p.ub,
1136 pr->u.p.st, pr->u.p.tc, pr->u.p.count,
1137 pr->u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1,
1138 pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4 ) );
1139 __kmp_str_free( &buff );
1142 #if ( KMP_STATIC_STEAL_ENABLED )
1143 if ( ___kmp_size_type < 8 ) {
1152 volatile T * p = &pr->u.p.static_steal_counter;
1156 #endif // ( KMP_STATIC_STEAL_ENABLED && USE_STEALING )
1166 template<
typename UT >
1168 __kmp_dispatch_finish(
int gtid,
ident_t *loc )
1170 typedef typename traits_t< UT >::signed_t ST;
1171 kmp_info_t *th = __kmp_threads[ gtid ];
1173 KD_TRACE(100, (
"__kmp_dispatch_finish: T#%d called\n", gtid ) );
1174 if ( ! th -> th.th_team -> t.t_serialized ) {
1176 dispatch_private_info_template< UT > * pr =
1177 reinterpret_cast< dispatch_private_info_template< UT >*
>
1178 ( th->th.th_dispatch->th_dispatch_pr_current );
1179 dispatch_shared_info_template< UT >
volatile * sh =
1180 reinterpret_cast< dispatch_shared_info_template< UT >volatile*
>
1181 ( th->th.th_dispatch->th_dispatch_sh_current );
1182 KMP_DEBUG_ASSERT( pr );
1183 KMP_DEBUG_ASSERT( sh );
1184 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
1185 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
1187 if ( pr->ordered_bumped ) {
1188 KD_TRACE(1000, (
"__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
1190 pr->ordered_bumped = 0;
1192 UT lower = pr->u.p.ordered_lower;
1198 buff = __kmp_str_format(
1199 "__kmp_dispatch_finish: T#%%d before wait: ordered_iteration:%%%s lower:%%%s\n",
1200 traits_t< UT >::spec, traits_t< UT >::spec );
1201 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
1202 __kmp_str_free( &buff );
1206 __kmp_wait_yield< UT >(&sh->u.s.ordered_iteration, lower, __kmp_ge< UT >
1207 USE_ITT_BUILD_ARG(NULL)
1214 buff = __kmp_str_format(
1215 "__kmp_dispatch_finish: T#%%d after wait: ordered_iteration:%%%s lower:%%%s\n",
1216 traits_t< UT >::spec, traits_t< UT >::spec );
1217 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
1218 __kmp_str_free( &buff );
1222 test_then_inc< ST >( (
volatile ST *) & sh->u.s.ordered_iteration );
1225 KD_TRACE(100, (
"__kmp_dispatch_finish: T#%d returned\n", gtid ) );
1228 #ifdef KMP_GOMP_COMPAT
1230 template<
typename UT >
1232 __kmp_dispatch_finish_chunk(
int gtid,
ident_t *loc )
1234 typedef typename traits_t< UT >::signed_t ST;
1235 kmp_info_t *th = __kmp_threads[ gtid ];
1237 KD_TRACE(100, (
"__kmp_dispatch_finish_chunk: T#%d called\n", gtid ) );
1238 if ( ! th -> th.th_team -> t.t_serialized ) {
1240 dispatch_private_info_template< UT > * pr =
1241 reinterpret_cast< dispatch_private_info_template< UT >*
>
1242 ( th->th.th_dispatch->th_dispatch_pr_current );
1243 dispatch_shared_info_template< UT >
volatile * sh =
1244 reinterpret_cast< dispatch_shared_info_template< UT >volatile*
>
1245 ( th->th.th_dispatch->th_dispatch_sh_current );
1246 KMP_DEBUG_ASSERT( pr );
1247 KMP_DEBUG_ASSERT( sh );
1248 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
1249 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
1252 UT lower = pr->u.p.ordered_lower;
1253 UT upper = pr->u.p.ordered_upper;
1254 UT inc = upper - lower + 1;
1256 if ( pr->ordered_bumped == inc ) {
1257 KD_TRACE(1000, (
"__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
1259 pr->ordered_bumped = 0;
1261 inc -= pr->ordered_bumped;
1267 buff = __kmp_str_format(
1268 "__kmp_dispatch_finish_chunk: T#%%d before wait: " \
1269 "ordered_iteration:%%%s lower:%%%s upper:%%%s\n",
1270 traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec );
1271 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower, upper ) );
1272 __kmp_str_free( &buff );
1276 __kmp_wait_yield< UT >(&sh->u.s.ordered_iteration, lower, __kmp_ge< UT >
1277 USE_ITT_BUILD_ARG(NULL)
1281 KD_TRACE(1000, (
"__kmp_dispatch_finish_chunk: T#%d resetting ordered_bumped to zero\n",
1283 pr->ordered_bumped = 0;
1289 buff = __kmp_str_format(
1290 "__kmp_dispatch_finish_chunk: T#%%d after wait: " \
1291 "ordered_iteration:%%%s inc:%%%s lower:%%%s upper:%%%s\n",
1292 traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec );
1293 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper ) );
1294 __kmp_str_free( &buff );
1298 test_then_add< ST >( (
volatile ST *) & sh->u.s.ordered_iteration, inc);
1302 KD_TRACE(100, (
"__kmp_dispatch_finish_chunk: T#%d returned\n", gtid ) );
1307 template<
typename T >
1309 __kmp_dispatch_next(
1310 ident_t *loc,
int gtid, kmp_int32 *p_last, T *p_lb, T *p_ub,
typename traits_t< T >::signed_t *p_st
1313 typedef typename traits_t< T >::unsigned_t UT;
1314 typedef typename traits_t< T >::signed_t ST;
1315 typedef typename traits_t< T >::floating_t DBL;
1316 static const int ___kmp_size_type =
sizeof( UT );
1319 dispatch_private_info_template< T > * pr;
1320 kmp_info_t * th = __kmp_threads[ gtid ];
1321 kmp_team_t * team = th -> th.th_team;
1327 buff = __kmp_str_format(
1328 "__kmp_dispatch_next: T#%%d called p_lb:%%%s p_ub:%%%s p_st:%%%s p_last: %%p\n",
1329 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
1330 KD_TRACE(1000, ( buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last ) );
1331 __kmp_str_free( &buff );
1335 if ( team -> t.t_serialized ) {
1337 pr =
reinterpret_cast< dispatch_private_info_template< T >*
>
1338 ( th -> th.th_dispatch -> th_disp_buffer );
1339 KMP_DEBUG_ASSERT( pr );
1341 if ( (status = (pr->u.p.tc != 0)) == 0 ) {
1347 if ( __kmp_env_consistency_check ) {
1348 if ( pr->pushed_ws != ct_none ) {
1349 pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc );
1352 }
else if ( pr->nomerge ) {
1355 UT limit, trip, init;
1357 T chunk = pr->u.p.parm1;
1359 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n", gtid ) );
1361 init = chunk * pr->u.p.count++;
1362 trip = pr->u.p.tc - 1;
1364 if ( (status = (init <= trip)) == 0 ) {
1367 if ( p_st != 0 ) *p_st = 0;
1368 if ( __kmp_env_consistency_check ) {
1369 if ( pr->pushed_ws != ct_none ) {
1370 pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc );
1375 limit = chunk + init - 1;
1378 if ( (last = (limit >= trip)) != 0 ) {
1381 pr->u.p.last_upper = pr->u.p.ub;
1391 *p_lb = start + init;
1392 *p_ub = start + limit;
1394 *p_lb = start + init * incr;
1395 *p_ub = start + limit * incr;
1398 if ( pr->ordered ) {
1399 pr->u.p.ordered_lower = init;
1400 pr->u.p.ordered_upper = limit;
1405 buff = __kmp_str_format(
1406 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1407 traits_t< UT >::spec, traits_t< UT >::spec );
1408 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1409 __kmp_str_free( &buff );
1420 pr->u.p.last_upper = *p_ub;
1434 buff = __kmp_str_format(
1435 "__kmp_dispatch_next: T#%%d serialized case: p_lb:%%%s " \
1436 "p_ub:%%%s p_st:%%%s p_last:%%p returning:%%d\n",
1437 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
1438 KD_TRACE(10, ( buff, gtid, *p_lb, *p_ub, *p_st, p_last, status) );
1439 __kmp_str_free( &buff );
1445 dispatch_shared_info_template< UT > *sh;
1448 UT limit, trip, init;
1450 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
1451 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
1453 pr =
reinterpret_cast< dispatch_private_info_template< T >*
>
1454 ( th->th.th_dispatch->th_dispatch_pr_current );
1455 KMP_DEBUG_ASSERT( pr );
1456 sh =
reinterpret_cast< dispatch_shared_info_template< UT >*
>
1457 ( th->th.th_dispatch->th_dispatch_sh_current );
1458 KMP_DEBUG_ASSERT( sh );
1460 if ( pr->u.p.tc == 0 ) {
1464 switch (pr->schedule) {
1465 #if ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 )
1468 T chunk = pr->u.p.parm1;
1470 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_static_steal case\n", gtid) );
1472 trip = pr->u.p.tc - 1;
1474 if ( ___kmp_size_type > 4 ) {
1477 init = ( pr->u.p.count )++;
1478 status = ( init < (UT)pr->u.p.ub );
1490 union_i4 vold, vnew;
1491 vold.b = *(
volatile kmp_int64 * )(&pr->u.p.count);
1494 while( ! KMP_COMPARE_AND_STORE_ACQ64(
1495 (
volatile kmp_int64* )&pr->u.p.count,
1496 *VOLATILE_CAST(kmp_int64 *)&vold.b,
1497 *VOLATILE_CAST(kmp_int64 *)&vnew.b ) ) {
1499 vold.b = *(
volatile kmp_int64 * )(&pr->u.p.count);
1504 init = vnew.p.count;
1505 status = ( init < (UT)vnew.p.ub ) ;
1509 kmp_info_t **other_threads = team->t.t_threads;
1510 int while_limit = 10;
1511 int while_index = 0;
1515 while ( ( !status ) && ( while_limit != ++while_index ) ) {
1516 union_i4 vold, vnew;
1517 kmp_int32 remaining;
1518 T victimIdx = pr->u.p.parm4;
1519 T oldVictimIdx = victimIdx;
1520 dispatch_private_info_template< T > * victim;
1524 victimIdx = team->t.t_nproc - 1;
1528 victim =
reinterpret_cast< dispatch_private_info_template< T >*
>
1529 ( other_threads[victimIdx]->th.th_dispatch->th_dispatch_pr_current );
1530 }
while ( (victim == NULL || victim == pr) && oldVictimIdx != victimIdx );
1533 ( (*(
volatile T * )&victim->u.p.static_steal_counter) !=
1534 (*(
volatile T * )&pr->u.p.static_steal_counter) ) ) {
1540 if ( oldVictimIdx == victimIdx ) {
1543 pr->u.p.parm4 = victimIdx;
1546 vold.b = *(
volatile kmp_int64 * )( &victim->u.p.count );
1549 KMP_DEBUG_ASSERT( (vnew.p.ub - 1) * (UT)chunk <= trip );
1550 if ( vnew.p.count >= (UT)vnew.p.ub || (remaining = vnew.p.ub - vnew.p.count) < 4 ) {
1553 vnew.p.ub -= (remaining >> 2);
1554 KMP_DEBUG_ASSERT((vnew.p.ub - 1) * (UT)chunk <= trip);
1555 #pragma warning( push )
1557 #pragma warning( disable: 186 )
1558 KMP_DEBUG_ASSERT(vnew.p.ub >= 0);
1559 #pragma warning( pop )
1561 if ( KMP_COMPARE_AND_STORE_ACQ64(
1562 (
volatile kmp_int64 * )&victim->u.p.count,
1563 *VOLATILE_CAST(kmp_int64 *)&vold.b,
1564 *VOLATILE_CAST(kmp_int64 *)&vnew.b ) ) {
1573 init = vold.p.count;
1575 pr->u.p.count = init + 1;
1576 pr->u.p.ub = vnew.p.count;
1579 vold.p.count = init + 1;
1581 *(
volatile kmp_int64 * )(&pr->u.p.count) = vold.b;
1582 #endif // KMP_ARCH_X86
1593 if ( p_st != 0 ) *p_st = 0;
1595 start = pr->u.p.parm2;
1597 limit = chunk + init - 1;
1600 KMP_DEBUG_ASSERT(init <= trip);
1601 if ( (last = (limit >= trip)) != 0 )
1606 if ( p_st != 0 ) *p_st = incr;
1609 *p_lb = start + init;
1610 *p_ub = start + limit;
1612 *p_lb = start + init * incr;
1613 *p_ub = start + limit * incr;
1616 if ( pr->ordered ) {
1617 pr->u.p.ordered_lower = init;
1618 pr->u.p.ordered_upper = limit;
1623 buff = __kmp_str_format(
1624 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1625 traits_t< UT >::spec, traits_t< UT >::spec );
1626 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1627 __kmp_str_free( &buff );
1634 #endif // ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 )
1635 case kmp_sch_static_balanced:
1637 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_static_balanced case\n", gtid) );
1638 if ( (status = !pr->u.p.count) != 0 ) {
1642 last = pr->u.p.parm1;
1649 pr->u.p.lb = pr->u.p.ub + pr->u.p.st;
1651 if ( pr->ordered ) {
1656 buff = __kmp_str_format(
1657 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1658 traits_t< UT >::spec, traits_t< UT >::spec );
1659 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1660 __kmp_str_free( &buff );
1666 case kmp_sch_static_greedy:
1667 case kmp_sch_static_chunked:
1671 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_static_[affinity|chunked] case\n",
1673 parm1 = pr->u.p.parm1;
1675 trip = pr->u.p.tc - 1;
1676 init = parm1 * (pr->u.p.count + __kmp_tid_from_gtid(gtid));
1678 if ( (status = (init <= trip)) != 0 ) {
1681 limit = parm1 + init - 1;
1683 if ( (last = (limit >= trip)) != 0 )
1689 if ( p_st != 0 ) *p_st = incr;
1691 pr->u.p.count += team->t.t_nproc;
1694 *p_lb = start + init;
1695 *p_ub = start + limit;
1698 *p_lb = start + init * incr;
1699 *p_ub = start + limit * incr;
1702 if ( pr->ordered ) {
1703 pr->u.p.ordered_lower = init;
1704 pr->u.p.ordered_upper = limit;
1709 buff = __kmp_str_format(
1710 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1711 traits_t< UT >::spec, traits_t< UT >::spec );
1712 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1713 __kmp_str_free( &buff );
1721 case kmp_sch_dynamic_chunked:
1723 T chunk = pr->u.p.parm1;
1725 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n",
1728 init = chunk * test_then_inc_acq< ST >((
volatile ST *) & sh->u.s.iteration );
1729 trip = pr->u.p.tc - 1;
1731 if ( (status = (init <= trip)) == 0 ) {
1734 if ( p_st != 0 ) *p_st = 0;
1737 limit = chunk + init - 1;
1740 if ( (last = (limit >= trip)) != 0 )
1745 if ( p_st != 0 ) *p_st = incr;
1748 *p_lb = start + init;
1749 *p_ub = start + limit;
1751 *p_lb = start + init * incr;
1752 *p_ub = start + limit * incr;
1755 if ( pr->ordered ) {
1756 pr->u.p.ordered_lower = init;
1757 pr->u.p.ordered_upper = limit;
1762 buff = __kmp_str_format(
1763 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1764 traits_t< UT >::spec, traits_t< UT >::spec );
1765 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1766 __kmp_str_free( &buff );
1774 case kmp_sch_guided_iterative_chunked:
1776 T chunkspec = pr->u.p.parm1;
1778 (
"__kmp_dispatch_next: T#%d kmp_sch_guided_chunked iterative case\n",gtid));
1783 init = sh->u.s.iteration;
1784 remaining = trip - init;
1785 if ( remaining <= 0 ) {
1790 if ( (T)remaining < pr->u.p.parm2 ) {
1793 init = test_then_add<ST>( (ST*)&sh->u.s.iteration, (ST)chunkspec );
1794 remaining = trip - init;
1795 if (remaining <= 0) {
1800 if ( (T)remaining > chunkspec ) {
1801 limit = init + chunkspec - 1;
1804 limit = init + remaining - 1;
1809 limit = init + (UT)( remaining * *(
double*)&pr->u.p.parm3 );
1810 if ( compare_and_swap<ST>( (ST*)&sh->u.s.iteration, (ST)init, (ST)limit ) ) {
1817 if ( status != 0 ) {
1822 if ( p_last != NULL )
1824 *p_lb = start + init * incr;
1825 *p_ub = start + limit * incr;
1826 if ( pr->ordered ) {
1827 pr->u.p.ordered_lower = init;
1828 pr->u.p.ordered_upper = limit;
1833 buff = __kmp_str_format(
1834 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1835 traits_t< UT >::spec, traits_t< UT >::spec );
1836 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1837 __kmp_str_free( &buff );
1850 case kmp_sch_guided_analytical_chunked:
1852 T chunkspec = pr->u.p.parm1;
1854 #if KMP_OS_WINDOWS && KMP_ARCH_X86
1857 unsigned int oldFpcw;
1860 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_guided_chunked analytical case\n",
1865 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
1866 KMP_DEBUG_ASSERT((2UL * chunkspec + 1) * (UT)team->t.t_nproc < trip);
1869 chunkIdx = test_then_inc_acq< ST >((
volatile ST *) & sh->u.s.iteration );
1870 if ( chunkIdx >= (UT)pr->u.p.parm2 ) {
1873 init = chunkIdx * chunkspec + pr->u.p.count;
1875 if ( (status = (init > 0 && init <= trip)) != 0 ) {
1876 limit = init + chunkspec -1;
1878 if ( (last = (limit >= trip)) != 0 )
1887 #if KMP_OS_WINDOWS && KMP_ARCH_X86
1892 oldFpcw = _control87(0,0x30000);
1897 init = __kmp_dispatch_guided_remaining< T >(
1898 trip, *( DBL * )&pr->u.p.parm3, chunkIdx );
1899 KMP_DEBUG_ASSERT(init);
1903 limit = trip - __kmp_dispatch_guided_remaining< T >(
1904 trip, *( DBL * )&pr->u.p.parm3, chunkIdx + 1 );
1905 KMP_ASSERT(init <= limit);
1906 if ( init < limit ) {
1907 KMP_DEBUG_ASSERT(limit <= trip);
1914 #if KMP_OS_WINDOWS && KMP_ARCH_X86
1916 if ( oldFpcw & fpcwSet != 0 )
1917 _control87(oldFpcw,0x30000);
1919 if ( status != 0 ) {
1924 if ( p_last != NULL )
1926 *p_lb = start + init * incr;
1927 *p_ub = start + limit * incr;
1928 if ( pr->ordered ) {
1929 pr->u.p.ordered_lower = init;
1930 pr->u.p.ordered_upper = limit;
1935 buff = __kmp_str_format(
1936 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1937 traits_t< UT >::spec, traits_t< UT >::spec );
1938 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1939 __kmp_str_free( &buff );
1952 case kmp_sch_trapezoidal:
1955 T parm2 = pr->u.p.parm2;
1956 T parm3 = pr->u.p.parm3;
1957 T parm4 = pr->u.p.parm4;
1958 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_trapezoidal case\n",
1961 index = test_then_inc< ST >( (
volatile ST *) & sh->u.s.iteration );
1963 init = ( index * ( (2*parm2) - (index-1)*parm4 ) ) / 2;
1964 trip = pr->u.p.tc - 1;
1966 if ( (status = ((T)index < parm3 && init <= trip)) == 0 ) {
1969 if ( p_st != 0 ) *p_st = 0;
1972 limit = ( (index+1) * ( 2*parm2 - index*parm4 ) ) / 2 - 1;
1975 if ( (last = (limit >= trip)) != 0 )
1978 if ( p_last != 0 ) {
1981 if ( p_st != 0 ) *p_st = incr;
1984 *p_lb = start + init;
1985 *p_ub = start + limit;
1987 *p_lb = start + init * incr;
1988 *p_ub = start + limit * incr;
1991 if ( pr->ordered ) {
1992 pr->u.p.ordered_lower = init;
1993 pr->u.p.ordered_upper = limit;
1998 buff = __kmp_str_format(
1999 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
2000 traits_t< UT >::spec, traits_t< UT >::spec );
2001 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
2002 __kmp_str_free( &buff );
2012 if ( status == 0 ) {
2015 num_done = test_then_inc< ST >( (
volatile ST *) & sh->u.s.num_done );
2020 buff = __kmp_str_format(
2021 "__kmp_dispatch_next: T#%%d increment num_done:%%%s\n",
2022 traits_t< UT >::spec );
2023 KD_TRACE(100, ( buff, gtid, sh->u.s.num_done ) );
2024 __kmp_str_free( &buff );
2028 if ( num_done == team->t.t_nproc-1 ) {
2033 sh->u.s.num_done = 0;
2034 sh->u.s.iteration = 0;
2037 if ( pr->ordered ) {
2038 sh->u.s.ordered_iteration = 0;
2043 sh -> buffer_index += KMP_MAX_DISP_BUF;
2044 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d change buffer_index:%d\n",
2045 gtid, sh->buffer_index) );
2050 if ( __kmp_env_consistency_check ) {
2051 if ( pr->pushed_ws != ct_none ) {
2052 pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc );
2056 th -> th.th_dispatch -> th_deo_fcn = NULL;
2057 th -> th.th_dispatch -> th_dxo_fcn = NULL;
2058 th -> th.th_dispatch -> th_dispatch_sh_current = NULL;
2059 th -> th.th_dispatch -> th_dispatch_pr_current = NULL;
2063 pr->u.p.last_upper = pr->u.p.ub;
2072 buff = __kmp_str_format(
2073 "__kmp_dispatch_next: T#%%d normal case: " \
2074 "p_lb:%%%s p_ub:%%%s p_st:%%%s p_last:%%p returning:%%d\n",
2075 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
2076 KD_TRACE(10, ( buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last, status ) );
2077 __kmp_str_free( &buff );
2107 kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk )
2109 KMP_DEBUG_ASSERT( __kmp_init_serial );
2110 __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2117 kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk )
2119 KMP_DEBUG_ASSERT( __kmp_init_serial );
2120 __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2128 kmp_int64 lb, kmp_int64 ub,
2129 kmp_int64 st, kmp_int64 chunk )
2131 KMP_DEBUG_ASSERT( __kmp_init_serial );
2132 __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2140 kmp_uint64 lb, kmp_uint64 ub,
2141 kmp_int64 st, kmp_int64 chunk )
2143 KMP_DEBUG_ASSERT( __kmp_init_serial );
2144 __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2161 kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st )
2163 return __kmp_dispatch_next< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st );
2171 kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st )
2173 return __kmp_dispatch_next< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st );
2181 kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st )
2183 return __kmp_dispatch_next< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st );
2191 kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st )
2193 return __kmp_dispatch_next< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st );
2205 __kmp_dispatch_finish< kmp_uint32 >( gtid, loc );
2214 __kmp_dispatch_finish< kmp_uint64 >( gtid, loc );
2223 __kmp_dispatch_finish< kmp_uint32 >( gtid, loc );
2232 __kmp_dispatch_finish< kmp_uint64 >( gtid, loc );
2239 kmp_uint32 __kmp_eq_4( kmp_uint32 value, kmp_uint32 checker) {
2240 return value == checker;
2243 kmp_uint32 __kmp_neq_4( kmp_uint32 value, kmp_uint32 checker) {
2244 return value != checker;
2247 kmp_uint32 __kmp_lt_4( kmp_uint32 value, kmp_uint32 checker) {
2248 return value < checker;
2251 kmp_uint32 __kmp_ge_4( kmp_uint32 value, kmp_uint32 checker) {
2252 return value >= checker;
2255 kmp_uint32 __kmp_le_4( kmp_uint32 value, kmp_uint32 checker) {
2256 return value <= checker;
2258 kmp_uint32 __kmp_eq_8( kmp_uint64 value, kmp_uint64 checker) {
2259 return value == checker;
2262 kmp_uint32 __kmp_neq_8( kmp_uint64 value, kmp_uint64 checker) {
2263 return value != checker;
2266 kmp_uint32 __kmp_lt_8( kmp_uint64 value, kmp_uint64 checker) {
2267 return value < checker;
2270 kmp_uint32 __kmp_ge_8( kmp_uint64 value, kmp_uint64 checker) {
2271 return value >= checker;
2274 kmp_uint32 __kmp_le_8( kmp_uint64 value, kmp_uint64 checker) {
2275 return value <= checker;
2279 __kmp_wait_yield_4(
volatile kmp_uint32 * spinner,
2281 kmp_uint32 (* pred)( kmp_uint32, kmp_uint32 )
2286 register volatile kmp_uint32 * spin = spinner;
2287 register kmp_uint32 check = checker;
2288 register kmp_uint32 spins;
2289 register kmp_uint32 (*f) ( kmp_uint32, kmp_uint32 ) = pred;
2290 register kmp_uint32 r;
2292 KMP_FSYNC_SPIN_INIT( obj, (
void*) spin );
2293 KMP_INIT_YIELD( spins );
2295 while(!f(r = TCR_4(*spin), check)) {
2296 KMP_FSYNC_SPIN_PREPARE( obj );
2302 __kmp_static_delay(TRUE);
2306 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2307 KMP_YIELD_SPIN( spins );
2309 KMP_FSYNC_SPIN_ACQUIRED( obj );
2314 __kmp_wait_yield_8(
volatile kmp_uint64 * spinner,
2316 kmp_uint32 (* pred)( kmp_uint64, kmp_uint64 )
2321 register volatile kmp_uint64 * spin = spinner;
2322 register kmp_uint64 check = checker;
2323 register kmp_uint32 spins;
2324 register kmp_uint32 (*f) ( kmp_uint64, kmp_uint64 ) = pred;
2325 register kmp_uint64 r;
2327 KMP_FSYNC_SPIN_INIT( obj, (
void*) spin );
2328 KMP_INIT_YIELD( spins );
2330 while(!f(r = *spin, check))
2332 KMP_FSYNC_SPIN_PREPARE( obj );
2338 __kmp_static_delay(TRUE);
2343 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2344 KMP_YIELD_SPIN( spins );
2346 KMP_FSYNC_SPIN_ACQUIRED( obj );
2352 #ifdef KMP_GOMP_COMPAT
2355 __kmp_aux_dispatch_init_4(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2356 kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
2357 kmp_int32 chunk,
int push_ws )
2359 __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk,
2364 __kmp_aux_dispatch_init_4u(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2365 kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st,
2366 kmp_int32 chunk,
int push_ws )
2368 __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk,
2373 __kmp_aux_dispatch_init_8(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2374 kmp_int64 lb, kmp_int64 ub, kmp_int64 st,
2375 kmp_int64 chunk,
int push_ws )
2377 __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk,
2382 __kmp_aux_dispatch_init_8u(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2383 kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st,
2384 kmp_int64 chunk,
int push_ws )
2386 __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk,
2391 __kmp_aux_dispatch_fini_chunk_4(
ident_t *loc, kmp_int32 gtid )
2393 __kmp_dispatch_finish_chunk< kmp_uint32 >( gtid, loc );
2397 __kmp_aux_dispatch_fini_chunk_8(
ident_t *loc, kmp_int32 gtid )
2399 __kmp_dispatch_finish_chunk< kmp_uint64 >( gtid, loc );
2403 __kmp_aux_dispatch_fini_chunk_4u(
ident_t *loc, kmp_int32 gtid )
2405 __kmp_dispatch_finish_chunk< kmp_uint32 >( gtid, loc );
2409 __kmp_aux_dispatch_fini_chunk_8u(
ident_t *loc, kmp_int32 gtid )
2411 __kmp_dispatch_finish_chunk< kmp_uint64 >( gtid, loc );