53 #include "kmp_error.h"
54 #if KMP_OS_WINDOWS && KMP_ARCH_X86
61 #ifdef KMP_STATIC_STEAL_ENABLED
64 template<
typename T >
65 struct dispatch_private_infoXX_template {
66 typedef typename traits_t< T >::unsigned_t UT;
67 typedef typename traits_t< T >::signed_t ST;
74 T static_steal_counter;
84 struct KMP_ALIGN( 32 ) {
101 template<
typename T >
102 struct dispatch_private_infoXX_template {
103 typedef typename traits_t< T >::unsigned_t UT;
104 typedef typename traits_t< T >::signed_t ST;
127 template<
typename T >
128 struct KMP_ALIGN_CACHE dispatch_private_info_template {
130 union KMP_ALIGN_CACHE private_info_tmpl {
131 dispatch_private_infoXX_template< T > p;
132 dispatch_private_info64_t p64;
136 kmp_uint32 ordered_bumped;
137 kmp_int32 ordered_dummy[KMP_MAX_ORDERED-3];
138 dispatch_private_info * next;
140 kmp_uint32 type_size;
141 enum cons_type pushed_ws;
146 template<
typename UT >
147 struct dispatch_shared_infoXX_template {
150 volatile UT iteration;
151 volatile UT num_done;
152 volatile UT ordered_iteration;
153 UT ordered_dummy[KMP_MAX_ORDERED-1];
157 template<
typename UT >
158 struct dispatch_shared_info_template {
160 union shared_info_tmpl {
161 dispatch_shared_infoXX_template< UT > s;
162 dispatch_shared_info64_t s64;
164 volatile kmp_uint32 buffer_index;
171 __kmp_static_delay(
int arg )
174 #if KMP_ARCH_X86_64 && KMP_OS_LINUX
176 KMP_ASSERT( arg >= 0 );
181 __kmp_static_yield(
int arg )
186 #undef USE_TEST_LOCKS
189 template<
typename T >
190 static __forceinline T
191 test_then_add(
volatile T *p, T d ) { KMP_ASSERT(0); };
194 __forceinline kmp_int32
195 test_then_add< kmp_int32 >(
volatile kmp_int32 *p, kmp_int32 d )
198 r = KMP_TEST_THEN_ADD32( p, d );
203 __forceinline kmp_int64
204 test_then_add< kmp_int64 >(
volatile kmp_int64 *p, kmp_int64 d )
207 r = KMP_TEST_THEN_ADD64( p, d );
212 template<
typename T >
213 static __forceinline T
214 test_then_inc_acq(
volatile T *p ) { KMP_ASSERT(0); };
217 __forceinline kmp_int32
218 test_then_inc_acq< kmp_int32 >(
volatile kmp_int32 *p )
221 r = KMP_TEST_THEN_INC_ACQ32( p );
226 __forceinline kmp_int64
227 test_then_inc_acq< kmp_int64 >(
volatile kmp_int64 *p )
230 r = KMP_TEST_THEN_INC_ACQ64( p );
235 template<
typename T >
236 static __forceinline T
237 test_then_inc(
volatile T *p ) { KMP_ASSERT(0); };
240 __forceinline kmp_int32
241 test_then_inc< kmp_int32 >(
volatile kmp_int32 *p )
244 r = KMP_TEST_THEN_INC32( p );
249 __forceinline kmp_int64
250 test_then_inc< kmp_int64 >(
volatile kmp_int64 *p )
253 r = KMP_TEST_THEN_INC64( p );
258 template<
typename T >
259 static __forceinline kmp_int32
260 compare_and_swap(
volatile T *p, T c, T s ) { KMP_ASSERT(0); };
263 __forceinline kmp_int32
264 compare_and_swap< kmp_int32 >(
volatile kmp_int32 *p, kmp_int32 c, kmp_int32 s )
266 return KMP_COMPARE_AND_STORE_REL32( p, c, s );
270 __forceinline kmp_int32
271 compare_and_swap< kmp_int64 >(
volatile kmp_int64 *p, kmp_int64 c, kmp_int64 s )
273 return KMP_COMPARE_AND_STORE_REL64( p, c, s );
289 template<
typename UT >
292 __kmp_wait_yield(
volatile UT * spinner,
294 kmp_uint32 (* pred)( UT, UT )
295 USE_ITT_BUILD_ARG(
void * obj)
299 register volatile UT * spin = spinner;
300 register UT check = checker;
301 register kmp_uint32 spins;
302 register kmp_uint32 (*f) ( UT, UT ) = pred;
305 KMP_FSYNC_SPIN_INIT( obj, (
void*) spin );
306 KMP_INIT_YIELD( spins );
308 while(!f(r = *spin, check))
310 KMP_FSYNC_SPIN_PREPARE( obj );
316 __kmp_static_delay(TRUE);
321 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
322 KMP_YIELD_SPIN( spins );
324 KMP_FSYNC_SPIN_ACQUIRED( obj );
328 template<
typename UT >
329 static kmp_uint32 __kmp_eq( UT value, UT checker) {
330 return value == checker;
333 template<
typename UT >
334 static kmp_uint32 __kmp_neq( UT value, UT checker) {
335 return value != checker;
338 template<
typename UT >
339 static kmp_uint32 __kmp_lt( UT value, UT checker) {
340 return value < checker;
343 template<
typename UT >
344 static kmp_uint32 __kmp_ge( UT value, UT checker) {
345 return value >= checker;
348 template<
typename UT >
349 static kmp_uint32 __kmp_le( UT value, UT checker) {
350 return value <= checker;
358 __kmp_dispatch_deo_error(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
362 KMP_DEBUG_ASSERT( gtid_ref );
364 if ( __kmp_env_consistency_check ) {
365 th = __kmp_threads[*gtid_ref];
366 if ( th -> th.th_root -> r.r_active
367 && ( th -> th.th_dispatch -> th_dispatch_pr_current -> pushed_ws != ct_none ) ) {
368 __kmp_push_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref, NULL );
373 template<
typename UT >
375 __kmp_dispatch_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
377 typedef typename traits_t< UT >::signed_t ST;
378 dispatch_private_info_template< UT > * pr;
380 int gtid = *gtid_ref;
382 kmp_info_t *th = __kmp_threads[ gtid ];
383 KMP_DEBUG_ASSERT( th -> th.th_dispatch );
385 KD_TRACE(100, (
"__kmp_dispatch_deo: T#%d called\n", gtid ) );
386 if ( __kmp_env_consistency_check ) {
387 pr =
reinterpret_cast< dispatch_private_info_template< UT >*
>
388 ( th -> th.th_dispatch -> th_dispatch_pr_current );
389 if ( pr -> pushed_ws != ct_none ) {
390 __kmp_push_sync( gtid, ct_ordered_in_pdo, loc_ref, NULL );
394 if ( ! th -> th.th_team -> t.t_serialized ) {
395 dispatch_shared_info_template< UT > * sh =
reinterpret_cast< dispatch_shared_info_template< UT >*
>
396 ( th -> th.th_dispatch -> th_dispatch_sh_current );
399 if ( ! __kmp_env_consistency_check ) {
400 pr =
reinterpret_cast< dispatch_private_info_template< UT >*
>
401 ( th -> th.th_dispatch -> th_dispatch_pr_current );
403 lower = pr->u.p.ordered_lower;
405 #if ! defined( KMP_GOMP_COMPAT )
406 if ( __kmp_env_consistency_check ) {
407 if ( pr->ordered_bumped ) {
408 struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons;
409 __kmp_error_construct2(
410 kmp_i18n_msg_CnsMultipleNesting,
411 ct_ordered_in_pdo, loc_ref,
412 & p->stack_data[ p->w_top ]
423 buff = __kmp_str_format(
424 "__kmp_dispatch_deo: T#%%d before wait: ordered_iter:%%%s lower:%%%s\n",
425 traits_t< UT >::spec, traits_t< UT >::spec );
426 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
427 __kmp_str_free( &buff );
431 __kmp_wait_yield< UT >( &sh->u.s.ordered_iteration, lower, __kmp_ge< UT >
432 USE_ITT_BUILD_ARG( NULL )
439 buff = __kmp_str_format(
440 "__kmp_dispatch_deo: T#%%d after wait: ordered_iter:%%%s lower:%%%s\n",
441 traits_t< UT >::spec, traits_t< UT >::spec );
442 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
443 __kmp_str_free( &buff );
447 KD_TRACE(100, (
"__kmp_dispatch_deo: T#%d returned\n", gtid ) );
451 __kmp_dispatch_dxo_error(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
455 if ( __kmp_env_consistency_check ) {
456 th = __kmp_threads[*gtid_ref];
457 if ( th -> th.th_dispatch -> th_dispatch_pr_current -> pushed_ws != ct_none ) {
458 __kmp_pop_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref );
463 template<
typename UT >
465 __kmp_dispatch_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
467 typedef typename traits_t< UT >::signed_t ST;
468 dispatch_private_info_template< UT > * pr;
470 int gtid = *gtid_ref;
472 kmp_info_t *th = __kmp_threads[ gtid ];
473 KMP_DEBUG_ASSERT( th -> th.th_dispatch );
475 KD_TRACE(100, (
"__kmp_dispatch_dxo: T#%d called\n", gtid ) );
476 if ( __kmp_env_consistency_check ) {
477 pr =
reinterpret_cast< dispatch_private_info_template< UT >*
>
478 ( th -> th.th_dispatch -> th_dispatch_pr_current );
479 if ( pr -> pushed_ws != ct_none ) {
480 __kmp_pop_sync( gtid, ct_ordered_in_pdo, loc_ref );
484 if ( ! th -> th.th_team -> t.t_serialized ) {
485 dispatch_shared_info_template< UT > * sh =
reinterpret_cast< dispatch_shared_info_template< UT >*
>
486 ( th -> th.th_dispatch -> th_dispatch_sh_current );
488 if ( ! __kmp_env_consistency_check ) {
489 pr =
reinterpret_cast< dispatch_private_info_template< UT >*
>
490 ( th -> th.th_dispatch -> th_dispatch_pr_current );
493 KMP_FSYNC_RELEASING( & sh->u.s.ordered_iteration );
494 #if ! defined( KMP_GOMP_COMPAT )
495 if ( __kmp_env_consistency_check ) {
496 if ( pr->ordered_bumped != 0 ) {
497 struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons;
499 __kmp_error_construct2(
500 kmp_i18n_msg_CnsMultipleNesting,
501 ct_ordered_in_pdo, loc_ref,
502 & p->stack_data[ p->w_top ]
510 pr->ordered_bumped += 1;
512 KD_TRACE(1000, (
"__kmp_dispatch_dxo: T#%d bumping ordered ordered_bumped=%d\n",
513 gtid, pr->ordered_bumped ) );
518 test_then_inc< ST >( (
volatile ST *) & sh->u.s.ordered_iteration );
522 KD_TRACE(100, (
"__kmp_dispatch_dxo: T#%d returned\n", gtid ) );
526 template<
typename UT >
527 static __forceinline
long double
528 __kmp_pow(
long double x, UT y) {
531 KMP_DEBUG_ASSERT(x > 0.0 && x < 1.0);
547 template<
typename T >
548 static __inline
typename traits_t< T >::unsigned_t
549 __kmp_dispatch_guided_remaining(
551 typename traits_t< T >::floating_t base,
552 typename traits_t< T >::unsigned_t idx
561 typedef typename traits_t< T >::unsigned_t UT;
563 long double x = tc * __kmp_pow< UT >(base, idx);
575 static int guided_int_param = 2;
576 static double guided_flt_param = 0.5;
580 template<
typename T >
588 typename traits_t< T >::signed_t st,
589 typename traits_t< T >::signed_t chunk,
592 typedef typename traits_t< T >::unsigned_t UT;
593 typedef typename traits_t< T >::signed_t ST;
594 typedef typename traits_t< T >::floating_t DBL;
595 static const int ___kmp_size_type =
sizeof( UT );
601 kmp_uint32 my_buffer_index;
602 dispatch_private_info_template< T > * pr;
603 dispatch_shared_info_template< UT >
volatile * sh;
605 KMP_BUILD_ASSERT(
sizeof( dispatch_private_info_template< T > ) ==
sizeof( dispatch_private_info ) );
606 KMP_BUILD_ASSERT(
sizeof( dispatch_shared_info_template< UT > ) ==
sizeof( dispatch_shared_info ) );
608 if ( ! TCR_4( __kmp_init_parallel ) )
609 __kmp_parallel_initialize();
615 buff = __kmp_str_format(
616 "__kmp_dispatch_init: T#%%d called: schedule:%%d chunk:%%%s lb:%%%s ub:%%%s st:%%%s\n",
617 traits_t< ST >::spec, traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
618 KD_TRACE(10, ( buff, gtid, schedule, chunk, lb, ub, st ) );
619 __kmp_str_free( &buff );
623 th = __kmp_threads[ gtid ];
624 team = th -> th.th_team;
625 active = ! team -> t.t_serialized;
626 th->th.th_ident = loc;
629 pr =
reinterpret_cast< dispatch_private_info_template< T >*
>
630 ( th -> th.th_dispatch -> th_disp_buffer );
632 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
633 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
635 my_buffer_index = th->th.th_dispatch->th_disp_index ++;
638 pr =
reinterpret_cast< dispatch_private_info_template< T > *
>
639 ( &th -> th.th_dispatch -> th_disp_buffer[ my_buffer_index % KMP_MAX_DISP_BUF ] );
640 sh =
reinterpret_cast< dispatch_shared_info_template< UT >
volatile *
>
641 ( &team -> t.t_disp_buffer[ my_buffer_index % KMP_MAX_DISP_BUF ] );
651 pr->type_size = ___kmp_size_type;
659 schedule = __kmp_static;
661 if ( schedule == kmp_sch_runtime ) {
664 schedule = team -> t.t_sched.r_sched_type;
667 schedule = __kmp_guided;
669 schedule = __kmp_static;
672 chunk = team -> t.t_sched.chunk;
674 kmp_r_sched_t r_sched = __kmp_get_schedule_global();
676 schedule = r_sched.r_sched_type;
677 chunk = r_sched.chunk;
684 buff = __kmp_str_format(
685 "__kmp_dispatch_init: T#%%d new: schedule:%%d chunk:%%%s\n",
686 traits_t< ST >::spec );
687 KD_TRACE(10, ( buff, gtid, schedule, chunk ) );
688 __kmp_str_free( &buff );
693 schedule = __kmp_guided;
696 chunk = KMP_DEFAULT_CHUNK;
703 schedule = __kmp_auto;
708 buff = __kmp_str_format(
709 "__kmp_dispatch_init: kmp_sch_auto: T#%%d new: schedule:%%d chunk:%%%s\n",
710 traits_t< ST >::spec );
711 KD_TRACE(10, ( buff, gtid, schedule, chunk ) );
712 __kmp_str_free( &buff );
716 #endif // OMP_30_ENABLED
719 if ( team->t.t_nproc > 1<<20 && schedule == kmp_sch_guided_analytical_chunked ) {
720 schedule = kmp_sch_guided_iterative_chunked;
721 KMP_WARNING( DispatchManyThreads );
723 pr->u.p.parm1 = chunk;
726 "unknown scheduling type" );
730 if ( __kmp_env_consistency_check ) {
732 __kmp_error_construct(
733 kmp_i18n_msg_CnsLoopIncrZeroProhibited,
734 ( pr->ordered ? ct_pdo_ordered : ct_pdo ), loc
739 tc = ( ub - lb + st );
754 }
else if ( ub < lb ) {
764 pr->u.p.last_upper = ub + st;
770 if ( pr->ordered == 0 ) {
771 th -> th.th_dispatch -> th_deo_fcn = __kmp_dispatch_deo_error;
772 th -> th.th_dispatch -> th_dxo_fcn = __kmp_dispatch_dxo_error;
774 pr->ordered_bumped = 0;
776 pr->u.p.ordered_lower = 1;
777 pr->u.p.ordered_upper = 0;
779 th -> th.th_dispatch -> th_deo_fcn = __kmp_dispatch_deo< UT >;
780 th -> th.th_dispatch -> th_dxo_fcn = __kmp_dispatch_dxo< UT >;
784 if ( __kmp_env_consistency_check ) {
785 enum cons_type ws = pr->ordered ? ct_pdo_ordered : ct_pdo;
787 __kmp_push_workshare( gtid, ws, loc );
790 __kmp_check_workshare( gtid, ws, loc );
791 pr->pushed_ws = ct_none;
795 switch ( schedule ) {
796 #if ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 )
799 T nproc = team->t.t_nproc;
802 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_static_steal case\n", gtid ) );
804 ntc = (tc % chunk ? 1 : 0) + tc / chunk;
805 if ( nproc > 1 && ntc >= nproc ) {
806 T
id = __kmp_tid_from_gtid(gtid);
807 T small_chunk, extras;
809 small_chunk = ntc / nproc;
810 extras = ntc % nproc;
812 init =
id * small_chunk + (
id < extras ?
id : extras );
813 pr->u.p.count = init;
814 pr->u.p.ub = init + small_chunk + (
id < extras ? 1 : 0 );
822 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_balanced\n",
824 schedule = kmp_sch_static_balanced;
830 case kmp_sch_static_balanced:
832 T nproc = team->t.t_nproc;
835 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_static_balanced case\n",
839 T
id = __kmp_tid_from_gtid(gtid);
845 pr->u.p.parm1 = (
id == tc - 1);
848 pr->u.p.parm1 = FALSE;
852 T small_chunk = tc / nproc;
853 T extras = tc % nproc;
854 init =
id * small_chunk + (
id < extras ?
id : extras);
855 limit = init + small_chunk - (
id < extras ? 0 : 1);
856 pr->u.p.parm1 = (
id == nproc - 1);
862 pr->u.p.parm1 = TRUE;
866 pr->u.p.parm1 = FALSE;
871 pr->u.p.lb = lb + init;
872 pr->u.p.ub = lb + limit;
874 T ub_tmp = lb + limit * st;
875 pr->u.p.lb = lb + init * st;
878 pr->u.p.ub = ( ub_tmp + st > ub ? ub : ub_tmp );
880 pr->u.p.ub = ( ub_tmp + st < ub ? ub : ub_tmp );
884 pr->u.p.ordered_lower = init;
885 pr->u.p.ordered_upper = limit;
889 case kmp_sch_guided_iterative_chunked :
891 T nproc = team->t.t_nproc;
892 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d kmp_sch_guided_iterative_chunked case\n",gtid));
895 if ( (2L * chunk + 1 ) * nproc >= tc ) {
897 schedule = kmp_sch_dynamic_chunked;
900 pr->u.p.parm2 = guided_int_param * nproc * ( chunk + 1 );
901 *(
double*)&pr->u.p.parm3 = guided_flt_param / nproc;
904 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_greedy\n",gtid));
905 schedule = kmp_sch_static_greedy;
907 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n",gtid));
912 case kmp_sch_guided_analytical_chunked:
914 T nproc = team->t.t_nproc;
915 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_guided_analytical_chunked case\n", gtid));
918 if ( (2L * chunk + 1 ) * nproc >= tc ) {
920 schedule = kmp_sch_dynamic_chunked;
925 #if KMP_OS_WINDOWS && KMP_ARCH_X86
938 unsigned int oldFpcw = _control87(0,0);
939 _control87(_PC_64,_MCW_PC);
942 long double target = ((
long double)chunk * 2 + 1) * nproc / tc;
949 x = (
long double)1.0 - (
long double)0.5 / nproc;
960 ptrdiff_t natural_alignment = (ptrdiff_t)&t.b - (ptrdiff_t)&t - (ptrdiff_t)1;
962 KMP_DEBUG_ASSERT( ( ( (ptrdiff_t)&pr->u.p.parm3 ) & ( natural_alignment ) ) == 0 );
967 *(DBL*)&pr->u.p.parm3 = x;
980 p = __kmp_pow< UT >(x,right);
985 }
while(p>target && right < (1<<27));
992 while ( left + 1 < right ) {
993 mid = (left + right) / 2;
994 if ( __kmp_pow< UT >(x,mid) > target ) {
1003 KMP_ASSERT(cross && __kmp_pow< UT >(x, cross - 1) > target && __kmp_pow< UT >(x, cross) <= target);
1006 pr->u.p.parm2 = cross;
1009 #if ( ( KMP_OS_LINUX || KMP_OS_WINDOWS ) && KMP_ARCH_X86 ) && ( ! defined( KMP_I8 ) )
1010 #define GUIDED_ANALYTICAL_WORKAROUND (*( DBL * )&pr->u.p.parm3)
1012 #define GUIDED_ANALYTICAL_WORKAROUND (x)
1015 pr->u.p.count = tc - __kmp_dispatch_guided_remaining(tc, GUIDED_ANALYTICAL_WORKAROUND, cross) - cross * chunk;
1016 #if KMP_OS_WINDOWS && KMP_ARCH_X86
1018 _control87(oldFpcw,_MCW_PC);
1022 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_greedy\n",
1024 schedule = kmp_sch_static_greedy;
1030 case kmp_sch_static_greedy:
1031 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n",gtid));
1032 pr->u.p.parm1 = ( team -> t.t_nproc > 1 ) ?
1033 ( tc + team->t.t_nproc - 1 ) / team->t.t_nproc :
1036 case kmp_sch_static_chunked :
1037 case kmp_sch_dynamic_chunked :
1038 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n", gtid));
1040 case kmp_sch_trapezoidal :
1044 T parm1, parm2, parm3, parm4;
1045 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_trapezoidal case\n", gtid ) );
1050 parm2 = ( tc / (2 * team->t.t_nproc) );
1061 }
else if ( parm1 > parm2 ) {
1066 parm3 = ( parm2 + parm1 );
1067 parm3 = ( 2 * tc + parm3 - 1) / parm3;
1074 parm4 = ( parm3 - 1 );
1075 parm4 = ( parm2 - parm1 ) / parm4;
1082 pr->u.p.parm1 = parm1;
1083 pr->u.p.parm2 = parm2;
1084 pr->u.p.parm3 = parm3;
1085 pr->u.p.parm4 = parm4;
1093 KMP_MSG( UnknownSchedTypeDetected ),
1094 KMP_HNT( GetNewerLibrary ),
1100 pr->schedule = schedule;
1104 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d sh->buffer_index:%d\n",
1105 gtid, my_buffer_index, sh->buffer_index) );
1106 __kmp_wait_yield< kmp_uint32 >( & sh->buffer_index, my_buffer_index, __kmp_eq< kmp_uint32 >
1107 USE_ITT_BUILD_ARG( NULL )
1112 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d sh->buffer_index:%d\n",
1113 gtid, my_buffer_index, sh->buffer_index) );
1115 th -> th.th_dispatch -> th_dispatch_pr_current = (dispatch_private_info_t*) pr;
1116 th -> th.th_dispatch -> th_dispatch_sh_current = (dispatch_shared_info_t*) sh;
1118 if ( pr->ordered ) {
1119 __kmp_itt_ordered_init( gtid );
1127 buff = __kmp_str_format(
1128 "__kmp_dispatch_init: T#%%d returning: schedule:%%d ordered:%%%s lb:%%%s ub:%%%s" \
1129 " st:%%%s tc:%%%s count:%%%s\n\tordered_lower:%%%s ordered_upper:%%%s" \
1130 " parm1:%%%s parm2:%%%s parm3:%%%s parm4:%%%s\n",
1131 traits_t< UT >::spec, traits_t< T >::spec, traits_t< T >::spec,
1132 traits_t< ST >::spec, traits_t< UT >::spec, traits_t< UT >::spec,
1133 traits_t< UT >::spec, traits_t< UT >::spec, traits_t< T >::spec,
1134 traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec );
1135 KD_TRACE(10, ( buff,
1136 gtid, pr->schedule, pr->ordered, pr->u.p.lb, pr->u.p.ub,
1137 pr->u.p.st, pr->u.p.tc, pr->u.p.count,
1138 pr->u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1,
1139 pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4 ) );
1140 __kmp_str_free( &buff );
1143 #if ( KMP_STATIC_STEAL_ENABLED )
1144 if ( ___kmp_size_type < 8 ) {
1153 volatile T * p = &pr->u.p.static_steal_counter;
1157 #endif // ( KMP_STATIC_STEAL_ENABLED && USE_STEALING )
1167 template<
typename UT >
1169 __kmp_dispatch_finish(
int gtid,
ident_t *loc )
1171 typedef typename traits_t< UT >::signed_t ST;
1172 kmp_info_t *th = __kmp_threads[ gtid ];
1174 KD_TRACE(100, (
"__kmp_dispatch_finish: T#%d called\n", gtid ) );
1175 if ( ! th -> th.th_team -> t.t_serialized ) {
1177 dispatch_private_info_template< UT > * pr =
1178 reinterpret_cast< dispatch_private_info_template< UT >*
>
1179 ( th->th.th_dispatch->th_dispatch_pr_current );
1180 dispatch_shared_info_template< UT >
volatile * sh =
1181 reinterpret_cast< dispatch_shared_info_template< UT >volatile*
>
1182 ( th->th.th_dispatch->th_dispatch_sh_current );
1183 KMP_DEBUG_ASSERT( pr );
1184 KMP_DEBUG_ASSERT( sh );
1185 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
1186 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
1188 if ( pr->ordered_bumped ) {
1189 KD_TRACE(1000, (
"__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
1191 pr->ordered_bumped = 0;
1193 UT lower = pr->u.p.ordered_lower;
1199 buff = __kmp_str_format(
1200 "__kmp_dispatch_finish: T#%%d before wait: ordered_iteration:%%%s lower:%%%s\n",
1201 traits_t< UT >::spec, traits_t< UT >::spec );
1202 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
1203 __kmp_str_free( &buff );
1207 __kmp_wait_yield< UT >(&sh->u.s.ordered_iteration, lower, __kmp_ge< UT >
1208 USE_ITT_BUILD_ARG(NULL)
1215 buff = __kmp_str_format(
1216 "__kmp_dispatch_finish: T#%%d after wait: ordered_iteration:%%%s lower:%%%s\n",
1217 traits_t< UT >::spec, traits_t< UT >::spec );
1218 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
1219 __kmp_str_free( &buff );
1223 test_then_inc< ST >( (
volatile ST *) & sh->u.s.ordered_iteration );
1226 KD_TRACE(100, (
"__kmp_dispatch_finish: T#%d returned\n", gtid ) );
1229 #ifdef KMP_GOMP_COMPAT
1231 template<
typename UT >
1233 __kmp_dispatch_finish_chunk(
int gtid,
ident_t *loc )
1235 typedef typename traits_t< UT >::signed_t ST;
1236 kmp_info_t *th = __kmp_threads[ gtid ];
1238 KD_TRACE(100, (
"__kmp_dispatch_finish_chunk: T#%d called\n", gtid ) );
1239 if ( ! th -> th.th_team -> t.t_serialized ) {
1241 dispatch_private_info_template< UT > * pr =
1242 reinterpret_cast< dispatch_private_info_template< UT >*
>
1243 ( th->th.th_dispatch->th_dispatch_pr_current );
1244 dispatch_shared_info_template< UT >
volatile * sh =
1245 reinterpret_cast< dispatch_shared_info_template< UT >volatile*
>
1246 ( th->th.th_dispatch->th_dispatch_sh_current );
1247 KMP_DEBUG_ASSERT( pr );
1248 KMP_DEBUG_ASSERT( sh );
1249 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
1250 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
1253 UT lower = pr->u.p.ordered_lower;
1254 UT upper = pr->u.p.ordered_upper;
1255 UT inc = upper - lower + 1;
1257 if ( pr->ordered_bumped == inc ) {
1258 KD_TRACE(1000, (
"__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
1260 pr->ordered_bumped = 0;
1262 inc -= pr->ordered_bumped;
1268 buff = __kmp_str_format(
1269 "__kmp_dispatch_finish_chunk: T#%%d before wait: " \
1270 "ordered_iteration:%%%s lower:%%%s upper:%%%s\n",
1271 traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec );
1272 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower, upper ) );
1273 __kmp_str_free( &buff );
1277 __kmp_wait_yield< UT >(&sh->u.s.ordered_iteration, lower, __kmp_ge< UT >
1278 USE_ITT_BUILD_ARG(NULL)
1282 KD_TRACE(1000, (
"__kmp_dispatch_finish_chunk: T#%d resetting ordered_bumped to zero\n",
1284 pr->ordered_bumped = 0;
1290 buff = __kmp_str_format(
1291 "__kmp_dispatch_finish_chunk: T#%%d after wait: " \
1292 "ordered_iteration:%%%s inc:%%%s lower:%%%s upper:%%%s\n",
1293 traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec );
1294 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper ) );
1295 __kmp_str_free( &buff );
1299 test_then_add< ST >( (
volatile ST *) & sh->u.s.ordered_iteration, inc);
1303 KD_TRACE(100, (
"__kmp_dispatch_finish_chunk: T#%d returned\n", gtid ) );
1308 template<
typename T >
1310 __kmp_dispatch_next(
1311 ident_t *loc,
int gtid, kmp_int32 *p_last, T *p_lb, T *p_ub,
typename traits_t< T >::signed_t *p_st
1314 typedef typename traits_t< T >::unsigned_t UT;
1315 typedef typename traits_t< T >::signed_t ST;
1316 typedef typename traits_t< T >::floating_t DBL;
1317 static const int ___kmp_size_type =
sizeof( UT );
1320 dispatch_private_info_template< T > * pr;
1321 kmp_info_t * th = __kmp_threads[ gtid ];
1322 kmp_team_t * team = th -> th.th_team;
1328 buff = __kmp_str_format(
1329 "__kmp_dispatch_next: T#%%d called p_lb:%%%s p_ub:%%%s p_st:%%%s p_last: %%p\n",
1330 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
1331 KD_TRACE(1000, ( buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last ) );
1332 __kmp_str_free( &buff );
1336 if ( team -> t.t_serialized ) {
1338 pr =
reinterpret_cast< dispatch_private_info_template< T >*
>
1339 ( th -> th.th_dispatch -> th_disp_buffer );
1340 KMP_DEBUG_ASSERT( pr );
1342 if ( (status = (pr->u.p.tc != 0)) == 0 ) {
1348 if ( __kmp_env_consistency_check ) {
1349 if ( pr->pushed_ws != ct_none ) {
1350 pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc );
1353 }
else if ( pr->nomerge ) {
1356 UT limit, trip, init;
1358 T chunk = pr->u.p.parm1;
1360 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n", gtid ) );
1362 init = chunk * pr->u.p.count++;
1363 trip = pr->u.p.tc - 1;
1365 if ( (status = (init <= trip)) == 0 ) {
1368 if ( p_st != 0 ) *p_st = 0;
1369 if ( __kmp_env_consistency_check ) {
1370 if ( pr->pushed_ws != ct_none ) {
1371 pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc );
1376 limit = chunk + init - 1;
1379 if ( (last = (limit >= trip)) != 0 ) {
1382 pr->u.p.last_upper = pr->u.p.ub;
1392 *p_lb = start + init;
1393 *p_ub = start + limit;
1395 *p_lb = start + init * incr;
1396 *p_ub = start + limit * incr;
1399 if ( pr->ordered ) {
1400 pr->u.p.ordered_lower = init;
1401 pr->u.p.ordered_upper = limit;
1406 buff = __kmp_str_format(
1407 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1408 traits_t< UT >::spec, traits_t< UT >::spec );
1409 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1410 __kmp_str_free( &buff );
1421 pr->u.p.last_upper = *p_ub;
1435 buff = __kmp_str_format(
1436 "__kmp_dispatch_next: T#%%d serialized case: p_lb:%%%s " \
1437 "p_ub:%%%s p_st:%%%s p_last:%%p returning:%%d\n",
1438 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
1439 KD_TRACE(10, ( buff, gtid, *p_lb, *p_ub, *p_st, p_last, status) );
1440 __kmp_str_free( &buff );
1446 dispatch_shared_info_template< UT > *sh;
1449 UT limit, trip, init;
1451 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
1452 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
1454 pr =
reinterpret_cast< dispatch_private_info_template< T >*
>
1455 ( th->th.th_dispatch->th_dispatch_pr_current );
1456 KMP_DEBUG_ASSERT( pr );
1457 sh =
reinterpret_cast< dispatch_shared_info_template< UT >*
>
1458 ( th->th.th_dispatch->th_dispatch_sh_current );
1459 KMP_DEBUG_ASSERT( sh );
1461 if ( pr->u.p.tc == 0 ) {
1465 switch (pr->schedule) {
1466 #if ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 )
1469 T chunk = pr->u.p.parm1;
1471 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_static_steal case\n", gtid) );
1473 trip = pr->u.p.tc - 1;
1475 if ( ___kmp_size_type > 4 ) {
1478 init = ( pr->u.p.count )++;
1479 status = ( init < (UT)pr->u.p.ub );
1491 union_i4 vold, vnew;
1492 vold.b = *(
volatile kmp_int64 * )(&pr->u.p.count);
1495 while( ! KMP_COMPARE_AND_STORE_ACQ64(
1496 (
volatile kmp_int64* )&pr->u.p.count,
1497 *VOLATILE_CAST(kmp_int64 *)&vold.b,
1498 *VOLATILE_CAST(kmp_int64 *)&vnew.b ) ) {
1500 vold.b = *(
volatile kmp_int64 * )(&pr->u.p.count);
1505 init = vnew.p.count;
1506 status = ( init < (UT)vnew.p.ub ) ;
1510 kmp_info_t **other_threads = team->t.t_threads;
1511 int while_limit = 10;
1512 int while_index = 0;
1516 while ( ( !status ) && ( while_limit != ++while_index ) ) {
1517 union_i4 vold, vnew;
1518 kmp_int32 remaining;
1519 T victimIdx = pr->u.p.parm4;
1520 T oldVictimIdx = victimIdx;
1521 dispatch_private_info_template< T > * victim;
1525 victimIdx = team->t.t_nproc - 1;
1529 victim =
reinterpret_cast< dispatch_private_info_template< T >*
>
1530 ( other_threads[victimIdx]->th.th_dispatch->th_dispatch_pr_current );
1531 }
while ( (victim == NULL || victim == pr) && oldVictimIdx != victimIdx );
1534 ( (*(
volatile T * )&victim->u.p.static_steal_counter) !=
1535 (*(
volatile T * )&pr->u.p.static_steal_counter) ) ) {
1541 if ( oldVictimIdx == victimIdx ) {
1544 pr->u.p.parm4 = victimIdx;
1547 vold.b = *(
volatile kmp_int64 * )( &victim->u.p.count );
1550 KMP_DEBUG_ASSERT( (vnew.p.ub - 1) * (UT)chunk <= trip );
1551 if ( vnew.p.count >= (UT)vnew.p.ub || (remaining = vnew.p.ub - vnew.p.count) < 4 ) {
1554 vnew.p.ub -= (remaining >> 2);
1555 KMP_DEBUG_ASSERT((vnew.p.ub - 1) * (UT)chunk <= trip);
1556 #pragma warning( push )
1558 #pragma warning( disable: 186 )
1559 KMP_DEBUG_ASSERT(vnew.p.ub >= 0);
1560 #pragma warning( pop )
1562 if ( KMP_COMPARE_AND_STORE_ACQ64(
1563 (
volatile kmp_int64 * )&victim->u.p.count,
1564 *VOLATILE_CAST(kmp_int64 *)&vold.b,
1565 *VOLATILE_CAST(kmp_int64 *)&vnew.b ) ) {
1574 init = vold.p.count;
1576 pr->u.p.count = init + 1;
1577 pr->u.p.ub = vnew.p.count;
1580 vold.p.count = init + 1;
1582 *(
volatile kmp_int64 * )(&pr->u.p.count) = vold.b;
1583 #endif // KMP_ARCH_X86
1594 if ( p_st != 0 ) *p_st = 0;
1596 start = pr->u.p.parm2;
1598 limit = chunk + init - 1;
1601 KMP_DEBUG_ASSERT(init <= trip);
1602 if ( (last = (limit >= trip)) != 0 )
1607 if ( p_st != 0 ) *p_st = incr;
1610 *p_lb = start + init;
1611 *p_ub = start + limit;
1613 *p_lb = start + init * incr;
1614 *p_ub = start + limit * incr;
1617 if ( pr->ordered ) {
1618 pr->u.p.ordered_lower = init;
1619 pr->u.p.ordered_upper = limit;
1624 buff = __kmp_str_format(
1625 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1626 traits_t< UT >::spec, traits_t< UT >::spec );
1627 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1628 __kmp_str_free( &buff );
1635 #endif // ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 )
1636 case kmp_sch_static_balanced:
1638 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_static_balanced case\n", gtid) );
1639 if ( (status = !pr->u.p.count) != 0 ) {
1643 last = pr->u.p.parm1;
1650 pr->u.p.lb = pr->u.p.ub + pr->u.p.st;
1652 if ( pr->ordered ) {
1657 buff = __kmp_str_format(
1658 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1659 traits_t< UT >::spec, traits_t< UT >::spec );
1660 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1661 __kmp_str_free( &buff );
1667 case kmp_sch_static_greedy:
1668 case kmp_sch_static_chunked:
1672 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_static_[affinity|chunked] case\n",
1674 parm1 = pr->u.p.parm1;
1676 trip = pr->u.p.tc - 1;
1677 init = parm1 * (pr->u.p.count + __kmp_tid_from_gtid(gtid));
1679 if ( (status = (init <= trip)) != 0 ) {
1682 limit = parm1 + init - 1;
1684 if ( (last = (limit >= trip)) != 0 )
1690 if ( p_st != 0 ) *p_st = incr;
1692 pr->u.p.count += team->t.t_nproc;
1695 *p_lb = start + init;
1696 *p_ub = start + limit;
1699 *p_lb = start + init * incr;
1700 *p_ub = start + limit * incr;
1703 if ( pr->ordered ) {
1704 pr->u.p.ordered_lower = init;
1705 pr->u.p.ordered_upper = limit;
1710 buff = __kmp_str_format(
1711 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1712 traits_t< UT >::spec, traits_t< UT >::spec );
1713 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1714 __kmp_str_free( &buff );
1722 case kmp_sch_dynamic_chunked:
1724 T chunk = pr->u.p.parm1;
1726 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n",
1729 init = chunk * test_then_inc_acq< ST >((
volatile ST *) & sh->u.s.iteration );
1730 trip = pr->u.p.tc - 1;
1732 if ( (status = (init <= trip)) == 0 ) {
1735 if ( p_st != 0 ) *p_st = 0;
1738 limit = chunk + init - 1;
1741 if ( (last = (limit >= trip)) != 0 )
1746 if ( p_st != 0 ) *p_st = incr;
1749 *p_lb = start + init;
1750 *p_ub = start + limit;
1752 *p_lb = start + init * incr;
1753 *p_ub = start + limit * incr;
1756 if ( pr->ordered ) {
1757 pr->u.p.ordered_lower = init;
1758 pr->u.p.ordered_upper = limit;
1763 buff = __kmp_str_format(
1764 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1765 traits_t< UT >::spec, traits_t< UT >::spec );
1766 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1767 __kmp_str_free( &buff );
1775 case kmp_sch_guided_iterative_chunked:
1777 T chunkspec = pr->u.p.parm1;
1779 (
"__kmp_dispatch_next: T#%d kmp_sch_guided_chunked iterative case\n",gtid));
1784 init = sh->u.s.iteration;
1785 remaining = trip - init;
1786 if ( remaining <= 0 ) {
1791 if ( (T)remaining < pr->u.p.parm2 ) {
1794 init = test_then_add<ST>( (ST*)&sh->u.s.iteration, (ST)chunkspec );
1795 remaining = trip - init;
1796 if (remaining <= 0) {
1801 if ( (T)remaining > chunkspec ) {
1802 limit = init + chunkspec - 1;
1805 limit = init + remaining - 1;
1810 limit = init + (UT)( remaining * *(
double*)&pr->u.p.parm3 );
1811 if ( compare_and_swap<ST>( (ST*)&sh->u.s.iteration, (ST)init, (ST)limit ) ) {
1818 if ( status != 0 ) {
1823 if ( p_last != NULL )
1825 *p_lb = start + init * incr;
1826 *p_ub = start + limit * incr;
1827 if ( pr->ordered ) {
1828 pr->u.p.ordered_lower = init;
1829 pr->u.p.ordered_upper = limit;
1834 buff = __kmp_str_format(
1835 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1836 traits_t< UT >::spec, traits_t< UT >::spec );
1837 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1838 __kmp_str_free( &buff );
1851 case kmp_sch_guided_analytical_chunked:
1853 T chunkspec = pr->u.p.parm1;
1855 #if KMP_OS_WINDOWS && KMP_ARCH_X86
1858 unsigned int oldFpcw;
1859 unsigned int fpcwSet = 0;
1861 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_guided_chunked analytical case\n",
1866 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
1867 KMP_DEBUG_ASSERT((2UL * chunkspec + 1) * (UT)team->t.t_nproc < trip);
1870 chunkIdx = test_then_inc_acq< ST >((
volatile ST *) & sh->u.s.iteration );
1871 if ( chunkIdx >= (UT)pr->u.p.parm2 ) {
1874 init = chunkIdx * chunkspec + pr->u.p.count;
1876 if ( (status = (init > 0 && init <= trip)) != 0 ) {
1877 limit = init + chunkspec -1;
1879 if ( (last = (limit >= trip)) != 0 )
1888 #if KMP_OS_WINDOWS && KMP_ARCH_X86
1893 oldFpcw = _control87(0,0);
1894 _control87(_PC_64,_MCW_PC);
1899 init = __kmp_dispatch_guided_remaining< T >(
1900 trip, *( DBL * )&pr->u.p.parm3, chunkIdx );
1901 KMP_DEBUG_ASSERT(init);
1905 limit = trip - __kmp_dispatch_guided_remaining< T >(
1906 trip, *( DBL * )&pr->u.p.parm3, chunkIdx + 1 );
1907 KMP_ASSERT(init <= limit);
1908 if ( init < limit ) {
1909 KMP_DEBUG_ASSERT(limit <= trip);
1916 #if KMP_OS_WINDOWS && KMP_ARCH_X86
1920 if ( fpcwSet && ( oldFpcw & fpcwSet ) )
1921 _control87(oldFpcw,_MCW_PC);
1923 if ( status != 0 ) {
1928 if ( p_last != NULL )
1930 *p_lb = start + init * incr;
1931 *p_ub = start + limit * incr;
1932 if ( pr->ordered ) {
1933 pr->u.p.ordered_lower = init;
1934 pr->u.p.ordered_upper = limit;
1939 buff = __kmp_str_format(
1940 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1941 traits_t< UT >::spec, traits_t< UT >::spec );
1942 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1943 __kmp_str_free( &buff );
1956 case kmp_sch_trapezoidal:
1959 T parm2 = pr->u.p.parm2;
1960 T parm3 = pr->u.p.parm3;
1961 T parm4 = pr->u.p.parm4;
1962 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_trapezoidal case\n",
1965 index = test_then_inc< ST >( (
volatile ST *) & sh->u.s.iteration );
1967 init = ( index * ( (2*parm2) - (index-1)*parm4 ) ) / 2;
1968 trip = pr->u.p.tc - 1;
1970 if ( (status = ((T)index < parm3 && init <= trip)) == 0 ) {
1973 if ( p_st != 0 ) *p_st = 0;
1976 limit = ( (index+1) * ( 2*parm2 - index*parm4 ) ) / 2 - 1;
1979 if ( (last = (limit >= trip)) != 0 )
1982 if ( p_last != 0 ) {
1985 if ( p_st != 0 ) *p_st = incr;
1988 *p_lb = start + init;
1989 *p_ub = start + limit;
1991 *p_lb = start + init * incr;
1992 *p_ub = start + limit * incr;
1995 if ( pr->ordered ) {
1996 pr->u.p.ordered_lower = init;
1997 pr->u.p.ordered_upper = limit;
2002 buff = __kmp_str_format(
2003 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
2004 traits_t< UT >::spec, traits_t< UT >::spec );
2005 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
2006 __kmp_str_free( &buff );
2016 if ( status == 0 ) {
2019 num_done = test_then_inc< ST >( (
volatile ST *) & sh->u.s.num_done );
2024 buff = __kmp_str_format(
2025 "__kmp_dispatch_next: T#%%d increment num_done:%%%s\n",
2026 traits_t< UT >::spec );
2027 KD_TRACE(100, ( buff, gtid, sh->u.s.num_done ) );
2028 __kmp_str_free( &buff );
2032 if ( num_done == team->t.t_nproc-1 ) {
2037 sh->u.s.num_done = 0;
2038 sh->u.s.iteration = 0;
2041 if ( pr->ordered ) {
2042 sh->u.s.ordered_iteration = 0;
2047 sh -> buffer_index += KMP_MAX_DISP_BUF;
2048 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d change buffer_index:%d\n",
2049 gtid, sh->buffer_index) );
2054 if ( __kmp_env_consistency_check ) {
2055 if ( pr->pushed_ws != ct_none ) {
2056 pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc );
2060 th -> th.th_dispatch -> th_deo_fcn = NULL;
2061 th -> th.th_dispatch -> th_dxo_fcn = NULL;
2062 th -> th.th_dispatch -> th_dispatch_sh_current = NULL;
2063 th -> th.th_dispatch -> th_dispatch_pr_current = NULL;
2067 pr->u.p.last_upper = pr->u.p.ub;
2076 buff = __kmp_str_format(
2077 "__kmp_dispatch_next: T#%%d normal case: " \
2078 "p_lb:%%%s p_ub:%%%s p_st:%%%s p_last:%%p returning:%%d\n",
2079 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
2080 KD_TRACE(10, ( buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last, status ) );
2081 __kmp_str_free( &buff );
2111 kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk )
2113 KMP_DEBUG_ASSERT( __kmp_init_serial );
2114 __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2121 kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk )
2123 KMP_DEBUG_ASSERT( __kmp_init_serial );
2124 __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2132 kmp_int64 lb, kmp_int64 ub,
2133 kmp_int64 st, kmp_int64 chunk )
2135 KMP_DEBUG_ASSERT( __kmp_init_serial );
2136 __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2144 kmp_uint64 lb, kmp_uint64 ub,
2145 kmp_int64 st, kmp_int64 chunk )
2147 KMP_DEBUG_ASSERT( __kmp_init_serial );
2148 __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2165 kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st )
2167 return __kmp_dispatch_next< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st );
2175 kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st )
2177 return __kmp_dispatch_next< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st );
2185 kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st )
2187 return __kmp_dispatch_next< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st );
2195 kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st )
2197 return __kmp_dispatch_next< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st );
2209 __kmp_dispatch_finish< kmp_uint32 >( gtid, loc );
2218 __kmp_dispatch_finish< kmp_uint64 >( gtid, loc );
2227 __kmp_dispatch_finish< kmp_uint32 >( gtid, loc );
2236 __kmp_dispatch_finish< kmp_uint64 >( gtid, loc );
2243 kmp_uint32 __kmp_eq_4( kmp_uint32 value, kmp_uint32 checker) {
2244 return value == checker;
2247 kmp_uint32 __kmp_neq_4( kmp_uint32 value, kmp_uint32 checker) {
2248 return value != checker;
2251 kmp_uint32 __kmp_lt_4( kmp_uint32 value, kmp_uint32 checker) {
2252 return value < checker;
2255 kmp_uint32 __kmp_ge_4( kmp_uint32 value, kmp_uint32 checker) {
2256 return value >= checker;
2259 kmp_uint32 __kmp_le_4( kmp_uint32 value, kmp_uint32 checker) {
2260 return value <= checker;
2262 kmp_uint32 __kmp_eq_8( kmp_uint64 value, kmp_uint64 checker) {
2263 return value == checker;
2266 kmp_uint32 __kmp_neq_8( kmp_uint64 value, kmp_uint64 checker) {
2267 return value != checker;
2270 kmp_uint32 __kmp_lt_8( kmp_uint64 value, kmp_uint64 checker) {
2271 return value < checker;
2274 kmp_uint32 __kmp_ge_8( kmp_uint64 value, kmp_uint64 checker) {
2275 return value >= checker;
2278 kmp_uint32 __kmp_le_8( kmp_uint64 value, kmp_uint64 checker) {
2279 return value <= checker;
2283 __kmp_wait_yield_4(
volatile kmp_uint32 * spinner,
2285 kmp_uint32 (* pred)( kmp_uint32, kmp_uint32 )
2290 register volatile kmp_uint32 * spin = spinner;
2291 register kmp_uint32 check = checker;
2292 register kmp_uint32 spins;
2293 register kmp_uint32 (*f) ( kmp_uint32, kmp_uint32 ) = pred;
2294 register kmp_uint32 r;
2296 KMP_FSYNC_SPIN_INIT( obj, (
void*) spin );
2297 KMP_INIT_YIELD( spins );
2299 while(!f(r = TCR_4(*spin), check)) {
2300 KMP_FSYNC_SPIN_PREPARE( obj );
2306 __kmp_static_delay(TRUE);
2310 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2311 KMP_YIELD_SPIN( spins );
2313 KMP_FSYNC_SPIN_ACQUIRED( obj );
2318 __kmp_wait_yield_8(
volatile kmp_uint64 * spinner,
2320 kmp_uint32 (* pred)( kmp_uint64, kmp_uint64 )
2325 register volatile kmp_uint64 * spin = spinner;
2326 register kmp_uint64 check = checker;
2327 register kmp_uint32 spins;
2328 register kmp_uint32 (*f) ( kmp_uint64, kmp_uint64 ) = pred;
2329 register kmp_uint64 r;
2331 KMP_FSYNC_SPIN_INIT( obj, (
void*) spin );
2332 KMP_INIT_YIELD( spins );
2334 while(!f(r = *spin, check))
2336 KMP_FSYNC_SPIN_PREPARE( obj );
2342 __kmp_static_delay(TRUE);
2347 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2348 KMP_YIELD_SPIN( spins );
2350 KMP_FSYNC_SPIN_ACQUIRED( obj );
2356 #ifdef KMP_GOMP_COMPAT
2359 __kmp_aux_dispatch_init_4(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2360 kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
2361 kmp_int32 chunk,
int push_ws )
2363 __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk,
2368 __kmp_aux_dispatch_init_4u(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2369 kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st,
2370 kmp_int32 chunk,
int push_ws )
2372 __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk,
2377 __kmp_aux_dispatch_init_8(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2378 kmp_int64 lb, kmp_int64 ub, kmp_int64 st,
2379 kmp_int64 chunk,
int push_ws )
2381 __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk,
2386 __kmp_aux_dispatch_init_8u(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2387 kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st,
2388 kmp_int64 chunk,
int push_ws )
2390 __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk,
2395 __kmp_aux_dispatch_fini_chunk_4(
ident_t *loc, kmp_int32 gtid )
2397 __kmp_dispatch_finish_chunk< kmp_uint32 >( gtid, loc );
2401 __kmp_aux_dispatch_fini_chunk_8(
ident_t *loc, kmp_int32 gtid )
2403 __kmp_dispatch_finish_chunk< kmp_uint64 >( gtid, loc );
2407 __kmp_aux_dispatch_fini_chunk_4u(
ident_t *loc, kmp_int32 gtid )
2409 __kmp_dispatch_finish_chunk< kmp_uint32 >( gtid, loc );
2413 __kmp_aux_dispatch_fini_chunk_8u(
ident_t *loc, kmp_int32 gtid )
2415 __kmp_dispatch_finish_chunk< kmp_uint64 >( gtid, loc );
void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk)
int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st)
void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk)
int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st)
int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st)
void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk)
int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st)
void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk)