44 #ifndef KOKKOS_EXECPOLICY_HPP 45 #define KOKKOS_EXECPOLICY_HPP 47 #include <Kokkos_Core_fwd.hpp> 48 #include <impl/Kokkos_Traits.hpp> 49 #include <impl/Kokkos_StaticAssert.hpp> 50 #include <impl/Kokkos_Tags.hpp> 77 template<
class Arg0 = void ,
class Arg1 = void ,
class Arg2 = void
81 typename std::conditional
82 < Impl::is_execution_space< Arg0 >::value , Arg0
83 , Kokkos::DefaultExecutionSpace >::type
89 typedef int DefaultIntType ;
90 enum { DefaultIntValue = 8 };
92 enum { Arg0_Void = Impl::is_same< Arg0 , void >::value };
93 enum { Arg1_Void = Impl::is_same< Arg1 , void >::value };
94 enum { Arg2_Void = Impl::is_same< Arg2 , void >::value };
96 enum { Arg0_ExecSpace = Impl::is_execution_space< Arg0 >::value };
98 enum { Arg0_IntConst = Impl::is_integral_constant< Arg0 >::value };
99 enum { Arg1_IntConst = Impl::is_integral_constant< Arg1 >::value };
100 enum { Arg2_IntConst = Impl::is_integral_constant< Arg2 >::value };
102 enum { Arg0_IntType = Impl::is_integral< Arg0 >::value };
103 enum { Arg1_IntType = Impl::is_integral< Arg1 >::value };
104 enum { Arg2_IntType = Impl::is_integral< Arg2 >::value };
106 enum { Arg0_WorkTag = ! Arg0_ExecSpace && ! Arg0_IntConst && ! Arg0_IntType && ! Arg0_Void };
107 enum { Arg1_WorkTag = Arg0_ExecSpace && ! Arg1_IntConst && ! Arg1_IntType && ! Arg1_Void };
109 enum { ArgOption_OK = Impl::StaticAssert< (
110 ( Arg0_ExecSpace && Arg1_WorkTag && ( Arg2_IntConst || Arg2_IntType ) ) ||
111 ( Arg0_ExecSpace && Arg1_WorkTag && Arg2_Void ) ||
112 ( Arg0_ExecSpace && ( Arg1_IntConst || Arg1_IntType ) && Arg2_Void ) ||
113 ( Arg0_ExecSpace && Arg1_Void && Arg2_Void ) ||
114 ( Arg0_WorkTag && ( Arg1_IntConst || Arg1_IntType ) && Arg2_Void ) ||
115 ( Arg0_WorkTag && Arg1_Void && Arg2_Void ) ||
116 ( ( Arg0_IntConst || Arg0_IntType ) && Arg1_Void && Arg2_Void ) ||
117 ( Arg0_Void && Arg1_Void && Arg2_Void )
121 typedef typename std::conditional< Arg0_WorkTag , Arg0 ,
122 typename std::conditional< Arg1_WorkTag , Arg1 ,
void 126 enum { Granularity = Arg0_IntConst ? unsigned(Impl::is_integral_constant<Arg0>::integral_value) : (
127 Arg1_IntConst ? unsigned(Impl::is_integral_constant<Arg1>::integral_value) : (
128 Arg2_IntConst ? unsigned(Impl::is_integral_constant<Arg2>::integral_value) : (
129 unsigned(DefaultIntValue) ))) };
132 static_assert( Impl::is_integral_power_of_two( Granularity )
133 ,
"RangePolicy blocking granularity must be power of two" );
135 typedef typename std::conditional< Arg0_IntType , Arg0 ,
136 typename std::conditional< Arg1_IntType , Arg1 ,
137 typename std::conditional< Arg2_IntType , Arg2 ,
138 typename std::conditional< Arg0_IntConst , typename Impl::is_integral_constant<Arg0>::integral_type ,
139 typename std::conditional< Arg1_IntConst , typename Impl::is_integral_constant<Arg1>::integral_type ,
140 typename std::conditional< Arg2_IntConst , typename Impl::is_integral_constant<Arg2>::integral_type ,
142 >::type >::type >::type
143 >::type >::type >::type
146 enum { GranularityMask = IntType(Granularity) - 1 };
157 typedef WorkTag work_tag ;
158 typedef IntType member_type ;
160 KOKKOS_INLINE_FUNCTION
const execution_space & space()
const {
return m_space ; }
161 KOKKOS_INLINE_FUNCTION member_type begin()
const {
return m_begin ; }
162 KOKKOS_INLINE_FUNCTION member_type end()
const {
return m_end ; }
164 inline RangePolicy() : m_space(), m_begin(0), m_end(0) {}
169 ,
const member_type work_end
172 , m_begin( work_begin < work_end ? work_begin : 0 )
173 , m_end( work_begin < work_end ? work_end : 0 )
179 ,
const member_type work_begin
180 ,
const member_type work_end
182 : m_space( work_space )
183 , m_begin( work_begin < work_end ? work_begin : 0 )
184 , m_end( work_begin < work_end ? work_end : 0 )
192 typedef typename RangePolicy::work_tag work_tag ;
193 typedef typename RangePolicy::member_type member_type ;
195 KOKKOS_INLINE_FUNCTION member_type begin()
const {
return m_begin ; }
196 KOKKOS_INLINE_FUNCTION member_type end()
const {
return m_end ; }
202 KOKKOS_INLINE_FUNCTION
204 ,
const int part_rank
205 ,
const int part_size
207 : m_begin(0), m_end(0)
212 const member_type work_part =
213 ( ( ( ( range.end() - range.begin() ) + ( part_size - 1 ) ) / part_size )
214 + GranularityMask ) & ~member_type(GranularityMask);
216 m_begin = range.begin() + work_part * part_rank ;
217 m_end = m_begin + work_part ;
219 if ( range.end() < m_begin ) m_begin = range.end() ;
220 if ( range.end() < m_end ) m_end = range.end() ;
224 member_type m_begin ;
247 template<
class MemorySpace >
254 m_per_team(per_team_), m_per_thread(per_thread_) {
257 size_t per_team()
const {
260 size_t per_thread()
const {
263 size_t total(
const size_t team_size)
const {
264 return m_per_team + m_per_thread * team_size;
290 template<
class Arg0 = void
295 typename std::conditional
296 < Impl::is_execution_space< Arg0 >::value , Arg0
297 , Kokkos::DefaultExecutionSpace >::type
302 enum { Arg0_ExecSpace = Impl::is_execution_space< Arg0 >::value };
303 enum { Arg1_Void = Impl::is_same< Arg1 , void >::value };
304 enum { ArgOption_OK = Impl::StaticAssert< ( Arg0_ExecSpace || Arg1_Void ) >::value };
306 typedef typename std::conditional< Arg0_ExecSpace , Arg1 , Arg0 >::type WorkTag ;
313 typedef WorkTag work_tag ;
322 template<
class FunctorType >
323 static int team_size_max(
const FunctorType & );
331 template<
class FunctorType >
332 static int team_size_recommended(
const FunctorType & );
334 template<
class FunctorType >
335 static int team_size_recommended(
const FunctorType & ,
const int&);
338 TeamPolicy(
const execution_space & ,
int league_size_request ,
int team_size_request ,
int vector_length_request = 1 );
340 TeamPolicy(
const execution_space & ,
int league_size_request ,
const Kokkos::AUTO_t & ,
int vector_length_request = 1 );
343 TeamPolicy(
int league_size_request ,
int team_size_request ,
int vector_length_request = 1 );
345 TeamPolicy(
int league_size_request ,
const Kokkos::AUTO_t & ,
int vector_length_request = 1 );
347 template<
class MemorySpace>
350 template<
class MemorySpace>
358 KOKKOS_INLINE_FUNCTION
int league_size()
const ;
365 KOKKOS_INLINE_FUNCTION
int team_size()
const ;
373 KOKKOS_INLINE_FUNCTION
374 typename execution_space::scratch_memory_space team_shmem()
const ;
377 KOKKOS_INLINE_FUNCTION
int league_rank()
const ;
380 KOKKOS_INLINE_FUNCTION
int league_size()
const ;
383 KOKKOS_INLINE_FUNCTION
int team_rank()
const ;
386 KOKKOS_INLINE_FUNCTION
int team_size()
const ;
389 KOKKOS_INLINE_FUNCTION
void team_barrier()
const ;
392 template<
class JoinOp >
393 KOKKOS_INLINE_FUNCTION
394 typename JoinOp::value_type team_reduce(
const typename JoinOp::value_type
395 ,
const JoinOp & )
const ;
402 template<
typename Type >
403 KOKKOS_INLINE_FUNCTION Type team_scan(
const Type & value )
const ;
414 template<
typename Type >
415 KOKKOS_INLINE_FUNCTION Type team_scan(
const Type & value , Type *
const global_accum )
const ;
425 template<
typename iType,
class TeamMemberType>
426 struct TeamThreadRangeBoundariesStruct {
429 KOKKOS_INLINE_FUNCTION
static 430 iType ibegin(
const iType & arg_begin
431 ,
const iType & arg_end
432 ,
const iType & arg_rank
433 ,
const iType & arg_size
436 return arg_begin + ( ( arg_end - arg_begin + arg_size - 1 ) / arg_size ) * arg_rank ;
439 KOKKOS_INLINE_FUNCTION
static 440 iType iend(
const iType & arg_begin
441 ,
const iType & arg_end
442 ,
const iType & arg_rank
443 ,
const iType & arg_size
446 const iType end_ = arg_begin + ( ( arg_end - arg_begin + arg_size - 1 ) / arg_size ) * ( arg_rank + 1 );
447 return end_ < arg_end ? end_ : arg_end ;
452 typedef iType index_type;
455 enum {increment = 1};
456 const TeamMemberType& thread;
458 KOKKOS_INLINE_FUNCTION
459 TeamThreadRangeBoundariesStruct(
const TeamMemberType& arg_thread
460 ,
const iType& arg_end
462 : start( ibegin( 0 , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
463 , end( iend( 0 , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
464 , thread( arg_thread )
467 KOKKOS_INLINE_FUNCTION
468 TeamThreadRangeBoundariesStruct(
const TeamMemberType& arg_thread
469 ,
const iType& arg_begin
470 ,
const iType& arg_end
472 : start( ibegin( arg_begin , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
473 , end( iend( arg_begin , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) )
474 , thread( arg_thread )
478 template<
typename iType,
class TeamMemberType>
479 struct ThreadVectorRangeBoundariesStruct {
480 typedef iType index_type;
483 enum {increment = 1};
485 KOKKOS_INLINE_FUNCTION
486 ThreadVectorRangeBoundariesStruct (
const TeamMemberType& thread,
const iType& count):
491 template<
class TeamMemberType>
492 struct ThreadSingleStruct {
493 const TeamMemberType& team_member;
494 KOKKOS_INLINE_FUNCTION
495 ThreadSingleStruct(
const TeamMemberType& team_member_):team_member(team_member_){}
498 template<
class TeamMemberType>
499 struct VectorSingleStruct {
500 const TeamMemberType& team_member;
501 KOKKOS_INLINE_FUNCTION
502 VectorSingleStruct(
const TeamMemberType& team_member_):team_member(team_member_){}
512 template<
typename iType,
class TeamMemberType>
513 KOKKOS_INLINE_FUNCTION
514 Impl::TeamThreadRangeBoundariesStruct<iType,TeamMemberType>
TeamThreadRange(
const TeamMemberType&,
const iType& count);
522 template<
typename iType,
class TeamMemberType>
523 KOKKOS_INLINE_FUNCTION
524 Impl::TeamThreadRangeBoundariesStruct<iType,TeamMemberType>
TeamThreadRange(
const TeamMemberType&,
const iType& begin,
const iType& end);
532 template<
typename iType,
class TeamMemberType>
533 KOKKOS_INLINE_FUNCTION
534 Impl::ThreadVectorRangeBoundariesStruct<iType,TeamMemberType>
ThreadVectorRange(
const TeamMemberType&,
const iType& count);
TeamPolicy execution_policy
Tag this class as an execution policy.
KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< iType, TeamMemberType > TeamThreadRange(const TeamMemberType &, const iType &count)
Execution policy for parallel work over a threads within a team.
Scratch memory request accepting per team and per thread value.
RangePolicy(const execution_space &work_space, const member_type work_begin, const member_type work_end)
Total range.
Parallel execution of a functor calls the functor once with each member of the execution policy...
RangePolicy(const member_type work_begin, const member_type work_end)
Total range.
KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct< iType, TeamMemberType > ThreadVectorRange(const TeamMemberType &, const iType &count)
Execution policy for a vector parallel loop.
Execution policy for work over a range of an integral type.
Subrange for a partition's rank and size.
Execution policy for parallel work over a league of teams of threads.
KOKKOS_INLINE_FUNCTION WorkRange(const RangePolicy &range, const int part_rank, const int part_size)
Subrange for a partition's rank and size.
ExecSpace execution_space
Tag this class as an execution policy.