47 #ifndef KOKKOS_SERIAL_HPP 48 #define KOKKOS_SERIAL_HPP 54 #include <Kokkos_HostSpace.hpp> 55 #include <Kokkos_ScratchSpace.hpp> 56 #include <Kokkos_MemoryTraits.hpp> 57 #include <impl/Kokkos_Tags.hpp> 58 #include <impl/Kokkos_FunctorAdapter.hpp> 60 #if defined( KOKKOS_HAVE_SERIAL ) 82 typedef Serial execution_space ;
84 typedef HostSpace::size_type size_type ;
86 typedef HostSpace memory_space ;
91 typedef LayoutRight array_layout ;
94 typedef ScratchMemorySpace< Kokkos::Serial > scratch_memory_space ;
104 inline static int in_parallel() {
return false ; }
132 static void fence() {}
134 static void initialize(
unsigned threads_count = 1 ,
135 unsigned use_numa_count = 0 ,
136 unsigned use_cores_per_numa = 0 ,
137 bool allow_asynchronous_threadpool =
false) {
138 (void) threads_count;
139 (void) use_numa_count;
140 (void) use_cores_per_numa;
141 (void) allow_asynchronous_threadpool;
144 Impl::init_lock_array_host_space();
148 static int is_initialized() {
return 1 ; }
154 static void print_configuration( std::ostream & ,
const bool =
false ) {}
158 inline static int thread_pool_size(
int = 0 ) {
return 1 ; }
159 KOKKOS_INLINE_FUNCTION
static int thread_pool_rank() {
return 0 ; }
163 KOKKOS_INLINE_FUNCTION
static unsigned hardware_thread_id() {
return thread_pool_rank(); }
164 inline static unsigned max_hardware_threads() {
return thread_pool_size(0); }
168 static void * scratch_memory_resize(
unsigned reduce_size ,
unsigned shared_size );
182 struct VerifyExecutionCanAccessMemorySpace
183 <
Kokkos::Serial::memory_space
184 , Kokkos::Serial::scratch_memory_space
187 enum { value =
true };
188 inline static void verify(
void ) { }
189 inline static void verify(
const void * ) { }
192 namespace SerialImpl {
197 unsigned m_reduce_end ;
198 unsigned m_shared_end ;
202 static Sentinel & singleton();
206 unsigned align(
unsigned n );
217 class SerialTeamMember {
220 const scratch_memory_space m_space ;
221 const int m_league_rank ;
222 const int m_league_size ;
224 SerialTeamMember & operator = (
const SerialTeamMember & );
228 KOKKOS_INLINE_FUNCTION
229 const scratch_memory_space & team_shmem()
const {
return m_space ; }
231 KOKKOS_INLINE_FUNCTION
int league_rank()
const {
return m_league_rank ; }
232 KOKKOS_INLINE_FUNCTION
int league_size()
const {
return m_league_size ; }
233 KOKKOS_INLINE_FUNCTION
int team_rank()
const {
return 0 ; }
234 KOKKOS_INLINE_FUNCTION
int team_size()
const {
return 1 ; }
236 KOKKOS_INLINE_FUNCTION
void team_barrier()
const {}
238 template<
class ValueType>
239 KOKKOS_INLINE_FUNCTION
240 void team_broadcast(
const ValueType& ,
const int& )
const {}
242 template<
class ValueType,
class JoinOp >
243 KOKKOS_INLINE_FUNCTION
244 ValueType team_reduce(
const ValueType & value ,
const JoinOp & )
const 258 template<
typename Type >
259 KOKKOS_INLINE_FUNCTION Type team_scan(
const Type & value , Type *
const global_accum )
const 261 const Type tmp = global_accum ? *global_accum : Type(0) ;
262 if ( global_accum ) { *global_accum += value ; }
271 template<
typename Type >
272 KOKKOS_INLINE_FUNCTION Type team_scan(
const Type & )
const 278 SerialTeamMember(
int arg_league_rank
279 ,
int arg_league_size
280 ,
int arg_shared_size
292 template<
class Arg0 ,
class Arg1 >
293 class TeamPolicy< Arg0 , Arg1 ,
Kokkos::Serial >
297 const int m_league_size ;
298 const int m_scratch_size ;
303 typedef TeamPolicy execution_policy ;
306 typedef Kokkos::Serial execution_space ;
309 Impl::if_c< ! Impl::is_same< Kokkos::Serial , Arg0 >::value , Arg0 , Arg1 >::type
314 template<
class FunctorType >
316 int team_size_max(
const FunctorType & ) {
return 1 ; }
318 template<
class FunctorType >
320 int team_size_recommended(
const FunctorType & ) {
return 1 ; }
322 template<
class FunctorType >
324 int team_size_recommended(
const FunctorType & ,
const int& ) {
return 1 ; }
328 inline int team_size()
const {
return 1 ; }
329 inline int league_size()
const {
return m_league_size ; }
330 inline size_t scratch_size()
const {
return m_scratch_size ; }
333 TeamPolicy( execution_space &
334 ,
int league_size_request
337 : m_league_size( league_size_request )
338 , m_scratch_size ( 0 )
341 TeamPolicy( execution_space &
342 ,
int league_size_request
343 ,
const Kokkos::AUTO_t &
345 : m_league_size( league_size_request )
346 , m_scratch_size ( 0 )
349 TeamPolicy(
int league_size_request
352 : m_league_size( league_size_request )
353 , m_scratch_size ( 0 )
356 TeamPolicy(
int league_size_request
357 ,
const Kokkos::AUTO_t &
359 : m_league_size( league_size_request )
360 , m_scratch_size ( 0 )
363 template<
class MemorySpace>
364 TeamPolicy(
int league_size_request
366 ,
const Experimental::TeamScratchRequest<MemorySpace> & scratch_request )
367 : m_league_size(league_size_request)
368 , m_scratch_size(scratch_request.total(1))
372 template<
class MemorySpace>
373 TeamPolicy(
int league_size_request
374 ,
const Kokkos::AUTO_t &
375 ,
const Experimental::TeamScratchRequest<MemorySpace> & scratch_request )
376 : m_league_size(league_size_request)
377 , m_scratch_size(scratch_request.total(1))
380 typedef Impl::SerialTeamMember member_type ;
395 template<
class FunctorType ,
class Arg0 ,
class Arg1 ,
class Arg2 >
396 class ParallelFor< FunctorType
397 ,
Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Serial >
404 const FunctorType m_functor ;
405 const Policy m_policy ;
407 template<
class TagType >
408 typename std::enable_if< std::is_same< TagType , void >::value >::type
411 const typename Policy::member_type e = m_policy.end();
412 for (
typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) {
417 template<
class TagType >
418 typename std::enable_if< ! std::is_same< TagType , void >::value >::type
422 const typename Policy::member_type e = m_policy.end();
423 for (
typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) {
432 { this->
template exec< typename Policy::work_tag >(); }
435 ParallelFor(
const FunctorType & arg_functor
436 ,
const Policy & arg_policy )
437 : m_functor( arg_functor )
438 , m_policy( arg_policy )
444 template<
class FunctorType ,
class Arg0 ,
class Arg1 ,
class Arg2 >
445 class ParallelReduce< FunctorType
452 typedef typename Policy::work_tag WorkTag ;
453 typedef Kokkos::Impl::FunctorValueTraits< FunctorType , WorkTag > ValueTraits ;
454 typedef Kokkos::Impl::FunctorValueInit< FunctorType , WorkTag > ValueInit ;
456 typedef typename ValueTraits::pointer_type pointer_type ;
457 typedef typename ValueTraits::reference_type reference_type ;
459 const FunctorType m_functor ;
460 const Policy m_policy ;
461 const pointer_type m_result_ptr ;
464 template<
class TagType >
466 typename std::enable_if< std::is_same< TagType , void >::value >::type
467 exec( pointer_type ptr )
const 469 reference_type update = ValueInit::init( m_functor , ptr );
471 const typename Policy::member_type e = m_policy.end();
472 for (
typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) {
473 m_functor( i , update );
476 Kokkos::Impl::FunctorFinal< FunctorType , TagType >::
477 final( m_functor , ptr );
480 template<
class TagType >
482 typename std::enable_if< ! std::is_same< TagType , void >::value >::type
483 exec( pointer_type ptr )
const 486 reference_type update = ValueInit::init( m_functor , ptr );
488 const typename Policy::member_type e = m_policy.end();
489 for (
typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) {
490 m_functor( t , i , update );
493 Kokkos::Impl::FunctorFinal< FunctorType , TagType >::
494 final( m_functor , ptr );
502 pointer_type ptr = (pointer_type) Kokkos::Serial::scratch_memory_resize
503 ( ValueTraits::value_size( m_functor ) , 0 );
505 this->
template exec< WorkTag >( m_result_ptr ? m_result_ptr : ptr );
508 template<
class ViewType >
509 ParallelReduce(
const FunctorType & arg_functor
510 ,
const Policy & arg_policy
511 ,
const ViewType & arg_result )
512 : m_functor( arg_functor )
513 , m_policy( arg_policy )
514 , m_result_ptr( arg_result.ptr_on_device() )
516 static_assert( Kokkos::is_view< ViewType >::value
517 ,
"Reduction result on Kokkos::Serial must be a Kokkos::View" );
519 static_assert( std::is_same<
typename ViewType::memory_space
521 ,
"Reduction result on Kokkos::Serial must be a Kokkos::View in HostSpace" );
527 template<
class FunctorType ,
class Arg0 ,
class Arg1 ,
class Arg2 >
528 class ParallelScan< FunctorType
535 typedef typename Policy::work_tag WorkTag ;
536 typedef Kokkos::Impl::FunctorValueTraits< FunctorType , WorkTag > ValueTraits ;
537 typedef Kokkos::Impl::FunctorValueInit< FunctorType , WorkTag > ValueInit ;
539 typedef typename ValueTraits::pointer_type pointer_type ;
540 typedef typename ValueTraits::reference_type reference_type ;
542 const FunctorType m_functor ;
543 const Policy m_policy ;
545 template<
class TagType >
547 typename std::enable_if< std::is_same< TagType , void >::value >::type
548 exec( pointer_type ptr )
const 550 reference_type update = ValueInit::init( m_functor , ptr );
552 const typename Policy::member_type e = m_policy.end();
553 for (
typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) {
554 m_functor( i , update ,
true );
558 template<
class TagType >
560 typename std::enable_if< ! std::is_same< TagType , void >::value >::type
561 exec( pointer_type ptr )
const 564 reference_type update = ValueInit::init( m_functor , ptr );
566 const typename Policy::member_type e = m_policy.end();
567 for (
typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) {
568 m_functor( t , i , update ,
true );
577 pointer_type ptr = (pointer_type)
578 Kokkos::Serial::scratch_memory_resize( ValueTraits::value_size( m_functor ) , 0 );
579 this->
template exec< WorkTag >( ptr );
583 ParallelScan(
const FunctorType & arg_functor
584 ,
const Policy & arg_policy
586 : m_functor( arg_functor )
587 , m_policy( arg_policy )
601 template<
class FunctorType ,
class Arg0 ,
class Arg1 >
602 class ParallelFor< FunctorType
609 typedef typename Policy::member_type Member ;
611 const FunctorType m_functor ;
615 template<
class TagType >
617 typename std::enable_if< std::is_same< TagType , void >::value >::type
620 for (
int ileague = 0 ; ileague < m_league ; ++ileague ) {
621 m_functor( Member(ileague,m_league,m_shared) );
625 template<
class TagType >
627 typename std::enable_if< ! std::is_same< TagType , void >::value >::type
631 for (
int ileague = 0 ; ileague < m_league ; ++ileague ) {
632 m_functor( t , Member(ileague,m_league,m_shared) );
641 Kokkos::Serial::scratch_memory_resize( 0 , m_shared );
642 this->
template exec< typename Policy::work_tag >();
645 ParallelFor(
const FunctorType & arg_functor
646 ,
const Policy & arg_policy )
647 : m_functor( arg_functor )
648 , m_league( arg_policy.league_size() )
649 , m_shared( arg_policy.scratch_size() + FunctorTeamShmemSize< FunctorType >::value( arg_functor , 1 ) )
655 template<
class FunctorType ,
class Arg0 ,
class Arg1 >
656 class ParallelReduce< FunctorType
663 typedef typename Policy::member_type Member ;
664 typedef typename Policy::work_tag WorkTag ;
665 typedef Kokkos::Impl::FunctorValueTraits< FunctorType , WorkTag > ValueTraits ;
666 typedef Kokkos::Impl::FunctorValueInit< FunctorType , WorkTag > ValueInit ;
668 typedef typename ValueTraits::pointer_type pointer_type ;
669 typedef typename ValueTraits::reference_type reference_type ;
671 const FunctorType m_functor ;
674 pointer_type m_result_ptr ;
676 template<
class TagType >
678 typename std::enable_if< std::is_same< TagType , void >::value >::type
679 exec( pointer_type ptr )
const 681 reference_type update = ValueInit::init( m_functor , ptr );
683 for (
int ileague = 0 ; ileague < m_league ; ++ileague ) {
684 m_functor( Member(ileague,m_league,m_shared) , update );
687 Kokkos::Impl::FunctorFinal< FunctorType , TagType >::
688 final( m_functor , ptr );
691 template<
class TagType >
693 typename std::enable_if< ! std::is_same< TagType , void >::value >::type
694 exec( pointer_type ptr )
const 698 reference_type update = ValueInit::init( m_functor , ptr );
700 for (
int ileague = 0 ; ileague < m_league ; ++ileague ) {
701 m_functor( t , Member(ileague,m_league,m_shared) , update );
704 Kokkos::Impl::FunctorFinal< FunctorType , TagType >::
705 final( m_functor , ptr );
713 pointer_type ptr = (pointer_type) Kokkos::Serial::scratch_memory_resize
714 ( ValueTraits::value_size( m_functor ) , m_shared );
716 this->
template exec< WorkTag >( m_result_ptr ? m_result_ptr : ptr );
719 template<
class ViewType >
720 ParallelReduce(
const FunctorType & arg_functor
721 ,
const Policy & arg_policy
722 ,
const ViewType & arg_result
724 : m_functor( arg_functor )
725 , m_league( arg_policy.league_size() )
726 , m_shared( arg_policy.scratch_size() + FunctorTeamShmemSize< FunctorType >::value( m_functor , 1 ) )
727 , m_result_ptr( arg_result.ptr_on_device() )
729 static_assert( Kokkos::is_view< ViewType >::value
730 ,
"Reduction result on Kokkos::Serial must be a Kokkos::View" );
732 static_assert( std::is_same<
typename ViewType::memory_space
734 ,
"Reduction result on Kokkos::Serial must be a Kokkos::View in HostSpace" );
749 template<
typename iType>
750 struct TeamThreadRangeBoundariesStruct<iType,SerialTeamMember> {
751 typedef iType index_type;
754 enum {increment = 1};
755 const SerialTeamMember& thread;
757 KOKKOS_INLINE_FUNCTION
758 TeamThreadRangeBoundariesStruct (
const SerialTeamMember& arg_thread,
const iType& arg_count)
764 KOKKOS_INLINE_FUNCTION
765 TeamThreadRangeBoundariesStruct (
const SerialTeamMember& arg_thread,
const iType& arg_begin,
const iType & arg_end )
768 , thread( arg_thread )
772 template<
typename iType>
773 struct ThreadVectorRangeBoundariesStruct<iType,SerialTeamMember> {
774 typedef iType index_type;
777 enum {increment = 1};
779 KOKKOS_INLINE_FUNCTION
780 ThreadVectorRangeBoundariesStruct (
const SerialTeamMember& thread,
const iType& count):
787 template<
typename iType>
788 KOKKOS_INLINE_FUNCTION
789 Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>
790 TeamThreadRange(
const Impl::SerialTeamMember& thread,
const iType & count )
792 return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>(thread,count);
795 template<
typename iType>
796 KOKKOS_INLINE_FUNCTION
797 Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>
798 TeamThreadRange(
const Impl::SerialTeamMember& thread,
const iType & begin ,
const iType & end )
800 return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>(thread,begin,end);
803 template<
typename iType>
804 KOKKOS_INLINE_FUNCTION
805 Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >
807 return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >(thread,count);
810 KOKKOS_INLINE_FUNCTION
811 Impl::ThreadSingleStruct<Impl::SerialTeamMember> PerTeam(
const Impl::SerialTeamMember& thread) {
812 return Impl::ThreadSingleStruct<Impl::SerialTeamMember>(thread);
815 KOKKOS_INLINE_FUNCTION
816 Impl::VectorSingleStruct<Impl::SerialTeamMember> PerThread(
const Impl::SerialTeamMember& thread) {
817 return Impl::VectorSingleStruct<Impl::SerialTeamMember>(thread);
828 template<
typename iType,
class Lambda>
829 KOKKOS_INLINE_FUNCTION
830 void parallel_for(
const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>& loop_boundaries,
const Lambda& lambda) {
831 for( iType i = loop_boundaries.begin; i < loop_boundaries.end; i+=loop_boundaries.increment)
839 template<
typename iType,
class Lambda,
typename ValueType >
840 KOKKOS_INLINE_FUNCTION
841 void parallel_reduce(
const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>& loop_boundaries,
842 const Lambda & lambda, ValueType& result) {
844 result = ValueType();
846 for( iType i = loop_boundaries.begin; i < loop_boundaries.end; i+=loop_boundaries.increment) {
847 ValueType tmp = ValueType();
852 result = loop_boundaries.thread.team_reduce(result,Impl::JoinAdd<ValueType>());
862 template<
typename iType,
class Lambda,
typename ValueType,
class JoinType >
863 KOKKOS_INLINE_FUNCTION
864 void parallel_reduce(
const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>& loop_boundaries,
865 const Lambda & lambda,
const JoinType& join, ValueType& init_result) {
867 ValueType result = init_result;
869 for( iType i = loop_boundaries.begin; i < loop_boundaries.end; i+=loop_boundaries.increment) {
870 ValueType tmp = ValueType();
875 init_result = loop_boundaries.thread.team_reduce(result,Impl::JoinLambdaAdapter<ValueType,JoinType>(join));
885 template<
typename iType,
class Lambda>
886 KOKKOS_INLINE_FUNCTION
887 void parallel_for(
const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
888 loop_boundaries,
const Lambda& lambda) {
889 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 892 for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
900 template<
typename iType,
class Lambda,
typename ValueType >
901 KOKKOS_INLINE_FUNCTION
902 void parallel_reduce(
const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
903 loop_boundaries,
const Lambda & lambda, ValueType& result) {
904 result = ValueType();
905 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 908 for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
909 ValueType tmp = ValueType();
922 template<
typename iType,
class Lambda,
typename ValueType,
class JoinType >
923 KOKKOS_INLINE_FUNCTION
924 void parallel_reduce(
const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
925 loop_boundaries,
const Lambda & lambda,
const JoinType& join, ValueType& init_result) {
927 ValueType result = init_result;
928 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 931 for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
932 ValueType tmp = ValueType();
936 init_result = result;
949 template<
typename iType,
class FunctorType >
950 KOKKOS_INLINE_FUNCTION
951 void parallel_scan(
const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
952 loop_boundaries,
const FunctorType & lambda) {
954 typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ;
955 typedef typename ValueTraits::value_type value_type ;
957 value_type scan_val = value_type();
959 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 962 for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
963 lambda(i,scan_val,
true);
971 template<
class FunctorType>
972 KOKKOS_INLINE_FUNCTION
973 void single(
const Impl::VectorSingleStruct<Impl::SerialTeamMember>& ,
const FunctorType& lambda) {
977 template<
class FunctorType>
978 KOKKOS_INLINE_FUNCTION
979 void single(
const Impl::ThreadSingleStruct<Impl::SerialTeamMember>& ,
const FunctorType& lambda) {
983 template<
class FunctorType,
class ValueType>
984 KOKKOS_INLINE_FUNCTION
985 void single(
const Impl::VectorSingleStruct<Impl::SerialTeamMember>& ,
const FunctorType& lambda, ValueType& val) {
989 template<
class FunctorType,
class ValueType>
990 KOKKOS_INLINE_FUNCTION
991 void single(
const Impl::ThreadSingleStruct<Impl::SerialTeamMember>& ,
const FunctorType& lambda, ValueType& val) {
996 #endif // defined( KOKKOS_HAVE_SERIAL ) Scratch memory space associated with an execution space.
KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< iType, TeamMemberType > TeamThreadRange(const TeamMemberType &, const iType &count)
Execution policy for parallel work over a threads within a team.
void parallel_reduce(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< !Impl::is_integral< ExecPolicy >::value >::type *=0)
Parallel reduction.
Memory space for main process and CPU execution spaces.
Memory management for host memory.
Declaration of various MemoryLayout options.
Declaration of parallel operators.
void finalize()
Finalize the spaces that were initialized via Kokkos::initialize.
KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct< iType, TeamMemberType > ThreadVectorRange(const TeamMemberType &, const iType &count)
Execution policy for a vector parallel loop.
Execution policy for work over a range of an integral type.
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< !Impl::is_integral< ExecPolicy >::value >::type *=0)
Execute functor in parallel according to the execution policy.
Execution policy for parallel work over a league of teams of threads.