47 #ifndef KOKKOS_PARALLEL_HPP 48 #define KOKKOS_PARALLEL_HPP 51 #include <Kokkos_Core_fwd.hpp> 52 #include <Kokkos_View.hpp> 53 #include <Kokkos_ExecPolicy.hpp> 55 #ifdef KOKKOSP_ENABLE_PROFILING 56 #include <impl/Kokkos_Profiling_Interface.hpp> 60 #include <impl/Kokkos_AllocationTracker.hpp> 61 #include <impl/Kokkos_Tags.hpp> 62 #include <impl/Kokkos_Traits.hpp> 63 #include <impl/Kokkos_FunctorAdapter.hpp> 65 #ifdef KOKKOS_HAVE_DEBUG 83 template<
class Functor
85 ,
class EnableFunctor = void
86 ,
class EnablePolicy =
void 89 typedef Kokkos::DefaultExecutionSpace execution_space ;
92 template<
class Functor ,
class Policy >
95 , typename enable_if_type< typename Functor::device_type >::type
96 , typename enable_if_type< typename Policy ::execution_space >::type
99 typedef typename Policy ::execution_space execution_space ;
102 template<
class Functor ,
class Policy >
105 , typename enable_if_type< typename Functor::execution_space >::type
106 , typename enable_if_type< typename Policy ::execution_space >::type
109 typedef typename Policy ::execution_space execution_space ;
112 template<
class Functor ,
class Policy ,
class EnableFunctor >
116 , typename enable_if_type< typename Policy::execution_space >::type
119 typedef typename Policy ::execution_space execution_space ;
122 template<
class Functor ,
class Policy ,
class EnablePolicy >
125 , typename enable_if_type< typename Functor::device_type >::type
129 typedef typename Functor::device_type execution_space ;
132 template<
class Functor ,
class Policy ,
class EnablePolicy >
135 , typename enable_if_type< typename Functor::execution_space >::type
139 typedef typename Functor::execution_space execution_space ;
149 template<
class FunctorType ,
class ExecPolicy >
class ParallelFor ;
195 template<
class ExecPolicy ,
class FunctorType >
198 ,
const FunctorType & functor
199 ,
const std::string& str =
"" 200 ,
typename Impl::enable_if< ! Impl::is_integral< ExecPolicy >::value >::type * = 0
203 #ifdef KOKKOSP_ENABLE_PROFILING 205 if(Kokkos::Experimental::profileLibraryLoaded()) {
206 Kokkos::Experimental::beginParallelFor(
"" == str ?
typeid(FunctorType).name() : str, 0, &kpID);
210 Kokkos::Impl::shared_allocation_tracking_claim_and_disable();
212 Kokkos::Impl::shared_allocation_tracking_release_and_enable();
216 #ifdef KOKKOSP_ENABLE_PROFILING 217 if(Kokkos::Experimental::profileLibraryLoaded()) {
218 Kokkos::Experimental::endParallelFor(kpID);
223 template<
class FunctorType >
226 ,
const FunctorType & functor
227 ,
const std::string& str =
"" 231 Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
235 #ifdef KOKKOSP_ENABLE_PROFILING 237 if(Kokkos::Experimental::profileLibraryLoaded()) {
238 Kokkos::Experimental::beginParallelFor(
"" == str ?
typeid(FunctorType).name() : str, 0, &kpID);
242 Kokkos::Impl::shared_allocation_tracking_claim_and_disable();
244 Kokkos::Impl::shared_allocation_tracking_release_and_enable();
248 #ifdef KOKKOSP_ENABLE_PROFILING 249 if(Kokkos::Experimental::profileLibraryLoaded()) {
250 Kokkos::Experimental::endParallelFor(kpID);
255 template<
class ExecPolicy ,
class FunctorType >
258 ,
const ExecPolicy & policy
259 ,
const FunctorType & functor )
261 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES 263 std::cout <<
"KOKKOS_DEBUG Start parallel_for kernel: " << str << std::endl;
268 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES 270 std::cout <<
"KOKKOS_DEBUG End parallel_for kernel: " << str << std::endl;
312 template<
class ExecPolicy ,
class FunctorType >
315 ,
const FunctorType & functor
316 ,
const std::string& str =
"" 317 ,
typename Impl::enable_if< ! Impl::is_integral< ExecPolicy >::value >::type * = 0
324 typedef Kokkos::Impl::FunctorValueTraits< FunctorType , typename ExecPolicy::work_tag > ValueTraits ;
326 typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0)
327 ,
typename ValueTraits::value_type
328 ,
typename ValueTraits::pointer_type
333 , Kokkos::MemoryUnmanaged
337 #ifdef KOKKOSP_ENABLE_PROFILING 339 if(Kokkos::Experimental::profileLibraryLoaded()) {
340 Kokkos::Experimental::beginParallelReduce(
"" == str ?
typeid(FunctorType).name() : str, 0, &kpID);
344 Kokkos::Impl::shared_allocation_tracking_claim_and_disable();
346 Kokkos::Impl::shared_allocation_tracking_release_and_enable();
350 #ifdef KOKKOSP_ENABLE_PROFILING 351 if(Kokkos::Experimental::profileLibraryLoaded()) {
352 Kokkos::Experimental::endParallelReduce(kpID);
358 template<
class FunctorType >
361 ,
const FunctorType & functor
362 ,
const std::string& str =
"" 366 Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
371 typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ;
373 typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0)
374 ,
typename ValueTraits::value_type
375 ,
typename ValueTraits::pointer_type
380 , Kokkos::MemoryUnmanaged
384 #ifdef KOKKOSP_ENABLE_PROFILING 386 if(Kokkos::Experimental::profileLibraryLoaded()) {
387 Kokkos::Experimental::beginParallelReduce(
"" == str ?
typeid(FunctorType).name() : str, 0, &kpID);
391 Kokkos::Impl::shared_allocation_tracking_claim_and_disable();
393 Kokkos::Impl::shared_allocation_tracking_release_and_enable();
397 #ifdef KOKKOSP_ENABLE_PROFILING 398 if(Kokkos::Experimental::profileLibraryLoaded()) {
399 Kokkos::Experimental::endParallelReduce(kpID);
406 template<
class ExecPolicy ,
class FunctorType ,
class ViewType >
409 ,
const FunctorType & functor
410 ,
const ViewType & result_view
411 ,
const std::string& str =
"" 412 ,
typename Impl::enable_if<
413 ( Kokkos::is_view<ViewType>::value && ! Impl::is_integral< ExecPolicy >::value
414 #ifdef KOKKOS_HAVE_CUDA
415 && ! Impl::is_same<typename ExecPolicy::execution_space,Kokkos::Cuda>::value
420 #ifdef KOKKOSP_ENABLE_PROFILING 422 if(Kokkos::Experimental::profileLibraryLoaded()) {
423 Kokkos::Experimental::beginParallelReduce(
"" == str ?
typeid(FunctorType).name() : str, 0, &kpID);
427 Kokkos::Impl::shared_allocation_tracking_claim_and_disable();
429 Kokkos::Impl::shared_allocation_tracking_release_and_enable();
433 #ifdef KOKKOSP_ENABLE_PROFILING 434 if(Kokkos::Experimental::profileLibraryLoaded()) {
435 Kokkos::Experimental::endParallelReduce(kpID);
442 template<
class ExecPolicy ,
class FunctorType >
444 ,
const FunctorType & functor
445 #ifdef KOKKOS_HAVE_CUDA
446 ,
typename Impl::enable_if<
447 ( ! Impl::is_integral< ExecPolicy >::value &&
448 ! Impl::is_same<typename ExecPolicy::execution_space,Kokkos::Cuda>::value )
449 ,
typename Kokkos::Impl::FunctorValueTraits< FunctorType , typename ExecPolicy::work_tag >::reference_type>::type result_ref
450 ,
const std::string& str =
"" 451 ,
typename Impl::enable_if<! Impl::is_same<typename ExecPolicy::execution_space,Kokkos::Cuda>::value >::type* = 0
454 ,
typename Impl::enable_if<
455 ( ! Impl::is_integral< ExecPolicy >::value)
456 ,
typename Kokkos::Impl::FunctorValueTraits< FunctorType , typename ExecPolicy::work_tag >::reference_type
458 ,
const std::string& str =
"" 462 typedef Kokkos::Impl::FunctorValueTraits< FunctorType , typename ExecPolicy::work_tag > ValueTraits ;
463 typedef Kokkos::Impl::FunctorValueOps< FunctorType , typename ExecPolicy::work_tag > ValueOps ;
468 typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0)
469 ,
typename ValueTraits::value_type
470 ,
typename ValueTraits::pointer_type
475 , Kokkos::MemoryUnmanaged
477 result_view( ValueOps::pointer( result_ref )
478 , ValueTraits::value_count( functor )
481 #ifdef KOKKOSP_ENABLE_PROFILING 483 if(Kokkos::Experimental::profileLibraryLoaded()) {
484 Kokkos::Experimental::beginParallelReduce(
"" == str ?
typeid(FunctorType).name() : str, 0, &kpID);
488 Kokkos::Impl::shared_allocation_tracking_claim_and_disable();
490 Kokkos::Impl::shared_allocation_tracking_release_and_enable();
494 #ifdef KOKKOSP_ENABLE_PROFILING 495 if(Kokkos::Experimental::profileLibraryLoaded()) {
496 Kokkos::Experimental::endParallelReduce(kpID);
503 template<
class FunctorType ,
class ViewType >
506 ,
const FunctorType & functor
507 ,
const ViewType & result_view
508 ,
const std::string& str =
"" 509 ,
typename Impl::enable_if<( Kokkos::is_view<ViewType>::value
510 #ifdef KOKKOS_HAVE_CUDA
512 typename Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space,
518 Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
523 #ifdef KOKKOSP_ENABLE_PROFILING 525 if(Kokkos::Experimental::profileLibraryLoaded()) {
526 Kokkos::Experimental::beginParallelReduce(
"" == str ?
typeid(FunctorType).name() : str, 0, &kpID);
530 Kokkos::Impl::shared_allocation_tracking_claim_and_disable();
532 Kokkos::Impl::shared_allocation_tracking_release_and_enable();
536 #ifdef KOKKOSP_ENABLE_PROFILING 537 if(Kokkos::Experimental::profileLibraryLoaded()) {
538 Kokkos::Experimental::endParallelReduce(kpID);
545 template<
class FunctorType >
548 ,
const FunctorType & functor
549 ,
typename Kokkos::Impl::FunctorValueTraits<
550 typename Impl::if_c<Impl::is_execution_policy<FunctorType>::value ||
551 Impl::is_integral<FunctorType>::value,
552 void,FunctorType>::type
553 ,
void >::reference_type result
554 ,
const std::string& str =
"" 555 ,
typename Impl::enable_if<
true 556 #ifdef KOKKOS_HAVE_CUDA
558 typename Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space,
563 typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ;
564 typedef Kokkos::Impl::FunctorValueOps< FunctorType , void > ValueOps ;
567 Kokkos::Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
575 typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0)
576 ,
typename ValueTraits::value_type
577 ,
typename ValueTraits::pointer_type
582 , Kokkos::MemoryUnmanaged
584 result_view( ValueOps::pointer( result )
585 , ValueTraits::value_count( functor )
588 #ifdef KOKKOSP_ENABLE_PROFILING 590 if(Kokkos::Experimental::profileLibraryLoaded()) {
591 Kokkos::Experimental::beginParallelReduce(
"" == str ?
typeid(FunctorType).name() : str, 0, &kpID);
595 Kokkos::Impl::shared_allocation_tracking_claim_and_disable();
597 Kokkos::Impl::shared_allocation_tracking_release_and_enable();
601 #ifdef KOKKOSP_ENABLE_PROFILING 602 if(Kokkos::Experimental::profileLibraryLoaded()) {
603 Kokkos::Experimental::endParallelReduce(kpID);
608 #ifndef KOKKOS_HAVE_CUDA 609 template<
class ExecPolicy ,
class FunctorType ,
class ResultType >
612 ,
const ExecPolicy & policy
613 ,
const FunctorType & functor
614 , ResultType * result)
616 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES 618 std::cout <<
"KOKKOS_DEBUG Start parallel_reduce kernel: " << str << std::endl;
623 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES 625 std::cout <<
"KOKKOS_DEBUG End parallel_reduce kernel: " << str << std::endl;
630 template<
class ExecPolicy ,
class FunctorType ,
class ResultType >
633 ,
const ExecPolicy & policy
634 ,
const FunctorType & functor
635 , ResultType & result)
637 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES 639 std::cout <<
"KOKKOS_DEBUG Start parallel_reduce kernel: " << str << std::endl;
644 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES 646 std::cout <<
"KOKKOS_DEBUG End parallel_reduce kernel: " << str << std::endl;
651 template<
class ExecPolicy ,
class FunctorType >
654 ,
const ExecPolicy & policy
655 ,
const FunctorType & functor)
657 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES 659 std::cout <<
"KOKKOS_DEBUG Start parallel_reduce kernel: " << str << std::endl;
664 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES 666 std::cout <<
"KOKKOS_DEBUG End parallel_reduce kernel: " << str << std::endl;
833 template<
class ExecutionPolicy ,
class FunctorType >
835 void parallel_scan(
const ExecutionPolicy & policy
836 ,
const FunctorType & functor
837 ,
const std::string& str =
"" 838 ,
typename Impl::enable_if< ! Impl::is_integral< ExecutionPolicy >::value >::type * = 0
841 #ifdef KOKKOSP_ENABLE_PROFILING 843 if(Kokkos::Experimental::profileLibraryLoaded()) {
844 Kokkos::Experimental::beginParallelScan(
"" == str ?
typeid(FunctorType).name() : str, 0, &kpID);
848 Kokkos::Impl::shared_allocation_tracking_claim_and_disable();
850 Kokkos::Impl::shared_allocation_tracking_release_and_enable();
854 #ifdef KOKKOSP_ENABLE_PROFILING 855 if(Kokkos::Experimental::profileLibraryLoaded()) {
856 Kokkos::Experimental::endParallelScan(kpID);
862 template<
class FunctorType >
864 void parallel_scan(
const size_t work_count
865 ,
const FunctorType & functor
866 ,
const std::string& str =
"" )
869 Kokkos::Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
874 #ifdef KOKKOSP_ENABLE_PROFILING 876 if(Kokkos::Experimental::profileLibraryLoaded()) {
877 Kokkos::Experimental::beginParallelScan(
"" == str ?
typeid(FunctorType).name() : str, 0, &kpID);
881 Kokkos::Impl::shared_allocation_tracking_claim_and_disable();
883 Kokkos::Impl::shared_allocation_tracking_release_and_enable();
887 #ifdef KOKKOSP_ENABLE_PROFILING 888 if(Kokkos::Experimental::profileLibraryLoaded()) {
889 Kokkos::Experimental::endParallelScan(kpID);
895 template<
class ExecutionPolicy ,
class FunctorType >
897 void parallel_scan(
const std::string& str
898 ,
const ExecutionPolicy & policy
899 ,
const FunctorType & functor)
901 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES 903 std::cout <<
"KOKKOS_DEBUG Start parallel_scan kernel: " << str << std::endl;
906 parallel_scan(policy,functor,str);
908 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES 910 std::cout <<
"KOKKOS_DEBUG End parallel_scan kernel: " << str << std::endl;
923 template<
class FunctorType ,
class Enable =
void >
924 struct FunctorTeamShmemSize
926 static inline size_t value(
const FunctorType & ,
int ) {
return 0 ; }
929 template<
class FunctorType >
930 struct FunctorTeamShmemSize< FunctorType , typename Impl::enable_if< 0 < sizeof( & FunctorType::team_shmem_size ) >::type >
932 static inline size_t value(
const FunctorType & f ,
int team_size ) {
return f.team_shmem_size( team_size ) ; }
935 template<
class FunctorType >
936 struct FunctorTeamShmemSize< FunctorType , typename Impl::enable_if< 0 < sizeof( & FunctorType::shmem_size ) >::type >
938 static inline size_t value(
const FunctorType & f ,
int team_size ) {
return f.shmem_size( team_size ) ; }
Implementation detail of parallel_scan.
void parallel_reduce(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< !Impl::is_integral< ExecPolicy >::value >::type *=0)
Parallel reduction.
View to an array of data.
Memory management for host memory.
Implementation of the ParallelFor operator that has a partial specialization for the device...
Given a Functor and Execution Policy query an execution space.
Execution policy for work over a range of an integral type.
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< !Impl::is_integral< ExecPolicy >::value >::type *=0)
Execute functor in parallel according to the execution policy.
Implementation detail of parallel_reduce.