44 #ifndef KOKKOS_CUDASPACE_HPP 45 #define KOKKOS_CUDASPACE_HPP 47 #include <Kokkos_Core_fwd.hpp> 49 #if defined( KOKKOS_HAVE_CUDA ) 55 #include <Kokkos_HostSpace.hpp> 57 #include <impl/Kokkos_AllocationTracker.hpp> 59 #include <Cuda/Kokkos_Cuda_abort.hpp> 60 #include <Cuda/Kokkos_Cuda_BasicAllocators.hpp> 72 typedef CudaSpace memory_space ;
73 typedef Kokkos::Cuda execution_space ;
76 typedef unsigned int size_type ;
80 #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) 82 typedef Impl::CudaMallocAllocator allocator;
90 static Impl::AllocationTracker allocate_and_track(
const std::string & label,
const size_t size );
96 #if defined( __CUDACC__ ) 97 static void texture_object_attach( Impl::AllocationTracker
const & tracker
99 , ::cudaChannelFormatDesc
const & desc
108 CudaSpace(
const CudaSpace & rhs ) = default ;
109 CudaSpace & operator = (
const CudaSpace & rhs ) = default ;
110 ~CudaSpace() = default ;
113 void * allocate(
const size_t arg_alloc_size )
const ;
116 void deallocate(
void *
const arg_alloc_ptr
117 ,
const size_t arg_alloc_size )
const ;
121 static void access_error();
122 static void access_error(
const void *
const );
138 void init_lock_array_cuda_space();
147 int* lock_array_cuda_space_ptr(
bool deallocate =
false);
163 typedef CudaUVMSpace memory_space ;
164 typedef Cuda execution_space ;
166 typedef unsigned int size_type ;
173 #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) 175 typedef Impl::CudaUVMAllocator allocator;
183 static Impl::AllocationTracker allocate_and_track(
const std::string & label,
const size_t size );
189 #if defined( __CUDACC__ ) 190 static void texture_object_attach( Impl::AllocationTracker
const & tracker
192 , ::cudaChannelFormatDesc
const & desc
201 CudaUVMSpace(
const CudaUVMSpace & rhs ) = default ;
202 CudaUVMSpace & operator = (
const CudaUVMSpace & rhs ) = default ;
203 ~CudaUVMSpace() = default ;
206 void * allocate(
const size_t arg_alloc_size )
const ;
209 void deallocate(
void *
const arg_alloc_ptr
210 ,
const size_t arg_alloc_size )
const ;
229 class CudaHostPinnedSpace {
234 typedef HostSpace::execution_space execution_space ;
235 typedef CudaHostPinnedSpace memory_space ;
237 typedef unsigned int size_type ;
241 #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) 243 typedef Impl::CudaHostAllocator allocator ;
251 static Impl::AllocationTracker allocate_and_track(
const std::string & label,
const size_t size );
257 CudaHostPinnedSpace();
258 CudaHostPinnedSpace(
const CudaHostPinnedSpace & rhs ) = default ;
259 CudaHostPinnedSpace & operator = (
const CudaHostPinnedSpace & rhs ) = default ;
260 ~CudaHostPinnedSpace() = default ;
263 void * allocate(
const size_t arg_alloc_size )
const ;
266 void deallocate(
void *
const arg_alloc_ptr
267 ,
const size_t arg_alloc_size )
const ;
280 void DeepCopyAsyncCuda(
void * dst ,
const void * src ,
size_t n);
282 template<>
struct DeepCopy< CudaSpace , CudaSpace , Cuda>
284 DeepCopy(
void * dst ,
const void * src ,
size_t );
285 DeepCopy(
const Cuda & ,
void * dst ,
const void * src ,
size_t );
288 template<>
struct DeepCopy< CudaSpace , HostSpace , Cuda >
290 DeepCopy(
void * dst ,
const void * src ,
size_t );
291 DeepCopy(
const Cuda & ,
void * dst ,
const void * src ,
size_t );
294 template<>
struct DeepCopy< HostSpace , CudaSpace , Cuda >
296 DeepCopy(
void * dst ,
const void * src ,
size_t );
297 DeepCopy(
const Cuda & ,
void * dst ,
const void * src ,
size_t );
300 template<
class ExecutionSpace>
struct DeepCopy< CudaSpace , CudaSpace , ExecutionSpace >
303 DeepCopy(
void * dst ,
const void * src ,
size_t n )
304 { (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
307 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
310 DeepCopyAsyncCuda (dst,src,n);
314 template<
class ExecutionSpace>
struct DeepCopy< CudaSpace , HostSpace , ExecutionSpace >
317 DeepCopy(
void * dst ,
const void * src ,
size_t n )
318 { (void) DeepCopy< CudaSpace , HostSpace , Cuda>( dst , src , n ); }
321 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
324 DeepCopyAsyncCuda (dst,src,n);
328 template<
class ExecutionSpace>
329 struct DeepCopy< HostSpace , CudaSpace , ExecutionSpace >
332 DeepCopy(
void * dst ,
const void * src ,
size_t n )
333 { (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
336 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
339 DeepCopyAsyncCuda (dst,src,n);
343 template<
class ExecutionSpace>
344 struct DeepCopy< CudaSpace , CudaUVMSpace , ExecutionSpace >
347 DeepCopy(
void * dst ,
const void * src ,
size_t n )
348 { (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
351 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
354 DeepCopyAsyncCuda (dst,src,n);
358 template<
class ExecutionSpace>
359 struct DeepCopy< CudaSpace , CudaHostPinnedSpace , ExecutionSpace>
362 DeepCopy(
void * dst ,
const void * src ,
size_t n )
363 { (void) DeepCopy< CudaSpace , HostSpace , Cuda >( dst , src , n ); }
366 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
369 DeepCopyAsyncCuda (dst,src,n);
374 template<
class ExecutionSpace>
375 struct DeepCopy< CudaUVMSpace , CudaSpace , ExecutionSpace>
378 DeepCopy(
void * dst ,
const void * src ,
size_t n )
379 { (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
382 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
385 DeepCopyAsyncCuda (dst,src,n);
389 template<
class ExecutionSpace>
390 struct DeepCopy< CudaUVMSpace , CudaUVMSpace , ExecutionSpace>
393 DeepCopy(
void * dst ,
const void * src ,
size_t n )
394 { (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
397 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
400 DeepCopyAsyncCuda (dst,src,n);
404 template<
class ExecutionSpace>
405 struct DeepCopy< CudaUVMSpace , CudaHostPinnedSpace , ExecutionSpace>
408 DeepCopy(
void * dst ,
const void * src ,
size_t n )
409 { (void) DeepCopy< CudaSpace , HostSpace , Cuda >( dst , src , n ); }
412 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
415 DeepCopyAsyncCuda (dst,src,n);
419 template<
class ExecutionSpace>
struct DeepCopy< CudaUVMSpace , HostSpace , ExecutionSpace >
422 DeepCopy(
void * dst ,
const void * src ,
size_t n )
423 { (void) DeepCopy< CudaSpace , HostSpace , Cuda >( dst , src , n ); }
426 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
429 DeepCopyAsyncCuda (dst,src,n);
434 template<
class ExecutionSpace>
struct DeepCopy< CudaHostPinnedSpace , CudaSpace , ExecutionSpace >
437 DeepCopy(
void * dst ,
const void * src ,
size_t n )
438 { (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
441 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
444 DeepCopyAsyncCuda (dst,src,n);
448 template<
class ExecutionSpace>
struct DeepCopy< CudaHostPinnedSpace , CudaUVMSpace , ExecutionSpace >
451 DeepCopy(
void * dst ,
const void * src ,
size_t n )
452 { (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
455 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
458 DeepCopyAsyncCuda (dst,src,n);
462 template<
class ExecutionSpace>
struct DeepCopy< CudaHostPinnedSpace , CudaHostPinnedSpace , ExecutionSpace >
465 DeepCopy(
void * dst ,
const void * src ,
size_t n )
466 { (void) DeepCopy< HostSpace , HostSpace , Cuda >( dst , src , n ); }
469 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
472 DeepCopyAsyncCuda (dst,src,n);
476 template<
class ExecutionSpace>
struct DeepCopy< CudaHostPinnedSpace , HostSpace , ExecutionSpace >
479 DeepCopy(
void * dst ,
const void * src ,
size_t n )
480 { (void) DeepCopy< HostSpace , HostSpace , Cuda >( dst , src , n ); }
483 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
486 DeepCopyAsyncCuda (dst,src,n);
491 template<
class ExecutionSpace>
struct DeepCopy< HostSpace , CudaUVMSpace , ExecutionSpace >
494 DeepCopy(
void * dst ,
const void * src ,
size_t n )
495 { (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
498 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
501 DeepCopyAsyncCuda (dst,src,n);
505 template<
class ExecutionSpace>
struct DeepCopy< HostSpace , CudaHostPinnedSpace , ExecutionSpace >
508 DeepCopy(
void * dst ,
const void * src ,
size_t n )
509 { (void) DeepCopy< HostSpace , HostSpace , Cuda >( dst , src , n ); }
512 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
515 DeepCopyAsyncCuda (dst,src,n);
532 enum { value =
false };
533 KOKKOS_INLINE_FUNCTION
static void verify(
void )
534 { Kokkos::abort(
"Cuda code attempted to access HostSpace memory"); }
536 KOKKOS_INLINE_FUNCTION
static void verify(
const void * )
537 { Kokkos::abort(
"Cuda code attempted to access HostSpace memory"); }
542 struct VerifyExecutionCanAccessMemorySpace<
Kokkos::CudaSpace , Kokkos::CudaUVMSpace >
544 enum { value =
true };
545 KOKKOS_INLINE_FUNCTION
static void verify(
void ) { }
546 KOKKOS_INLINE_FUNCTION
static void verify(
const void * ) { }
551 struct VerifyExecutionCanAccessMemorySpace<
Kokkos::CudaSpace , Kokkos::CudaHostPinnedSpace >
553 enum { value =
true };
554 KOKKOS_INLINE_FUNCTION
static void verify(
void ) { }
555 KOKKOS_INLINE_FUNCTION
static void verify(
const void * ) { }
559 template<
class OtherSpace >
560 struct VerifyExecutionCanAccessMemorySpace<
561 typename enable_if< ! is_same<
Kokkos::CudaSpace,OtherSpace>::value , Kokkos::CudaSpace >::type ,
564 enum { value =
false };
565 KOKKOS_INLINE_FUNCTION
static void verify(
void )
566 { Kokkos::abort(
"Cuda code attempted to access unknown Space memory"); }
568 KOKKOS_INLINE_FUNCTION
static void verify(
const void * )
569 { Kokkos::abort(
"Cuda code attempted to access unknown Space memory"); }
575 struct VerifyExecutionCanAccessMemorySpace<
Kokkos::HostSpace , Kokkos::CudaSpace >
577 enum { value =
false };
578 inline static void verify(
void ) { CudaSpace::access_error(); }
579 inline static void verify(
const void * p ) { CudaSpace::access_error(p); }
584 struct VerifyExecutionCanAccessMemorySpace<
Kokkos::HostSpace , Kokkos::CudaUVMSpace >
586 enum { value =
true };
587 inline static void verify(
void ) { }
588 inline static void verify(
const void * ) { }
593 struct VerifyExecutionCanAccessMemorySpace<
Kokkos::HostSpace , Kokkos::CudaHostPinnedSpace >
595 enum { value =
true };
596 KOKKOS_INLINE_FUNCTION
static void verify(
void ) {}
597 KOKKOS_INLINE_FUNCTION
static void verify(
const void * ) {}
611 class SharedAllocationRecord<
Kokkos::CudaSpace , void >
612 :
public SharedAllocationRecord< void , void >
616 friend class SharedAllocationRecord<
Kokkos::CudaUVMSpace , void > ;
618 typedef SharedAllocationRecord< void , void > RecordBase ;
620 SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
621 SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
623 static void deallocate( RecordBase * );
625 static ::cudaTextureObject_t
626 attach_texture_object( const unsigned sizeof_alias
627 , void * const alloc_ptr
628 , const size_t alloc_size );
630 static RecordBase s_root_record ;
632 ::cudaTextureObject_t m_tex_obj ;
633 const Kokkos::CudaSpace m_space ;
637 ~SharedAllocationRecord();
638 SharedAllocationRecord() : RecordBase(), m_tex_obj(0), m_space() {}
640 SharedAllocationRecord(
const Kokkos::CudaSpace & arg_space
641 ,
const std::string & arg_label
642 ,
const size_t arg_alloc_size
643 ,
const RecordBase::function_type arg_dealloc = & deallocate
648 std::string get_label()
const ;
650 static SharedAllocationRecord * allocate(
const Kokkos::CudaSpace & arg_space
651 ,
const std::string & arg_label
652 ,
const size_t arg_alloc_size );
656 void * allocate_tracked(
const Kokkos::CudaSpace & arg_space
657 ,
const std::string & arg_label
658 ,
const size_t arg_alloc_size );
662 void * reallocate_tracked(
void *
const arg_alloc_ptr
663 ,
const size_t arg_alloc_size );
667 void deallocate_tracked(
void *
const arg_alloc_ptr );
669 static SharedAllocationRecord * get_record(
void * arg_alloc_ptr );
671 template<
typename AliasType >
673 ::cudaTextureObject_t attach_texture_object()
675 static_assert( ( std::is_same< AliasType , int >::value ||
676 std::is_same< AliasType , ::int2 >::value ||
677 std::is_same< AliasType , ::int4 >::value )
678 ,
"Cuda texture fetch only supported for alias types of int, ::int2, or ::int4" );
680 if ( m_tex_obj == 0 ) {
681 m_tex_obj = attach_texture_object(
sizeof(AliasType)
682 , (
void*) RecordBase::m_alloc_ptr
683 , RecordBase::m_alloc_size );
689 template<
typename AliasType >
691 int attach_texture_object_offset(
const AliasType *
const ptr )
694 return ptr -
reinterpret_cast<AliasType*
>( RecordBase::m_alloc_ptr );
697 static void print_records( std::ostream & ,
const Kokkos::CudaSpace & ,
bool detail =
false );
702 class SharedAllocationRecord<
Kokkos::CudaUVMSpace , void >
703 :
public SharedAllocationRecord< void , void >
707 typedef SharedAllocationRecord< void , void > RecordBase ;
709 SharedAllocationRecord(
const SharedAllocationRecord & ) = delete ;
710 SharedAllocationRecord & operator = (
const SharedAllocationRecord & ) = delete ;
712 static void deallocate( RecordBase * );
714 static RecordBase s_root_record ;
716 ::cudaTextureObject_t m_tex_obj ;
717 const Kokkos::CudaUVMSpace m_space ;
721 ~SharedAllocationRecord();
722 SharedAllocationRecord() : RecordBase(), m_tex_obj(0), m_space() {}
724 SharedAllocationRecord(
const Kokkos::CudaUVMSpace & arg_space
725 ,
const std::string & arg_label
726 ,
const size_t arg_alloc_size
727 ,
const RecordBase::function_type arg_dealloc = & deallocate
732 std::string get_label()
const ;
734 static SharedAllocationRecord * allocate(
const Kokkos::CudaUVMSpace & arg_space
735 ,
const std::string & arg_label
736 ,
const size_t arg_alloc_size
741 void * allocate_tracked(
const Kokkos::CudaUVMSpace & arg_space
742 ,
const std::string & arg_label
743 ,
const size_t arg_alloc_size );
747 void * reallocate_tracked(
void *
const arg_alloc_ptr
748 ,
const size_t arg_alloc_size );
752 void deallocate_tracked(
void *
const arg_alloc_ptr );
754 static SharedAllocationRecord * get_record(
void * arg_alloc_ptr );
757 template<
typename AliasType >
759 ::cudaTextureObject_t attach_texture_object()
761 static_assert( ( std::is_same< AliasType , int >::value ||
762 std::is_same< AliasType , ::int2 >::value ||
763 std::is_same< AliasType , ::int4 >::value )
764 ,
"Cuda texture fetch only supported for alias types of int, ::int2, or ::int4" );
766 if ( m_tex_obj == 0 ) {
767 m_tex_obj = SharedAllocationRecord< Kokkos::CudaSpace , void >::
768 attach_texture_object(
sizeof(AliasType)
769 , (
void*) RecordBase::m_alloc_ptr
770 , RecordBase::m_alloc_size );
776 template<
typename AliasType >
778 int attach_texture_object_offset(
const AliasType *
const ptr )
781 return ptr -
reinterpret_cast<AliasType*
>( RecordBase::m_alloc_ptr );
784 static void print_records( std::ostream & ,
const Kokkos::CudaUVMSpace & ,
bool detail =
false );
788 class SharedAllocationRecord<
Kokkos::CudaHostPinnedSpace , void >
789 :
public SharedAllocationRecord< void , void >
793 typedef SharedAllocationRecord< void , void > RecordBase ;
795 SharedAllocationRecord(
const SharedAllocationRecord & ) = delete ;
796 SharedAllocationRecord & operator = (
const SharedAllocationRecord & ) = delete ;
798 static void deallocate( RecordBase * );
800 static RecordBase s_root_record ;
802 const Kokkos::CudaHostPinnedSpace m_space ;
806 ~SharedAllocationRecord();
807 SharedAllocationRecord() : RecordBase(), m_space() {}
809 SharedAllocationRecord(
const Kokkos::CudaHostPinnedSpace & arg_space
810 ,
const std::string & arg_label
811 ,
const size_t arg_alloc_size
812 ,
const RecordBase::function_type arg_dealloc = & deallocate
817 std::string get_label()
const ;
819 static SharedAllocationRecord * allocate(
const Kokkos::CudaHostPinnedSpace & arg_space
820 ,
const std::string & arg_label
821 ,
const size_t arg_alloc_size
825 void * allocate_tracked(
const Kokkos::CudaHostPinnedSpace & arg_space
826 ,
const std::string & arg_label
827 ,
const size_t arg_alloc_size );
831 void * reallocate_tracked(
void *
const arg_alloc_ptr
832 ,
const size_t arg_alloc_size );
836 void deallocate_tracked(
void *
const arg_alloc_ptr );
839 static SharedAllocationRecord * get_record(
void * arg_alloc_ptr );
841 static void print_records( std::ostream & ,
const Kokkos::CudaHostPinnedSpace & ,
bool detail =
false );
Memory space for main process and CPU execution spaces.
Memory management for host memory.
bool available()
Query if hwloc is available.