Kokkos Core Kernels Package  Version of the Day
Kokkos_CudaSpace.hpp
1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 2.0
6 // Copyright (2014) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
39 //
40 // ************************************************************************
41 //@HEADER
42 */
43 
44 #ifndef KOKKOS_CUDASPACE_HPP
45 #define KOKKOS_CUDASPACE_HPP
46 
47 #include <Kokkos_Core_fwd.hpp>
48 
49 #if defined( KOKKOS_HAVE_CUDA )
50 
51 #include <iosfwd>
52 #include <typeinfo>
53 #include <string>
54 
55 #include <Kokkos_HostSpace.hpp>
56 
57 #include <impl/Kokkos_AllocationTracker.hpp>
58 
59 #include <Cuda/Kokkos_Cuda_abort.hpp>
60 #include <Cuda/Kokkos_Cuda_BasicAllocators.hpp>
61 
62 /*--------------------------------------------------------------------------*/
63 
64 namespace Kokkos {
65 
68 class CudaSpace {
69 public:
70 
72  typedef CudaSpace memory_space ;
73  typedef Kokkos::Cuda execution_space ;
75 
76  typedef unsigned int size_type ;
77 
78  /*--------------------------------*/
79 
80 #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
81 
82  typedef Impl::CudaMallocAllocator allocator;
83 
90  static Impl::AllocationTracker allocate_and_track( const std::string & label, const size_t size );
91 
92  /*--------------------------------*/
96 #if defined( __CUDACC__ )
97  static void texture_object_attach( Impl::AllocationTracker const & tracker
98  , unsigned type_size
99  , ::cudaChannelFormatDesc const & desc
100  );
101 #endif
102 
103 #endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
104 
105  /*--------------------------------*/
106 
107  CudaSpace();
108  CudaSpace( const CudaSpace & rhs ) = default ;
109  CudaSpace & operator = ( const CudaSpace & rhs ) = default ;
110  ~CudaSpace() = default ;
111 
113  void * allocate( const size_t arg_alloc_size ) const ;
114 
116  void deallocate( void * const arg_alloc_ptr
117  , const size_t arg_alloc_size ) const ;
118 
119  /*--------------------------------*/
121  static void access_error();
122  static void access_error( const void * const );
123 
124 private:
125 
126  int m_device ;
127 
128  // friend class Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void > ;
129 };
130 
131 namespace Impl {
138 void init_lock_array_cuda_space();
139 
147 int* lock_array_cuda_space_ptr(bool deallocate = false);
148 }
149 } // namespace Kokkos
150 
151 /*--------------------------------------------------------------------------*/
152 /*--------------------------------------------------------------------------*/
153 
154 namespace Kokkos {
155 
159 class CudaUVMSpace {
160 public:
161 
163  typedef CudaUVMSpace memory_space ;
164  typedef Cuda execution_space ;
166  typedef unsigned int size_type ;
167 
169  static bool available();
170 
171  /*--------------------------------*/
172 
173 #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
174 
175  typedef Impl::CudaUVMAllocator allocator;
176 
183  static Impl::AllocationTracker allocate_and_track( const std::string & label, const size_t size );
184 
185 
189 #if defined( __CUDACC__ )
190  static void texture_object_attach( Impl::AllocationTracker const & tracker
191  , unsigned type_size
192  , ::cudaChannelFormatDesc const & desc
193  );
194 #endif
195 
196 #endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
197 
198  /*--------------------------------*/
199 
200  CudaUVMSpace();
201  CudaUVMSpace( const CudaUVMSpace & rhs ) = default ;
202  CudaUVMSpace & operator = ( const CudaUVMSpace & rhs ) = default ;
203  ~CudaUVMSpace() = default ;
204 
206  void * allocate( const size_t arg_alloc_size ) const ;
207 
209  void deallocate( void * const arg_alloc_ptr
210  , const size_t arg_alloc_size ) const ;
211 
212  /*--------------------------------*/
213 
214 private:
215 
216  int m_device ;
217 };
218 
219 } // namespace Kokkos
220 
221 /*--------------------------------------------------------------------------*/
222 /*--------------------------------------------------------------------------*/
223 
224 namespace Kokkos {
225 
229 class CudaHostPinnedSpace {
230 public:
231 
233 
234  typedef HostSpace::execution_space execution_space ;
235  typedef CudaHostPinnedSpace memory_space ;
237  typedef unsigned int size_type ;
238 
239  /*--------------------------------*/
240 
241 #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
242 
243  typedef Impl::CudaHostAllocator allocator ;
244 
251  static Impl::AllocationTracker allocate_and_track( const std::string & label, const size_t size );
252 
253 #endif /* #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) */
254 
255  /*--------------------------------*/
256 
257  CudaHostPinnedSpace();
258  CudaHostPinnedSpace( const CudaHostPinnedSpace & rhs ) = default ;
259  CudaHostPinnedSpace & operator = ( const CudaHostPinnedSpace & rhs ) = default ;
260  ~CudaHostPinnedSpace() = default ;
261 
263  void * allocate( const size_t arg_alloc_size ) const ;
264 
266  void deallocate( void * const arg_alloc_ptr
267  , const size_t arg_alloc_size ) const ;
268 
269  /*--------------------------------*/
270 };
271 
272 } // namespace Kokkos
273 
274 /*--------------------------------------------------------------------------*/
275 /*--------------------------------------------------------------------------*/
276 
277 namespace Kokkos {
278 namespace Impl {
279 
280 void DeepCopyAsyncCuda( void * dst , const void * src , size_t n);
281 
282 template<> struct DeepCopy< CudaSpace , CudaSpace , Cuda>
283 {
284  DeepCopy( void * dst , const void * src , size_t );
285  DeepCopy( const Cuda & , void * dst , const void * src , size_t );
286 };
287 
288 template<> struct DeepCopy< CudaSpace , HostSpace , Cuda >
289 {
290  DeepCopy( void * dst , const void * src , size_t );
291  DeepCopy( const Cuda & , void * dst , const void * src , size_t );
292 };
293 
294 template<> struct DeepCopy< HostSpace , CudaSpace , Cuda >
295 {
296  DeepCopy( void * dst , const void * src , size_t );
297  DeepCopy( const Cuda & , void * dst , const void * src , size_t );
298 };
299 
300 template<class ExecutionSpace> struct DeepCopy< CudaSpace , CudaSpace , ExecutionSpace >
301 {
302  inline
303  DeepCopy( void * dst , const void * src , size_t n )
304  { (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
305 
306  inline
307  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
308  {
309  exec.fence();
310  DeepCopyAsyncCuda (dst,src,n);
311  }
312 };
313 
314 template<class ExecutionSpace> struct DeepCopy< CudaSpace , HostSpace , ExecutionSpace >
315 {
316  inline
317  DeepCopy( void * dst , const void * src , size_t n )
318  { (void) DeepCopy< CudaSpace , HostSpace , Cuda>( dst , src , n ); }
319 
320  inline
321  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
322  {
323  exec.fence();
324  DeepCopyAsyncCuda (dst,src,n);
325  }
326 };
327 
328 template<class ExecutionSpace>
329 struct DeepCopy< HostSpace , CudaSpace , ExecutionSpace >
330 {
331  inline
332  DeepCopy( void * dst , const void * src , size_t n )
333  { (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
334 
335  inline
336  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
337  {
338  exec.fence();
339  DeepCopyAsyncCuda (dst,src,n);
340  }
341 };
342 
343 template<class ExecutionSpace>
344 struct DeepCopy< CudaSpace , CudaUVMSpace , ExecutionSpace >
345 {
346  inline
347  DeepCopy( void * dst , const void * src , size_t n )
348  { (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
349 
350  inline
351  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
352  {
353  exec.fence();
354  DeepCopyAsyncCuda (dst,src,n);
355  }
356 };
357 
358 template<class ExecutionSpace>
359 struct DeepCopy< CudaSpace , CudaHostPinnedSpace , ExecutionSpace>
360 {
361  inline
362  DeepCopy( void * dst , const void * src , size_t n )
363  { (void) DeepCopy< CudaSpace , HostSpace , Cuda >( dst , src , n ); }
364 
365  inline
366  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
367  {
368  exec.fence();
369  DeepCopyAsyncCuda (dst,src,n);
370  }
371 };
372 
373 
374 template<class ExecutionSpace>
375 struct DeepCopy< CudaUVMSpace , CudaSpace , ExecutionSpace>
376 {
377  inline
378  DeepCopy( void * dst , const void * src , size_t n )
379  { (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
380 
381  inline
382  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
383  {
384  exec.fence();
385  DeepCopyAsyncCuda (dst,src,n);
386  }
387 };
388 
389 template<class ExecutionSpace>
390 struct DeepCopy< CudaUVMSpace , CudaUVMSpace , ExecutionSpace>
391 {
392  inline
393  DeepCopy( void * dst , const void * src , size_t n )
394  { (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
395 
396  inline
397  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
398  {
399  exec.fence();
400  DeepCopyAsyncCuda (dst,src,n);
401  }
402 };
403 
404 template<class ExecutionSpace>
405 struct DeepCopy< CudaUVMSpace , CudaHostPinnedSpace , ExecutionSpace>
406 {
407  inline
408  DeepCopy( void * dst , const void * src , size_t n )
409  { (void) DeepCopy< CudaSpace , HostSpace , Cuda >( dst , src , n ); }
410 
411  inline
412  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
413  {
414  exec.fence();
415  DeepCopyAsyncCuda (dst,src,n);
416  }
417 };
418 
419 template<class ExecutionSpace> struct DeepCopy< CudaUVMSpace , HostSpace , ExecutionSpace >
420 {
421  inline
422  DeepCopy( void * dst , const void * src , size_t n )
423  { (void) DeepCopy< CudaSpace , HostSpace , Cuda >( dst , src , n ); }
424 
425  inline
426  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
427  {
428  exec.fence();
429  DeepCopyAsyncCuda (dst,src,n);
430  }
431 };
432 
433 
434 template<class ExecutionSpace> struct DeepCopy< CudaHostPinnedSpace , CudaSpace , ExecutionSpace >
435 {
436  inline
437  DeepCopy( void * dst , const void * src , size_t n )
438  { (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
439 
440  inline
441  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
442  {
443  exec.fence();
444  DeepCopyAsyncCuda (dst,src,n);
445  }
446 };
447 
448 template<class ExecutionSpace> struct DeepCopy< CudaHostPinnedSpace , CudaUVMSpace , ExecutionSpace >
449 {
450  inline
451  DeepCopy( void * dst , const void * src , size_t n )
452  { (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
453 
454  inline
455  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
456  {
457  exec.fence();
458  DeepCopyAsyncCuda (dst,src,n);
459  }
460 };
461 
462 template<class ExecutionSpace> struct DeepCopy< CudaHostPinnedSpace , CudaHostPinnedSpace , ExecutionSpace >
463 {
464  inline
465  DeepCopy( void * dst , const void * src , size_t n )
466  { (void) DeepCopy< HostSpace , HostSpace , Cuda >( dst , src , n ); }
467 
468  inline
469  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
470  {
471  exec.fence();
472  DeepCopyAsyncCuda (dst,src,n);
473  }
474 };
475 
476 template<class ExecutionSpace> struct DeepCopy< CudaHostPinnedSpace , HostSpace , ExecutionSpace >
477 {
478  inline
479  DeepCopy( void * dst , const void * src , size_t n )
480  { (void) DeepCopy< HostSpace , HostSpace , Cuda >( dst , src , n ); }
481 
482  inline
483  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
484  {
485  exec.fence();
486  DeepCopyAsyncCuda (dst,src,n);
487  }
488 };
489 
490 
491 template<class ExecutionSpace> struct DeepCopy< HostSpace , CudaUVMSpace , ExecutionSpace >
492 {
493  inline
494  DeepCopy( void * dst , const void * src , size_t n )
495  { (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
496 
497  inline
498  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
499  {
500  exec.fence();
501  DeepCopyAsyncCuda (dst,src,n);
502  }
503 };
504 
505 template<class ExecutionSpace> struct DeepCopy< HostSpace , CudaHostPinnedSpace , ExecutionSpace >
506 {
507  inline
508  DeepCopy( void * dst , const void * src , size_t n )
509  { (void) DeepCopy< HostSpace , HostSpace , Cuda >( dst , src , n ); }
510 
511  inline
512  DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
513  {
514  exec.fence();
515  DeepCopyAsyncCuda (dst,src,n);
516  }
517 };
518 
519 } // namespace Impl
520 } // namespace Kokkos
521 
522 //----------------------------------------------------------------------------
523 //----------------------------------------------------------------------------
524 
525 namespace Kokkos {
526 namespace Impl {
527 
529 template<>
530 struct VerifyExecutionCanAccessMemorySpace< Kokkos::CudaSpace , Kokkos::HostSpace >
531 {
532  enum { value = false };
533  KOKKOS_INLINE_FUNCTION static void verify( void )
534  { Kokkos::abort("Cuda code attempted to access HostSpace memory"); }
535 
536  KOKKOS_INLINE_FUNCTION static void verify( const void * )
537  { Kokkos::abort("Cuda code attempted to access HostSpace memory"); }
538 };
539 
541 template<>
542 struct VerifyExecutionCanAccessMemorySpace< Kokkos::CudaSpace , Kokkos::CudaUVMSpace >
543 {
544  enum { value = true };
545  KOKKOS_INLINE_FUNCTION static void verify( void ) { }
546  KOKKOS_INLINE_FUNCTION static void verify( const void * ) { }
547 };
548 
550 template<>
551 struct VerifyExecutionCanAccessMemorySpace< Kokkos::CudaSpace , Kokkos::CudaHostPinnedSpace >
552 {
553  enum { value = true };
554  KOKKOS_INLINE_FUNCTION static void verify( void ) { }
555  KOKKOS_INLINE_FUNCTION static void verify( const void * ) { }
556 };
557 
559 template< class OtherSpace >
560 struct VerifyExecutionCanAccessMemorySpace<
561  typename enable_if< ! is_same<Kokkos::CudaSpace,OtherSpace>::value , Kokkos::CudaSpace >::type ,
562  OtherSpace >
563 {
564  enum { value = false };
565  KOKKOS_INLINE_FUNCTION static void verify( void )
566  { Kokkos::abort("Cuda code attempted to access unknown Space memory"); }
567 
568  KOKKOS_INLINE_FUNCTION static void verify( const void * )
569  { Kokkos::abort("Cuda code attempted to access unknown Space memory"); }
570 };
571 
572 //----------------------------------------------------------------------------
574 template<>
575 struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::CudaSpace >
576 {
577  enum { value = false };
578  inline static void verify( void ) { CudaSpace::access_error(); }
579  inline static void verify( const void * p ) { CudaSpace::access_error(p); }
580 };
581 
583 template<>
584 struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::CudaUVMSpace >
585 {
586  enum { value = true };
587  inline static void verify( void ) { }
588  inline static void verify( const void * ) { }
589 };
590 
592 template<>
593 struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::CudaHostPinnedSpace >
594 {
595  enum { value = true };
596  KOKKOS_INLINE_FUNCTION static void verify( void ) {}
597  KOKKOS_INLINE_FUNCTION static void verify( const void * ) {}
598 };
599 
600 } // namespace Impl
601 } // namespace Kokkos
602 
603 //----------------------------------------------------------------------------
604 //----------------------------------------------------------------------------
605 
606 namespace Kokkos {
607 namespace Experimental {
608 namespace Impl {
609 
610 template<>
611 class SharedAllocationRecord< Kokkos::CudaSpace , void >
612  : public SharedAllocationRecord< void , void >
613 {
614 private:
615 
616  friend class SharedAllocationRecord< Kokkos::CudaUVMSpace , void > ;
617 
618  typedef SharedAllocationRecord< void , void > RecordBase ;
619 
620  SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
621  SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
622 
623  static void deallocate( RecordBase * );
624 
625  static ::cudaTextureObject_t
626  attach_texture_object( const unsigned sizeof_alias
627  , void * const alloc_ptr
628  , const size_t alloc_size );
629 
630  static RecordBase s_root_record ;
631 
632  ::cudaTextureObject_t m_tex_obj ;
633  const Kokkos::CudaSpace m_space ;
634 
635 protected:
636 
637  ~SharedAllocationRecord();
638  SharedAllocationRecord() : RecordBase(), m_tex_obj(0), m_space() {}
639 
640  SharedAllocationRecord( const Kokkos::CudaSpace & arg_space
641  , const std::string & arg_label
642  , const size_t arg_alloc_size
643  , const RecordBase::function_type arg_dealloc = & deallocate
644  );
645 
646 public:
647 
648  std::string get_label() const ;
649 
650  static SharedAllocationRecord * allocate( const Kokkos::CudaSpace & arg_space
651  , const std::string & arg_label
652  , const size_t arg_alloc_size );
653 
655  static
656  void * allocate_tracked( const Kokkos::CudaSpace & arg_space
657  , const std::string & arg_label
658  , const size_t arg_alloc_size );
659 
661  static
662  void * reallocate_tracked( void * const arg_alloc_ptr
663  , const size_t arg_alloc_size );
664 
666  static
667  void deallocate_tracked( void * const arg_alloc_ptr );
668 
669  static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
670 
671  template< typename AliasType >
672  inline
673  ::cudaTextureObject_t attach_texture_object()
674  {
675  static_assert( ( std::is_same< AliasType , int >::value ||
676  std::is_same< AliasType , ::int2 >::value ||
677  std::is_same< AliasType , ::int4 >::value )
678  , "Cuda texture fetch only supported for alias types of int, ::int2, or ::int4" );
679 
680  if ( m_tex_obj == 0 ) {
681  m_tex_obj = attach_texture_object( sizeof(AliasType)
682  , (void*) RecordBase::m_alloc_ptr
683  , RecordBase::m_alloc_size );
684  }
685 
686  return m_tex_obj ;
687  }
688 
689  template< typename AliasType >
690  inline
691  int attach_texture_object_offset( const AliasType * const ptr )
692  {
693  // Texture object is attached to the entire allocation range
694  return ptr - reinterpret_cast<AliasType*>( RecordBase::m_alloc_ptr );
695  }
696 
697  static void print_records( std::ostream & , const Kokkos::CudaSpace & , bool detail = false );
698 };
699 
700 
701 template<>
702 class SharedAllocationRecord< Kokkos::CudaUVMSpace , void >
703  : public SharedAllocationRecord< void , void >
704 {
705 private:
706 
707  typedef SharedAllocationRecord< void , void > RecordBase ;
708 
709  SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
710  SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
711 
712  static void deallocate( RecordBase * );
713 
714  static RecordBase s_root_record ;
715 
716  ::cudaTextureObject_t m_tex_obj ;
717  const Kokkos::CudaUVMSpace m_space ;
718 
719 protected:
720 
721  ~SharedAllocationRecord();
722  SharedAllocationRecord() : RecordBase(), m_tex_obj(0), m_space() {}
723 
724  SharedAllocationRecord( const Kokkos::CudaUVMSpace & arg_space
725  , const std::string & arg_label
726  , const size_t arg_alloc_size
727  , const RecordBase::function_type arg_dealloc = & deallocate
728  );
729 
730 public:
731 
732  std::string get_label() const ;
733 
734  static SharedAllocationRecord * allocate( const Kokkos::CudaUVMSpace & arg_space
735  , const std::string & arg_label
736  , const size_t arg_alloc_size
737  );
738 
740  static
741  void * allocate_tracked( const Kokkos::CudaUVMSpace & arg_space
742  , const std::string & arg_label
743  , const size_t arg_alloc_size );
744 
746  static
747  void * reallocate_tracked( void * const arg_alloc_ptr
748  , const size_t arg_alloc_size );
749 
751  static
752  void deallocate_tracked( void * const arg_alloc_ptr );
753 
754  static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
755 
756 
757  template< typename AliasType >
758  inline
759  ::cudaTextureObject_t attach_texture_object()
760  {
761  static_assert( ( std::is_same< AliasType , int >::value ||
762  std::is_same< AliasType , ::int2 >::value ||
763  std::is_same< AliasType , ::int4 >::value )
764  , "Cuda texture fetch only supported for alias types of int, ::int2, or ::int4" );
765 
766  if ( m_tex_obj == 0 ) {
767  m_tex_obj = SharedAllocationRecord< Kokkos::CudaSpace , void >::
768  attach_texture_object( sizeof(AliasType)
769  , (void*) RecordBase::m_alloc_ptr
770  , RecordBase::m_alloc_size );
771  }
772 
773  return m_tex_obj ;
774  }
775 
776  template< typename AliasType >
777  inline
778  int attach_texture_object_offset( const AliasType * const ptr )
779  {
780  // Texture object is attached to the entire allocation range
781  return ptr - reinterpret_cast<AliasType*>( RecordBase::m_alloc_ptr );
782  }
783 
784  static void print_records( std::ostream & , const Kokkos::CudaUVMSpace & , bool detail = false );
785 };
786 
787 template<>
788 class SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >
789  : public SharedAllocationRecord< void , void >
790 {
791 private:
792 
793  typedef SharedAllocationRecord< void , void > RecordBase ;
794 
795  SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
796  SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
797 
798  static void deallocate( RecordBase * );
799 
800  static RecordBase s_root_record ;
801 
802  const Kokkos::CudaHostPinnedSpace m_space ;
803 
804 protected:
805 
806  ~SharedAllocationRecord();
807  SharedAllocationRecord() : RecordBase(), m_space() {}
808 
809  SharedAllocationRecord( const Kokkos::CudaHostPinnedSpace & arg_space
810  , const std::string & arg_label
811  , const size_t arg_alloc_size
812  , const RecordBase::function_type arg_dealloc = & deallocate
813  );
814 
815 public:
816 
817  std::string get_label() const ;
818 
819  static SharedAllocationRecord * allocate( const Kokkos::CudaHostPinnedSpace & arg_space
820  , const std::string & arg_label
821  , const size_t arg_alloc_size
822  );
824  static
825  void * allocate_tracked( const Kokkos::CudaHostPinnedSpace & arg_space
826  , const std::string & arg_label
827  , const size_t arg_alloc_size );
828 
830  static
831  void * reallocate_tracked( void * const arg_alloc_ptr
832  , const size_t arg_alloc_size );
833 
835  static
836  void deallocate_tracked( void * const arg_alloc_ptr );
837 
838 
839  static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
840 
841  static void print_records( std::ostream & , const Kokkos::CudaHostPinnedSpace & , bool detail = false );
842 };
843 
844 } // namespace Impl
845 } // namespace Experimental
846 } // namespace Kokkos
847 
848 //----------------------------------------------------------------------------
849 //----------------------------------------------------------------------------
850 
851 #endif /* #if defined( KOKKOS_HAVE_CUDA ) */
852 #endif /* #define KOKKOS_CUDASPACE_HPP */
853 
Memory space for main process and CPU execution spaces.
Memory management for host memory.
bool available()
Query if hwloc is available.