Kokkos Core Kernels Package  Version of the Day
Kokkos_SegmentedView.hpp
1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 2.0
6 // Copyright (2014) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
39 //
40 // ************************************************************************
41 //@HEADER
42 */
43 
44 #ifndef KOKKOS_SEGMENTED_VIEW_HPP_
45 #define KOKKOS_SEGMENTED_VIEW_HPP_
46 
47 #include <Kokkos_Core.hpp>
48 #include <impl/Kokkos_Error.hpp>
49 #include <cstdio>
50 
51 #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
52 
53 namespace Kokkos {
54 namespace Experimental {
55 
56 namespace Impl {
57 
58 template<class DataType, class Arg1Type, class Arg2Type, class Arg3Type>
59 struct delete_segmented_view;
60 
61 template<class MemorySpace>
62 inline
63 void DeviceSetAllocatableMemorySize(size_t) {}
64 
65 #if defined( KOKKOS_HAVE_CUDA )
66 
67 template<>
68 inline
69 void DeviceSetAllocatableMemorySize<Kokkos::CudaSpace>(size_t size) {
70 #ifdef __CUDACC__
71  size_t size_limit;
72  cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
73  if(size_limit<size)
74  cudaDeviceSetLimit(cudaLimitMallocHeapSize,2*size);
75  cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
76 #endif
77 }
78 
79 template<>
80 inline
81 void DeviceSetAllocatableMemorySize<Kokkos::CudaUVMSpace>(size_t size) {
82 #ifdef __CUDACC__
83  size_t size_limit;
84  cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
85  if(size_limit<size)
86  cudaDeviceSetLimit(cudaLimitMallocHeapSize,2*size);
87  cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
88 #endif
89 }
90 
91 #endif /* #if defined( KOKKOS_HAVE_CUDA ) */
92 
93 }
94 
95 template< class DataType ,
96  class Arg1Type = void ,
97  class Arg2Type = void ,
98  class Arg3Type = void>
99 class SegmentedView : public Kokkos::ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type >
100 {
101 public:
103 
105 
107  typedef Kokkos::View< typename traits::data_type ,
108  typename traits::array_layout ,
109  typename traits::memory_space ,
110  Kokkos::MemoryUnmanaged > t_dev ;
111 
112 
113 private:
115 
118 
119  size_t segment_length_;
120  size_t segment_length_m1_;
121  int max_segments_;
122 
123  int segment_length_log2;
124 
125  // Dimensions, cardinality, capacity, and offset computation for
126  // multidimensional array view of contiguous memory.
127  // Inherits from Impl::Shape
128  typedef Kokkos::Impl::ViewOffset< typename traits::shape_type
129  , typename traits::array_layout
130  > offset_map_type ;
131 
132  offset_map_type m_offset_map ;
133 
134  typedef Kokkos::View< typename traits::array_intrinsic_type ,
135  typename traits::array_layout ,
136  typename traits::memory_space ,
137  typename traits::memory_traits > array_type ;
138 
139  typedef Kokkos::View< typename traits::const_data_type ,
140  typename traits::array_layout ,
141  typename traits::memory_space ,
142  typename traits::memory_traits > const_type ;
143 
144  typedef Kokkos::View< typename traits::non_const_data_type ,
145  typename traits::array_layout ,
146  typename traits::memory_space ,
147  typename traits::memory_traits > non_const_type ;
148 
149  typedef Kokkos::View< typename traits::non_const_data_type ,
150  typename traits::array_layout ,
151  HostSpace ,
152  void > HostMirror ;
153 
154  template< bool Accessible >
155  KOKKOS_INLINE_FUNCTION
156  typename Kokkos::Impl::enable_if< Accessible , typename traits::size_type >::type
157  dimension_0_intern() const { return nsegments_() * segment_length_ ; }
158 
159  template< bool Accessible >
160  KOKKOS_INLINE_FUNCTION
161  typename Kokkos::Impl::enable_if< ! Accessible , typename traits::size_type >::type
162  dimension_0_intern() const
163  {
164  // In Host space
165  int n = 0 ;
166 #if ! defined( __CUDA_ARCH__ )
167  Kokkos::Impl::DeepCopy< HostSpace , typename traits::memory_space >( & n , nsegments_.ptr_on_device() , sizeof(int) );
168 #endif
169 
170  return n * segment_length_ ;
171  }
172 
173 public:
174 
175  enum { Rank = traits::rank };
176 
177  KOKKOS_INLINE_FUNCTION offset_map_type shape() const { return m_offset_map ; }
178 
179  /* \brief return (current) size of dimension 0 */
180  KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_0() const {
181  enum { Accessible = Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
182  Kokkos::Impl::ActiveExecutionMemorySpace, typename traits::memory_space >::value };
183  int n = SegmentedView::dimension_0_intern< Accessible >();
184  return n ;
185  }
186 
187  /* \brief return size of dimension 1 */
188  KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_1() const { return m_offset_map.N1 ; }
189  /* \brief return size of dimension 2 */
190  KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_2() const { return m_offset_map.N2 ; }
191  /* \brief return size of dimension 3 */
192  KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_3() const { return m_offset_map.N3 ; }
193  /* \brief return size of dimension 4 */
194  KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_4() const { return m_offset_map.N4 ; }
195  /* \brief return size of dimension 5 */
196  KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_5() const { return m_offset_map.N5 ; }
197  /* \brief return size of dimension 6 */
198  KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_6() const { return m_offset_map.N6 ; }
199  /* \brief return size of dimension 7 */
200  KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_7() const { return m_offset_map.N7 ; }
201 
202  /* \brief return size of dimension 2 */
203  KOKKOS_INLINE_FUNCTION typename traits::size_type size() const {
204  return dimension_0() *
205  m_offset_map.N1 * m_offset_map.N2 * m_offset_map.N3 * m_offset_map.N4 *
206  m_offset_map.N5 * m_offset_map.N6 * m_offset_map.N7 ;
207  }
208 
209  template< typename iType >
210  KOKKOS_INLINE_FUNCTION
211  typename traits::size_type dimension( const iType & i ) const {
212  if(i==0)
213  return dimension_0();
214  else
215  return Kokkos::Impl::dimension( m_offset_map , i );
216  }
217 
218  KOKKOS_INLINE_FUNCTION
219  typename traits::size_type capacity() {
220  return segments_.dimension_0() *
221  m_offset_map.N1 * m_offset_map.N2 * m_offset_map.N3 * m_offset_map.N4 *
222  m_offset_map.N5 * m_offset_map.N6 * m_offset_map.N7;
223  }
224 
225  KOKKOS_INLINE_FUNCTION
226  typename traits::size_type get_num_segments() {
227  enum { Accessible = Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
228  Kokkos::Impl::ActiveExecutionMemorySpace, typename traits::memory_space >::value };
229  int n = SegmentedView::dimension_0_intern< Accessible >();
230  return n/segment_length_ ;
231  }
232 
233  KOKKOS_INLINE_FUNCTION
234  typename traits::size_type get_max_segments() {
235  return max_segments_;
236  }
237 
254  template< class LabelType >
255  SegmentedView(const LabelType & label ,
256  const size_t view_length ,
257  const size_t n0 ,
258  const size_t n1 = 0 ,
259  const size_t n2 = 0 ,
260  const size_t n3 = 0 ,
261  const size_t n4 = 0 ,
262  const size_t n5 = 0 ,
263  const size_t n6 = 0 ,
264  const size_t n7 = 0
265  ): segment_length_(view_length),segment_length_m1_(view_length-1)
266  {
267  segment_length_log2 = -1;
268  size_t l = segment_length_;
269  while(l>0) {
270  l>>=1;
271  segment_length_log2++;
272  }
273  l = 1<<segment_length_log2;
274  if(l!=segment_length_)
275  Kokkos::Impl::throw_runtime_exception("Kokkos::SegmentedView requires a 'power of 2' segment length");
276 
277  max_segments_ = (n0+segment_length_m1_)/segment_length_;
278 
279  Impl::DeviceSetAllocatableMemorySize<typename traits::memory_space>(segment_length_*max_segments_*sizeof(typename traits::value_type));
280 
281  segments_ = Kokkos::View<t_dev*,typename traits::execution_space>(label , max_segments_);
284  m_offset_map.assign( n0, n1, n2, n3, n4, n5, n6, n7, n0*n1*n2*n3*n4*n5*n6*n7 );
285 
286  }
287 
288  KOKKOS_INLINE_FUNCTION
289  SegmentedView(const SegmentedView& src):
290  segments_(src.segments_),
291  realloc_lock (src.realloc_lock),
292  nsegments_ (src.nsegments_),
293  segment_length_(src.segment_length_),
294  segment_length_m1_(src.segment_length_m1_),
295  max_segments_ (src.max_segments_),
296  segment_length_log2(src.segment_length_log2),
297  m_offset_map (src.m_offset_map)
298  {}
299 
300  KOKKOS_INLINE_FUNCTION
301  SegmentedView& operator= (const SegmentedView& src) {
302  segments_ = src.segments_;
303  realloc_lock = src.realloc_lock;
304  nsegments_ = src.nsegments_;
305  segment_length_= src.segment_length_;
306  segment_length_m1_= src.segment_length_m1_;
307  max_segments_ = src.max_segments_;
308  segment_length_log2= src.segment_length_log2;
309  m_offset_map = src.m_offset_map;
310  return *this;
311  }
312 
313  ~SegmentedView() {
314  if ( !segments_.tracker().ref_counting()) { return; }
315  size_t ref_count = segments_.tracker().ref_count();
316  if(ref_count == 1u) {
317  Kokkos::fence();
319  Kokkos::deep_copy(h_nviews,nsegments_);
320  Kokkos::parallel_for(h_nviews(),Impl::delete_segmented_view<DataType , Arg1Type , Arg2Type, Arg3Type>(*this));
321  }
322  }
323 
324  KOKKOS_INLINE_FUNCTION
325  t_dev get_segment(const int& i) const {
326  return segments_[i];
327  }
328 
329  template< class MemberType>
330  KOKKOS_INLINE_FUNCTION
331  void grow (MemberType& team_member, const size_t& growSize) const {
332  if (growSize>max_segments_*segment_length_) {
333  printf ("Exceeding maxSize: %lu %lu\n", growSize, max_segments_*segment_length_);
334  return;
335  }
336 
337  if(team_member.team_rank()==0) {
338  bool too_small = growSize > segment_length_ * nsegments_();
339  if (too_small) {
340  while(Kokkos::atomic_compare_exchange(&realloc_lock(),0,1) )
341  ; // get the lock
342  too_small = growSize > segment_length_ * nsegments_(); // Recheck once we have the lock
343  if(too_small) {
344  while(too_small) {
345  const size_t alloc_size = segment_length_*m_offset_map.N1*m_offset_map.N2*m_offset_map.N3*
346  m_offset_map.N4*m_offset_map.N5*m_offset_map.N6*m_offset_map.N7;
347  typename traits::non_const_value_type* const ptr = new typename traits::non_const_value_type[alloc_size];
348 
349  segments_(nsegments_()) =
350  t_dev(ptr,segment_length_,m_offset_map.N1,m_offset_map.N2,m_offset_map.N3,m_offset_map.N4,m_offset_map.N5,m_offset_map.N6,m_offset_map.N7);
351  nsegments_()++;
352  too_small = growSize > segment_length_ * nsegments_();
353  }
354  }
355  realloc_lock() = 0; //release the lock
356  }
357  }
358  team_member.team_barrier();
359  }
360 
361  KOKKOS_INLINE_FUNCTION
362  void grow_non_thread_safe (const size_t& growSize) const {
363  if (growSize>max_segments_*segment_length_) {
364  printf ("Exceeding maxSize: %lu %lu\n", growSize, max_segments_*segment_length_);
365  return;
366  }
367  bool too_small = growSize > segment_length_ * nsegments_();
368  if(too_small) {
369  while(too_small) {
370  const size_t alloc_size = segment_length_*m_offset_map.N1*m_offset_map.N2*m_offset_map.N3*
371  m_offset_map.N4*m_offset_map.N5*m_offset_map.N6*m_offset_map.N7;
372  typename traits::non_const_value_type* const ptr =
373  new typename traits::non_const_value_type[alloc_size];
374 
375  segments_(nsegments_()) =
376  t_dev (ptr, segment_length_, m_offset_map.N1, m_offset_map.N2,
377  m_offset_map.N3, m_offset_map.N4, m_offset_map.N5,
378  m_offset_map.N6, m_offset_map.N7);
379  nsegments_()++;
380  too_small = growSize > segment_length_ * nsegments_();
381  }
382  }
383  }
384 
385  template< typename iType0 >
386  KOKKOS_FORCEINLINE_FUNCTION
387  typename std::enable_if<( std::is_integral<iType0>::value && traits::rank == 1 )
388  , typename traits::value_type &
389  >::type
390  operator() ( const iType0 & i0 ) const
391  {
392  return segments_[i0>>segment_length_log2](i0&(segment_length_m1_));
393  }
394 
395  template< typename iType0 , typename iType1 >
396  KOKKOS_FORCEINLINE_FUNCTION
397  typename std::enable_if<( std::is_integral<iType0>::value &&
398  std::is_integral<iType1>::value &&
399  traits::rank == 2 )
400  , typename traits::value_type &
401  >::type
402  operator() ( const iType0 & i0 , const iType1 & i1 ) const
403  {
404  return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1);
405  }
406 
407  template< typename iType0 , typename iType1 , typename iType2 >
408  KOKKOS_FORCEINLINE_FUNCTION
409  typename std::enable_if<( std::is_integral<iType0>::value &&
410  std::is_integral<iType1>::value &&
411  std::is_integral<iType2>::value &&
412  traits::rank == 3 )
413  , typename traits::value_type &
414  >::type
415  operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const
416  {
417  return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2);
418  }
419 
420  template< typename iType0 , typename iType1 , typename iType2 , typename iType3 >
421  KOKKOS_FORCEINLINE_FUNCTION
422  typename std::enable_if<( std::is_integral<iType0>::value &&
423  std::is_integral<iType1>::value &&
424  std::is_integral<iType2>::value &&
425  std::is_integral<iType3>::value &&
426  traits::rank == 4 )
427  , typename traits::value_type &
428  >::type
429  operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const
430  {
431  return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3);
432  }
433 
434  template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
435  typename iType4 >
436  KOKKOS_FORCEINLINE_FUNCTION
437  typename std::enable_if<( std::is_integral<iType0>::value &&
438  std::is_integral<iType1>::value &&
439  std::is_integral<iType2>::value &&
440  std::is_integral<iType3>::value &&
441  std::is_integral<iType4>::value &&
442  traits::rank == 5 )
443  , typename traits::value_type &
444  >::type
445  operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
446  const iType4 & i4 ) const
447  {
448  return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4);
449  }
450 
451  template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
452  typename iType4 , typename iType5 >
453  KOKKOS_FORCEINLINE_FUNCTION
454  typename std::enable_if<( std::is_integral<iType0>::value &&
455  std::is_integral<iType1>::value &&
456  std::is_integral<iType2>::value &&
457  std::is_integral<iType3>::value &&
458  std::is_integral<iType4>::value &&
459  std::is_integral<iType5>::value &&
460  traits::rank == 6 )
461  , typename traits::value_type &
462  >::type
463  operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
464  const iType4 & i4 , const iType5 & i5 ) const
465  {
466  return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5);
467  }
468 
469  template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
470  typename iType4 , typename iType5 , typename iType6 >
471  KOKKOS_FORCEINLINE_FUNCTION
472  typename std::enable_if<( std::is_integral<iType0>::value &&
473  std::is_integral<iType1>::value &&
474  std::is_integral<iType2>::value &&
475  std::is_integral<iType3>::value &&
476  std::is_integral<iType4>::value &&
477  std::is_integral<iType5>::value &&
478  std::is_integral<iType6>::value &&
479  traits::rank == 7 )
480  , typename traits::value_type &
481  >::type
482  operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
483  const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const
484  {
485  return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5,i6);
486  }
487 
488  template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
489  typename iType4 , typename iType5 , typename iType6 , typename iType7 >
490  KOKKOS_FORCEINLINE_FUNCTION
491  typename std::enable_if<( std::is_integral<iType0>::value &&
492  std::is_integral<iType1>::value &&
493  std::is_integral<iType2>::value &&
494  std::is_integral<iType3>::value &&
495  std::is_integral<iType4>::value &&
496  std::is_integral<iType5>::value &&
497  std::is_integral<iType6>::value &&
498  std::is_integral<iType7>::value &&
499  traits::rank == 8 )
500  , typename traits::value_type &
501  >::type
502  operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
503  const iType4 & i4 , const iType5 & i5 , const iType6 & i6 , const iType7 & i7 ) const
504  {
505  return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5,i6,i7);
506  }
507 };
508 
509 namespace Impl {
510 template<class DataType, class Arg1Type, class Arg2Type, class Arg3Type>
511 struct delete_segmented_view {
512  typedef SegmentedView<DataType , Arg1Type , Arg2Type, Arg3Type> view_type;
513  typedef typename view_type::execution_space execution_space;
514 
515  view_type view_;
516  delete_segmented_view(view_type view):view_(view) {
517  }
518 
519  KOKKOS_INLINE_FUNCTION
520  void operator() (int i) const {
521  delete [] view_.get_segment(i).ptr_on_device();
522  }
523 };
524 
525 }
526 }
527 }
528 
529 #endif
530 
531 #endif
void deep_copy(const View< DT, DL, DD, DM, DS > &dst, typename Impl::enable_if<(Impl::is_same< typename ViewTraits< DT, DL, DD, DM >::non_const_value_type, typename ViewTraits< DT, DL, DD, DM >::value_type >::value), typename ViewTraits< DT, DL, DD, DM >::const_value_type >::type &value)
Deep copy a value into a view.
View to an array of data.
Traits class for accessing attributes of a View.
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< !Impl::is_integral< ExecPolicy >::value >::type *=0)
Execute functor in parallel according to the execution policy.