44 #ifndef KOKKOS_SEGMENTED_VIEW_HPP_ 45 #define KOKKOS_SEGMENTED_VIEW_HPP_ 47 #include <Kokkos_Core.hpp> 48 #include <impl/Kokkos_Error.hpp> 51 #if ! defined( KOKKOS_USING_EXPERIMENTAL_VIEW ) 58 template<
class DataType,
class Arg1Type,
class Arg2Type,
class Arg3Type>
59 struct delete_segmented_view;
61 template<
class MemorySpace>
63 void DeviceSetAllocatableMemorySize(
size_t) {}
65 #if defined( KOKKOS_HAVE_CUDA ) 69 void DeviceSetAllocatableMemorySize<Kokkos::CudaSpace>(
size_t size) {
72 cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
74 cudaDeviceSetLimit(cudaLimitMallocHeapSize,2*size);
75 cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
81 void DeviceSetAllocatableMemorySize<Kokkos::CudaUVMSpace>(
size_t size) {
84 cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
86 cudaDeviceSetLimit(cudaLimitMallocHeapSize,2*size);
87 cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
95 template<
class DataType ,
96 class Arg1Type = void ,
97 class Arg2Type = void ,
98 class Arg3Type =
void>
99 class SegmentedView :
public Kokkos::ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type >
108 typename traits::array_layout ,
109 typename traits::memory_space ,
110 Kokkos::MemoryUnmanaged > t_dev ;
119 size_t segment_length_;
120 size_t segment_length_m1_;
123 int segment_length_log2;
128 typedef Kokkos::Impl::ViewOffset<
typename traits::shape_type
129 ,
typename traits::array_layout
132 offset_map_type m_offset_map ;
134 typedef Kokkos::View<
typename traits::array_intrinsic_type ,
135 typename traits::array_layout ,
136 typename traits::memory_space ,
137 typename traits::memory_traits > array_type ;
139 typedef Kokkos::View<
typename traits::const_data_type ,
140 typename traits::array_layout ,
141 typename traits::memory_space ,
142 typename traits::memory_traits > const_type ;
144 typedef Kokkos::View<
typename traits::non_const_data_type ,
145 typename traits::array_layout ,
146 typename traits::memory_space ,
147 typename traits::memory_traits > non_const_type ;
149 typedef Kokkos::View<
typename traits::non_const_data_type ,
150 typename traits::array_layout ,
154 template<
bool Accessible >
155 KOKKOS_INLINE_FUNCTION
156 typename Kokkos::Impl::enable_if< Accessible , typename traits::size_type >::type
157 dimension_0_intern()
const {
return nsegments_() * segment_length_ ; }
159 template<
bool Accessible >
160 KOKKOS_INLINE_FUNCTION
161 typename Kokkos::Impl::enable_if< ! Accessible , typename traits::size_type >::type
162 dimension_0_intern()
const 166 #if ! defined( __CUDA_ARCH__ ) 167 Kokkos::Impl::DeepCopy< HostSpace , typename traits::memory_space >( & n , nsegments_.ptr_on_device() ,
sizeof(int) );
170 return n * segment_length_ ;
175 enum { Rank = traits::rank };
177 KOKKOS_INLINE_FUNCTION offset_map_type shape()
const {
return m_offset_map ; }
180 KOKKOS_INLINE_FUNCTION
typename traits::size_type dimension_0()
const {
181 enum { Accessible = Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
182 Kokkos::Impl::ActiveExecutionMemorySpace,
typename traits::memory_space >::value };
183 int n = SegmentedView::dimension_0_intern< Accessible >();
188 KOKKOS_INLINE_FUNCTION
typename traits::size_type dimension_1()
const {
return m_offset_map.N1 ; }
190 KOKKOS_INLINE_FUNCTION
typename traits::size_type dimension_2()
const {
return m_offset_map.N2 ; }
192 KOKKOS_INLINE_FUNCTION
typename traits::size_type dimension_3()
const {
return m_offset_map.N3 ; }
194 KOKKOS_INLINE_FUNCTION
typename traits::size_type dimension_4()
const {
return m_offset_map.N4 ; }
196 KOKKOS_INLINE_FUNCTION
typename traits::size_type dimension_5()
const {
return m_offset_map.N5 ; }
198 KOKKOS_INLINE_FUNCTION
typename traits::size_type dimension_6()
const {
return m_offset_map.N6 ; }
200 KOKKOS_INLINE_FUNCTION
typename traits::size_type dimension_7()
const {
return m_offset_map.N7 ; }
203 KOKKOS_INLINE_FUNCTION
typename traits::size_type size()
const {
204 return dimension_0() *
205 m_offset_map.N1 * m_offset_map.N2 * m_offset_map.N3 * m_offset_map.N4 *
206 m_offset_map.N5 * m_offset_map.N6 * m_offset_map.N7 ;
209 template<
typename iType >
210 KOKKOS_INLINE_FUNCTION
211 typename traits::size_type dimension(
const iType & i )
const {
213 return dimension_0();
215 return Kokkos::Impl::dimension( m_offset_map , i );
218 KOKKOS_INLINE_FUNCTION
219 typename traits::size_type capacity() {
220 return segments_.dimension_0() *
221 m_offset_map.N1 * m_offset_map.N2 * m_offset_map.N3 * m_offset_map.N4 *
222 m_offset_map.N5 * m_offset_map.N6 * m_offset_map.N7;
225 KOKKOS_INLINE_FUNCTION
226 typename traits::size_type get_num_segments() {
227 enum { Accessible = Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
228 Kokkos::Impl::ActiveExecutionMemorySpace,
typename traits::memory_space >::value };
229 int n = SegmentedView::dimension_0_intern< Accessible >();
230 return n/segment_length_ ;
233 KOKKOS_INLINE_FUNCTION
234 typename traits::size_type get_max_segments() {
235 return max_segments_;
254 template<
class LabelType >
255 SegmentedView(
const LabelType & label ,
256 const size_t view_length ,
258 const size_t n1 = 0 ,
259 const size_t n2 = 0 ,
260 const size_t n3 = 0 ,
261 const size_t n4 = 0 ,
262 const size_t n5 = 0 ,
263 const size_t n6 = 0 ,
265 ): segment_length_(view_length),segment_length_m1_(view_length-1)
267 segment_length_log2 = -1;
268 size_t l = segment_length_;
271 segment_length_log2++;
273 l = 1<<segment_length_log2;
274 if(l!=segment_length_)
275 Kokkos::Impl::throw_runtime_exception(
"Kokkos::SegmentedView requires a 'power of 2' segment length");
277 max_segments_ = (n0+segment_length_m1_)/segment_length_;
279 Impl::DeviceSetAllocatableMemorySize<typename traits::memory_space>(segment_length_*max_segments_*
sizeof(
typename traits::value_type));
284 m_offset_map.assign( n0, n1, n2, n3, n4, n5, n6, n7, n0*n1*n2*n3*n4*n5*n6*n7 );
288 KOKKOS_INLINE_FUNCTION
289 SegmentedView(
const SegmentedView& src):
290 segments_(src.segments_),
291 realloc_lock (src.realloc_lock),
292 nsegments_ (src.nsegments_),
293 segment_length_(src.segment_length_),
294 segment_length_m1_(src.segment_length_m1_),
295 max_segments_ (src.max_segments_),
296 segment_length_log2(src.segment_length_log2),
297 m_offset_map (src.m_offset_map)
300 KOKKOS_INLINE_FUNCTION
301 SegmentedView& operator= (
const SegmentedView& src) {
302 segments_ = src.segments_;
303 realloc_lock = src.realloc_lock;
304 nsegments_ = src.nsegments_;
305 segment_length_= src.segment_length_;
306 segment_length_m1_= src.segment_length_m1_;
307 max_segments_ = src.max_segments_;
308 segment_length_log2= src.segment_length_log2;
309 m_offset_map = src.m_offset_map;
314 if ( !segments_.tracker().ref_counting()) {
return; }
315 size_t ref_count = segments_.tracker().ref_count();
316 if(ref_count == 1u) {
320 Kokkos::parallel_for(h_nviews(),Impl::delete_segmented_view<DataType , Arg1Type , Arg2Type, Arg3Type>(*
this));
324 KOKKOS_INLINE_FUNCTION
325 t_dev get_segment(
const int& i)
const {
329 template<
class MemberType>
330 KOKKOS_INLINE_FUNCTION
331 void grow (MemberType& team_member,
const size_t& growSize)
const {
332 if (growSize>max_segments_*segment_length_) {
333 printf (
"Exceeding maxSize: %lu %lu\n", growSize, max_segments_*segment_length_);
337 if(team_member.team_rank()==0) {
338 bool too_small = growSize > segment_length_ * nsegments_();
340 while(Kokkos::atomic_compare_exchange(&realloc_lock(),0,1) )
342 too_small = growSize > segment_length_ * nsegments_();
345 const size_t alloc_size = segment_length_*m_offset_map.N1*m_offset_map.N2*m_offset_map.N3*
346 m_offset_map.N4*m_offset_map.N5*m_offset_map.N6*m_offset_map.N7;
347 typename traits::non_const_value_type*
const ptr =
new typename traits::non_const_value_type[alloc_size];
349 segments_(nsegments_()) =
350 t_dev(ptr,segment_length_,m_offset_map.N1,m_offset_map.N2,m_offset_map.N3,m_offset_map.N4,m_offset_map.N5,m_offset_map.N6,m_offset_map.N7);
352 too_small = growSize > segment_length_ * nsegments_();
358 team_member.team_barrier();
361 KOKKOS_INLINE_FUNCTION
362 void grow_non_thread_safe (
const size_t& growSize)
const {
363 if (growSize>max_segments_*segment_length_) {
364 printf (
"Exceeding maxSize: %lu %lu\n", growSize, max_segments_*segment_length_);
367 bool too_small = growSize > segment_length_ * nsegments_();
370 const size_t alloc_size = segment_length_*m_offset_map.N1*m_offset_map.N2*m_offset_map.N3*
371 m_offset_map.N4*m_offset_map.N5*m_offset_map.N6*m_offset_map.N7;
372 typename traits::non_const_value_type*
const ptr =
373 new typename traits::non_const_value_type[alloc_size];
375 segments_(nsegments_()) =
376 t_dev (ptr, segment_length_, m_offset_map.N1, m_offset_map.N2,
377 m_offset_map.N3, m_offset_map.N4, m_offset_map.N5,
378 m_offset_map.N6, m_offset_map.N7);
380 too_small = growSize > segment_length_ * nsegments_();
385 template<
typename iType0 >
386 KOKKOS_FORCEINLINE_FUNCTION
387 typename std::enable_if<( std::is_integral<iType0>::value && traits::rank == 1 )
388 ,
typename traits::value_type &
390 operator() (
const iType0 & i0 )
const 392 return segments_[i0>>segment_length_log2](i0&(segment_length_m1_));
395 template<
typename iType0 ,
typename iType1 >
396 KOKKOS_FORCEINLINE_FUNCTION
397 typename std::enable_if<( std::is_integral<iType0>::value &&
398 std::is_integral<iType1>::value &&
400 ,
typename traits::value_type &
402 operator() (
const iType0 & i0 ,
const iType1 & i1 )
const 404 return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1);
407 template<
typename iType0 ,
typename iType1 ,
typename iType2 >
408 KOKKOS_FORCEINLINE_FUNCTION
409 typename std::enable_if<( std::is_integral<iType0>::value &&
410 std::is_integral<iType1>::value &&
411 std::is_integral<iType2>::value &&
413 ,
typename traits::value_type &
415 operator() (
const iType0 & i0 ,
const iType1 & i1 ,
const iType2 & i2 )
const 417 return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2);
420 template<
typename iType0 ,
typename iType1 ,
typename iType2 ,
typename iType3 >
421 KOKKOS_FORCEINLINE_FUNCTION
422 typename std::enable_if<( std::is_integral<iType0>::value &&
423 std::is_integral<iType1>::value &&
424 std::is_integral<iType2>::value &&
425 std::is_integral<iType3>::value &&
427 ,
typename traits::value_type &
429 operator() (
const iType0 & i0 ,
const iType1 & i1 ,
const iType2 & i2 ,
const iType3 & i3 )
const 431 return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3);
434 template<
typename iType0 ,
typename iType1 ,
typename iType2 ,
typename iType3 ,
436 KOKKOS_FORCEINLINE_FUNCTION
437 typename std::enable_if<( std::is_integral<iType0>::value &&
438 std::is_integral<iType1>::value &&
439 std::is_integral<iType2>::value &&
440 std::is_integral<iType3>::value &&
441 std::is_integral<iType4>::value &&
443 ,
typename traits::value_type &
445 operator() (
const iType0 & i0 ,
const iType1 & i1 ,
const iType2 & i2 ,
const iType3 & i3 ,
446 const iType4 & i4 )
const 448 return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4);
451 template<
typename iType0 ,
typename iType1 ,
typename iType2 ,
typename iType3 ,
452 typename iType4 ,
typename iType5 >
453 KOKKOS_FORCEINLINE_FUNCTION
454 typename std::enable_if<( std::is_integral<iType0>::value &&
455 std::is_integral<iType1>::value &&
456 std::is_integral<iType2>::value &&
457 std::is_integral<iType3>::value &&
458 std::is_integral<iType4>::value &&
459 std::is_integral<iType5>::value &&
461 ,
typename traits::value_type &
463 operator() (
const iType0 & i0 ,
const iType1 & i1 ,
const iType2 & i2 ,
const iType3 & i3 ,
464 const iType4 & i4 ,
const iType5 & i5 )
const 466 return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5);
469 template<
typename iType0 ,
typename iType1 ,
typename iType2 ,
typename iType3 ,
470 typename iType4 ,
typename iType5 ,
typename iType6 >
471 KOKKOS_FORCEINLINE_FUNCTION
472 typename std::enable_if<( std::is_integral<iType0>::value &&
473 std::is_integral<iType1>::value &&
474 std::is_integral<iType2>::value &&
475 std::is_integral<iType3>::value &&
476 std::is_integral<iType4>::value &&
477 std::is_integral<iType5>::value &&
478 std::is_integral<iType6>::value &&
480 ,
typename traits::value_type &
482 operator() (
const iType0 & i0 ,
const iType1 & i1 ,
const iType2 & i2 ,
const iType3 & i3 ,
483 const iType4 & i4 ,
const iType5 & i5 ,
const iType6 & i6 )
const 485 return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5,i6);
488 template<
typename iType0 ,
typename iType1 ,
typename iType2 ,
typename iType3 ,
489 typename iType4 ,
typename iType5 ,
typename iType6 ,
typename iType7 >
490 KOKKOS_FORCEINLINE_FUNCTION
491 typename std::enable_if<( std::is_integral<iType0>::value &&
492 std::is_integral<iType1>::value &&
493 std::is_integral<iType2>::value &&
494 std::is_integral<iType3>::value &&
495 std::is_integral<iType4>::value &&
496 std::is_integral<iType5>::value &&
497 std::is_integral<iType6>::value &&
498 std::is_integral<iType7>::value &&
500 ,
typename traits::value_type &
502 operator() (
const iType0 & i0 ,
const iType1 & i1 ,
const iType2 & i2 ,
const iType3 & i3 ,
503 const iType4 & i4 ,
const iType5 & i5 ,
const iType6 & i6 ,
const iType7 & i7 )
const 505 return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5,i6,i7);
510 template<
class DataType,
class Arg1Type,
class Arg2Type,
class Arg3Type>
511 struct delete_segmented_view {
512 typedef SegmentedView<DataType , Arg1Type , Arg2Type, Arg3Type> view_type;
513 typedef typename view_type::execution_space execution_space;
516 delete_segmented_view(view_type view):view_(view) {
519 KOKKOS_INLINE_FUNCTION
520 void operator() (
int i)
const {
521 delete [] view_.get_segment(i).ptr_on_device();
void deep_copy(const View< DT, DL, DD, DM, DS > &dst, typename Impl::enable_if<(Impl::is_same< typename ViewTraits< DT, DL, DD, DM >::non_const_value_type, typename ViewTraits< DT, DL, DD, DM >::value_type >::value), typename ViewTraits< DT, DL, DD, DM >::const_value_type >::type &value)
Deep copy a value into a view.
View to an array of data.
Traits class for accessing attributes of a View.
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< !Impl::is_integral< ExecPolicy >::value >::type *=0)
Execute functor in parallel according to the execution policy.