Tpetra parallel linear algebra  Version of the Day
Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp
1 /*
2 // @HEADER
3 // ***********************************************************************
4 //
5 // Tpetra: Templated Linear Algebra Services Package
6 // Copyright (2008) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ************************************************************************
41 // @HEADER
42 */
43 
44 // mfh 13/14 Sep 2013 The "should use as<size_t>" comments are both
45 // incorrect (as() is not a device function) and usually irrelevant
46 // (it would only matter if LocalOrdinal were bigger than size_t on a
47 // particular platform, which is unlikely).
48 
49 #ifndef TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
50 #define TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
51 
52 #include "Kokkos_Core.hpp"
53 #include "Kokkos_ArithTraits.hpp"
54 
55 namespace Tpetra {
56 namespace KokkosRefactor {
57 namespace Details {
58 
59  // Functors for implementing packAndPrepare and unpackAndCombine
60  // through parallel_for
61 
62  template <typename DstView, typename SrcView, typename IdxView>
63  struct PackArraySingleColumn {
64  typedef typename DstView::execution_space execution_space;
65  typedef typename execution_space::size_type size_type;
66 
67  DstView dst;
68  SrcView src;
69  IdxView idx;
70  size_t col;
71 
72  PackArraySingleColumn(const DstView& dst_,
73  const SrcView& src_,
74  const IdxView& idx_,
75  size_t col_) :
76  dst(dst_), src(src_), idx(idx_), col(col_) {}
77 
78  KOKKOS_INLINE_FUNCTION
79  void operator()( const size_type k ) const {
80  dst(k) = src(idx(k), col);
81  }
82 
83  static void pack(const DstView& dst,
84  const SrcView& src,
85  const IdxView& idx,
86  size_t col) {
87  Kokkos::parallel_for( idx.size(),
88  PackArraySingleColumn(dst,src,idx,col) );
89  }
90  };
91 
92  // To do: Add enable_if<> restrictions on DstView::Rank == 1,
93  // SrcView::Rank == 2
94  template <typename DstView, typename SrcView, typename IdxView>
95  void pack_array_single_column(const DstView& dst,
96  const SrcView& src,
97  const IdxView& idx,
98  size_t col) {
99  PackArraySingleColumn<DstView,SrcView,IdxView>::pack(
100  dst, src, idx, col);
101  }
102 
103  template <typename DstView, typename SrcView, typename IdxView>
104  struct PackArrayMultiColumn {
105  typedef typename DstView::execution_space execution_space;
106  typedef typename execution_space::size_type size_type;
107 
108  DstView dst;
109  SrcView src;
110  IdxView idx;
111  size_t numCols;
112 
113  PackArrayMultiColumn(const DstView& dst_,
114  const SrcView& src_,
115  const IdxView& idx_,
116  size_t numCols_) :
117  dst(dst_), src(src_), idx(idx_), numCols(numCols_) {}
118 
119  KOKKOS_INLINE_FUNCTION
120  void operator()( const size_type k ) const {
121  const typename IdxView::value_type localRow = idx(k);
122  const size_t offset = k*numCols;
123  for (size_t j = 0; j < numCols; ++j)
124  dst(offset + j) = src(localRow, j);
125  }
126 
127  static void pack(const DstView& dst,
128  const SrcView& src,
129  const IdxView& idx,
130  size_t numCols) {
131  Kokkos::parallel_for( idx.size(),
132  PackArrayMultiColumn(dst,src,idx,numCols) );
133  }
134  };
135 
136  // To do: Add enable_if<> restrictions on DstView::Rank == 1,
137  // SrcView::Rank == 2
138  template <typename DstView, typename SrcView, typename IdxView>
139  void pack_array_multi_column(const DstView& dst,
140  const SrcView& src,
141  const IdxView& idx,
142  size_t numCols) {
143  PackArrayMultiColumn<DstView,SrcView,IdxView>::pack(
144  dst, src, idx, numCols);
145  }
146 
147  template <typename DstView, typename SrcView, typename IdxView,
148  typename ColView>
149  struct PackArrayMultiColumnVariableStride {
150  typedef typename DstView::execution_space execution_space;
151  typedef typename execution_space::size_type size_type;
152 
153  DstView dst;
154  SrcView src;
155  IdxView idx;
156  ColView col;
157  size_t numCols;
158 
159  PackArrayMultiColumnVariableStride(const DstView& dst_,
160  const SrcView& src_,
161  const IdxView& idx_,
162  const ColView& col_,
163  size_t numCols_) :
164  dst(dst_), src(src_), idx(idx_), col(col_), numCols(numCols_) {}
165 
166  KOKKOS_INLINE_FUNCTION
167  void operator()( const size_type k ) const {
168  const typename IdxView::value_type localRow = idx(k);
169  const size_t offset = k*numCols;
170  for (size_t j = 0; j < numCols; ++j)
171  dst(offset + j) = src(localRow, col(j));
172  }
173 
174  static void pack(const DstView& dst,
175  const SrcView& src,
176  const IdxView& idx,
177  const ColView& col,
178  size_t numCols) {
179  Kokkos::parallel_for( idx.size(),
180  PackArrayMultiColumnVariableStride(
181  dst,src,idx,col,numCols) );
182  }
183  };
184 
185  // To do: Add enable_if<> restrictions on DstView::Rank == 1,
186  // SrcView::Rank == 2
187  template <typename DstView, typename SrcView, typename IdxView,
188  typename ColView>
189  void pack_array_multi_column_variable_stride(const DstView& dst,
190  const SrcView& src,
191  const IdxView& idx,
192  const ColView& col,
193  size_t numCols) {
194  PackArrayMultiColumnVariableStride<DstView,SrcView,IdxView,ColView>::pack(
195  dst, src, idx, col, numCols);
196  }
197 
198  struct InsertOp {
199  template <typename Scalar>
200  KOKKOS_INLINE_FUNCTION
201  void operator() (Scalar& dest, const Scalar& src) const {
202  Kokkos::atomic_assign(&dest, src);
203  }
204  };
205  struct AddOp {
206  template <typename Scalar>
207  KOKKOS_INLINE_FUNCTION
208  void operator() (Scalar& dest, const Scalar& src) const {
209  Kokkos::atomic_add(&dest, src);
210  }
211  };
212  struct AbsMaxOp {
213  // ETP: Is this really what we want? This seems very odd if
214  // Scalar != SCT::mag_type (e.g., Scalar == std::complex<T>)
215  template <typename T>
216  KOKKOS_INLINE_FUNCTION
217  T max(const T& a, const T& b) const { return a > b ? a : b; }
218 
219  template <typename Scalar>
220  KOKKOS_INLINE_FUNCTION
221  void operator() (Scalar& dest, const Scalar& src) const {
222  typedef Kokkos::Details::ArithTraits<Scalar> SCT;
223  Kokkos::atomic_assign(&dest, Scalar(max(SCT::abs(dest),SCT::abs(src))));
224  }
225  };
226 
227  template <typename DstView, typename SrcView, typename IdxView, typename Op>
228  struct UnpackArrayMultiColumn {
229  typedef typename DstView::execution_space execution_space;
230  typedef typename execution_space::size_type size_type;
231 
232  DstView dst;
233  SrcView src;
234  IdxView idx;
235  Op op;
236  size_t numCols;
237 
238  UnpackArrayMultiColumn(const DstView& dst_,
239  const SrcView& src_,
240  const IdxView& idx_,
241  const Op& op_,
242  size_t numCols_) :
243  dst(dst_), src(src_), idx(idx_), op(op_), numCols(numCols_) {}
244 
245  KOKKOS_INLINE_FUNCTION
246  void operator()( const size_type k ) const {
247  const typename IdxView::value_type localRow = idx(k);
248  const size_t offset = k*numCols;
249  for (size_t j = 0; j < numCols; ++j)
250  op( dst(localRow,j), src(offset+j) );
251  }
252 
253  static void unpack(const DstView& dst,
254  const SrcView& src,
255  const IdxView& idx,
256  const Op& op,
257  size_t numCols) {
258  Kokkos::parallel_for( idx.size(),
259  UnpackArrayMultiColumn(dst,src,idx,op,numCols) );
260  }
261  };
262 
263  // To do: Add enable_if<> restrictions on DstView::Rank == 2,
264  // SrcView::Rank == 1
265  template <typename DstView, typename SrcView, typename IdxView, typename Op>
266  void unpack_array_multi_column(const DstView& dst,
267  const SrcView& src,
268  const IdxView& idx,
269  const Op& op,
270  size_t numCols) {
271  UnpackArrayMultiColumn<DstView,SrcView,IdxView,Op>::unpack(
272  dst, src, idx, op, numCols);
273  }
274 
275  template <typename DstView, typename SrcView, typename IdxView,
276  typename ColView, typename Op>
277  struct UnpackArrayMultiColumnVariableStride {
278  typedef typename DstView::execution_space execution_space;
279  typedef typename execution_space::size_type size_type;
280 
281  DstView dst;
282  SrcView src;
283  IdxView idx;
284  ColView col;
285  Op op;
286  size_t numCols;
287 
288  UnpackArrayMultiColumnVariableStride(const DstView& dst_,
289  const SrcView& src_,
290  const IdxView& idx_,
291  const ColView& col_,
292  const Op& op_,
293  size_t numCols_) :
294  dst(dst_), src(src_), idx(idx_), col(col_), op(op_), numCols(numCols_) {}
295 
296  KOKKOS_INLINE_FUNCTION
297  void operator()( const size_type k ) const {
298  const typename IdxView::value_type localRow = idx(k);
299  const size_t offset = k*numCols;
300  for (size_t j = 0; j < numCols; ++j)
301  op( dst(localRow,col(j)), src(offset+j) );
302  }
303 
304  static void unpack(const DstView& dst,
305  const SrcView& src,
306  const IdxView& idx,
307  const ColView& col,
308  const Op& op,
309  size_t numCols) {
310  Kokkos::parallel_for( idx.size(),
311  UnpackArrayMultiColumnVariableStride(
312  dst,src,idx,col,op,numCols) );
313  }
314  };
315 
316  // To do: Add enable_if<> restrictions on DstView::Rank == 2,
317  // SrcView::Rank == 1
318  template <typename DstView, typename SrcView,typename IdxView,
319  typename ColView, typename Op>
320  void unpack_array_multi_column_variable_stride(const DstView& dst,
321  const SrcView& src,
322  const IdxView& idx,
323  const ColView& col,
324  const Op& op,
325  size_t numCols) {
326  UnpackArrayMultiColumnVariableStride<DstView,SrcView,IdxView,ColView,Op>::unpack(
327  dst, src, idx, col, op, numCols);
328  }
329 
330  template <typename DstView, typename SrcView,
331  typename DstIdxView, typename SrcIdxView>
332  struct PermuteArrayMultiColumn {
333  typedef typename DstView::execution_space execution_space;
334  typedef typename execution_space::size_type size_type;
335 
336  DstView dst;
337  SrcView src;
338  DstIdxView dst_idx;
339  SrcIdxView src_idx;
340  size_t numCols;
341 
342  PermuteArrayMultiColumn(const DstView& dst_,
343  const SrcView& src_,
344  const DstIdxView& dst_idx_,
345  const SrcIdxView& src_idx_,
346  size_t numCols_) :
347  dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
348  numCols(numCols_) {}
349 
350  KOKKOS_INLINE_FUNCTION
351  void operator()( const size_type k ) const {
352  const typename DstIdxView::value_type toRow = dst_idx(k);
353  const typename SrcIdxView::value_type fromRow = src_idx(k);
354  for (size_t j = 0; j < numCols; ++j)
355  dst(toRow, j) = src(fromRow, j);
356  }
357 
358  static void permute(const DstView& dst,
359  const SrcView& src,
360  const DstIdxView& dst_idx,
361  const SrcIdxView& src_idx,
362  size_t numCols) {
363  const size_type n = std::min( dst_idx.size(), src_idx.size() );
364  Kokkos::parallel_for(
365  n, PermuteArrayMultiColumn(dst,src,dst_idx,src_idx,numCols) );
366  }
367  };
368 
369  // To do: Add enable_if<> restrictions on DstView::Rank == 1,
370  // SrcView::Rank == 2
371  template <typename DstView, typename SrcView,
372  typename DstIdxView, typename SrcIdxView>
373  void permute_array_multi_column(const DstView& dst,
374  const SrcView& src,
375  const DstIdxView& dst_idx,
376  const SrcIdxView& src_idx,
377  size_t numCols) {
378  PermuteArrayMultiColumn<DstView,SrcView,DstIdxView,SrcIdxView>::permute(
379  dst, src, dst_idx, src_idx, numCols);
380  }
381 
382  template <typename DstView, typename SrcView,
383  typename DstIdxView, typename SrcIdxView,
384  typename DstColView, typename SrcColView>
385  struct PermuteArrayMultiColumnVariableStride {
386  typedef typename DstView::execution_space execution_space;
387  typedef typename execution_space::size_type size_type;
388 
389  DstView dst;
390  SrcView src;
391  DstIdxView dst_idx;
392  SrcIdxView src_idx;
393  DstColView dst_col;
394  SrcColView src_col;
395  size_t numCols;
396 
397  PermuteArrayMultiColumnVariableStride(const DstView& dst_,
398  const SrcView& src_,
399  const DstIdxView& dst_idx_,
400  const SrcIdxView& src_idx_,
401  const DstColView& dst_col_,
402  const SrcColView& src_col_,
403  size_t numCols_) :
404  dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
405  dst_col(dst_col_), src_col(src_col_),
406  numCols(numCols_) {}
407 
408  KOKKOS_INLINE_FUNCTION
409  void operator()( const size_type k ) const {
410  const typename DstIdxView::value_type toRow = dst_idx(k);
411  const typename SrcIdxView::value_type fromRow = src_idx(k);
412  for (size_t j = 0; j < numCols; ++j)
413  dst(toRow, dst_col(j)) = src(fromRow, src_col(j));
414  }
415 
416  static void permute(const DstView& dst,
417  const SrcView& src,
418  const DstIdxView& dst_idx,
419  const SrcIdxView& src_idx,
420  const DstColView& dst_col,
421  const SrcColView& src_col,
422  size_t numCols) {
423  const size_type n = std::min( dst_idx.size(), src_idx.size() );
424  Kokkos::parallel_for(
425  n, PermuteArrayMultiColumnVariableStride(
426  dst,src,dst_idx,src_idx,dst_col,src_col,numCols) );
427  }
428  };
429 
430  // To do: Add enable_if<> restrictions on DstView::Rank == 1,
431  // SrcView::Rank == 2
432  template <typename DstView, typename SrcView,
433  typename DstIdxView, typename SrcIdxView,
434  typename DstColView, typename SrcColView>
435  void permute_array_multi_column_variable_stride(const DstView& dst,
436  const SrcView& src,
437  const DstIdxView& dst_idx,
438  const SrcIdxView& src_idx,
439  const DstColView& dst_col,
440  const SrcColView& src_col,
441  size_t numCols) {
442  PermuteArrayMultiColumnVariableStride<DstView,SrcView,
443  DstIdxView,SrcIdxView,DstColView,SrcColView>::permute(
444  dst, src, dst_idx, src_idx, dst_col, src_col, numCols);
445  }
446 
447 } // Details namespace
448 } // KokkosRefactor namespace
449 } // Tpetra namespace
450 
451 #endif // TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Implementation details of Tpetra.