Tpetra parallel linear algebra  Version of the Day
Tpetra_CrsMatrix_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_CRSMATRIX_DEF_HPP
43 #define TPETRA_CRSMATRIX_DEF_HPP
44 
52 
53 #include "Tpetra_RowMatrix.hpp"
54 #include "Tpetra_Import_Util.hpp"
55 #include "Tpetra_Import_Util2.hpp"
56 #include "Tpetra_Util.hpp"
57 #include "Teuchos_SerialDenseMatrix.hpp"
58 #include "Teuchos_as.hpp"
59 #include "Teuchos_ArrayRCP.hpp"
60 #include <typeinfo>
61 
62 // CrsMatrix relies on template methods implemented in Tpetra_CrsGraph_def.hpp
63 //
64 // FIXME (mfh 01 Oct 2015) If I comment this out, I get link errors
65 // for CrsGraph's templated methods, even though supposedly the ETI
66 // for CrsGraph instantiates them.
67 #include "Tpetra_CrsGraph_def.hpp"
68 
69 namespace Tpetra {
70  //
71  // Users must never rely on anything in the Details namespace.
72  //
73  namespace Details {
83  template<class Scalar>
84  struct AbsMax {
86  Scalar operator() (const Scalar& x, const Scalar& y) {
87  typedef Teuchos::ScalarTraits<Scalar> STS;
88  return std::max (STS::magnitude (x), STS::magnitude (y));
89  }
90  };
91 
103  template <class Ordinal, class Scalar>
104  struct CrsIJV {
110  CrsIJV () :
111  i (Teuchos::OrdinalTraits<Ordinal>::invalid ()),
112  j (Teuchos::OrdinalTraits<Ordinal>::invalid ()),
113  v (Teuchos::ScalarTraits<Scalar>::zero ())
114  {}
115 
121  CrsIJV (Ordinal row, Ordinal col, const Scalar &val) :
122  i (row), j (col), v (val)
123  {}
124 
130  bool operator< (const CrsIJV<Ordinal, Scalar>& rhs) const {
131  // FIXME (mfh 10 May 2013): This is what I found when I moved
132  // this operator out of the std namespace to be an instance
133  // method of CrsIJV. It's a little odd to me that it doesn't
134  // include the column index in the sort order (for the usual
135  // lexicographic sort). It doesn't really matter because
136  // CrsMatrix will sort rows by column index anyway, but it's
137  // still odd.
138  return this->i < rhs.i;
139  }
140 
141  Ordinal i;
142  Ordinal j;
143  Scalar v;
144  };
145 
146  } // namespace Details
147 } // namespace Tpetra
148 
149 namespace Teuchos {
150  // SerializationTraits specialization for Tpetra::Details::CrsIJV.
151  //
152  // Tpetra::Details::CrsIJV can be serialized using
153  // DirectSerialization. This lets Comm send and receive instances
154  // of this class.
155  //
156  // NOTE (mfh 16 Dec 2012): This won't work if Scalar does not
157  // support direct serialization ("just taking the address"). The
158  // usual Scalar types (float, double, dd_real, qd_real, or
159  // std::complex<T> for any of these types) _do_ support direct
160  // serialization.
161  template <typename Ordinal, typename Scalar>
162  class SerializationTraits<int, Tpetra::Details::CrsIJV<Ordinal, Scalar> >
163  : public DirectSerializationTraits<int, Tpetra::Details::CrsIJV<Ordinal, Scalar> >
164  {};
165 } // namespace Teuchos
166 
167 namespace Tpetra {
168 
169  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
171  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
172  size_t maxNumEntriesPerRow,
173  ProfileType pftype,
174  const RCP<Teuchos::ParameterList>& params) :
175  dist_object_type (rowMap),
176  storageStatus_ (pftype == StaticProfile ?
177  Details::STORAGE_1D_UNPACKED :
178  Details::STORAGE_2D),
179  fillComplete_ (false),
180  frobNorm_ (-STM::one ())
181  {
182  using Teuchos::rcp;
183  try {
184  myGraph_ = rcp (new crs_graph_type (rowMap, maxNumEntriesPerRow,
185  pftype, params));
186  }
187  catch (std::exception& e) {
188  TEUCHOS_TEST_FOR_EXCEPTION(
189  true, std::runtime_error, "Tpetra::CrsMatrix constructor: Caught "
190  "exception while allocating CrsGraph: " << e.what ());
191  }
192  staticGraph_ = myGraph_;
193  resumeFill (params);
194  checkInternalState ();
195  }
196 
197  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
199  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
200  const Teuchos::ArrayRCP<const size_t>& NumEntriesPerRowToAlloc,
201  ProfileType pftype,
202  const Teuchos::RCP<Teuchos::ParameterList>& params) :
203  dist_object_type (rowMap),
204  storageStatus_ (pftype == StaticProfile ?
205  Details::STORAGE_1D_UNPACKED :
206  Details::STORAGE_2D),
207  fillComplete_ (false),
208  frobNorm_ (-STM::one ())
209  {
210  using Teuchos::rcp;
211  try {
212  myGraph_ = rcp (new Graph (rowMap, NumEntriesPerRowToAlloc, pftype, params));
213  }
214  catch (std::exception &e) {
215  TEUCHOS_TEST_FOR_EXCEPTION(
216  true, std::runtime_error, "Tpetra::CrsMatrix constructor: Caught "
217  "exception while allocating CrsGraph: " << e.what ());
218  }
219  staticGraph_ = myGraph_;
220  resumeFill (params);
222  }
223 
224  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
226  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
227  const Teuchos::RCP<const map_type>& colMap,
228  size_t maxNumEntriesPerRow,
229  ProfileType pftype,
230  const Teuchos::RCP<Teuchos::ParameterList>& params) :
231  dist_object_type (rowMap),
232  storageStatus_ (pftype == StaticProfile ?
233  Details::STORAGE_1D_UNPACKED :
234  Details::STORAGE_2D),
235  fillComplete_ (false),
236  frobNorm_ (-STM::one ())
237  {
238  using Teuchos::rcp;
239  TEUCHOS_TEST_FOR_EXCEPTION(! staticGraph_.is_null(), std::logic_error,
240  "Tpetra::CrsMatrix ctor (row Map, col Map, maxNumEntriesPerRow, ...): "
241  "staticGraph_ is not null at the beginning of the constructor. "
242  "Please report this bug to the Tpetra developers.");
243  TEUCHOS_TEST_FOR_EXCEPTION(! myGraph_.is_null(), std::logic_error,
244  "Tpetra::CrsMatrix ctor (row Map, col Map, maxNumEntriesPerRow, ...): "
245  "myGraph_ is not null at the beginning of the constructor. "
246  "Please report this bug to the Tpetra developers.");
247  try {
248  myGraph_ = rcp (new Graph (rowMap, colMap, maxNumEntriesPerRow,
249  pftype, params));
250  }
251  catch (std::exception &e) {
252  TEUCHOS_TEST_FOR_EXCEPTION(
253  true, std::runtime_error, "Tpetra::CrsMatrix constructor: Caught "
254  "exception while allocating CrsGraph: " << e.what ());
255  }
256  staticGraph_ = myGraph_;
257  resumeFill (params);
259  }
260 
261  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
263  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
264  const Teuchos::RCP<const map_type>& colMap,
265  const Teuchos::ArrayRCP<const size_t>& numEntPerRow,
266  ProfileType pftype,
267  const Teuchos::RCP<Teuchos::ParameterList>& params) :
268  dist_object_type (rowMap),
269  storageStatus_ (pftype == StaticProfile ?
270  Details::STORAGE_1D_UNPACKED :
271  Details::STORAGE_2D),
272  fillComplete_ (false),
273  frobNorm_ (-STM::one ())
274  {
275  using Teuchos::rcp;
276  try {
277  myGraph_ = rcp (new Graph (rowMap, colMap, numEntPerRow, pftype, params));
278  }
279  catch (std::exception &e) {
280  TEUCHOS_TEST_FOR_EXCEPTION(
281  true, std::runtime_error, "Tpetra::CrsMatrix constructor: Caught "
282  "exception while allocating CrsGraph: " << e.what ());
283  }
284  staticGraph_ = myGraph_;
285  resumeFill (params);
287  }
288 
289  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
291  CrsMatrix (const Teuchos::RCP<const crs_graph_type>& graph,
292  const Teuchos::RCP<Teuchos::ParameterList>& params) :
293  dist_object_type (graph->getRowMap ()),
294  staticGraph_ (graph),
295  storageStatus_ (Details::STORAGE_1D_PACKED),
296  fillComplete_ (false),
297  frobNorm_ (-STM::one ())
298  {
299  const char tfecfFuncName[] = "CrsMatrix(graph[,params])";
300  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(staticGraph_.is_null (),
301  std::runtime_error, ": When calling the CrsMatrix constructor that "
302  "accepts a static graph, the pointer to the graph must not be null.");
303  // We prohibit the case where the graph is not yet filled.
304  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( ! staticGraph_->isFillComplete (),
305  std::runtime_error, ": The specified graph is not fill-complete. You "
306  "must invoke fillComplete() on the graph before using it to construct a "
307  "CrsMatrix. Note that calling resumeFill() makes the graph not fill-"
308  "complete, even if you had previously called fillComplete(). In that "
309  "case, you must call fillComplete() on the graph again.");
310  // the graph has entries, and the matrix should have entries as well, set to zero. no need or point in lazy allocating in this case.
311  // first argument LocalIndices is ignored; the graph is already allocated (local or global, we don't care here)
312  allocateValues (LocalIndices, GraphAlreadyAllocated);
313  resumeFill (params);
315  }
316 
317  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
319  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
320  const Teuchos::RCP<const map_type>& colMap,
321  const typename local_matrix_type::row_map_type& rowPointers,
322  const typename local_graph_type::entries_type::non_const_type& columnIndices,
323  const typename local_matrix_type::values_type& values,
324  const Teuchos::RCP<Teuchos::ParameterList>& params) :
325  dist_object_type (rowMap),
326  storageStatus_ (Details::STORAGE_1D_PACKED),
327  fillComplete_ (false),
328  frobNorm_ (-STM::one ())
329  {
330  using Teuchos::rcp;
331  try {
332  myGraph_ = rcp (new Graph (rowMap, colMap, rowPointers,
333  columnIndices, params));
334  }
335  catch (std::exception &e) {
336  TEUCHOS_TEST_FOR_EXCEPTION(
337  true, std::runtime_error, "Tpetra::CrsMatrix constructor: Caught "
338  "exception while allocating CrsGraph: " << e.what ());
339  }
340  staticGraph_ = myGraph_;
341  k_values1D_ = values;
342  resumeFill (params);
344  }
345 
346  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
348  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
349  const Teuchos::RCP<const map_type>& colMap,
350  const Teuchos::ArrayRCP<size_t> & rowPointers,
351  const Teuchos::ArrayRCP<LocalOrdinal> & columnIndices,
352  const Teuchos::ArrayRCP<Scalar> & values,
353  const Teuchos::RCP<Teuchos::ParameterList>& params) :
354  dist_object_type (rowMap),
355  storageStatus_ (Details::STORAGE_1D_PACKED),
356  fillComplete_ (false),
357  frobNorm_ (-STM::one ())
358  {
359  using Teuchos::rcp;
360  try {
361  myGraph_ = rcp (new Graph (rowMap, colMap, rowPointers,
362  columnIndices, params));
363  }
364  catch (std::exception &e) {
365  TEUCHOS_TEST_FOR_EXCEPTION(
366  true, std::runtime_error, "Tpetra::CrsMatrix constructor: Caught "
367  "exception while allocating CrsGraph: " << e.what ());
368  }
369  staticGraph_ = myGraph_;
370  // FIXME (mfh 05 Aug 2014) It should be possible to convince the
371  // ArrayRCP to relinquish its allocation, but that might require
372  // passing the ArrayRCP in by nonconst reference.
373  Teuchos::ArrayRCP<impl_scalar_type> vals =
374  Teuchos::arcp_reinterpret_cast<impl_scalar_type> (values);
375  k_values1D_ = Kokkos::Compat::getKokkosViewDeepCopy<device_type> (vals ());
376  resumeFill (params);
378  }
379 
380  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
382  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
383  const Teuchos::RCP<const map_type>& colMap,
384  const local_matrix_type& lclMatrix,
385  const Teuchos::RCP<Teuchos::ParameterList>& params) :
386  dist_object_type (rowMap),
387  lclMatrix_ (lclMatrix),
388  storageStatus_ (Details::STORAGE_1D_PACKED),
389  fillComplete_ (false),
390  frobNorm_ (-STM::one ())
391  {
392  using Teuchos::ArrayRCP;
393  using Teuchos::arcp;
394  using Teuchos::rcp;
395  using Teuchos::RCP;
396  const char tfecfFuncName[] = "Tpetra::CrsMatrix(rowMap,colMap,lclMatrix,params): ";
397 
398  try {
399  myGraph_ = rcp (new Graph (rowMap, colMap, lclMatrix.graph, params));
400  }
401  catch (std::exception &e) {
402  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
403  true, std::runtime_error, "Caught exception while allocating "
404  "CrsGraph: " << e.what ());
405  }
406  staticGraph_ = myGraph_;
408 
409  k_values1D_ = lclMatrix_.values;
410 
411  // FIXME (mfh 28 Aug 2014) "Preserve Local Graph" bool parameter no longer used.
412 
413  // Now we're fill complete!
414  fillComplete_ = true;
415 
416  // Sanity checks at the end.
417 #ifdef HAVE_TPETRA_DEBUG
418  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isFillActive (), std::logic_error,
419  "We're at the end of fillComplete(), but isFillActive() is true. "
420  "Please report this bug to the Tpetra developers.");
421  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillComplete (), std::logic_error,
422  "We're at the end of fillComplete(), but isFillComplete() is false. "
423  "Please report this bug to the Tpetra developers.");
424 #endif // HAVE_TPETRA_DEBUG
426  }
427 
428  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
431  {}
432 
433  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
434  Teuchos::RCP<const Teuchos::Comm<int> >
436  getComm () const {
437  return getCrsGraph ()->getComm ();
438  }
439 
440  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
441  Teuchos::RCP<Node>
443  getNode () const {
444  return getCrsGraph ()->getNode ();
445  }
446 
447  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
450  getProfileType () const {
451  return getCrsGraph ()->getProfileType ();
452  }
453 
454  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
455  bool
457  isFillComplete () const {
458  return fillComplete_;
459  }
460 
461  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
462  bool
464  isFillActive () const {
465  return ! fillComplete_;
466  }
467 
468  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
469  bool
472  return getCrsGraph()->isStorageOptimized();
473  }
474 
475  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
476  bool
479  return getCrsGraph ()->isLocallyIndexed ();
480  }
481 
482  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
483  bool
486  return getCrsGraph ()->isGloballyIndexed ();
487  }
488 
489  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
490  bool
492  hasColMap () const {
493  return getCrsGraph ()->hasColMap ();
494  }
495 
496  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
500  return getCrsGraph ()->getGlobalNumEntries ();
501  }
502 
503  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
504  size_t
507  return getCrsGraph ()->getNodeNumEntries ();
508  }
509 
510  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
514  return getCrsGraph ()->getGlobalNumRows ();
515  }
516 
517  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
521  return getCrsGraph ()->getGlobalNumCols ();
522  }
523 
524  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
525  size_t
527  getNodeNumRows () const {
528  return getCrsGraph ()->getNodeNumRows ();
529  }
530 
531  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
532  size_t
534  getNodeNumCols () const {
535  return getCrsGraph ()->getNodeNumCols ();
536  }
537 
538  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
542  return getCrsGraph ()->getGlobalNumDiags ();
543  }
544 
545  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
546  size_t
549  return getCrsGraph ()->getNodeNumDiags ();
550  }
551 
552  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
553  size_t
555  getNumEntriesInGlobalRow (GlobalOrdinal globalRow) const {
556  return getCrsGraph ()->getNumEntriesInGlobalRow (globalRow);
557  }
558 
559  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
560  size_t
562  getNumEntriesInLocalRow (LocalOrdinal localRow) const {
563  return getCrsGraph ()->getNumEntriesInLocalRow (localRow);
564  }
565 
566  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
567  size_t
570  return getCrsGraph ()->getGlobalMaxNumRowEntries ();
571  }
572 
573  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
574  size_t
577  return getCrsGraph ()->getNodeMaxNumRowEntries ();
578  }
579 
580  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
581  GlobalOrdinal
583  getIndexBase () const {
584  return getRowMap ()->getIndexBase ();
585  }
586 
587  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
588  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
590  getRowMap () const {
591  return getCrsGraph ()->getRowMap ();
592  }
593 
594  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
595  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
597  getColMap () const {
598  return getCrsGraph ()->getColMap ();
599  }
600 
601  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
602  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
604  getDomainMap () const {
605  return getCrsGraph ()->getDomainMap ();
606  }
607 
608  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
609  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
611  getRangeMap () const {
612  return getCrsGraph()->getRangeMap();
613  }
614 
615  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
616  Teuchos::RCP<const RowGraph<LocalOrdinal, GlobalOrdinal, Node> >
618  getGraph () const {
619  if (staticGraph_ != Teuchos::null) {
620  return staticGraph_;
621  }
622  return myGraph_;
623  }
624 
625  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
626  Teuchos::RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Node, classic> >
628  getCrsGraph () const {
629  if (staticGraph_ != Teuchos::null) {
630  return staticGraph_;
631  }
632  return myGraph_;
633  }
634 
635  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
636  bool
639  return getCrsGraph ()->isLowerTriangular ();
640  }
641 
642  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
643  bool
646  return getCrsGraph ()->isUpperTriangular ();
647  }
648 
649  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
650  bool
652  isStaticGraph () const {
653  return myGraph_.is_null ();
654  }
655 
656  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
657  bool
660  return true;
661  }
662 
663  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
664  bool
667  return true;
668  }
669 
670  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
671  void
673  allocateValues (ELocalGlobal lg, GraphAllocationStatus gas)
674  {
675 #ifdef HAVE_TPETRA_DEBUG
676  // If the graph indices are already allocated, then gas should be
677  // GraphAlreadyAllocated. Otherwise, gas should be
678  // GraphNotYetAllocated.
679  if ((gas == GraphAlreadyAllocated) != staticGraph_->indicesAreAllocated()) {
680  const std::string err1 ("allocateValues: The caller has asserted that "
681  "the graph is ");
682  const std::string err2 ("already allocated, but the static graph says "
683  "that its indices are ");
684  const std::string err3 ("already allocated. Please report this bug to "
685  "the Tpetra developers.");
686  TEUCHOS_TEST_FOR_EXCEPTION(gas == GraphAlreadyAllocated && ! staticGraph_->indicesAreAllocated(),
687  std::logic_error, err1 << err2 << "not " << err3);
688  TEUCHOS_TEST_FOR_EXCEPTION(gas != GraphAlreadyAllocated && staticGraph_->indicesAreAllocated(),
689  std::logic_error, err1 << "not " << err2 << err3);
690  }
691 
692  // If the graph is unallocated, then it had better be a
693  // matrix-owned graph. ("Matrix-owned graph" means that the
694  // matrix gets to define the graph structure. If the CrsMatrix
695  // constructor that takes an RCP<const CrsGraph> was used, then
696  // the matrix does _not_ own the graph.)
697  TEUCHOS_TEST_FOR_EXCEPTION(
698  ! staticGraph_->indicesAreAllocated() && myGraph_.is_null(),
699  std::logic_error,
700  "allocateValues: The static graph says that its indices are not "
701  "allocated, but the graph is not owned by the matrix. Please report "
702  "this bug to the Tpetra developers.");
703 #endif // HAVE_TPETRA_DEBUG
704 
705  if (gas == GraphNotYetAllocated) {
706  myGraph_->allocateIndices (lg);
707  }
708 
709  // Allocate matrix values.
710  if (getProfileType () == StaticProfile) {
711  // "Static profile" means that the number of matrix entries in
712  // each row was fixed at the time the CrsMatrix constructor was
713  // called. This lets us use 1-D storage for the matrix's
714  // values. ("1-D storage" means the same as that used by the
715  // three arrays in the classic compressed sparse row format.)
716 
717  const size_t lclNumRows = staticGraph_->getNodeNumRows ();
718  typename Graph::local_graph_type::row_map_type k_ptrs =
719  staticGraph_->k_rowPtrs_;
720  TEUCHOS_TEST_FOR_EXCEPTION(
721  k_ptrs.dimension_0 () != lclNumRows+1, std::logic_error,
722  "Tpetra::CrsMatrix::allocateValues: With StaticProfile, row offsets "
723  "array has length " << k_ptrs.dimension_0 () << " != (lclNumRows+1) = "
724  << (lclNumRows+1) << ".");
725  // FIXME (mfh 08 Aug 2014) This assumes UVM. We could fix this
726  // either by storing the row offsets in the graph as a DualView,
727  // or by making a device View of that entry, and copying it back
728  // to host.
729  const size_t lclTotalNumEntries = k_ptrs(lclNumRows);
730 
731  // Allocate array of (packed???) matrix values.
732  typedef typename local_matrix_type::values_type values_type;
733  k_values1D_ = values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
734  }
735  else {
736  // "Dynamic profile" means the number of matrix entries in each
737  // row is not fixed and may expand. Thus, we store the matrix's
738  // values in "2-D storage," meaning an array of arrays. The
739  // outer array has as many inner arrays as there are rows in the
740  // matrix, and each inner array stores the values in that row.
741  values2D_ = staticGraph_->template allocateValues2D<impl_scalar_type> ();
742  }
743  }
744 
745  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
746  void
748  getAllValues (Teuchos::ArrayRCP<const size_t>& rowPointers,
749  Teuchos::ArrayRCP<const LocalOrdinal>& columnIndices,
750  Teuchos::ArrayRCP<const Scalar>& values) const
751  {
752  const char tfecfFuncName[] = "getAllValues: ";
753  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
754  columnIndices.size () != values.size (), std::runtime_error,
755  "Requires that columnIndices and values are the same size.");
756 
757  RCP<const crs_graph_type> relevantGraph = getCrsGraph ();
758  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
759  relevantGraph.is_null (), std::runtime_error,
760  "Requires that getCrsGraph() is not null.");
761  try {
762  rowPointers = relevantGraph->getNodeRowPtrs ();
763  }
764  catch (std::exception &e) {
765  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
766  true, std::runtime_error,
767  "Caught exception while calling graph->getNodeRowPtrs(): "
768  << e.what ());
769  }
770  try {
771  columnIndices = relevantGraph->getNodePackedIndices ();
772  }
773  catch (std::exception &e) {
774  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
775  true, std::runtime_error,
776  "Caught exception while calling graph->getNodePackedIndices(): "
777  << e.what ());
778  }
779  Teuchos::ArrayRCP<const impl_scalar_type> vals =
780  Kokkos::Compat::persistingView (k_values1D_);
781  values = Teuchos::arcp_reinterpret_cast<const Scalar> (vals);
782  }
783 
784  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
785  void
787  fillLocalGraphAndMatrix (const Teuchos::RCP<Teuchos::ParameterList>& params)
788  {
789  using Kokkos::create_mirror_view;
790  using Teuchos::arcp_const_cast;
791  using Teuchos::ArrayRCP;
792  using Teuchos::null;
793  using Teuchos::RCP;
794  using Teuchos::rcp;
795  typedef ArrayRCP<size_t>::size_type size_type;
796  typedef typename local_matrix_type::row_map_type row_map_type;
797  typedef typename Graph::t_numRowEntries_ row_entries_type;
798  typedef typename Graph::local_graph_type::entries_type::non_const_type lclinds_1d_type;
799  typedef typename local_matrix_type::values_type values_type;
800 
801  // fillComplete() only calls fillLocalGraphAndMatrix() if the
802  // matrix owns the graph, which means myGraph_ is not null.
803  TEUCHOS_TEST_FOR_EXCEPTION(
804  myGraph_.is_null (), std::logic_error, "Tpetra::CrsMatrix::"
805  "fillLocalGraphAndMatrix (called from fillComplete or "
806  "expertStaticFillComplete): The nonconst graph (myGraph_) is null. This "
807  "means that the matrix has a const (a.k.a. \"static\") graph. This may "
808  "mean that fillComplete or expertStaticFillComplete has a bug, since it "
809  "should never call fillLocalGraphAndMatrix in that case. "
810  "Please report this bug to the Tpetra developers.");
811 
812  const size_t lclNumRows = this->getNodeNumRows ();
813 
814  // This method's goal is to fill in the three arrays (compressed
815  // sparse row format) that define the sparse graph's and matrix's
816  // structure, and the sparse matrix's values.
817  //
818  // Use the nonconst version of row_map_type for k_ptrs,
819  // because row_map_type is const and we need to modify k_ptrs here.
820  typename row_map_type::non_const_type k_ptrs;
821  row_map_type k_ptrs_const;
822  lclinds_1d_type k_inds;
823  values_type k_vals;
824 
825  // Get references to the data in myGraph_, so we can modify them
826  // as well. Note that we only call fillLocalGraphAndMatrix() if
827  // the matrix owns the graph, which means myGraph_ is not null.
828  lclinds_1d_type k_lclInds1D_ = myGraph_->k_lclInds1D_;
829 
830  // The number of entries in each locally owned row. This is a
831  // DualView. 2-D storage lives on host and is currently not
832  // thread-safe for parallel kernels even on host, so we have to
833  // work sequentially with host storage in that case.
834  row_entries_type k_numRowEnt = myGraph_->k_numRowEntries_;
835  typename row_entries_type::t_host h_numRowEnt = k_numRowEnt.h_view;
836 
837  if (getProfileType () == DynamicProfile) {
838  // Pack 2-D storage (DynamicProfile) into 1-D packed storage.
839  //
840  // DynamicProfile means that the matrix's column indices and
841  // values are currently stored in a 2-D "unpacked" format, in
842  // the arrays-of-arrays myGraph_->lclInds2D_ (for column
843  // indices) and values2D_ (for values). We allocate 1-D storage
844  // (k_inds resp. k_vals), and then copy from 2-D storage
845  // (lclInds2D_ resp. values2D_) into 1-D storage (k_inds
846  // resp. k_vals).
847  TEUCHOS_TEST_FOR_EXCEPTION(
848  static_cast<size_t> (k_numRowEnt.dimension_0 ()) != lclNumRows,
849  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix (called "
850  "from fillComplete or expertStaticFillComplete): For the "
851  "DynamicProfile branch, k_numRowEnt has the wrong length. "
852  "k_numRowEnt.dimension_0() = " << k_numRowEnt.dimension_0 ()
853  << " != getNodeNumRows() = " << lclNumRows << "");
854 
855  // Pack the row offsets into k_ptrs, by doing a sum-scan of
856  // the array of valid entry counts per row (h_numRowEnt).
857  //
858  // Total number of entries in the matrix on the calling
859  // process. We will compute this in the loop below. It's
860  // cheap to compute and useful as a sanity check.
861  size_t lclTotalNumEntries = 0;
862  // This will be a host view of packed row offsets.
863  typename row_map_type::non_const_type::HostMirror h_ptrs;
864  {
865  // Allocate the packed row offsets array. We use a nonconst
866  // temporary (packedRowOffsets) here, because k_ptrs is const.
867  // We will assign packedRowOffsets to k_ptrs below.
868  typename row_map_type::non_const_type packedRowOffsets ("Tpetra::CrsGraph::ptr",
869  lclNumRows+1);
870  //
871  // FIXME hack until we get parallel_scan in kokkos
872  //
873  h_ptrs = create_mirror_view (packedRowOffsets);
874  h_ptrs(0) = 0;
875  for (size_type i = 0; i < static_cast<size_type> (lclNumRows); ++i) {
876  const size_t numEnt = h_numRowEnt(i);
877  lclTotalNumEntries += numEnt;
878  h_ptrs(i+1) = h_ptrs(i) + numEnt;
879  }
880  Kokkos::deep_copy (packedRowOffsets, h_ptrs);
881  // packedRowOffsets is modifiable; k_ptrs isn't, so we have to
882  // use packedRowOffsets in the loop above and assign here.
883  k_ptrs = packedRowOffsets;
884  k_ptrs_const = k_ptrs;
885  }
886 
887  TEUCHOS_TEST_FOR_EXCEPTION(
888  static_cast<size_t> (k_ptrs.dimension_0 ()) != lclNumRows + 1,
889  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: In "
890  "DynamicProfile branch, after packing k_ptrs, k_ptrs.dimension_0()"
891  " = " << k_ptrs.dimension_0 () << " != (lclNumRows+1) = "
892  << (lclNumRows+1) << ".");
893  TEUCHOS_TEST_FOR_EXCEPTION(
894  static_cast<size_t> (h_ptrs.dimension_0 ()) != lclNumRows + 1,
895  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: In "
896  "DynamicProfile branch, after packing h_ptrs, h_ptrs.dimension_0()"
897  " = " << h_ptrs.dimension_0 () << " != (lclNumRows+1) = "
898  << (lclNumRows+1) << ".");
899  // FIXME (mfh 08 Aug 2014) This assumes UVM.
900  TEUCHOS_TEST_FOR_EXCEPTION(
901  k_ptrs(lclNumRows) != lclTotalNumEntries, std::logic_error,
902  "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: In DynamicProfile branch, "
903  "after packing k_ptrs, k_ptrs(lclNumRows = " << lclNumRows << ") = " <<
904  k_ptrs(lclNumRows) << " != total number of entries on the calling "
905  "process = " << lclTotalNumEntries << ".");
906 
907  // Allocate the arrays of packed column indices and values.
908  k_inds = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries);
909  k_vals = values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
910 
911  // We need host views of the above, since 2-D storage lives on host.
912  typename lclinds_1d_type::HostMirror h_inds = create_mirror_view (k_inds);
913  typename values_type::HostMirror h_vals = create_mirror_view (k_vals);
914 
915  // Pack the column indices and values on the host.
916  ArrayRCP<Array<LocalOrdinal> > lclInds2D = myGraph_->lclInds2D_;
917  for (size_t row = 0; row < lclNumRows; ++row) {
918  const size_t numEnt = h_numRowEnt(row);
919  std::copy (lclInds2D[row].begin(),
920  lclInds2D[row].begin() + numEnt,
921  h_inds.ptr_on_device() + h_ptrs(row));
922  std::copy (values2D_[row].begin(),
923  values2D_[row].begin() + numEnt,
924  h_vals.ptr_on_device() + h_ptrs(row));
925  }
926  // Copy the packed column indices and values to the device.
927  Kokkos::deep_copy (k_inds, h_inds);
928  Kokkos::deep_copy (k_vals, h_vals);
929 
930  // Sanity check of packed row offsets.
931  if (k_ptrs.dimension_0 () != 0) {
932  const size_t numOffsets = static_cast<size_t> (k_ptrs.dimension_0 ());
933  TEUCHOS_TEST_FOR_EXCEPTION(
934  static_cast<size_t> (k_ptrs(numOffsets-1)) != k_vals.dimension_0 (),
935  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: "
936  "In DynamicProfile branch, after packing, k_ptrs(" << (numOffsets-1)
937  << ") = " << k_ptrs(numOffsets-1) << " != k_vals.dimension_0() = "
938  << k_vals.dimension_0 () << ".");
939  TEUCHOS_TEST_FOR_EXCEPTION(
940  static_cast<size_t> (k_ptrs(numOffsets-1)) != k_inds.dimension_0 (),
941  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: "
942  "In DynamicProfile branch, after packing, k_ptrs(" << (numOffsets-1)
943  << ") = " << k_ptrs(numOffsets-1) << " != k_inds.dimension_0() = "
944  << k_inds.dimension_0 () << ".");
945  }
946  }
947  else if (getProfileType () == StaticProfile) {
948  // StaticProfile means that the matrix's column indices and
949  // values are currently stored in a 1-D format, with row offsets
950  // in k_rowPtrs_ and local column indices in k_lclInds1D_.
951 
952  // StaticProfile also means that the graph's array of row
953  // offsets must already be allocated.
954  typename Graph::local_graph_type::row_map_type curRowOffsets =
955  myGraph_->k_rowPtrs_;
956  TEUCHOS_TEST_FOR_EXCEPTION(
957  curRowOffsets.dimension_0 () == 0, std::logic_error,
958  "curRowOffsets has size zero, but shouldn't");
959  TEUCHOS_TEST_FOR_EXCEPTION(
960  curRowOffsets.dimension_0 () != lclNumRows + 1, std::logic_error,
961  "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: curRowOffsets has size "
962  << curRowOffsets.dimension_0 () << " != lclNumRows + 1 = "
963  << (lclNumRows + 1) << ".")
964  {
965  const size_t numOffsets = curRowOffsets.dimension_0 ();
966  // FIXME (mfh 06 Aug 2014) This relies on UVM.
967  TEUCHOS_TEST_FOR_EXCEPTION(
968  numOffsets != 0 &&
969  myGraph_->k_lclInds1D_.dimension_0 () != curRowOffsets(numOffsets - 1),
970  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: "
971  "numOffsets = " << numOffsets << " != 0 and "
972  "myGraph_->k_lclInds1D_.dimension_0() = "
973  << myGraph_->k_lclInds1D_.dimension_0 ()
974  << " != curRowOffsets(" << numOffsets << ") = "
975  << curRowOffsets(numOffsets - 1) << ".");
976  }
977 
978  if (myGraph_->nodeNumEntries_ != myGraph_->nodeNumAllocated_) {
979  // The matrix's current 1-D storage is "unpacked." This means
980  // the row offsets may differ from what the final row offsets
981  // should be. This could happen, for example, if the user
982  // specified StaticProfile in the constructor and set an upper
983  // bound on the number of entries per row, but didn't fill all
984  // those entries.
985  TEUCHOS_TEST_FOR_EXCEPTION(
986  static_cast<size_t> (k_numRowEnt.dimension_0 ()) != lclNumRows,
987  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix (called"
988  " from fillComplete or expertStaticFillComplete): In StaticProfile "
989  "unpacked branch, k_numRowEnt has the wrong length. "
990  "k_numRowEnt.dimension_0() = " << k_numRowEnt.dimension_0 ()
991  << " != getNodeNumRows() = " << lclNumRows << ".");
992 
993  if (curRowOffsets.dimension_0 () != 0) {
994  const size_t numOffsets =
995  static_cast<size_t> (curRowOffsets.dimension_0 ());
996  TEUCHOS_TEST_FOR_EXCEPTION(
997  curRowOffsets(numOffsets-1) != static_cast<size_t> (k_values1D_.dimension_0 ()),
998  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: "
999  "In StaticProfile branch, before allocating or packing, "
1000  "curRowOffsets(" << (numOffsets-1) << ") = "
1001  << curRowOffsets(numOffsets - 1)
1002  << " != k_values1D_.dimension_0() = "
1003  << k_values1D_.dimension_0 () << ".");
1004  TEUCHOS_TEST_FOR_EXCEPTION(
1005  static_cast<size_t> (curRowOffsets(numOffsets - 1)) !=
1006  myGraph_->k_lclInds1D_.dimension_0 (),
1007  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: "
1008  "In StaticProfile branch, before allocating or packing, "
1009  "curRowOffsets(" << (numOffsets-1) << ") = "
1010  << curRowOffsets(numOffsets - 1)
1011  << " != myGraph_->k_lclInds1D_.dimension_0() = "
1012  << myGraph_->k_lclInds1D_.dimension_0 () << ".");
1013  }
1014 
1015  // Pack the row offsets into k_ptrs, by doing a sum-scan of
1016  // the array of valid entry counts per row (h_numRowEnt).
1017 
1018  // Total number of entries in the matrix on the calling
1019  // process. We will compute this in the loop below. It's
1020  // cheap to compute and useful as a sanity check.
1021  size_t lclTotalNumEntries = 0;
1022  // This will be a host view of packed row offsets.
1023  typename row_map_type::non_const_type::HostMirror h_ptrs;
1024  {
1025  // Allocate the packed row offsets array. We use a nonconst
1026  // temporary (packedRowOffsets) here, because k_ptrs is
1027  // const. We will assign packedRowOffsets to k_ptrs below.
1028  typename row_map_type::non_const_type packedRowOffsets ("Tpetra::CrsGraph::ptr",
1029  lclNumRows+1);
1030  //
1031  // FIXME hack until we get parallel_scan in Kokkos
1032  //
1033  // Unlike in the 2-D storage case above, we don't need the
1034  // host view of the packed row offsets array after packing
1035  // the row offsets.
1036  h_ptrs = create_mirror_view (packedRowOffsets);
1037  h_ptrs(0) = 0;
1038  for (size_type i = 0; i < static_cast<size_type> (lclNumRows); ++i) {
1039  const size_t numEnt = h_numRowEnt(i);
1040  lclTotalNumEntries += numEnt;
1041  h_ptrs(i+1) = h_ptrs(i) + numEnt;
1042  }
1043  Kokkos::deep_copy (packedRowOffsets, h_ptrs);
1044  // packedRowOffsets is modifiable; k_ptrs isn't, so we have
1045  // to use packedRowOffsets in the loop above and assign here.
1046  k_ptrs = packedRowOffsets;
1047  k_ptrs_const = k_ptrs;
1048  }
1049 
1050  TEUCHOS_TEST_FOR_EXCEPTION(
1051  static_cast<size_t> (k_ptrs.dimension_0 ()) != lclNumRows + 1,
1052  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: For "
1053  "the StaticProfile unpacked-but-pack branch, after packing k_ptrs, "
1054  "k_ptrs.dimension_0() = " << k_ptrs.dimension_0 () << " != "
1055  "lclNumRows+1 = " << (lclNumRows+1) << ".");
1056  // FIXME (mfh 06 Aug 2014) This assumes UVM.
1057  TEUCHOS_TEST_FOR_EXCEPTION(
1058  k_ptrs(lclNumRows) != lclTotalNumEntries, std::logic_error,
1059  "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: In StaticProfile "
1060  "unpacked-but-pack branch, after filling k_ptrs, k_ptrs(lclNumRows="
1061  << lclNumRows << ") = " << k_ptrs(lclNumRows) << " != total number "
1062  "of entries on the calling process = " << lclTotalNumEntries << ".");
1063 
1064  // Allocate the arrays of packed column indices and values.
1065  k_inds = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries);
1066  k_vals = values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
1067 
1068  // curRowOffsets (myGraph_->k_rowPtrs_) (???), k_lclInds1D_,
1069  // and k_values1D_ are currently unpacked. Pack them, using
1070  // the packed row offsets array k_ptrs that we created above.
1071  //
1072  // FIXME (mfh 06 Aug 2014) If "Optimize Storage" is false, we
1073  // need to keep around the unpacked row offsets, column
1074  // indices, and values arrays.
1075 
1076  // Pack the column indices from unpacked k_lclInds1D_ into
1077  // packed k_inds. We will replace k_lclInds1D_ below.
1078  typedef pack_functor<typename Graph::local_graph_type::entries_type::non_const_type,
1079  typename Graph::local_graph_type::row_map_type>
1080  inds_packer_type;
1081  inds_packer_type indsPacker (k_inds, myGraph_->k_lclInds1D_,
1082  k_ptrs, curRowOffsets);
1083  Kokkos::parallel_for (lclNumRows, indsPacker);
1084 
1085  // Pack the values from unpacked k_values1D_ into packed
1086  // k_vals. We will replace k_values1D_ below.
1087  typedef pack_functor<values_type, row_map_type> vals_packer_type;
1088  vals_packer_type valsPacker (k_vals, this->k_values1D_,
1089  k_ptrs, curRowOffsets);
1090  Kokkos::parallel_for (lclNumRows, valsPacker);
1091 
1092  TEUCHOS_TEST_FOR_EXCEPTION(
1093  k_ptrs.dimension_0 () == 0, std::logic_error, "Tpetra::CrsMatrix::"
1094  "fillLocalGraphAndMatrix: In StaticProfile \"Optimize Storage\" = "
1095  "true branch, after packing, k_ptrs.dimension_0() = 0. This "
1096  "probably means that k_rowPtrs_ was never allocated.");
1097  if (k_ptrs.dimension_0 () != 0) {
1098  const size_t numOffsets = static_cast<size_t> (k_ptrs.dimension_0 ());
1099  TEUCHOS_TEST_FOR_EXCEPTION(
1100  static_cast<size_t> (k_ptrs(numOffsets - 1)) != k_vals.dimension_0 (),
1101  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: "
1102  "In StaticProfile \"Optimize Storage\"=true branch, after packing, "
1103  "k_ptrs(" << (numOffsets-1) << ") = " << k_ptrs(numOffsets-1) <<
1104  " != k_vals.dimension_0() = " << k_vals.dimension_0 () << ".");
1105  TEUCHOS_TEST_FOR_EXCEPTION(
1106  static_cast<size_t> (k_ptrs(numOffsets - 1)) != k_inds.dimension_0 (),
1107  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: "
1108  "In StaticProfile \"Optimize Storage\"=true branch, after packing, "
1109  "k_ptrs(" << (numOffsets-1) << ") = " << k_ptrs(numOffsets-1) <<
1110  " != k_inds.dimension_0() = " << k_inds.dimension_0 () << ".");
1111  }
1112  }
1113  else { // We don't have to pack, so just set the pointers.
1114  k_ptrs_const = myGraph_->k_rowPtrs_;
1115  k_inds = myGraph_->k_lclInds1D_;
1116  k_vals = this->k_values1D_;
1117 
1118  TEUCHOS_TEST_FOR_EXCEPTION(
1119  k_ptrs_const.dimension_0 () == 0, std::logic_error, "Tpetra::CrsMatrix::"
1120  "fillLocalGraphAndMatrix: In StaticProfile \"Optimize Storage\" = "
1121  "false branch, k_ptrs_const.dimension_0() = 0. This probably means that "
1122  "k_rowPtrs_ was never allocated.");
1123  if (k_ptrs_const.dimension_0 () != 0) {
1124  const size_t numOffsets = static_cast<size_t> (k_ptrs_const.dimension_0 ());
1125  TEUCHOS_TEST_FOR_EXCEPTION(
1126  static_cast<size_t> (k_ptrs_const(numOffsets - 1)) != k_vals.dimension_0 (),
1127  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: "
1128  "In StaticProfile \"Optimize Storage\" = false branch, "
1129  "k_ptrs_const(" << (numOffsets-1) << ") = " << k_ptrs_const(numOffsets - 1)
1130  << " != k_vals.dimension_0() = " << k_vals.dimension_0 () << ".");
1131  TEUCHOS_TEST_FOR_EXCEPTION(
1132  static_cast<size_t> (k_ptrs_const(numOffsets - 1)) != k_inds.dimension_0 (),
1133  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: "
1134  "In StaticProfile \"Optimize Storage\" = false branch, "
1135  "k_ptrs_const(" << (numOffsets-1) << ") = " << k_ptrs_const(numOffsets - 1)
1136  << " != k_inds.dimension_0() = " << k_inds.dimension_0 () << ".");
1137  }
1138  }
1139  }
1140 
1141  // Extra sanity checks.
1142  TEUCHOS_TEST_FOR_EXCEPTION(
1143  static_cast<size_t> (k_ptrs_const.dimension_0 ()) != lclNumRows + 1,
1144  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: After "
1145  "packing, k_ptrs_const.dimension_0() = " << k_ptrs_const.dimension_0 ()
1146  << " != lclNumRows+1 = " << (lclNumRows+1) << ".");
1147  if (k_ptrs_const.dimension_0 () != 0) {
1148  const size_t numOffsets = static_cast<size_t> (k_ptrs_const.dimension_0 ());
1149  TEUCHOS_TEST_FOR_EXCEPTION(
1150  static_cast<size_t> (k_ptrs_const(numOffsets - 1)) != k_vals.dimension_0 (),
1151  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: After "
1152  "packing, k_ptrs_const(" << (numOffsets-1) << ") = " << k_ptrs_const(numOffsets-1)
1153  << " != k_vals.dimension_0() = " << k_vals.dimension_0 () << ".");
1154  TEUCHOS_TEST_FOR_EXCEPTION(
1155  static_cast<size_t> (k_ptrs_const(numOffsets - 1)) != k_inds.dimension_0 (),
1156  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: After "
1157  "packing, k_ptrs_const(" << (numOffsets-1) << ") = " << k_ptrs_const(numOffsets-1)
1158  << " != k_inds.dimension_0() = " << k_inds.dimension_0 () << ".");
1159  }
1160 
1161  // May we ditch the old allocations for the packed (and otherwise
1162  // "optimized") allocations, later in this routine? Optimize
1163  // storage if the graph is not static, or if the graph already has
1164  // optimized storage.
1165  const bool defaultOptStorage =
1166  ! isStaticGraph () || staticGraph_->isStorageOptimized ();
1167  const bool requestOptimizedStorage =
1168  (! params.is_null () && params->get ("Optimize Storage", defaultOptStorage)) ||
1169  (params.is_null () && defaultOptStorage);
1170 
1171  // The graph has optimized storage when indices are allocated,
1172  // myGraph_->k_numRowEntries_ is empty, and there are more than
1173  // zero rows on this process. It's impossible for the graph to
1174  // have dynamic profile (getProfileType() == DynamicProfile) and
1175  // be optimized (isStorageOptimized()).
1176  if (requestOptimizedStorage) {
1177  // Free the old, unpacked, unoptimized allocations.
1178  // Change the graph from dynamic to static allocation profile
1179 
1180  // Free graph data structures that are only needed for 2-D or
1181  // unpacked 1-D storage.
1182  myGraph_->lclInds2D_ = null; // legacy KokkosClassic 2-D storage
1183  myGraph_->k_numRowEntries_ = row_entries_type ();
1184 
1185  // Free the matrix's 2-D storage.
1186  this->values2D_ = null;
1187 
1188  // Keep the new 1-D packed allocations.
1189  myGraph_->k_rowPtrs_ = k_ptrs_const;
1190  myGraph_->k_lclInds1D_ = k_inds;
1191  this->k_values1D_ = k_vals;
1192 
1193  // Storage is packed now, so the number of allocated entries is
1194  // the same as the actual number of entries.
1195  myGraph_->nodeNumAllocated_ = myGraph_->nodeNumEntries_;
1196  // The graph is definitely StaticProfile now, whether or not it
1197  // was before.
1198  myGraph_->pftype_ = StaticProfile;
1199  myGraph_->storageStatus_ = Details::STORAGE_1D_PACKED;
1200  this->storageStatus_ = Details::STORAGE_1D_PACKED;
1201  }
1202 
1203  // Make the local graph, using the arrays of row offsets and
1204  // column indices that we built above. The local graph should be
1205  // null, but we delete it first so that any memory can be freed
1206  // before we allocate the new one.
1207  //
1208  // FIXME (mfh 06,28 Aug 2014) It would make more sense for
1209  // Tpetra::CrsGraph to have a protected method that accepts k_inds
1210  // and k_ptrs, and creates the local graph lclGraph_.
1211  myGraph_->lclGraph_ =
1212  typename Graph::local_graph_type (k_inds, k_ptrs_const);
1213 
1214  // Make the local matrix, using the local graph and vals array.
1215  lclMatrix_ = local_matrix_type ("Tpetra::CrsMatrix::lclMatrix_",
1216  getNodeNumCols (), k_vals,
1217  myGraph_->lclGraph_);
1218  }
1219 
1220  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1221  void
1223  fillLocalMatrix (const Teuchos::RCP<Teuchos::ParameterList>& params)
1224  {
1225  using Kokkos::create_mirror_view;
1226  using Teuchos::ArrayRCP;
1227  using Teuchos::Array;
1228  using Teuchos::null;
1229  using Teuchos::RCP;
1230  using Teuchos::rcp;
1231  typedef LocalOrdinal LO;
1232  typedef typename Graph::t_numRowEntries_ row_entries_type;
1233  typedef typename Graph::local_graph_type::row_map_type row_map_type;
1234  typedef typename row_map_type::non_const_type non_const_row_map_type;
1235  typedef typename local_matrix_type::values_type values_type;
1236 
1237  const size_t lclNumRows = getNodeNumRows();
1238  const map_type& rowMap = * (getRowMap ());
1239  RCP<node_type> node = rowMap.getNode ();
1240 
1241  // The goals of this routine are first, to allocate and fill
1242  // packed 1-D storage (see below for an explanation) in the vals
1243  // array, and second, to give vals to the local matrix and
1244  // finalize the local matrix. We only need k_ptrs, the packed 1-D
1245  // row offsets, within the scope of this routine, since we're only
1246  // filling the local matrix here (use fillLocalGraphAndMatrix() to
1247  // fill both the graph and the matrix at the same time).
1248 
1249  // get data from staticGraph_
1250  ArrayRCP<Array<LO> > lclInds2D = staticGraph_->lclInds2D_;
1251  size_t nodeNumEntries = staticGraph_->nodeNumEntries_;
1252  size_t nodeNumAllocated = staticGraph_->nodeNumAllocated_;
1253  row_map_type k_rowPtrs_ = staticGraph_->lclGraph_.row_map;
1254 
1255  row_map_type k_ptrs; // "packed" row offsets array
1256  values_type k_vals; // "packed" values array
1257 
1258  // May we ditch the old allocations for the packed (and otherwise
1259  // "optimized") allocations, later in this routine? Request
1260  // optimized storage by default.
1261  bool requestOptimizedStorage = true;
1262  const bool default_OptimizeStorage =
1263  ! isStaticGraph () || staticGraph_->isStorageOptimized ();
1264  if (! params.is_null () && ! params->get ("Optimize Storage", default_OptimizeStorage)) {
1265  requestOptimizedStorage = false;
1266  }
1267  // If we're not allowed to change a static graph, then we can't
1268  // change the storage of the matrix, either. This means that if
1269  // the graph's storage isn't already optimized, we can't optimize
1270  // the matrix's storage either. Check and give warning, as
1271  // appropriate.
1272  if (! staticGraph_->isStorageOptimized () && requestOptimizedStorage) {
1273  TPETRA_ABUSE_WARNING(true, std::runtime_error,
1274  "::fillLocalMatrix(): You requested optimized storage by setting the"
1275  "\"Optimize Storage\" flag to \"true\" in the parameter list, or by virtue"
1276  "of default behavior. However, the associated CrsGraph was filled separately"
1277  "and requested not to optimize storage. Therefore, the CrsMatrix cannot"
1278  "optimize storage.");
1279  requestOptimizedStorage = false;
1280  }
1281 
1282  // The number of entries in each locally owned row. This is a
1283  // DualView. 2-D storage lives on host and is currently not
1284  // thread-safe for parallel kernels even on host, so we have to
1285  // work sequentially with host storage in that case.
1286  row_entries_type k_numRowEnt = staticGraph_->k_numRowEntries_;
1287  typename row_entries_type::t_host h_numRowEnt = k_numRowEnt.h_view;
1288 
1289  if (getProfileType() == DynamicProfile) {
1290  // Pack 2-D storage (DynamicProfile) into 1-D packed storage.
1291  //
1292  // DynamicProfile means that the matrix's values are currently
1293  // stored in a 2-D "unpacked" format, in the array-of-arrays
1294  // values2D_. We allocate 1-D storage and then copy from 2-D
1295  // storage in values2D_ into 1-D storage in k_vals. Since we're
1296  // only allocating the local matrix here, not the local graph,
1297  // we don't need to keep the row offsets array, but we do need
1298  // it here temporarily in order to convert to 1-D storage. (The
1299  // allocStorage() function needs it.) We'll free ptrs later in
1300  // this method.
1301  //
1302  // FIXME (mfh 08 Aug 2014) If we're in this method, then the
1303  // graph should already have packed 1-D storage. Why can't we
1304  // just use the graph's current row offsets array?
1305 
1306  // Pack the row offsets into k_ptrs, by doing a sum-scan of
1307  // the array of valid entry counts per row (h_numRowEnt).
1308  //
1309  // Total number of entries in the matrix on the calling
1310  // process. We will compute this in the loop below. It's
1311  // cheap to compute and useful as a sanity check.
1312  size_t lclTotalNumEntries = 0;
1313  // This will be a host view of packed row offsets.
1314  typename non_const_row_map_type::HostMirror h_ptrs;
1315  {
1316  non_const_row_map_type packedRowOffsets ("Tpetra::CrsGraph::ptr",
1317  lclNumRows+1);
1318  //
1319  // FIXME hack until we get parallel_scan in Kokkos
1320  //
1321  h_ptrs = create_mirror_view (packedRowOffsets);
1322  h_ptrs(0) = 0;
1323  for (size_t i = 0; i < lclNumRows; ++i) {
1324  const size_t numEnt = h_numRowEnt(i);
1325  lclTotalNumEntries += numEnt;
1326  h_ptrs(i+1) = h_ptrs(i) + numEnt;
1327  }
1328  Kokkos::deep_copy (packedRowOffsets, h_ptrs);
1329  k_ptrs = packedRowOffsets;
1330  }
1331 
1332  TEUCHOS_TEST_FOR_EXCEPTION(
1333  static_cast<size_t> (k_ptrs.dimension_0 ()) != lclNumRows + 1,
1334  std::logic_error, "Tpetra::CrsMatrix::fillLocalMatrix: In "
1335  "DynamicProfile branch, after packing k_ptrs, k_ptrs.dimension_0()"
1336  " = " << k_ptrs.dimension_0 () << " != (lclNumRows+1) = "
1337  << (lclNumRows+1) << ".");
1338  TEUCHOS_TEST_FOR_EXCEPTION(
1339  static_cast<size_t> (h_ptrs.dimension_0 ()) != lclNumRows + 1,
1340  std::logic_error, "Tpetra::CrsMatrix::fillLocalMatrix: In "
1341  "DynamicProfile branch, after packing h_ptrs, h_ptrs.dimension_0()"
1342  " = " << h_ptrs.dimension_0 () << " != (lclNumRows+1) = "
1343  << (lclNumRows+1) << ".");
1344  // FIXME (mfh 08 Aug 2014) This assumes UVM.
1345  TEUCHOS_TEST_FOR_EXCEPTION(
1346  k_ptrs(lclNumRows) != lclTotalNumEntries, std::logic_error,
1347  "Tpetra::CrsMatrix::fillLocalMatrix: In DynamicProfile branch, "
1348  "after packing k_ptrs, k_ptrs(lclNumRows = " << lclNumRows << ") = " <<
1349  k_ptrs(lclNumRows) << " != total number of entries on the calling "
1350  "process = " << lclTotalNumEntries << ".");
1351 
1352  // Allocate the array of packed values.
1353  k_vals = values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
1354  // We need a host view of the above, since 2-D storage lives on host.
1355  typename values_type::HostMirror h_vals = create_mirror_view (k_vals);
1356  // Pack the values on the host.
1357  for (size_t lclRow = 0; lclRow < lclNumRows; ++lclRow) {
1358  const size_t numEnt = h_numRowEnt(lclRow);
1359  std::copy (values2D_[lclRow].begin(),
1360  values2D_[lclRow].begin() + numEnt,
1361  h_vals.ptr_on_device() + h_ptrs(lclRow));
1362  }
1363  // Copy the packed values to the device.
1364  Kokkos::deep_copy (k_vals, h_vals);
1365 
1366  // Sanity check of packed row offsets.
1367  if (k_ptrs.dimension_0 () != 0) {
1368  const size_t numOffsets = static_cast<size_t> (k_ptrs.dimension_0 ());
1369  TEUCHOS_TEST_FOR_EXCEPTION(
1370  static_cast<size_t> (k_ptrs(numOffsets-1)) != k_vals.dimension_0 (),
1371  std::logic_error, "Tpetra::CrsMatrix::fillLocalMatrix: "
1372  "In DynamicProfile branch, after packing, k_ptrs(" << (numOffsets-1)
1373  << ") = " << k_ptrs(numOffsets-1) << " != k_vals.dimension_0() = "
1374  << k_vals.dimension_0 () << ".");
1375  }
1376  }
1377  else if (getProfileType () == StaticProfile) {
1378  // StaticProfile means that the matrix's values are currently
1379  // stored in a 1-D format. However, this format is "unpacked";
1380  // it doesn't necessarily have the same row offsets as indicated
1381  // by the ptrs array returned by allocRowPtrs. This could
1382  // happen, for example, if the user specified StaticProfile in
1383  // the constructor and fixed the number of matrix entries in
1384  // each row, but didn't fill all those entries.
1385  //
1386  // As above, we don't need to keep the "packed" row offsets
1387  // array ptrs here, but we do need it here temporarily, so we
1388  // have to allocate it. We'll free ptrs later in this method.
1389  //
1390  // Note that this routine checks whether storage has already
1391  // been packed. This is a common case for solution of nonlinear
1392  // PDEs using the finite element method, as long as the
1393  // structure of the sparse matrix does not change between linear
1394  // solves.
1395  if (nodeNumEntries != nodeNumAllocated) {
1396  // We have to pack the 1-D storage, since the user didn't fill
1397  // up all requested storage.
1398  non_const_row_map_type tmpk_ptrs ("Tpetra::CrsGraph::ptr",
1399  lclNumRows+1);
1400  // Total number of entries in the matrix on the calling
1401  // process. We will compute this in the loop below. It's
1402  // cheap to compute and useful as a sanity check.
1403  size_t lclTotalNumEntries = 0;
1404  k_ptrs = tmpk_ptrs;
1405  {
1406  //
1407  // FIXME hack until we get parallel_scan in Kokkos
1408  //
1409  typename non_const_row_map_type::HostMirror h_ptrs =
1410  create_mirror_view (tmpk_ptrs);
1411  h_ptrs(0) = 0;
1412  for (size_t i = 0; i < lclNumRows; ++i) {
1413  const size_t numEnt = h_numRowEnt(i);
1414  lclTotalNumEntries += numEnt;
1415  h_ptrs(i+1) = h_ptrs(i) + numEnt;
1416  }
1417  Kokkos::deep_copy (tmpk_ptrs, h_ptrs);
1418  }
1419 
1420  // Allocate the "packed" values array.
1421  // It has exactly the right number of entries.
1422  k_vals = values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
1423 
1424  // Pack k_values1D_ into k_vals. We will replace k_values1D_ below.
1425  typedef pack_functor<values_type, row_map_type> packer_type;
1426  packer_type valsPacker (k_vals, k_values1D_, tmpk_ptrs, k_rowPtrs_);
1427  Kokkos::parallel_for (lclNumRows, valsPacker);
1428  }
1429  else { // We don't have to pack, so just set the pointer.
1430  k_vals = k_values1D_;
1431  }
1432  }
1433 
1434  // May we ditch the old allocations for the packed one?
1435  if (requestOptimizedStorage) {
1436  // The user requested optimized storage, so we can dump the
1437  // unpacked 2-D and 1-D storage, and keep the packed storage.
1438  values2D_ = null;
1439  k_values1D_ = k_vals;
1440  this->storageStatus_ = Details::STORAGE_1D_PACKED;
1441  }
1442 
1443  // Build the local sparse matrix object. At this point, the local
1444  // matrix certainly has a column Map. Remember that the local
1445  // matrix's number of columns comes from the column Map, not the
1446  // domain Map.
1447  lclMatrix_ = local_matrix_type ("Tpetra::CrsMatrix::lclMatrix_",
1448  getColMap ()->getNodeNumElements (),
1449  k_vals,
1450  staticGraph_->getLocalGraph ());
1451  }
1452 
1453  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1454  void
1456  insertLocalValues (const LocalOrdinal localRow,
1457  const Teuchos::ArrayView<const LocalOrdinal>& indices,
1458  const Teuchos::ArrayView<const Scalar>& values)
1459  {
1460  using Teuchos::Array;
1461  using Teuchos::ArrayView;
1462  using Teuchos::av_reinterpret_cast;
1463  using Teuchos::toString;
1464  using std::endl;
1465  const char tfecfFuncName[] = "insertLocalValues";
1466 
1467  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillActive (), std::runtime_error,
1468  ": Fill is not active. After calling fillComplete, you must call "
1469  "resumeFill before you may insert entries into the matrix again.");
1470  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isStaticGraph (), std::runtime_error,
1471  " cannot insert indices with static graph; use replaceLocalValues() instead.");
1472  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(myGraph_->isGloballyIndexed(),
1473  std::runtime_error, ": graph indices are global; use insertGlobalValues().");
1474  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! hasColMap (), std::runtime_error,
1475  " cannot insert local indices without a column map.");
1476  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(values.size() != indices.size(),
1477  std::runtime_error, ": values.size() must equal indices.size().");
1478  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1479  ! getRowMap()->isNodeLocalElement(localRow), std::runtime_error,
1480  ": Local row index " << localRow << " does not belong to this process.");
1481 
1482  if (! myGraph_->indicesAreAllocated ()) {
1483  try {
1484  allocateValues (LocalIndices, GraphNotYetAllocated);
1485  }
1486  catch (std::exception& e) {
1487  TEUCHOS_TEST_FOR_EXCEPTION(
1488  true, std::runtime_error, "Tpetra::CrsMatrix::insertLocalValues: "
1489  "allocateValues(LocalIndices,GraphNotYetAllocated) threw an "
1490  "exception: " << e.what ());
1491  }
1492  }
1493 
1494  const size_t numEntriesToAdd = static_cast<size_t> (indices.size ());
1495 #ifdef HAVE_TPETRA_DEBUG
1496  // In a debug build, if the matrix has a column Map, test whether
1497  // any of the given column indices are not in the column Map.
1498  // Keep track of the invalid column indices so we can tell the
1499  // user about them.
1500  if (hasColMap ()) {
1501  const map_type& colMap = * (getColMap ());
1502  Array<LocalOrdinal> badColInds;
1503  bool allInColMap = true;
1504  for (size_t k = 0; k < numEntriesToAdd; ++k) {
1505  if (! colMap.isNodeLocalElement (indices[k])) {
1506  allInColMap = false;
1507  badColInds.push_back (indices[k]);
1508  }
1509  }
1510  if (! allInColMap) {
1511  std::ostringstream os;
1512  os << "Tpetra::CrsMatrix::insertLocalValues: You attempted to insert "
1513  "entries in owned row " << localRow << ", at the following column "
1514  "indices: " << toString (indices) << "." << endl;
1515  os << "Of those, the following indices are not in the column Map on "
1516  "this process: " << toString (badColInds) << "." << endl << "Since "
1517  "the matrix has a column Map already, it is invalid to insert "
1518  "entries at those locations.";
1519  TEUCHOS_TEST_FOR_EXCEPTION(! allInColMap, std::invalid_argument, os.str ());
1520  }
1521  }
1522 #endif // HAVE_TPETRA_DEBUG
1523 
1524 #ifdef HAVE_TPETRA_DEBUG
1525  RowInfo rowInfo;
1526  try {
1527  rowInfo = myGraph_->getRowInfo (localRow);
1528  } catch (std::exception& e) {
1529  TEUCHOS_TEST_FOR_EXCEPTION(
1530  true, std::runtime_error, "Tpetra::CrsMatrix::insertLocalValues: "
1531  "myGraph_->getRowInfo threw an exception: " << e.what ());
1532  }
1533 #else
1534  RowInfo rowInfo = myGraph_->getRowInfo (localRow);
1535 #endif // HAVE_TPETRA_DEBUG
1536 
1537  const size_t curNumEntries = rowInfo.numEntries;
1538  const size_t newNumEntries = curNumEntries + numEntriesToAdd;
1539  if (newNumEntries > rowInfo.allocSize) {
1540  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1541  getProfileType() == StaticProfile, std::runtime_error,
1542  ": new indices exceed statically allocated graph structure.");
1543 
1544  // Make space for the new matrix entries.
1545  try {
1546  rowInfo = myGraph_->template updateLocalAllocAndValues<impl_scalar_type> (rowInfo,
1547  newNumEntries,
1548  values2D_[localRow]);
1549  } catch (std::exception& e) {
1550  TEUCHOS_TEST_FOR_EXCEPTION(
1551  true, std::runtime_error, "Tpetra::CrsMatrix::insertLocalValues: "
1552  "myGraph_->updateGlobalAllocAndValues threw an exception: "
1553  << e.what ());
1554  }
1555  }
1556  typename Graph::SLocalGlobalViews indsView;
1557  indsView.linds = indices;
1558 
1559 #ifdef HAVE_TPETRA_DEBUG
1560  ArrayView<impl_scalar_type> valsView;
1561  try {
1562  valsView = this->getViewNonConst (rowInfo);
1563  } catch (std::exception& e) {
1564  TEUCHOS_TEST_FOR_EXCEPTION(
1565  true, std::runtime_error, "Tpetra::CrsMatrix::insertLocalValues: "
1566  "getViewNonConst threw an exception: " << e.what ());
1567  }
1568 #else
1569  ArrayView<impl_scalar_type> valsView = this->getViewNonConst (rowInfo);
1570 #endif // HAVE_TPETRA_DEBUG
1571 
1572  ArrayView<const impl_scalar_type> valsIn =
1573  av_reinterpret_cast<const impl_scalar_type> (values);
1574  try {
1575  myGraph_->template insertIndicesAndValues<impl_scalar_type> (rowInfo, indsView,
1576  valsView, valsIn,
1577  LocalIndices,
1578  LocalIndices);
1579  } catch (std::exception& e) {
1580  TEUCHOS_TEST_FOR_EXCEPTION(
1581  true, std::runtime_error, "Tpetra::CrsMatrix::insertLocalValues: "
1582  "myGraph_->insertIndicesAndValues threw an exception: "
1583  << e.what ());
1584  }
1585 
1586 #ifdef HAVE_TPETRA_DEBUG
1587  const size_t chkNewNumEntries = myGraph_->getNumEntriesInLocalRow (localRow);
1588  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1589  chkNewNumEntries != newNumEntries, std::logic_error,
1590  ": The row should have " << newNumEntries << " entries after insert, but "
1591  "instead has " << chkNewNumEntries << ". Please report this bug to the "
1592  "Tpetra developers.");
1593  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isLocallyIndexed(), std::logic_error,
1594  ": At end of insertLocalValues(), this CrsMatrix is not locally indexed. "
1595  "Please report this bug to the Tpetra developers.");
1596 #endif // HAVE_TPETRA_DEBUG
1597  }
1598 
1599  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1600  void
1602  insertLocalValuesFiltered (const LocalOrdinal localRow,
1603  const Teuchos::ArrayView<const LocalOrdinal>& indices,
1604  const Teuchos::ArrayView<const Scalar>& values)
1605  {
1606  using Teuchos::Array;
1607  using Teuchos::ArrayView;
1608  using Teuchos::av_reinterpret_cast;
1609  const char tfecfFuncName[] = "insertLocalValues: ";
1610 
1611  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillActive (), std::runtime_error,
1612  "Requires that fill is active.");
1613  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isStaticGraph (), std::runtime_error,
1614  "Cannot insert indices with static graph; use replaceLocalValues() instead.");
1615  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(myGraph_->isGloballyIndexed(),
1616  std::runtime_error, "Graph indices are global; use insertGlobalValues().");
1617  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1618  ! hasColMap (), std::runtime_error, "The matrix has no column Map yet, "
1619  "so you cannot insert local indices. If you created the matrix without "
1620  "a column Map (or without a fill-complete graph), you must call "
1621  "fillComplete to create the column Map, before you may work with local "
1622  "indices.");
1623  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1624  values.size () != indices.size (), std::runtime_error, "values.size() = "
1625  << values.size () << " != indices.size() = " << indices.size ()<< ".");
1626  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1627  ! getRowMap()->isNodeLocalElement (localRow), std::runtime_error,
1628  "Local row index " << localRow << " does not belong to this process.");
1629  if (! myGraph_->indicesAreAllocated ()) {
1630  allocateValues (LocalIndices, GraphNotYetAllocated);
1631  }
1632  // Use the graph to filter incoming entries whose column indices
1633  // aren't in the column Map.
1634  Array<LocalOrdinal> f_inds (indices);
1635  ArrayView<const impl_scalar_type> valsIn =
1636  av_reinterpret_cast<const impl_scalar_type> (values);
1637  Array<impl_scalar_type> f_vals (valsIn);
1638  const size_t numFilteredEntries =
1639  myGraph_->template filterLocalIndicesAndValues<impl_scalar_type> (f_inds (),
1640  f_vals ());
1641  if (numFilteredEntries > 0) {
1642  RowInfo rowInfo = myGraph_->getRowInfo (localRow);
1643  const size_t curNumEntries = rowInfo.numEntries;
1644  const size_t newNumEntries = curNumEntries + numFilteredEntries;
1645  if (newNumEntries > rowInfo.allocSize) {
1646  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1647  getProfileType () == StaticProfile, std::runtime_error,
1648  ": new indices exceed statically allocated graph structure. "
1649  "newNumEntries (" << newNumEntries << " > rowInfo.allocSize ("
1650  << rowInfo.allocSize << ").");
1651  // Make space for the new matrix entries.
1652  rowInfo =
1653  myGraph_->template updateLocalAllocAndValues<impl_scalar_type> (rowInfo,
1654  newNumEntries,
1655  values2D_[localRow]);
1656  }
1657  typename Graph::SLocalGlobalViews inds_view;
1658  inds_view.linds = f_inds (0, numFilteredEntries);
1659  myGraph_->template insertIndicesAndValues<impl_scalar_type> (rowInfo, inds_view,
1660  this->getViewNonConst (rowInfo),
1661  f_vals, LocalIndices,
1662  LocalIndices);
1663 #ifdef HAVE_TPETRA_DEBUG
1664  const size_t chkNewNumEntries = myGraph_->getNumEntriesInLocalRow (localRow);
1665  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(chkNewNumEntries != newNumEntries,
1666  std::logic_error, ": Internal logic error. Please contact Tpetra team.");
1667 #endif // HAVE_TPETRA_DEBUG
1668  }
1669 #ifdef HAVE_TPETRA_DEBUG
1670  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isLocallyIndexed(), std::logic_error,
1671  ": At end of insertLocalValues(), this CrsMatrix is not locally indexed. "
1672  "Please report this bug to the Tpetra developers.");
1673 #endif // HAVE_TPETRA_DEBUG
1674  }
1675 
1676 
1677  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1678  void
1680  insertGlobalValues (const GlobalOrdinal globalRow,
1681  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
1682  const Teuchos::ArrayView<const Scalar>& values)
1683  {
1684  using Teuchos::Array;
1685  using Teuchos::ArrayView;
1686  using Teuchos::av_reinterpret_cast;
1687  using Teuchos::toString;
1688  using std::endl;
1689  typedef LocalOrdinal LO;
1690  typedef GlobalOrdinal GO;
1691  typedef typename ArrayView<const GO>::size_type size_type;
1692  const char tfecfFuncName[] = "insertGlobalValues: ";
1693 
1694 #ifdef HAVE_TPETRA_DEBUG
1695  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1696  values.size () != indices.size (), std::runtime_error,
1697  "values.size() = " << values.size() << " != indices.size() = "
1698  << indices.size() << ".");
1699 #endif // HAVE_TPETRA_DEBUG
1700 
1701  const LO localRow = getRowMap ()->getLocalElement (globalRow);
1702 
1703  if (localRow == OTL::invalid ()) { // globalRow _not_ owned by calling process
1704  insertNonownedGlobalValues (globalRow, indices, values);
1705  }
1706  else { // globalRow _is_ owned by calling process
1707  if (this->isStaticGraph ()) {
1708  // Uh oh! Not allowed to insert into owned rows in that case.
1709  std::ostringstream err;
1710  const int myRank = getRowMap ()->getComm ()->getRank ();
1711  const int numProcs = getRowMap ()->getComm ()->getSize ();
1712 
1713  err << "The matrix was constructed with a constant (\"static\") graph, "
1714  "yet the given global row index " << globalRow << " is in the row "
1715  "Map on the calling process (with rank " << myRank << ", of " <<
1716  numProcs << " process(es)). In this case, you may not insert new "
1717  "entries into rows owned by the calling process.";
1718 
1719  if (! getRowMap ()->isNodeGlobalElement (globalRow)) {
1720  err << " Furthermore, GID->LID conversion with the row Map claims that "
1721  "the global row index is owned on the calling process, yet "
1722  "getRowMap()->isNodeGlobalElement(globalRow) returns false. That's"
1723  " weird! This might indicate a Map bug. Please report this to the"
1724  " Tpetra developers.";
1725  }
1726  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1727  this->isStaticGraph (), std::runtime_error, err.str ());
1728  }
1729 
1730  if (! myGraph_->indicesAreAllocated ()) {
1731  try {
1732  allocateValues (GlobalIndices, GraphNotYetAllocated);
1733  }
1734  catch (std::exception& e) {
1735  TEUCHOS_TEST_FOR_EXCEPTION(
1736  true, std::runtime_error, "Tpetra::CrsMatrix::insertGlobalValues: "
1737  "allocateValues(GlobalIndices,GraphNotYetAllocated) threw an "
1738  "exception: " << e.what ());
1739  }
1740  }
1741 
1742  const size_type numEntriesToInsert = indices.size ();
1743  // If the matrix has a column Map, check at this point whether
1744  // the column indices belong to the column Map.
1745  //
1746  // FIXME (mfh 16 May 2013) We may want to consider deferring the
1747  // test to the CrsGraph method, since it may have to do this
1748  // anyway.
1749  if (hasColMap ()) {
1750  const map_type& colMap = * (getColMap ());
1751  // In a debug build, keep track of the nonowned ("bad") column
1752  // indices, so that we can display them in the exception
1753  // message. In a release build, just ditch the loop early if
1754  // we encounter a nonowned column index.
1755 #ifdef HAVE_TPETRA_DEBUG
1756  Array<GO> badColInds;
1757 #endif // HAVE_TPETRA_DEBUG
1758  bool allInColMap = true;
1759  for (size_type k = 0; k < numEntriesToInsert; ++k) {
1760  if (! colMap.isNodeGlobalElement (indices[k])) {
1761  allInColMap = false;
1762 #ifdef HAVE_TPETRA_DEBUG
1763  badColInds.push_back (indices[k]);
1764 #else
1765  break;
1766 #endif // HAVE_TPETRA_DEBUG
1767  }
1768  }
1769  if (! allInColMap) {
1770  std::ostringstream os;
1771  os << "You attempted to insert entries in owned row " << globalRow
1772  << ", at the following column indices: " << toString (indices)
1773  << "." << endl;
1774 #ifdef HAVE_TPETRA_DEBUG
1775  os << "Of those, the following indices are not in the column Map on "
1776  "this process: " << toString (badColInds) << "." << endl << "Since "
1777  "the matrix has a column Map already, it is invalid to insert "
1778  "entries at those locations.";
1779 #else
1780  os << "At least one of those indices is not in the column Map on this "
1781  "process." << endl << "It is invalid to insert into columns not in "
1782  "the column Map on the process that owns the row.";
1783 #endif // HAVE_TPETRA_DEBUG
1784  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1785  ! allInColMap, std::invalid_argument, os.str ());
1786  }
1787  }
1788 
1789  typename Graph::SLocalGlobalViews inds_view;
1790  ArrayView<const impl_scalar_type> vals_view;
1791 
1792  inds_view.ginds = indices;
1793  vals_view = av_reinterpret_cast<const impl_scalar_type> (values);
1794 
1795 #ifdef HAVE_TPETRA_DEBUG
1796  RowInfo rowInfo;
1797  try {
1798  rowInfo = myGraph_->getRowInfo (localRow);
1799  } catch (std::exception& e) {
1800  TEUCHOS_TEST_FOR_EXCEPTION(
1801  true, std::runtime_error, "myGraph_->getRowInfo(localRow=" << localRow
1802  << ") threw an exception: " << e.what ());
1803  }
1804 #else
1805  RowInfo rowInfo = myGraph_->getRowInfo (localRow);
1806 #endif // HAVE_TPETRA_DEBUG
1807 
1808  const size_t curNumEntries = rowInfo.numEntries;
1809  const size_t newNumEntries =
1810  curNumEntries + static_cast<size_t> (numEntriesToInsert);
1811  if (newNumEntries > rowInfo.allocSize) {
1812  TEUCHOS_TEST_FOR_EXCEPTION(
1813  getProfileType () == StaticProfile && newNumEntries > rowInfo.allocSize,
1814  std::runtime_error, "Tpetra::CrsMatrix::insertGlobalValues: new "
1815  "indices exceed statically allocated graph structure. curNumEntries"
1816  " (" << curNumEntries << ") + numEntriesToInsert (" <<
1817  numEntriesToInsert << ") > allocSize (" << rowInfo.allocSize << ").");
1818 
1819  // Update allocation only as much as necessary
1820  try {
1821  rowInfo =
1822  myGraph_->template updateGlobalAllocAndValues<impl_scalar_type> (rowInfo,
1823  newNumEntries,
1824  values2D_[localRow]);
1825  } catch (std::exception& e) {
1826  TEUCHOS_TEST_FOR_EXCEPTION(
1827  true, std::runtime_error, "myGraph_->updateGlobalAllocAndValues"
1828  "(...) threw an exception: " << e.what ());
1829  }
1830  }
1831  try {
1832  if (isGloballyIndexed ()) {
1833  // lg=GlobalIndices, I=GlobalIndices means the method calls
1834  // getGlobalViewNonConst() and does direct copying, which
1835  // should be reasonably fast.
1836  myGraph_->template insertIndicesAndValues<impl_scalar_type> (rowInfo, inds_view,
1837  this->getViewNonConst (rowInfo),
1838  vals_view,
1839  GlobalIndices, GlobalIndices);
1840  }
1841  else {
1842  // lg=GlobalIndices, I=LocalIndices means the method calls
1843  // the Map's getLocalElement() method once per entry to
1844  // insert. This may be slow.
1845  myGraph_->template insertIndicesAndValues<impl_scalar_type> (rowInfo, inds_view,
1846  this->getViewNonConst (rowInfo),
1847  vals_view,
1848  GlobalIndices, LocalIndices);
1849  }
1850  }
1851  catch (std::exception& e) {
1852  TEUCHOS_TEST_FOR_EXCEPTION(
1853  true, std::runtime_error, "myGraph_->insertIndicesAndValues(...) "
1854  "threw an exception: " << e.what ());
1855  }
1856 
1857 #ifdef HAVE_TPETRA_DEBUG
1858  const size_t chkNewNumEntries = myGraph_->getNumEntriesInLocalRow (localRow);
1859  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(chkNewNumEntries != newNumEntries,
1860  std::logic_error, ": There should be a total of " << newNumEntries
1861  << " entries in the row, but the graph now reports " << chkNewNumEntries
1862  << " entries. Please report this bug to the Tpetra developers.");
1863 #endif // HAVE_TPETRA_DEBUG
1864  }
1865  }
1866 
1867 
1868  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1869  void
1871  insertGlobalValuesFiltered (const GlobalOrdinal globalRow,
1872  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
1873  const Teuchos::ArrayView<const Scalar>& values)
1874  {
1875  using Teuchos::Array;
1876  using Teuchos::ArrayView;
1877  using Teuchos::av_reinterpret_cast;
1878  typedef LocalOrdinal LO;
1879  typedef GlobalOrdinal GO;
1880  typedef impl_scalar_type ST;
1881  const char tfecfFuncName[] = "insertGlobalValuesFiltered: ";
1882 
1883  // mfh 14 Dec 2012: Defer test for static graph until we know that
1884  // globalRow is in the row Map. If it's not in the row Map, it
1885  // doesn't matter whether or not the graph is static; the data
1886  // just get stashed for later use by globalAssemble().
1887  //
1888  // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1889  // isStaticGraph(), std::runtime_error,
1890  // ": matrix was constructed with static graph. Cannot insert new entries.");
1891 #ifdef HAVE_TPETRA_DEBUG
1892  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1893  values.size () != indices.size (), std::runtime_error,
1894  "values.size() = " << values.size() << " != indices.size() = "
1895  << indices.size() << ".");
1896 #endif // HAVE_TPETRA_DEBUG
1897 
1898  ArrayView<const ST> valsIn = av_reinterpret_cast<const ST> (values);
1899  const LO lrow = getRowMap ()->getLocalElement (globalRow);
1900 
1901  if (lrow != Teuchos::OrdinalTraits<LO>::invalid ()) { // globalRow is in our row Map.
1902  // If the matrix has a static graph, this process is now allowed
1903  // to insert into rows it owns.
1904  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1905  this->isStaticGraph (), std::runtime_error,
1906  "The matrix was constructed with a static graph. In that case, "
1907  "it is forbidden to insert new entries into rows owned by the "
1908  "calling process.");
1909  if (! myGraph_->indicesAreAllocated ()) {
1910  allocateValues (GlobalIndices, GraphNotYetAllocated);
1911  }
1912  typename Graph::SLocalGlobalViews inds_view;
1913  ArrayView<const ST> vals_view;
1914 
1915  // We have to declare these Arrays here rather than in the
1916  // hasColMap() if branch, so that views to them will remain
1917  // valid for the whole scope.
1918  Array<GO> filtered_indices;
1919  Array<ST> filtered_values;
1920  if (hasColMap ()) { // We have a column Map.
1921  // Use column Map to filter the indices and corresponding
1922  // values, so that we only insert entries into columns we own.
1923  filtered_indices.assign (indices.begin (), indices.end ());
1924  filtered_values.assign (valsIn.begin (), valsIn.end ());
1925  const size_t numFilteredEntries =
1926  myGraph_->template filterGlobalIndicesAndValues<ST> (filtered_indices (),
1927  filtered_values ());
1928  inds_view.ginds = filtered_indices (0, numFilteredEntries);
1929  vals_view = filtered_values (0, numFilteredEntries);
1930  }
1931  else { // we don't have a column Map.
1932  inds_view.ginds = indices;
1933  vals_view = valsIn;
1934  }
1935  const size_t numFilteredEntries = vals_view.size ();
1936  // add the new indices and values
1937  if (numFilteredEntries > 0) {
1938  RowInfo rowInfo = myGraph_->getRowInfo (lrow);
1939  const size_t curNumEntries = rowInfo.numEntries;
1940  const size_t newNumEntries = curNumEntries + numFilteredEntries;
1941  if (newNumEntries > rowInfo.allocSize) {
1942  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1943  getProfileType () == StaticProfile, std::runtime_error,
1944  "New indices exceed statically allocated graph structure.");
1945 
1946  // Update allocation only as much as necessary
1947  rowInfo = myGraph_->template updateGlobalAllocAndValues<ST> (rowInfo,
1948  newNumEntries,
1949  values2D_[lrow]);
1950  }
1951  if (isGloballyIndexed ()) {
1952  // lg=GlobalIndices, I=GlobalIndices means the method calls
1953  // getGlobalViewNonConst() and does direct copying, which
1954  // should be reasonably fast.
1955  myGraph_->template insertIndicesAndValues<ST> (rowInfo, inds_view,
1956  this->getViewNonConst (rowInfo),
1957  vals_view,
1958  GlobalIndices, GlobalIndices);
1959  }
1960  else {
1961  // lg=GlobalIndices, I=LocalIndices means the method calls
1962  // the Map's getLocalElement() method once per entry to
1963  // insert. This may be slow.
1964  myGraph_->template insertIndicesAndValues<ST> (rowInfo, inds_view,
1965  this->getViewNonConst (rowInfo),
1966  vals_view,
1967  GlobalIndices, LocalIndices);
1968  }
1969 #ifdef HAVE_TPETRA_DEBUG
1970  {
1971  const size_t chkNewNumEntries = myGraph_->getNumEntriesInLocalRow (lrow);
1972  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(chkNewNumEntries != newNumEntries,
1973  std::logic_error, ": There should be a total of " << newNumEntries
1974  << " entries in the row, but the graph now reports " << chkNewNumEntries
1975  << " entries. Please report this bug to the Tpetra developers.");
1976  }
1977 #endif // HAVE_TPETRA_DEBUG
1978  }
1979  }
1980  else { // The calling process doesn't own the given row.
1981  insertNonownedGlobalValues (globalRow, indices, values);
1982  }
1983  }
1984 
1985 
1986  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1987  LocalOrdinal
1989  replaceLocalValues (const LocalOrdinal localRow,
1990  const Teuchos::ArrayView<const LocalOrdinal> &indices,
1991  const Teuchos::ArrayView<const Scalar>& values) const
1992  {
1993  using Kokkos::MemoryUnmanaged;
1994  using Kokkos::View;
1995  typedef impl_scalar_type ST;
1996  typedef LocalOrdinal LO;
1997  typedef device_type DD;
1998  typedef typename View<LO*, DD>::HostMirror::device_type HD;
1999  typedef View<const ST*, HD, MemoryUnmanaged> ISVT;
2000  typedef View<const LO*, HD, MemoryUnmanaged> LIVT;
2001 
2002  if (! isFillActive () || staticGraph_.is_null ()) {
2003  // Fill must be active and the graph must exist.
2004  return Teuchos::OrdinalTraits<LO>::invalid ();
2005  }
2006 
2007  const RowInfo rowInfo = staticGraph_->getRowInfo (localRow);
2008  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2009  // The input local row is invalid on the calling process,
2010  // which means that the calling process summed 0 entries.
2011  return static_cast<LO> (0);
2012  }
2013 
2014  auto curVals = this->getRowViewNonConst (rowInfo);
2015  typedef typename std::remove_const<typename std::remove_reference<decltype (curVals)>::type>::type OSVT;
2016  const ST* valsRaw = reinterpret_cast<const ST*> (values.getRawPtr ());
2017  ISVT valsIn (valsRaw, values.size ());
2018  LIVT indsIn (indices.getRawPtr (), indices.size ());
2019  return staticGraph_->template replaceLocalValues<OSVT, LIVT, ISVT> (rowInfo,
2020  curVals,
2021  indsIn,
2022  valsIn);
2023  }
2024 
2025 
2026  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2027  LocalOrdinal
2029  replaceGlobalValues (const GlobalOrdinal globalRow,
2030  const Kokkos::View<const GlobalOrdinal*, device_type,
2031  Kokkos::MemoryUnmanaged>& inputInds,
2032  const Kokkos::View<const impl_scalar_type*, device_type,
2033  Kokkos::MemoryUnmanaged>& inputVals) const
2034  {
2035  typedef impl_scalar_type ST;
2036  typedef device_type DD;
2037  // project2nd is a binary function that returns its second
2038  // argument. This replaces entries in the given row with their
2039  // corresponding entry of values.
2040  typedef Tpetra::project2nd<ST, ST> BF;
2041 
2042  // It doesn't make sense for replace to use atomic updates, since
2043  // the result of multiple threads replacing the same value
2044  // concurrently is undefined.
2045  return this->template transformGlobalValues<BF, DD> (globalRow, inputInds,
2046  inputVals, BF (), false);
2047  }
2048 
2049 
2050  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2051  LocalOrdinal
2053  replaceGlobalValues (const GlobalOrdinal globalRow,
2054  const Teuchos::ArrayView<const GlobalOrdinal>& inputInds,
2055  const Teuchos::ArrayView<const Scalar>& inputVals) const
2056  {
2057  using Kokkos::MemoryUnmanaged;
2058  using Kokkos::View;
2059  typedef impl_scalar_type ST;
2060  typedef GlobalOrdinal GO;
2061  typedef device_type DD;
2062  typedef typename View<GO*, DD>::HostMirror::device_type HD;
2063  // project2nd is a binary function that returns its second
2064  // argument. This replaces entries in the given row with their
2065  // corresponding entry of values.
2066  typedef Tpetra::project2nd<ST, ST> BF;
2067 
2068  const ST* const rawInputVals =
2069  reinterpret_cast<const ST*> (inputVals.getRawPtr ());
2070  // 'indices' and 'values' come from the user, so they are host data.
2071  View<const ST*, HD, MemoryUnmanaged> inputValsK (rawInputVals,
2072  inputVals.size ());
2073  View<const GO*, HD, MemoryUnmanaged> inputIndsK (inputInds.getRawPtr (),
2074  inputInds.size ());
2075  // It doesn't make sense for replace to use atomic updates, since
2076  // the result of multiple threads replacing the same value
2077  // concurrently is undefined.
2078  return this->template transformGlobalValues<BF, HD> (globalRow, inputIndsK,
2079  inputValsK, BF (), false);
2080  }
2081 
2082 
2083  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2084  LocalOrdinal
2086  sumIntoGlobalValues (const GlobalOrdinal globalRow,
2087  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2088  const Teuchos::ArrayView<const Scalar>& values,
2089  const bool atomic)
2090  {
2091  using Kokkos::MemoryUnmanaged;
2092  using Kokkos::View;
2093  typedef impl_scalar_type ST;
2094  typedef LocalOrdinal LO;
2095  typedef GlobalOrdinal GO;
2096  typedef device_type DD;
2097  typedef typename View<LO*, DD>::HostMirror::device_type HD;
2098 
2099  if (! isFillActive ()) {
2100  // Fill must be active in order to call this method.
2101  return Teuchos::OrdinalTraits<LO>::invalid ();
2102  }
2103 
2104  // mfh 26 Nov 2015: Avoid calling getRowMap() or getCrsGraph(),
2105  // because they touch RCP's reference count, which is not thread
2106  // safe. Dereferencing an RCP or calling op-> does not touch the
2107  // reference count.
2108  const LO lrow = this->staticGraph_.is_null () ?
2109  myGraph_->rowMap_->getLocalElement (globalRow) :
2110  staticGraph_->rowMap_->getLocalElement (globalRow);
2111  //const LO lrow = this->getRowMap ()->getLocalElement (globalRow);
2112 
2113  if (lrow == Teuchos::OrdinalTraits<LO>::invalid ()) {
2114  // globalRow is not in the row Map, so stash the given entries
2115  // away in a separate data structure. globalAssemble() (called
2116  // during fillComplete()) will exchange that data and sum it in
2117  // using sumIntoGlobalValues().
2118  this->insertNonownedGlobalValues (globalRow, indices, values);
2119  // FIXME (mfh 08 Jul 2014) It's not clear what to return here,
2120  // since we won't know whether the given indices were valid
2121  // until globalAssemble (called in fillComplete) is called.
2122  // That's why insertNonownedGlobalValues doesn't return
2123  // anything. Just for consistency, I'll return the number of
2124  // entries that the user gave us.
2125  return static_cast<LO> (indices.size ());
2126  }
2127 
2128  if (staticGraph_.is_null ()) {
2129  return Teuchos::OrdinalTraits<LO>::invalid ();
2130  }
2131  const RowInfo rowInfo = this->staticGraph_->getRowInfo (lrow);
2132 
2133  auto curVals = this->getRowViewNonConst (rowInfo);
2134  const ST* valsRaw = reinterpret_cast<const ST*> (values.getRawPtr ());
2135  View<const ST*, HD, MemoryUnmanaged> valsIn (valsRaw, values.size ());
2136  View<const GO*, HD, MemoryUnmanaged> indsIn (indices.getRawPtr (),
2137  indices.size ());
2138  return staticGraph_->template sumIntoGlobalValues<ST, HD, DD> (rowInfo,
2139  curVals,
2140  indsIn,
2141  valsIn,
2142  atomic);
2143  }
2144 
2145 
2146  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2147  LocalOrdinal
2149  sumIntoLocalValues (const LocalOrdinal localRow,
2150  const Teuchos::ArrayView<const LocalOrdinal>& indices,
2151  const Teuchos::ArrayView<const Scalar>& values,
2152  const bool atomic) const
2153  {
2154  using Kokkos::MemoryUnmanaged;
2155  using Kokkos::View;
2156  typedef impl_scalar_type ST;
2157  typedef LocalOrdinal LO;
2158  typedef device_type DD;
2159  typedef typename View<LO*, DD>::HostMirror::device_type HD;
2160 
2161  typedef View<const ST*, HD, MemoryUnmanaged> IVT;
2162  typedef View<const LO*, HD, MemoryUnmanaged> IIT;
2163 
2164  const ST* valsRaw = reinterpret_cast<const ST*> (values.getRawPtr ());
2165  IVT valsIn (valsRaw, values.size ());
2166  IIT indsIn (indices.getRawPtr (), indices.size ());
2167  return this->template sumIntoLocalValues<IIT, IVT> (localRow, indsIn,
2168  valsIn, atomic);
2169  }
2170 
2171  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2172  Teuchos::ArrayView<const typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic>::impl_scalar_type>
2174  getView (RowInfo rowinfo) const
2175  {
2176  using Kokkos::MemoryUnmanaged;
2177  using Kokkos::View;
2178  using Teuchos::ArrayView;
2179  typedef impl_scalar_type ST;
2180  typedef std::pair<size_t, size_t> range_type;
2181 
2182  if (k_values1D_.dimension_0 () != 0 && rowinfo.allocSize > 0) {
2183 #ifdef HAVE_TPETRA_DEBUG
2184  TEUCHOS_TEST_FOR_EXCEPTION(
2185  rowinfo.offset1D + rowinfo.allocSize > k_values1D_.dimension_0 (),
2186  std::range_error, "Tpetra::CrsMatrix::getView: Invalid access "
2187  "to 1-D storage of values." << std::endl << "rowinfo.offset1D (" <<
2188  rowinfo.offset1D << ") + rowinfo.allocSize (" << rowinfo.allocSize <<
2189  ") > k_values1D_.dimension_0() (" << k_values1D_.dimension_0 () << ").");
2190 #endif // HAVE_TPETRA_DEBUG
2191  range_type range (rowinfo.offset1D, rowinfo.offset1D + rowinfo.allocSize);
2192  typedef View<const ST*, execution_space, MemoryUnmanaged> subview_type;
2193  // mfh 23 Nov 2015: Don't just create a subview of k_values1D_
2194  // directly, because that first creates a _managed_ subview,
2195  // then returns an unmanaged version of that. That touches the
2196  // reference count, which costs performance in a measurable way.
2197  // Instead, we create a temporary unmanaged view, then create
2198  // the subview from that.
2199  subview_type sv = Kokkos::subview (subview_type (k_values1D_), range);
2200  const ST* const sv_raw = (rowinfo.allocSize == 0) ? NULL : sv.ptr_on_device ();
2201  return ArrayView<const ST> (sv_raw, rowinfo.allocSize);
2202  }
2203  else if (values2D_ != null) {
2204  return values2D_[rowinfo.localRow] ();
2205  }
2206  else {
2207  return ArrayView<impl_scalar_type> ();
2208  }
2209  }
2210 
2211  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2212  Kokkos::View<const typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic>::impl_scalar_type*,
2214  Kokkos::MemoryUnmanaged>
2216  getRowView (const RowInfo& rowInfo) const
2217  {
2218  using Kokkos::MemoryUnmanaged;
2219  using Kokkos::View;
2220  typedef impl_scalar_type ST;
2221  typedef View<const ST*, execution_space, MemoryUnmanaged> subview_type;
2222  typedef std::pair<size_t, size_t> range_type;
2223 
2224  if (k_values1D_.dimension_0 () != 0 && rowInfo.allocSize > 0) {
2225 #ifdef HAVE_TPETRA_DEBUG
2226  TEUCHOS_TEST_FOR_EXCEPTION(
2227  rowInfo.offset1D + rowInfo.allocSize > k_values1D_.dimension_0 (),
2228  std::range_error, "Tpetra::CrsMatrix::getRowView: Invalid access "
2229  "to 1-D storage of values." << std::endl << "rowInfo.offset1D (" <<
2230  rowInfo.offset1D << ") + rowInfo.allocSize (" << rowInfo.allocSize <<
2231  ") > k_values1D_.dimension_0() (" << k_values1D_.dimension_0 () << ").");
2232 #endif // HAVE_TPETRA_DEBUG
2233  range_type range (rowInfo.offset1D, rowInfo.offset1D + rowInfo.allocSize);
2234  // mfh 23 Nov 2015: Don't just create a subview of k_values1D_
2235  // directly, because that first creates a _managed_ subview,
2236  // then returns an unmanaged version of that. That touches the
2237  // reference count, which costs performance in a measurable way.
2238  // Instead, we create a temporary unmanaged view, then create
2239  // the subview from that.
2240  return Kokkos::subview (subview_type (k_values1D_), range);
2241  }
2242  else if (values2D_ != null) {
2243  Teuchos::ArrayView<const ST> rowView = values2D_[rowInfo.localRow] ();
2244  return subview_type (rowView.getRawPtr (), rowView.size ());
2245  }
2246  else {
2247  return subview_type ();
2248  }
2249  }
2250 
2251  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2252  Kokkos::View<typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic>::impl_scalar_type*,
2253  typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic>::execution_space,
2254  Kokkos::MemoryUnmanaged>
2256  getRowViewNonConst (const RowInfo& rowInfo) const
2257  {
2258  using Kokkos::MemoryUnmanaged;
2259  using Kokkos::View;
2260  typedef impl_scalar_type ST;
2261  typedef View<ST*, execution_space, MemoryUnmanaged> subview_type;
2262  typedef std::pair<size_t, size_t> range_type;
2263 
2264  if (k_values1D_.dimension_0 () != 0 && rowInfo.allocSize > 0) {
2265 #ifdef HAVE_TPETRA_DEBUG
2266  TEUCHOS_TEST_FOR_EXCEPTION(
2267  rowInfo.offset1D + rowInfo.allocSize > k_values1D_.dimension_0 (),
2268  std::range_error, "Tpetra::CrsMatrix::getRowViewNonConst: Invalid access "
2269  "to 1-D storage of values." << std::endl << "rowInfo.offset1D (" <<
2270  rowInfo.offset1D << ") + rowInfo.allocSize (" << rowInfo.allocSize <<
2271  ") > k_values1D_.dimension_0() (" << k_values1D_.dimension_0 () << ").");
2272 #endif // HAVE_TPETRA_DEBUG
2273  range_type range (rowInfo.offset1D, rowInfo.offset1D + rowInfo.allocSize);
2274  // mfh 23 Nov 2015: Don't just create a subview of k_values1D_
2275  // directly, because that first creates a _managed_ subview,
2276  // then returns an unmanaged version of that. That touches the
2277  // reference count, which costs performance in a measurable way.
2278  // Instead, we create a temporary unmanaged view, then create
2279  // the subview from that.
2280  return Kokkos::subview (subview_type (k_values1D_), range);
2281  }
2282  else if (values2D_ != null) {
2283  Teuchos::ArrayView<ST> rowView = values2D_[rowInfo.localRow] ();
2284  return subview_type (rowView.getRawPtr (), rowView.size ());
2285  }
2286  else {
2287  return subview_type ();
2288  }
2289  }
2290 
2291  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2292  Teuchos::ArrayView<typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic>::impl_scalar_type>
2294  getViewNonConst (const RowInfo& rowinfo) const
2295  {
2296  return Teuchos::av_const_cast<impl_scalar_type> (this->getView (rowinfo));
2297  }
2298 
2299  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2300  void
2302  getLocalRowCopy (LocalOrdinal localRow,
2303  const Teuchos::ArrayView<LocalOrdinal>& indices,
2304  const Teuchos::ArrayView<Scalar>& values,
2305  size_t& numEntries) const
2306  {
2307  using Teuchos::ArrayView;
2308  using Teuchos::av_reinterpret_cast;
2309  typedef LocalOrdinal LO;
2310  typedef GlobalOrdinal GO;
2311 
2312  TEUCHOS_TEST_FOR_EXCEPTION(
2313  isGloballyIndexed () && ! hasColMap (), std::runtime_error,
2314  "Tpetra::CrsMatrix::getLocalRowCopy: The matrix is globally indexed and "
2315  "does not have a column Map yet. That means we don't have local indices "
2316  "for columns yet, so it doesn't make sense to call this method. If the "
2317  "matrix doesn't have a column Map yet, you should call fillComplete on "
2318  "it first.");
2319  TEUCHOS_TEST_FOR_EXCEPTION(
2320  ! staticGraph_->hasRowInfo (), std::runtime_error,
2321  "Tpetra::CrsMatrix::getLocalRowCopy: The graph's row information was "
2322  "deleted at fillComplete().");
2323 
2324  if (! this->getRowMap ()->isNodeLocalElement (localRow)) {
2325  // The calling process owns no entries in this row.
2326  numEntries = 0;
2327  return;
2328  }
2329 
2330  const RowInfo rowinfo = staticGraph_->getRowInfo (localRow);
2331  const size_t theNumEntries = rowinfo.numEntries;
2332 
2333  TEUCHOS_TEST_FOR_EXCEPTION(
2334  static_cast<size_t> (indices.size ()) < theNumEntries ||
2335  static_cast<size_t> (values.size ()) < theNumEntries,
2336  std::runtime_error,
2337  "Tpetra::CrsMatrix::getLocalRowCopy: The given row " << localRow
2338  << " has " << theNumEntries << " entries. One or both of the given "
2339  "ArrayViews are not long enough to store that many entries. indices "
2340  "can store " << indices.size() << " entries and values can store "
2341  << values.size() << " entries.");
2342 
2343  numEntries = theNumEntries;
2344 
2345  if (staticGraph_->isLocallyIndexed ()) {
2346  ArrayView<const LO> indrowview = staticGraph_->getLocalView (rowinfo);
2347  ArrayView<const Scalar> valrowview =
2348  av_reinterpret_cast<const Scalar> (this->getView (rowinfo));
2349  std::copy (indrowview.begin (), indrowview.begin () + numEntries, indices.begin ());
2350  std::copy (valrowview.begin (), valrowview.begin () + numEntries, values.begin ());
2351  }
2352  else if (staticGraph_->isGloballyIndexed ()) {
2353  ArrayView<const GO> indrowview = staticGraph_->getGlobalView (rowinfo);
2354  ArrayView<const Scalar> valrowview =
2355  av_reinterpret_cast<const Scalar> (this->getView (rowinfo));
2356  std::copy (valrowview.begin (), valrowview.begin () + numEntries, values.begin ());
2357 
2358  const map_type& colMap = * (this->getColMap ());
2359  for (size_t j=0; j < numEntries; ++j) {
2360  indices[j] = colMap.getLocalElement (indrowview[j]);
2361  }
2362  }
2363  else {
2364  numEntries = 0;
2365  }
2366  }
2367 
2368  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2369  void
2371  getGlobalRowCopy (GlobalOrdinal globalRow,
2372  const Teuchos::ArrayView<GlobalOrdinal>& indices,
2373  const Teuchos::ArrayView<Scalar>& values,
2374  size_t& numEntries) const
2375  {
2376  using Teuchos::ArrayView;
2377  using Teuchos::av_reinterpret_cast;
2378  typedef LocalOrdinal LO;
2379  typedef GlobalOrdinal GO;
2380 
2381  const char tfecfFuncName[] = "getGlobalRowCopy: ";
2382  const LocalOrdinal lrow = getRowMap ()->getLocalElement (globalRow);
2383  if (lrow == OTL::invalid ()) {
2384  // The calling process owns no entries in this row.
2385  numEntries = 0;
2386  return;
2387  }
2388 
2389  const RowInfo rowinfo = staticGraph_->getRowInfo (lrow);
2390  const size_t theNumEntries = rowinfo.numEntries;
2391 
2392  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2393  static_cast<size_t> (indices.size ()) < theNumEntries ||
2394  static_cast<size_t> (values.size ()) < theNumEntries,
2395  std::runtime_error,
2396  "The given row " << globalRow << ", corresponding to local row " << lrow
2397  << ", has " << theNumEntries << " entries. One or both of the given "
2398  "ArrayView input arguments are not long enough to store that many "
2399  "entries. indices.size() = " << indices.size() << ", values.size() = "
2400  << values.size () << ", but the number of entries in the row is "
2401  << theNumEntries << ".");
2402 
2403  // Don't "commit" the value until we know that the input arrays are valid.
2404  numEntries = theNumEntries;
2405 
2406  if (staticGraph_->isGloballyIndexed ()) {
2407  ArrayView<const GO> indrowview = staticGraph_->getGlobalView (rowinfo);
2408  ArrayView<const Scalar> valrowview =
2409  av_reinterpret_cast<const Scalar> (this->getView (rowinfo));
2410  std::copy (indrowview.begin (), indrowview.begin () + numEntries, indices.begin ());
2411  std::copy (valrowview.begin (), valrowview.begin () + numEntries, values.begin ());
2412  }
2413  else if (staticGraph_->isLocallyIndexed ()) {
2414  ArrayView<const LO> indrowview = staticGraph_->getLocalView(rowinfo);
2415  ArrayView<const Scalar> valrowview =
2416  av_reinterpret_cast<const Scalar> (this->getView (rowinfo));
2417  std::copy (valrowview.begin (), valrowview.begin () + numEntries, values.begin ());
2418  for (size_t j = 0; j < numEntries; ++j) {
2419  indices[j] = getColMap ()->getGlobalElement (indrowview[j]);
2420  }
2421  }
2422  else {
2423 #ifdef HAVE_TPETRA_DEBUG
2424  // should have fallen in one of the above if indices are allocated
2425  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2426  staticGraph_->indicesAreAllocated (), std::logic_error,
2427  "Internal logic error. Please contact Tpetra team.");
2428 #endif // HAVE_TPETRA_DEBUG
2429  numEntries = 0;
2430  }
2431  }
2432 
2433  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2434  void
2436  getLocalRowView (LocalOrdinal localRow,
2437  Teuchos::ArrayView<const LocalOrdinal>& indices,
2438  Teuchos::ArrayView<const Scalar>& values) const
2439  {
2440  using Teuchos::ArrayView;
2441  using Teuchos::av_reinterpret_cast;
2442  typedef LocalOrdinal LO;
2443 
2444  const char tfecfFuncName[] = "getLocalRowView: ";
2445  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2446  isGloballyIndexed (), std::runtime_error, "The matrix currently stores "
2447  "its indices as global indices, so you cannot get a view with local "
2448  "column indices. If the matrix has a column Map, you may call "
2449  "getLocalRowCopy() to get local column indices; otherwise, you may get "
2450  "a view with global column indices by calling getGlobalRowCopy().");
2451  indices = Teuchos::null;
2452  values = Teuchos::null;
2453 #ifdef HAVE_TPETRA_DEBUG
2454  size_t numEntries = 0;
2455 #endif // HAVE_TPETRA_DEBUG
2456  if (getRowMap ()->isNodeLocalElement (localRow)) {
2457  const RowInfo rowinfo = staticGraph_->getRowInfo (localRow);
2458 #ifdef HAVE_TPETRA_DEBUG
2459  numEntries = rowinfo.numEntries;
2460 #endif // HAVE_TPETRA_DEBUG
2461  if (rowinfo.numEntries > 0) {
2462  ArrayView<const LO> indTmp = staticGraph_->getLocalView (rowinfo);
2463  ArrayView<const Scalar> valTmp =
2464  av_reinterpret_cast<const Scalar> (this->getView (rowinfo));
2465  indices = indTmp (0, rowinfo.numEntries);
2466  values = valTmp (0, rowinfo.numEntries);
2467  }
2468  }
2469 
2470 #ifdef HAVE_TPETRA_DEBUG
2471  const char suffix[] = ". This should never happen. Please report this "
2472  "bug to the Tpetra developers.";
2473  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2474  static_cast<size_t>(indices.size ()) != static_cast<size_t>(values.size ()), std::logic_error,
2475  "At the end of this method, for local row " << localRow << ", "
2476  "indices.size() = " << indices.size () << " != values.size () = "
2477  << values.size () << suffix);
2478  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2479  static_cast<size_t>(indices.size ()) != static_cast<size_t>(numEntries), std::logic_error,
2480  "At the end of this method, for local row " << localRow << ", "
2481  "indices.size() = " << indices.size () << " != numEntries = "
2482  << numEntries << suffix);
2483  const size_t expectedNumEntries = this->getNumEntriesInLocalRow (localRow);
2484  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2485  numEntries != expectedNumEntries, std::logic_error,
2486  "At the end of this method, for local row " << localRow << ", numEntries"
2487  " = " << numEntries << " != getNumEntriesInLocalRow(localRow)"
2488  " = "<< expectedNumEntries << suffix);
2489 #endif // HAVE_TPETRA_DEBUG
2490  }
2491 
2492  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2493  void
2495  getGlobalRowView (GlobalOrdinal globalRow,
2496  Teuchos::ArrayView<const GlobalOrdinal>& indices,
2497  Teuchos::ArrayView<const Scalar>& values) const
2498  {
2499  using Teuchos::ArrayView;
2500  using Teuchos::av_reinterpret_cast;
2501  typedef GlobalOrdinal GO;
2502  const char tfecfFuncName[] = "getGlobalRowView: ";
2503 
2504  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2505  isLocallyIndexed (), std::runtime_error,
2506  "The matrix is locally indexed, so we cannot return a view of the row "
2507  "with global column indices. Use getGlobalRowCopy() instead.");
2508  indices = Teuchos::null;
2509  values = Teuchos::null;
2510  const LocalOrdinal lrow = getRowMap ()->getLocalElement (globalRow);
2511  if (lrow != Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) {
2512  // getRowInfo() requires a local row index, whether or not
2513  // storage has been optimized.
2514  const RowInfo rowinfo = staticGraph_->getRowInfo(lrow);
2515  if (rowinfo.numEntries > 0) {
2516  ArrayView<const GO> indTmp = staticGraph_->getGlobalView (rowinfo);
2517  ArrayView<const Scalar> valTmp =
2518  av_reinterpret_cast<const Scalar> (this->getView (rowinfo));
2519  indices = indTmp (0, rowinfo.numEntries);
2520  values = valTmp (0, rowinfo.numEntries);
2521  }
2522  }
2523 #ifdef HAVE_TPETRA_DEBUG
2524  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2525  static_cast<size_t> (indices.size ()) != this->getNumEntriesInGlobalRow (globalRow) ||
2526  indices.size () != values.size (),
2527  std::logic_error,
2528  "Violated stated post-conditions. Please contact Tpetra team.");
2529 #endif // HAVE_TPETRA_DEBUG
2530  }
2531 
2532  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2533  void
2535  scale (const Scalar& alpha)
2536  {
2537  typedef LocalOrdinal LO;
2538  typedef Kokkos::SparseRowView<local_matrix_type> row_view_type;
2539  typedef typename Teuchos::Array<Scalar>::size_type size_type;
2540  const char tfecfFuncName[] = "scale: ";
2541  const impl_scalar_type theAlpha = static_cast<impl_scalar_type> (alpha);
2542 
2543  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2544  ! isFillActive (), std::runtime_error,
2545  "Fill must be active before you may call this method. "
2546  "Please call resumeFill() to make fill active.");
2547 
2548  const size_t nlrs = staticGraph_->getNodeNumRows ();
2549  const size_t numAlloc = staticGraph_->getNodeAllocationSize ();
2550  const size_t numEntries = staticGraph_->getNodeNumEntries ();
2551  if (! staticGraph_->indicesAreAllocated () || nlrs == 0 ||
2552  numAlloc == 0 || numEntries == 0) {
2553  // do nothing
2554  }
2555  else {
2556  if (staticGraph_->getProfileType () == StaticProfile) {
2557  const LO lclNumRows = lclMatrix_.numRows ();
2558  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
2559  row_view_type row_i = lclMatrix_.template row<typename row_view_type::size_type> (lclRow);
2560  for (LO k = 0; k < row_i.length; ++k) {
2561  // FIXME (mfh 02 Jan 2015) This assumes CUDA UVM.
2562  row_i.value (k) *= theAlpha;
2563  }
2564  }
2565  }
2566  else if (staticGraph_->getProfileType () == DynamicProfile) {
2567  for (size_t row = 0; row < nlrs; ++row) {
2568  const size_type numEnt = getNumEntriesInLocalRow (row);
2569  Teuchos::ArrayView<impl_scalar_type> rowVals = values2D_[row] ();
2570  for (size_type k = 0; k < numEnt; ++k) {
2571  rowVals[k] *= theAlpha;
2572  }
2573  }
2574  }
2575  }
2576  }
2577 
2578  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2579  void
2581  setAllToScalar (const Scalar& alpha)
2582  {
2583  const char tfecfFuncName[] = "setAllToScalar: ";
2584  const impl_scalar_type theAlpha = static_cast<impl_scalar_type> (alpha);
2585  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2586  ! isFillActive (), std::runtime_error,
2587  "Fill must be active before you may call this method. "
2588  "Please call resumeFill() to make fill active.");
2589 
2590  // replace all values in the matrix
2591  // it is easiest to replace all allocated values, instead of replacing only the ones with valid entries
2592  // however, if there are no valid entries, we can short-circuit
2593  // furthermore, if the values aren't allocated, we can short-circuit (no entry have been inserted so far)
2594  const size_t nlrs = staticGraph_->getNodeNumRows(),
2595  numAlloc = staticGraph_->getNodeAllocationSize(),
2596  numEntries = staticGraph_->getNodeNumEntries();
2597  if (! staticGraph_->indicesAreAllocated () || numAlloc == 0 || numEntries == 0) {
2598  // do nothing
2599  }
2600  else {
2601  const ProfileType profType = staticGraph_->getProfileType ();
2602  if (profType == StaticProfile) {
2603  // FIXME (mfh 24 Dec 2014) Once CrsMatrix implements DualView
2604  // semantics, this would be the place to mark memory as
2605  // modified.
2606  Kokkos::deep_copy (k_values1D_, theAlpha);
2607  }
2608  else if (profType == DynamicProfile) {
2609  for (size_t row = 0; row < nlrs; ++row) {
2610  std::fill (values2D_[row].begin (), values2D_[row].end (), theAlpha);
2611  }
2612  }
2613  }
2614  }
2615 
2616  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2617  void
2619  setAllValues (const typename local_matrix_type::row_map_type& rowPointers,
2620  const typename local_graph_type::entries_type::non_const_type& columnIndices,
2621  const typename local_matrix_type::values_type& values)
2622  {
2623  const char tfecfFuncName[] = "setAllValues";
2624  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2625  columnIndices.size () != values.size (), std::runtime_error,
2626  ": columnIndices and values must have the same size. columnIndices.size() = "
2627  << columnIndices.size () << " != values.size() = " << values.size () << ".");
2628  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2629  myGraph_.is_null (), std::runtime_error, ": myGraph_ must not be null.");
2630 
2631  try {
2632  myGraph_->setAllIndices (rowPointers, columnIndices);
2633  }
2634  catch (std::exception &e) {
2635  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2636  true, std::runtime_error, ": Caught exception while calling myGraph_->"
2637  "setAllIndices(): " << e.what ());
2638  }
2639  k_values1D_ = values;
2640  checkInternalState ();
2641  }
2642 
2643  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2644  void
2646  setAllValues (const Teuchos::ArrayRCP<size_t>& rowPointers,
2647  const Teuchos::ArrayRCP<LocalOrdinal>& columnIndices,
2648  const Teuchos::ArrayRCP<Scalar>& values)
2649  {
2650  const char tfecfFuncName[] = "setAllValues: ";
2651  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2652  columnIndices.size () != values.size (), std::runtime_error,
2653  "columnIndices.size() = " << columnIndices.size () << " != "
2654  "values.size() = " << values.size () << ".");
2655  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2656  myGraph_.is_null (), std::runtime_error, "myGraph_ must not be null.");
2657 
2658  try {
2659  myGraph_->setAllIndices (rowPointers, columnIndices);
2660  }
2661  catch (std::exception &e) {
2662  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2663  true, std::runtime_error, "Caught exception while calling myGraph_->"
2664  "setAllIndices(): " << e.what ());
2665  }
2666  Teuchos::ArrayRCP<impl_scalar_type> vals =
2667  Teuchos::arcp_reinterpret_cast<impl_scalar_type> (values);
2668  k_values1D_ = Kokkos::Compat::getKokkosViewDeepCopy<device_type> (vals ());
2669  checkInternalState ();
2670  }
2671 
2672  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2673  void
2675  getLocalDiagOffsets (Teuchos::ArrayRCP<size_t>& offsets) const
2676  {
2677  using Teuchos::ArrayRCP;
2678  using Teuchos::ArrayView;
2679  typedef LocalOrdinal LO;
2680  const char tfecfFuncName[] = "getLocalDiagOffsets";
2681 
2682  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2683  ! hasColMap (), std::runtime_error,
2684  ": This method requires that the matrix have a column Map.");
2685  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2686  staticGraph_.is_null (), std::runtime_error,
2687  ": This method requires that the matrix have a graph.");
2688 
2689  const map_type& rowMap = * (this->getRowMap ());
2690  const map_type& colMap = * (this->getColMap ());
2691 
2692  const LO myNumRows = static_cast<LO> (this->getNodeNumRows ());
2693  if (static_cast<LO> (offsets.size ()) != myNumRows) {
2694  offsets.resize (myNumRows);
2695  }
2696 
2697 #ifdef HAVE_TPETRA_DEBUG
2698  bool allRowMapDiagEntriesInColMap = true;
2699  bool allDiagEntriesFound = true;
2700 #endif // HAVE_TPETRA_DEBUG
2701 
2702  // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
2703  // setup, at least on the host.
2704  for (LO r = 0; r < myNumRows; ++r) {
2705  const GlobalOrdinal rgid = rowMap.getGlobalElement (r);
2706  const LO rlid = colMap.getLocalElement (rgid);
2707 
2708 #ifdef HAVE_TPETRA_DEBUG
2709  if (rlid == Teuchos::OrdinalTraits<LO>::invalid ()) {
2710  allRowMapDiagEntriesInColMap = false;
2711  }
2712 #endif // HAVE_TPETRA_DEBUG
2713 
2714  if (rlid != Teuchos::OrdinalTraits<LO>::invalid ()) {
2715  const RowInfo rowinfo = staticGraph_->getRowInfo (r);
2716  if (rowinfo.numEntries > 0) {
2717  offsets[r] = staticGraph_->findLocalIndex (rowinfo, rlid);
2718  }
2719  else {
2720  offsets[r] = Teuchos::OrdinalTraits<size_t>::invalid ();
2721 #ifdef HAVE_TPETRA_DEBUG
2722  allDiagEntriesFound = false;
2723 #endif // HAVE_TPETRA_DEBUG
2724  }
2725  }
2726  }
2727 
2728 #ifdef HAVE_TPETRA_DEBUG
2729  using Teuchos::reduceAll;
2730  using std::endl;
2731 
2732  const bool localSuccess =
2733  allRowMapDiagEntriesInColMap && allDiagEntriesFound;
2734  int localResults[3];
2735  localResults[0] = allRowMapDiagEntriesInColMap ? 1 : 0;
2736  localResults[1] = allDiagEntriesFound ? 1 : 0;
2737  // min-all-reduce will compute least rank of all the processes
2738  // that didn't succeed.
2739  localResults[2] =
2740  ! localSuccess ? getComm ()->getRank () : getComm ()->getSize ();
2741  int globalResults[3];
2742  globalResults[0] = 0;
2743  globalResults[1] = 0;
2744  globalResults[2] = 0;
2745  reduceAll<int, int> (* (getComm ()), Teuchos::REDUCE_MIN,
2746  3, localResults, globalResults);
2747  if (globalResults[0] == 0 || globalResults[1] == 0) {
2748  std::ostringstream os; // build error message
2749  const bool both =
2750  globalResults[0] == 0 && globalResults[1] == 0;
2751  os << ": At least one process (including Process " << globalResults[2]
2752  << ") had the following issue" << (both ? "s" : "") << ":" << endl;
2753  if (globalResults[0] == 0) {
2754  os << " - The column Map does not contain at least one diagonal entry "
2755  "of the matrix." << endl;
2756  }
2757  if (globalResults[1] == 0) {
2758  os << " - There is a row on that / those process(es) that does not "
2759  "contain a diagonal entry." << endl;
2760  }
2761  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
2762  }
2763 #endif // HAVE_TPETRA_DEBUG
2764  }
2765 
2766  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2767  void
2770  {
2771  using Teuchos::ArrayRCP;
2772  using Teuchos::ArrayView;
2773  using Teuchos::av_reinterpret_cast;
2774  const char tfecfFuncName[] = "getLocalDiagCopy: ";
2776  typedef typename vec_type::dual_view_type dual_view_type;
2777  typedef typename dual_view_type::host_mirror_space::execution_space host_execution_space;
2778 
2779  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2780  ! hasColMap (), std::runtime_error,
2781  "This method requires that the matrix have a column Map.");
2782  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2783  staticGraph_.is_null (), std::runtime_error,
2784  "This method requires that the matrix have a graph.");
2785  const map_type& rowMap = * (this->getRowMap ());
2786  const map_type& colMap = * (this->getColMap ());
2787 
2788 #ifdef HAVE_TPETRA_DEBUG
2789  // isCompatible() requires an all-reduce, and thus this check
2790  // should only be done in debug mode.
2791  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2792  ! dvec.getMap ()->isCompatible (rowMap), std::runtime_error,
2793  ": The input Vector's Map must be compatible with the CrsMatrix's row "
2794  "Map. You may check this by using Map's isCompatible method: "
2795  "dvec.getMap ()->isCompatible (A.getRowMap ());");
2796 #endif // HAVE_TPETRA_DEBUG
2797 
2798  // For now, we fill the Vector on the host and sync to device.
2799  // Later, we may write a parallel kernel that works entirely on
2800  // device.
2801  dual_view_type lclVec = dvec.getDualView ();
2802  lclVec.template modify<host_execution_space> ();
2803  typedef typename dual_view_type::t_host host_view_type;
2804  host_view_type lclVecHost = lclVec.h_view;
2805 
2806  // 1-D subview of lclVecHost. All the "typename" stuff ensures
2807  // that we get the same layout and memory traits as the original
2808  // 2-D view.
2809  typedef typename Kokkos::View<impl_scalar_type*,
2810  typename host_view_type::array_layout,
2811  typename host_view_type::device_type,
2812  typename host_view_type::memory_traits>
2813  host_view_1d_type;
2814  host_view_1d_type lclVecHost1d =
2815  Kokkos::subview (lclVecHost, Kokkos::ALL (), 0);
2816 
2817  // Find the diagonal entries and put them in lclVecHost1d.
2818  const LocalOrdinal myNumRows =
2819  static_cast<LocalOrdinal> (this->getNodeNumRows ());
2820  for (LocalOrdinal r = 0; r < myNumRows; ++r) {
2821  lclVecHost1d(r) = STS::zero (); // default value if no diag entry
2822  const GlobalOrdinal rgid = rowMap.getGlobalElement (r);
2823  const LocalOrdinal rlid = colMap.getLocalElement (rgid);
2824 
2825  if (rlid != Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) {
2826  const RowInfo rowinfo = staticGraph_->getRowInfo (r);
2827  if (rowinfo.numEntries > 0) {
2828  const size_t j = staticGraph_->findLocalIndex (rowinfo, rlid);
2829  if (j != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2830  // NOTE (mfh 02 Jan 2015) This technically does not assume
2831  // UVM, since getView and getViewNonConst are supposed to
2832  // return views of host data.
2833  ArrayView<const impl_scalar_type> view = this->getView (rowinfo);
2834  lclVecHost1d(r) = view[j];
2835  }
2836  }
2837  }
2838  }
2839  lclVec.template sync<execution_space> (); // sync changes back to device
2840  }
2841 
2842  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2843  void
2846  const Teuchos::ArrayView<const size_t>& offsets) const
2847  {
2848  using Teuchos::ArrayRCP;
2849  using Teuchos::ArrayView;
2851  typedef typename vec_type::dual_view_type dual_view_type;
2852  typedef typename dual_view_type::host_mirror_space::execution_space host_execution_space;
2853 
2854 #ifdef HAVE_TPETRA_DEBUG
2855  const char tfecfFuncName[] = "getLocalDiagCopy: ";
2856  const map_type& rowMap = * (this->getRowMap ());
2857  // isCompatible() requires an all-reduce, and thus this check
2858  // should only be done in debug mode.
2859  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2860  ! diag.getMap ()->isCompatible (rowMap), std::runtime_error,
2861  "The input Vector's Map must be compatible with (in the sense of Map::"
2862  "isCompatible) the CrsMatrix's row Map.");
2863 #endif // HAVE_TPETRA_DEBUG
2864 
2865  // For now, we fill the Vector on the host and sync to device.
2866  // Later, we may write a parallel kernel that works entirely on
2867  // device.
2868  dual_view_type lclVec = diag.getDualView ();
2869  lclVec.template modify<host_execution_space> ();
2870  typedef typename dual_view_type::t_host host_view_type;
2871  host_view_type lclVecHost = lclVec.h_view;
2872 
2873  // 1-D subview of lclVecHost. All the "typename" stuff ensures
2874  // that we get the same layout and memory traits as the original
2875  // 2-D view.
2876  typedef typename Kokkos::View<impl_scalar_type*,
2877  typename host_view_type::array_layout,
2878  typename host_view_type::device_type,
2879  typename host_view_type::memory_traits>
2880  host_view_1d_type;
2881  host_view_1d_type lclVecHost1d =
2882  Kokkos::subview (lclVecHost, Kokkos::ALL (), 0);
2883 
2884  Kokkos::View<const size_t*, Kokkos::HostSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
2885  h_offsets(&offsets[0],offsets.size());
2886  // Find the diagonal entries and put them in lclVecHost1d.
2887  const LocalOrdinal myNumRows =
2888  static_cast<LocalOrdinal> (this->getNodeNumRows ());
2889  typedef Kokkos::RangePolicy<host_execution_space, LocalOrdinal> policy_type;
2890  Kokkos::parallel_for (policy_type (0, myNumRows), [&] (const LocalOrdinal& lclRow) {
2891  lclVecHost1d(lclRow) = STS::zero (); // default value if no diag entry
2892  if (h_offsets[lclRow] != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2893  //ArrayView<const LocalOrdinal> ind;
2894  //ArrayView<const Scalar> val;
2895  // NOTE (mfh 02 Jan 2015) This technically does not assume
2896  // UVM, since the get{Global,Local}RowView methods are
2897  // supposed to return views of host data.
2898  //this->getLocalRowView (i, ind, val);
2899  auto curRow = lclMatrix_.template rowConst<size_t>(lclRow);
2900  lclVecHost1d(lclRow) = static_cast<impl_scalar_type> (curRow.value(h_offsets[lclRow]));
2901  }
2902  });
2903  lclVec.template sync<execution_space> (); // sync changes back to device
2904  }
2905 
2906 
2907  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2908  void
2911  {
2912  using Teuchos::ArrayRCP;
2913  using Teuchos::ArrayView;
2914  using Teuchos::null;
2915  using Teuchos::RCP;
2916  using Teuchos::rcp;
2917  using Teuchos::rcpFromRef;
2919  const char tfecfFuncName[] = "leftScale";
2920 
2921  // FIXME (mfh 06 Aug 2014) This doesn't make sense. The matrix
2922  // should only be modified when it is not fill complete.
2923  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2924  ! isFillComplete (), std::runtime_error,
2925  ": matrix must be fill complete.");
2926  RCP<const vec_type> xp;
2927 
2928  if (getRangeMap ()->isSameAs (* (x.getMap ()))){
2929  // Take from Epetra: If we have a non-trivial exporter, we must
2930  // import elements that are permuted or are on other processors.
2931  // (We will use the exporter to perform the import ("reverse
2932  // mode").)
2933  if (getCrsGraph ()->getExporter () != null) {
2934  RCP<vec_type> tempVec = rcp (new vec_type (getRowMap ()));
2935  tempVec->doImport (x, * (getCrsGraph ()->getExporter ()), INSERT);
2936  xp = tempVec;
2937  }
2938  else {
2939  xp = rcpFromRef (x);
2940  }
2941  }
2942  else if (getRowMap ()->isSameAs (* (x.getMap ()))) {
2943  xp = rcpFromRef (x);
2944  }
2945  else {
2946  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::invalid_argument, ": The "
2947  "input scaling vector x's Map must be the same as either the row Map or "
2948  "the range Map of the CrsMatrix.");
2949  }
2950  ArrayRCP<const Scalar> vectorVals = xp->getData (0);
2951  ArrayView<impl_scalar_type> rowValues = null;
2952 
2953  const LocalOrdinal lclNumRows =
2954  static_cast<LocalOrdinal> (this->getNodeNumRows ());
2955  for (LocalOrdinal i = 0; i < lclNumRows; ++i) {
2956  const RowInfo rowinfo = staticGraph_->getRowInfo (i);
2957  rowValues = this->getViewNonConst (rowinfo);
2958  const impl_scalar_type scaleValue = static_cast<impl_scalar_type> (vectorVals[i]);
2959  for (size_t j = 0; j < rowinfo.numEntries; ++j) {
2960  rowValues[j] *= scaleValue;
2961  }
2962  }
2963  }
2964 
2965  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2966  void
2969  {
2970  using Teuchos::ArrayRCP;
2971  using Teuchos::ArrayView;
2972  using Teuchos::null;
2973  using Teuchos::RCP;
2974  using Teuchos::rcp;
2975  using Teuchos::rcpFromRef;
2977  const char tfecfFuncName[] = "rightScale: ";
2978 
2979  // FIXME (mfh 06 Aug 2014) This doesn't make sense. The matrix
2980  // should only be modified when it is not fill complete.
2981  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2982  ! isFillComplete (), std::runtime_error, "Matrix must be fill complete.");
2983  RCP<const vec_type> xp;
2984  if (getDomainMap ()->isSameAs (* (x.getMap ()))) {
2985  // Take from Epetra: If we have a non-trivial exporter, we must
2986  // import elements that are permuted or are on other processors.
2987  // (We will use the exporter to perform the import.)
2988  if (getCrsGraph ()->getImporter () != null) {
2989  RCP<vec_type> tempVec = rcp (new vec_type (getColMap ()));
2990  tempVec->doImport (x, * (getCrsGraph ()->getImporter ()), INSERT);
2991  xp = tempVec;
2992  }
2993  else {
2994  xp = rcpFromRef (x);
2995  }
2996  }
2997  else if (getRowMap ()->isSameAs (* (x.getMap ()))) {
2998  xp = rcpFromRef (x);
2999  } else {
3000  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3001  true, std::runtime_error, "The vector x must have the same Map as "
3002  "either the row Map or the range Map.");
3003  }
3004 
3005  ArrayRCP<const Scalar> vectorVals = xp->getData (0);
3006  ArrayView<impl_scalar_type> rowValues = null;
3007 
3008  const LocalOrdinal lclNumRows =
3009  static_cast<LocalOrdinal> (this->getNodeNumRows ());
3010  for (LocalOrdinal i = 0; i < lclNumRows; ++i) {
3011  const RowInfo rowinfo = staticGraph_->getRowInfo (i);
3012  rowValues = this->getViewNonConst (rowinfo);
3013  ArrayView<const LocalOrdinal> colInds;
3014  getCrsGraph ()->getLocalRowView (i, colInds);
3015  for (size_t j = 0; j < rowinfo.numEntries; ++j) {
3016  rowValues[j] *= static_cast<impl_scalar_type> (vectorVals[colInds[j]]);
3017  }
3018  }
3019  }
3020 
3021  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3025  {
3026  using Teuchos::outArg;
3027  using Teuchos::REDUCE_SUM;
3028  using Teuchos::reduceAll;
3029  typedef typename Teuchos::ArrayRCP<const impl_scalar_type>::size_type size_type;
3030 
3031  // FIXME (mfh 05 Aug 2014) Write a thread-parallel kernel for the
3032  // local part of this computation. It could make sense to put
3033  // this operation in the Kokkos::CrsMatrix.
3034 
3035  // check the cache first
3036  mag_type frobNorm = frobNorm_;
3037  if (frobNorm == -STM::one ()) {
3038  mag_type mySum = STM::zero ();
3039  if (getNodeNumEntries() > 0) {
3040  if (isStorageOptimized ()) {
3041  // "Optimized" storage is packed storage. That means we can
3042  // iterate in one pass through the 1-D values array.
3043  const size_type numEntries =
3044  static_cast<size_type> (getNodeNumEntries ());
3045  for (size_type k = 0; k < numEntries; ++k) {
3046  // FIXME (mfh 05 Aug 2014) This assumes UVM.
3047  const impl_scalar_type val = k_values1D_(k);
3048  // Note (etp 06 Jan 2015) We need abs() here for composite types
3049  // (in general, if mag_type is on the left-hand-side, we need
3050  // abs() on the right-hand-side)
3051  const mag_type val_abs = STS::abs (val);
3052  mySum += val_abs * val_abs;
3053  }
3054  }
3055  else {
3056  const LocalOrdinal numRows =
3057  static_cast<LocalOrdinal> (this->getNodeNumRows ());
3058  for (LocalOrdinal r = 0; r < numRows; ++r) {
3059  const RowInfo rowInfo = myGraph_->getRowInfo (r);
3060  const size_type numEntries =
3061  static_cast<size_type> (rowInfo.numEntries);
3062  ArrayView<const impl_scalar_type> A_r =
3063  this->getView (rowInfo).view (0, numEntries);
3064  for (size_type k = 0; k < numEntries; ++k) {
3065  const impl_scalar_type val = A_r[k];
3066  const mag_type val_abs = STS::abs (val);
3067  mySum += val_abs * val_abs;
3068  }
3069  }
3070  }
3071  }
3072  mag_type totalSum = STM::zero ();
3073  reduceAll<int, mag_type> (* (getComm ()), REDUCE_SUM,
3074  mySum, outArg (totalSum));
3075  frobNorm = STM::sqrt (totalSum);
3076  }
3077  if (isFillComplete ()) {
3078  // Only cache the result if the matrix is fill complete.
3079  // Otherwise, the values might still change. resumeFill clears
3080  // the cache.
3081  frobNorm_ = frobNorm;
3082  }
3083  return frobNorm;
3084  }
3085 
3086  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3087  void
3089  replaceColMap (const Teuchos::RCP<const map_type>& newColMap)
3090  {
3091  const char tfecfFuncName[] = "replaceColMap: ";
3092  // FIXME (mfh 06 Aug 2014) What if the graph is locally indexed?
3093  // Then replacing the column Map might mean that we need to
3094  // reindex the column indices.
3095  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3096  myGraph_.is_null (), std::runtime_error,
3097  "This method does not work if the matrix has a const graph. The whole "
3098  "idea of a const graph is that you are not allowed to change it, but "
3099  "this method necessarily must modify the graph, since the graph owns "
3100  "the matrix's column Map.");
3101  myGraph_->replaceColMap (newColMap);
3102  }
3103 
3104  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3105  void
3108  const Teuchos::RCP<const map_type>& newColMap,
3109  const Teuchos::RCP<const import_type>& newImport,
3110  const bool sortEachRow)
3111  {
3112  const char tfecfFuncName[] = "reindexColumns: ";
3113  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3114  graph == NULL && myGraph_.is_null (), std::invalid_argument,
3115  "The input graph is NULL, but the matrix does not own its graph.");
3116 
3117  crs_graph_type& theGraph = (graph == NULL) ? *myGraph_ : *graph;
3118  const bool sortGraph = false; // we'll sort graph & matrix together below
3119  theGraph.reindexColumns (newColMap, newImport, sortGraph);
3120  if (sortEachRow && theGraph.isLocallyIndexed () && ! theGraph.isSorted ()) {
3121  // We can't just call sortEntries() here, because that fails if
3122  // the matrix has a const graph. We want to use the given graph
3123  // in that case.
3124  const LocalOrdinal lclNumRows =
3125  static_cast<LocalOrdinal> (theGraph.getNodeNumRows ());
3126  for (LocalOrdinal row = 0; row < lclNumRows; ++row) {
3127  const RowInfo rowInfo = theGraph.getRowInfo (row);
3128  Teuchos::ArrayView<impl_scalar_type> rv = this->getViewNonConst (rowInfo);
3129  theGraph.template sortRowIndicesAndValues<impl_scalar_type> (rowInfo, rv);
3130  }
3131  theGraph.indicesAreSorted_ = true;
3132  }
3133  }
3134 
3135  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3136  void
3138  replaceDomainMapAndImporter (const Teuchos::RCP<const map_type>& newDomainMap,
3139  Teuchos::RCP<const import_type>& newImporter)
3140  {
3141  const char tfecfFuncName[] = "replaceDomainMapAndImporter: ";
3142  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3143  myGraph_.is_null (), std::runtime_error,
3144  "This method does not work if the matrix has a const graph. The whole "
3145  "idea of a const graph is that you are not allowed to change it, but this"
3146  " method necessarily must modify the graph, since the graph owns the "
3147  "matrix's domain Map and Import objects.");
3148  myGraph_->replaceDomainMapAndImporter (newDomainMap, newImporter);
3149  }
3150 
3151  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3152  void
3154  insertNonownedGlobalValues (const GlobalOrdinal globalRow,
3155  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
3156  const Teuchos::ArrayView<const Scalar>& values)
3157  {
3158  using Teuchos::Array;
3159  typedef GlobalOrdinal GO;
3160  typedef typename Array<GO>::size_type size_type;
3161 
3162  const size_type numToInsert = indices.size ();
3163  // Add the new data to the list of nonlocals.
3164  // This creates the arrays if they don't exist yet.
3165  std::pair<Array<GO>, Array<Scalar> >& curRow = nonlocals_[globalRow];
3166  Array<GO>& curRowInds = curRow.first;
3167  Array<Scalar>& curRowVals = curRow.second;
3168  const size_type newCapacity = curRowInds.size () + numToInsert;
3169  curRowInds.reserve (newCapacity);
3170  curRowVals.reserve (newCapacity);
3171  for (size_type k = 0; k < numToInsert; ++k) {
3172  curRowInds.push_back (indices[k]);
3173  curRowVals.push_back (values[k]);
3174  }
3175  }
3176 
3177  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3178  void
3181  {
3182  using Teuchos::arcp;
3183  using Teuchos::Array;
3184  using Teuchos::ArrayRCP;
3185  using Teuchos::ArrayView;
3186  using Teuchos::CommRequest;
3187  using Teuchos::gatherAll;
3188  using Teuchos::isend;
3189  using Teuchos::ireceive;
3190  using Teuchos::null;
3191  using Teuchos::outArg;
3192  using Teuchos::RCP;
3193  using Teuchos::rcpFromRef;
3194  using Teuchos::REDUCE_MAX;
3195  using Teuchos::reduceAll;
3196  using Teuchos::SerialDenseMatrix;
3197  using Teuchos::tuple;
3198  using Teuchos::waitAll;
3199  using std::make_pair;
3200  using std::pair;
3201  typedef GlobalOrdinal GO;
3202  typedef typename Array<GO>::size_type size_type;
3203  // nonlocals_ contains the entries stored by previous calls to
3204  // insertGlobalValues() for nonowned rows.
3205  typedef std::map<GO, pair<Array<GO>, Array<Scalar> > > nonlocals_map_type;
3206  typedef typename nonlocals_map_type::const_iterator nonlocals_iter_type;
3207 
3208  const char tfecfFuncName[] = "globalAssemble";
3209  const Teuchos::Comm<int>& comm = * (getComm ());
3210  const int numImages = comm.getSize ();
3211  const int myImageID = comm.getRank ();
3212 
3213  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3214  ! isFillActive (), std::runtime_error, ": requires that fill is active.");
3215 
3216  // Determine (via a global all-reduce) if any processes have
3217  // nonlocal entries to share. This is necessary even if the
3218  // matrix has a static graph, because insertGlobalValues allows
3219  // nonlocal entries in that case.
3220  size_t MyNonlocals = static_cast<size_t> (nonlocals_.size ());
3221  size_t MaxGlobalNonlocals = 0;
3222  reduceAll<int, size_t> (comm, REDUCE_MAX, MyNonlocals,
3223  outArg (MaxGlobalNonlocals));
3224  if (MaxGlobalNonlocals == 0) {
3225  return; // no entries to share
3226  }
3227 
3228  // FIXME (mfh 14 Dec 2012) The code below reimplements an Export
3229  // operation. It would be better just to use an Export. See
3230  // Comment #34 in discussion of Bug 5782.
3231  //
3232  // mfh 24 Feb 2014: On the other hand, this is not technically an
3233  // Export, since the row Map might not necessarily be one-to-one.
3234 
3235  // compute a list of NLRs from nonlocals_ and use it to compute:
3236  // IdsAndRows: a vector of (id,row) pairs
3237  // NLR2Id: a map from NLR to the Id that owns it
3238  // globalNeighbors: a global graph of connectivity between images:
3239  // globalNeighbors(i,j) indicates that j sends to i
3240  // sendIDs: a list of all images I send to
3241  // recvIDs: a list of all images I receive from (constructed later)
3242  Array<pair<int,GlobalOrdinal> > IdsAndRows;
3243  std::map<GlobalOrdinal,int> NLR2Id;
3244  SerialDenseMatrix<int,char> globalNeighbors;
3245  Array<int> sendIDs, recvIDs;
3246  {
3247  // Construct the set of all nonowned rows encountered by this
3248  // process in insertGlobalValues() or sumIntoGlobalValues().
3249  std::set<GlobalOrdinal> setOfRows;
3250  for (nonlocals_iter_type iter = nonlocals_.begin ();
3251  iter != nonlocals_.end (); ++iter) {
3252  setOfRows.insert (iter->first);
3253  }
3254  // Copy the resulting set of nonowned rows into an Array.
3255  Array<GlobalOrdinal> NLRs (setOfRows.size ());
3256  std::copy (setOfRows.begin (), setOfRows.end (), NLRs.begin ());
3257 
3258  // get a list of ImageIDs for the non-local rows (NLRs)
3259  Array<int> NLRIds (NLRs.size ());
3260  {
3261  const LookupStatus stat =
3262  getRowMap ()->getRemoteIndexList (NLRs (), NLRIds ());
3263  const int lclerr = (stat == IDNotPresent ? 1 : 0);
3264  int gblerr;
3265  reduceAll<int, int> (comm, REDUCE_MAX, lclerr, outArg (gblerr));
3266  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3267  gblerr, std::runtime_error, ": non-local entries correspond to "
3268  "invalid rows.");
3269  }
3270 
3271  // build up a list of neighbors, as well as a map between NLRs and Ids
3272  // localNeighbors[i] != 0 iff I have data to send to image i
3273  // put NLRs,Ids into an array of pairs
3274  IdsAndRows.reserve (NLRs.size ());
3275  Array<char> localNeighbors (numImages, 0);
3276  typename Array<GO>::const_iterator nlr;
3277  typename Array<int>::const_iterator id;
3278  for (nlr = NLRs.begin (), id = NLRIds.begin ();
3279  nlr != NLRs.end (); ++nlr, ++id) {
3280  NLR2Id[*nlr] = *id;
3281  localNeighbors[*id] = 1;
3282  IdsAndRows.push_back (make_pair (*id, *nlr));
3283  }
3284  for (int j = 0; j < numImages; ++j) {
3285  if (localNeighbors[j]) {
3286  sendIDs.push_back (j);
3287  }
3288  }
3289  // sort IdsAndRows, by Ids first, then rows
3290  std::sort (IdsAndRows.begin (), IdsAndRows.end ());
3291  // gather from other nodes to form the full graph
3292  //
3293  // FIXME (mfh 24 Feb 2014) Ugh, this is awful!!! It's making a
3294  // P x P matrix which is the full graph of process connectivity.
3295  // Neither Export nor Import does this! It would probably be
3296  // more efficient to do the following:
3297  //
3298  // 1. Form the one-to-one version of the row Map, tgtMap
3299  // 2. Form the (possibly overlapping) Map srcMap, with the
3300  // global row indices which are the keys of nonlocals_ on
3301  // each process
3302  // 3. Construct an Export from srcMap to tgtMap
3303  // 4. Execute the Export with Tpetra::ADD
3304  globalNeighbors.shapeUninitialized (numImages, numImages);
3305  gatherAll (comm, numImages, localNeighbors.getRawPtr (),
3306  numImages*numImages, globalNeighbors.values ());
3307  // globalNeighbors at this point contains (on all images) the
3308  // connectivity between the images.
3309  // globalNeighbors(i,j) != 0 means that j sends to i/that i receives from j
3310  }
3311 
3313  // FIGURE OUT WHO IS SENDING TO WHOM AND HOW MUCH
3314  // DO THIS IN THE PROCESS OF PACKING ALL OUTGOING DATA ACCORDING TO DESTINATION ID
3316 
3317  // loop over all columns to know from which images I can expect to receive something
3318  for (int j=0; j<numImages; ++j) {
3319  if (globalNeighbors (myImageID, j)) {
3320  recvIDs.push_back (j);
3321  }
3322  }
3323  const size_t numRecvs = recvIDs.size ();
3324 
3325  // we know how many we're sending to already
3326  // form a contiguous list of all data to be sent
3327  // track the number of entries for each ID
3328  Array<Details::CrsIJV<GlobalOrdinal, Scalar> > IJVSendBuffer;
3329  Array<size_t> sendSizes (sendIDs.size(), 0);
3330  size_t numSends = 0;
3331  for (typename Array<pair<int, GlobalOrdinal> >::const_iterator IdAndRow = IdsAndRows.begin();
3332  IdAndRow != IdsAndRows.end(); ++IdAndRow)
3333  {
3334  const int id = IdAndRow->first;
3335  const GO row = IdAndRow->second;
3336 
3337  // have we advanced to a new send?
3338  if (sendIDs[numSends] != id) {
3339  numSends++;
3340  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3341  sendIDs[numSends] != id, std::logic_error,
3342  ": internal logic error. Contact Tpetra team.");
3343  }
3344 
3345  // copy data for row into contiguous storage
3346  pair<Array<GO>, Array<Scalar> >& nonlocalsRow = nonlocals_[row];
3347  ArrayView<const GO> nonlocalsRow_colInds = nonlocalsRow.first ();
3348  ArrayView<const Scalar> nonlocalsRow_values = nonlocalsRow.second ();
3349  const size_type numNonlocalsRow = nonlocalsRow_colInds.size ();
3350 
3351  for (size_type k = 0; k < numNonlocalsRow; ++k) {
3352  const Scalar val = nonlocalsRow_values[k];
3353  const GO col = nonlocalsRow_colInds[k];
3354  IJVSendBuffer.push_back (Details::CrsIJV<GO, Scalar> (row, col, val));
3355  sendSizes[numSends]++;
3356  }
3357  }
3358  if (IdsAndRows.size () > 0) {
3359  numSends++; // one last increment, to make it a count instead of an index
3360  }
3361  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3362  static_cast<size_type> (numSends) != sendIDs.size(),
3363  std::logic_error, ": internal logic error. Contact Tpetra team.");
3364 
3365  // don't need this data anymore
3366  // clear it before we start allocating a bunch of new memory
3367  nonlocals_.clear ();
3368 
3370  // TRANSMIT SIZE INFO BETWEEN SENDERS AND RECEIVERS
3372  // perform non-blocking sends: send sizes to our recipients
3373  Array<RCP<CommRequest<int> > > sendRequests;
3374  for (size_t s = 0; s < numSends ; ++s) {
3375  // we'll fake the memory management, because all communication will be local to this method and the scope of our data
3376  sendRequests.push_back (isend<int, size_t> (comm, rcpFromRef (sendSizes[s]), sendIDs[s]));
3377  }
3378  // perform non-blocking receives: receive sizes from our senders
3379  Array<RCP<CommRequest<int> > > recvRequests;
3380  Array<size_t> recvSizes (numRecvs);
3381  for (size_t r = 0; r < numRecvs; ++r) {
3382  // we'll fake the memory management, because all communication
3383  // will be local to this method and the scope of our data
3384  recvRequests.push_back (ireceive<int, size_t> (comm, rcpFromRef (recvSizes[r]), recvIDs[r]));
3385  }
3386  // wait on all
3387  if (! sendRequests.empty ()) {
3388  waitAll (comm, sendRequests ());
3389  }
3390  if (! recvRequests.empty ()) {
3391  waitAll (comm, recvRequests ());
3392  }
3393  comm.barrier ();
3394  sendRequests.clear ();
3395  recvRequests.clear ();
3396 
3398  // NOW SEND/RECEIVE ALL ROW DATA
3400  // from the size info, build the ArrayViews into IJVSendBuffer
3401  Array<ArrayView<Details::CrsIJV<GO, Scalar> > > sendBuffers (numSends, null);
3402  {
3403  size_t cur = 0;
3404  for (size_t s=0; s<numSends; ++s) {
3405  sendBuffers[s] = IJVSendBuffer (cur, sendSizes[s]);
3406  cur += sendSizes[s];
3407  }
3408  }
3409  // perform non-blocking sends
3410  for (size_t s = 0; s < numSends; ++s) {
3411  // we'll fake the memory management, because all communication
3412  // will be local to this method and the scope of our data
3413  ArrayRCP<Details::CrsIJV<GO, Scalar> > tmparcp =
3414  arcp (sendBuffers[s].getRawPtr (), 0, sendBuffers[s].size (), false);
3415  sendRequests.push_back (isend<int, Details::CrsIJV<GlobalOrdinal,Scalar> > (comm, tmparcp, sendIDs[s]));
3416  }
3417  // calculate amount of storage needed for receives
3418  // setup pointers for the receives as well
3419  size_t totalRecvSize = std::accumulate (recvSizes.begin (), recvSizes.end (), 0);
3420  Array<Details::CrsIJV<GO, Scalar> > IJVRecvBuffer (totalRecvSize);
3421  // from the size info, build the ArrayViews into IJVRecvBuffer
3422  Array<ArrayView<Details::CrsIJV<GO, Scalar> > > recvBuffers (numRecvs, null);
3423  {
3424  size_t cur = 0;
3425  for (size_t r = 0; r < numRecvs; ++r) {
3426  recvBuffers[r] = IJVRecvBuffer (cur, recvSizes[r]);
3427  cur += recvSizes[r];
3428  }
3429  }
3430  // perform non-blocking recvs
3431  for (size_t r = 0; r < numRecvs ; ++r) {
3432  // we'll fake the memory management, because all communication
3433  // will be local to this method and the scope of our data
3434  ArrayRCP<Details::CrsIJV<GO, Scalar> > tmparcp =
3435  arcp (recvBuffers[r].getRawPtr (), 0, recvBuffers[r].size (), false);
3436  recvRequests.push_back (ireceive (comm, tmparcp, recvIDs[r]));
3437  }
3438  // perform waits
3439  if (! sendRequests.empty ()) {
3440  waitAll (comm, sendRequests ());
3441  }
3442  if (! recvRequests.empty ()) {
3443  waitAll (comm, recvRequests ());
3444  }
3445  comm.barrier ();
3446  sendRequests.clear ();
3447  recvRequests.clear ();
3448 
3450  // NOW PROCESS THE RECEIVED ROW DATA
3452  // TODO: instead of adding one entry at a time, add one row at a time.
3453  // this requires resorting; they arrived sorted by sending node, so that entries could be non-contiguous if we received
3454  // multiple entries for a particular row from different processors.
3455  // it also requires restoring the data, which may make it not worth the trouble.
3456 
3457  typedef typename Array<Details::CrsIJV<GO, Scalar> >::const_iterator ijv_iter_type;
3458  if (this->isStaticGraph ()) {
3459  for (ijv_iter_type ijv = IJVRecvBuffer.begin ();
3460  ijv != IJVRecvBuffer.end (); ++ijv) {
3461  sumIntoGlobalValues (ijv->i, tuple (ijv->j), tuple (ijv->v));
3462  }
3463  }
3464  else { // Dynamic graph; can use insertGlobalValues ()
3465  for (ijv_iter_type ijv = IJVRecvBuffer.begin ();
3466  ijv != IJVRecvBuffer.end (); ++ijv) {
3467  try {
3468  insertGlobalValues (ijv->i, tuple (ijv->j), tuple (ijv->v));
3469  }
3470  catch (std::runtime_error &e) {
3471  std::ostringstream outmsg;
3472  outmsg << e.what() << std::endl
3473  << "caught in globalAssemble() in " << __FILE__ << ":" << __LINE__
3474  << std::endl ;
3475  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, outmsg.str());
3476  }
3477  }
3478  }
3479 
3480  // WHEW! THAT WAS TIRING!
3481  }
3482 
3483  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3484  void
3486  resumeFill (const Teuchos::RCP<Teuchos::ParameterList>& params)
3487  {
3488  if (! isStaticGraph ()) { // Don't resume fill of a nonowned graph.
3489  myGraph_->resumeFill (params);
3490  }
3492  fillComplete_ = false;
3493  }
3494 
3495  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3496  void
3499  {
3500  // This method doesn't do anything. The analogous method in
3501  // CrsGraph does actually compute something.
3502  //
3503  // Oddly enough, clearGlobalConstants() clears frobNorm_ (by
3504  // setting it to -1), but computeGlobalConstants() does _not_
3505  // compute the Frobenius norm; this is done on demand in
3506  // getFrobeniusNorm(), and the result is cached there.
3507  }
3508 
3509  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3510  void
3513  // We use -1 to indicate that the Frobenius norm needs to be
3514  // recomputed, since the values might change between now and the
3515  // next fillComplete call.
3516  //
3517  // Oddly enough, clearGlobalConstants() clears frobNorm_, but
3518  // computeGlobalConstants() does _not_ compute the Frobenius norm;
3519  // this is done on demand in getFrobeniusNorm(), and the result is
3520  // cached there.
3521  frobNorm_ = -STM::one ();
3522  }
3523 
3524  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3525  void
3527  fillComplete (const RCP<ParameterList>& params)
3528  {
3529  TEUCHOS_TEST_FOR_EXCEPTION(
3530  getCrsGraph ().is_null (), std::logic_error, "Tpetra::CrsMatrix::"
3531  "fillComplete(params): getCrsGraph() returns null. "
3532  "This should not happen at this point. "
3533  "Please report this bug to the Tpetra developers.");
3534 
3535  if (isStaticGraph () && getCrsGraph ()->isFillComplete ()) {
3537  getCrsGraph ()->getRangeMap (), params);
3538  } else {
3539  fillComplete (getRowMap (), getRowMap (), params);
3540  }
3541  }
3542 
3543  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3544  void
3546  fillComplete (const Teuchos::RCP<const map_type>& domainMap,
3547  const Teuchos::RCP<const map_type>& rangeMap,
3548  const Teuchos::RCP<Teuchos::ParameterList>& params)
3549  {
3550  using Teuchos::ArrayRCP;
3551  using Teuchos::RCP;
3552  using Teuchos::rcp;
3553  const char tfecfFuncName[] = "fillComplete";
3554 
3555  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3556  ! isFillActive () || isFillComplete (),
3557  std::runtime_error, ": Matrix fill state must be active (isFillActive() "
3558  "must be true) before you may call fillComplete().");
3559  const int numProcs = getComm ()->getSize ();
3560 
3561  //
3562  // Read parameters from the input ParameterList.
3563  //
3564 
3565  // If true, the caller promises that no process did nonlocal
3566  // changes since the last call to fillComplete.
3567  bool assertNoNonlocalInserts = false;
3568  // If true, makeColMap sorts remote GIDs (within each remote
3569  // process' group).
3570  bool sortGhosts = true;
3571 
3572  if (! params.is_null ()) {
3573  assertNoNonlocalInserts = params->get ("No Nonlocal Changes",
3574  assertNoNonlocalInserts);
3575  if (params->isParameter ("sort column map ghost gids")) {
3576  sortGhosts = params->get ("sort column map ghost gids", sortGhosts);
3577  }
3578  else if (params->isParameter ("Sort column Map ghost GIDs")) {
3579  sortGhosts = params->get ("Sort column Map ghost GIDs", sortGhosts);
3580  }
3581  }
3582  // We also don't need to do global assembly if there is only one
3583  // process in the communicator.
3584  const bool needGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
3585  // This parameter only matters if this matrix owns its graph.
3586  if (! myGraph_.is_null ()) {
3587  myGraph_->sortGhostsAssociatedWithEachProcessor_ = sortGhosts;
3588  }
3589 
3590  if (! getCrsGraph()->indicesAreAllocated()) {
3591  if (hasColMap ()) {
3592  // We have a column Map, so use local indices.
3593  allocateValues (LocalIndices, GraphNotYetAllocated);
3594  } else {
3595  // We don't have a column Map, so use global indices.
3596  allocateValues (GlobalIndices, GraphNotYetAllocated);
3597  }
3598  }
3599  // Global assemble, if we need to. This call only costs a single
3600  // all-reduce if we didn't need global assembly after all.
3601  if (needGlobalAssemble) {
3602  globalAssemble ();
3603  }
3604  else {
3605  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3606  numProcs == 1 && nonlocals_.size() > 0,
3607  std::runtime_error, ": cannot have nonlocal entries on a serial run. "
3608  "An invalid entry (i.e., with row index not in the row Map) must have "
3609  "been submitted to the CrsMatrix.");
3610  }
3611 
3612  if (isStaticGraph ()) {
3613  // FIXME (mfh 18 Jun 2014) This check for correctness of the
3614  // input Maps incurs a penalty of two all-reduces for the
3615  // otherwise optimal const graph case.
3616  //
3617  // We could turn these (max) 2 all-reduces into (max) 1, by
3618  // fusing them. We could do this by adding a "locallySameAs"
3619  // method to Map, which would return one of four states:
3620  //
3621  // a. Certainly globally the same
3622  // b. Certainly globally not the same
3623  // c. Locally the same
3624  // d. Locally not the same
3625  //
3626  // The first two states don't require further communication.
3627  // The latter two states require an all-reduce to communicate
3628  // globally, but we only need one all-reduce, since we only need
3629  // to check whether at least one of the Maps is wrong.
3630  const bool domainMapsMatch = staticGraph_->getDomainMap ()->isSameAs (*domainMap);
3631  const bool rangeMapsMatch = staticGraph_->getRangeMap ()->isSameAs (*rangeMap);
3632 
3633  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3634  ! domainMapsMatch, std::runtime_error,
3635  ": The CrsMatrix's domain Map does not match the graph's domain Map. "
3636  "The graph cannot be changed because it was given to the CrsMatrix "
3637  "constructor as const. You can fix this by passing in the graph's "
3638  "domain Map and range Map to the matrix's fillComplete call.");
3639 
3640  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3641  ! rangeMapsMatch, std::runtime_error,
3642  ": The CrsMatrix's range Map does not match the graph's range Map. "
3643  "The graph cannot be changed because it was given to the CrsMatrix "
3644  "constructor as const. You can fix this by passing in the graph's "
3645  "domain Map and range Map to the matrix's fillComplete call.");
3646  }
3647  else {
3648  // Set the graph's domain and range Maps. This will clear the
3649  // Import if the domain Map has changed (is a different
3650  // pointer), and the Export if the range Map has changed (is a
3651  // different pointer).
3652  myGraph_->setDomainRangeMaps (domainMap, rangeMap);
3653 
3654  // Make the graph's column Map, if necessary.
3655  if (! myGraph_->hasColMap ()) {
3656  myGraph_->makeColMap ();
3657  }
3658 
3659  // Make indices local, if necessary. The method won't do
3660  // anything if the graph is already locally indexed.
3661  myGraph_->makeIndicesLocal ();
3662 
3663  if (! myGraph_->isSorted ()) {
3664  sortEntries ();
3665  }
3666  if (! myGraph_->isMerged ()) {
3668  }
3669  // Make the Import and Export, if they haven't been made already.
3670  myGraph_->makeImportExport ();
3671  myGraph_->computeGlobalConstants ();
3672  myGraph_->fillComplete_ = true;
3673  myGraph_->checkInternalState ();
3674  }
3676  // fill local objects; will fill and finalize local graph if appropriate
3677  if (myGraph_.is_null ()) {
3678  // The matrix does _not_ own the graph, and the graph's
3679  // structure is already fixed, so just fill the local matrix.
3680  fillLocalMatrix (params);
3681  } else {
3682  // The matrix _does_ own the graph, so fill the local graph at
3683  // the same time as the local matrix.
3684  fillLocalGraphAndMatrix (params);
3685  }
3686 
3687  // Once we've initialized the sparse kernels, we're done with the
3688  // local objects. We may now release them and their memory, since
3689  // they will persist in the local sparse ops if necessary. We
3690  // keep the local graph if the parameters tell us to do so.
3691 
3692  // FIXME (mfh 28 Aug 2014) "Preserve Local Graph" bool parameter no longer used.
3693 
3694  fillComplete_ = true; // Now we're fill complete!
3695  checkInternalState ();
3696  }
3697 
3698  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3699  void
3701  expertStaticFillComplete (const Teuchos::RCP<const map_type> & domainMap,
3702  const Teuchos::RCP<const map_type> & rangeMap,
3703  const Teuchos::RCP<const import_type>& importer,
3704  const Teuchos::RCP<const export_type>& exporter,
3705  const Teuchos::RCP<Teuchos::ParameterList> &params)
3706  {
3707 #ifdef HAVE_TPETRA_MMM_TIMINGS
3708  std::string label;
3709  if(!params.is_null())
3710  label = params->get("Timer Label",label);
3711  std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
3712  using Teuchos::TimeMonitor;
3713  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-M-Graph"))));
3714 #endif
3715 
3716  const char tfecfFuncName[] = "expertStaticFillComplete: ";
3717  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( ! isFillActive() || isFillComplete(),
3718  std::runtime_error, "Matrix fill state must be active (isFillActive() "
3719  "must be true) before calling fillComplete().");
3720  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3721  myGraph_.is_null (), std::logic_error, "myGraph_ is null. This is not allowed.");
3722 
3723 
3724  // We will presume globalAssemble is not needed, so we do the ESFC on the graph
3725  myGraph_->expertStaticFillComplete (domainMap, rangeMap, importer, exporter,params);
3726 
3727 #ifdef HAVE_TPETRA_MMM_TIMINGS
3728  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-M-cGC"))));
3729 #endif
3730 
3732 
3733 #ifdef HAVE_TPETRA_MMM_TIMINGS
3734  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-M-fLGAM"))));
3735 #endif
3736 
3737  // Fill the local graph and matrix
3738  fillLocalGraphAndMatrix (params);
3739 
3740  // FIXME (mfh 28 Aug 2014) "Preserve Local Graph" bool parameter no longer used.
3741 
3742  // Now we're fill complete!
3743  fillComplete_ = true;
3744 
3745  // Sanity checks at the end.
3746 #ifdef HAVE_TPETRA_DEBUG
3747  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isFillActive(), std::logic_error,
3748  ": We're at the end of fillComplete(), but isFillActive() is true. "
3749  "Please report this bug to the Tpetra developers.");
3750  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillComplete(), std::logic_error,
3751  ": We're at the end of fillComplete(), but isFillActive() is true. "
3752  "Please report this bug to the Tpetra developers.");
3753 #endif // HAVE_TPETRA_DEBUG
3754 
3755 #ifdef HAVE_TPETRA_MMM_TIMINGS
3756  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-M-cIS"))));
3757 #endif
3758 
3760  }
3761 
3762  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3763  void
3766  {
3767  TEUCHOS_TEST_FOR_EXCEPTION(
3768  isStaticGraph (), std::runtime_error, "Tpetra::CrsMatrix::sortEntries: "
3769  "Cannot sort with static graph.");
3770  if (! myGraph_->isSorted ()) {
3771  const LocalOrdinal lclNumRows =
3772  static_cast<LocalOrdinal> (this->getNodeNumRows ());
3773  for (LocalOrdinal row = 0; row < lclNumRows; ++row) {
3774  const RowInfo rowInfo = myGraph_->getRowInfo (row);
3775  Teuchos::ArrayView<impl_scalar_type> rv = this->getViewNonConst (rowInfo);
3776  myGraph_->template sortRowIndicesAndValues<impl_scalar_type> (rowInfo, rv);
3777  }
3778  // we just sorted every row
3779  myGraph_->indicesAreSorted_ = true;
3780  }
3781  }
3782 
3783  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3784  void
3787  {
3788  TEUCHOS_TEST_FOR_EXCEPTION(
3789  isStaticGraph (), std::runtime_error, "Tpetra::CrsMatrix::"
3790  "mergeRedundantEntries: Cannot merge with static graph.");
3791  if (! myGraph_->isMerged ()) {
3792  const LocalOrdinal lclNumRows =
3793  static_cast<LocalOrdinal> (this->getNodeNumRows ());
3794  for (LocalOrdinal row = 0; row < lclNumRows; ++row) {
3795  const RowInfo rowInfo = myGraph_->getRowInfo (row);
3796  Teuchos::ArrayView<impl_scalar_type> rv = this->getViewNonConst (rowInfo);
3797  myGraph_->template mergeRowIndicesAndValues<impl_scalar_type> (rowInfo, rv);
3798  }
3799  myGraph_->noRedundancies_ = true; // we just merged every row
3800  }
3801  }
3802 
3803  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3804  void
3808  Scalar alpha,
3809  Scalar beta) const
3810  {
3811  using Teuchos::null;
3812  using Teuchos::RCP;
3813  using Teuchos::rcp;
3814  using Teuchos::rcp_const_cast;
3815  using Teuchos::rcpFromRef;
3816  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
3817  const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one ();
3818 
3819  // mfh 05 Jun 2014: Special case for alpha == 0. I added this to
3820  // fix an Ifpack2 test (RILUKSingleProcessUnitTests), which was
3821  // failing only for the Kokkos refactor version of Tpetra. It's a
3822  // good idea regardless to have the bypass.
3823  if (alpha == ZERO) {
3824  if (beta == ZERO) {
3825  Y_in.putScalar (ZERO);
3826  } else if (beta != ONE) {
3827  Y_in.scale (beta);
3828  }
3829  return;
3830  }
3831 
3832  // It's possible that X is a view of Y or vice versa. We don't
3833  // allow this (apply() requires that X and Y not alias one
3834  // another), but it's helpful to detect and work around this case.
3835  // We don't try to to detect the more subtle cases (e.g., one is a
3836  // subview of the other, but their initial pointers differ). We
3837  // only need to do this if this matrix's Import is trivial;
3838  // otherwise, we don't actually apply the operator from X into Y.
3839 
3840  RCP<const import_type> importer = this->getGraph ()->getImporter ();
3841  RCP<const export_type> exporter = this->getGraph ()->getExporter ();
3842 
3843  // If beta == 0, then the output MV will be overwritten; none of
3844  // its entries should be read. (Sparse BLAS semantics say that we
3845  // must ignore any Inf or NaN entries in Y_in, if beta is zero.)
3846  // This matters if we need to do an Export operation; see below.
3847  const bool Y_is_overwritten = (beta == ZERO);
3848 
3849  // We treat the case of a replicated MV output specially.
3850  const bool Y_is_replicated = ! Y_in.isDistributed ();
3851 
3852  // This is part of the special case for replicated MV output.
3853  // We'll let each process do its thing, but do an all-reduce at
3854  // the end to sum up the results. Setting beta=0 on all processes
3855  // but Proc 0 makes the math work out for the all-reduce. (This
3856  // assumes that the replicated data is correctly replicated, so
3857  // that the data are the same on all processes.)
3858  if (Y_is_replicated && this->getComm ()->getRank () > 0) {
3859  beta = ZERO;
3860  }
3861 
3862  // Temporary MV for Import operation. After the block of code
3863  // below, this will be an (Imported if necessary) column Map MV
3864  // ready to give to localMultiply().
3865  RCP<const MV> X_colMap;
3866  if (importer.is_null ()) {
3867  if (! X_in.isConstantStride ()) {
3868  // Not all sparse mat-vec kernels can handle an input MV with
3869  // nonconstant stride correctly, so we have to copy it in that
3870  // case into a constant stride MV. To make a constant stride
3871  // copy of X_in, we force creation of the column (== domain)
3872  // Map MV (if it hasn't already been created, else fetch the
3873  // cached copy). This avoids creating a new MV each time.
3874  RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in, true);
3875  Tpetra::deep_copy (*X_colMapNonConst, X_in);
3876  X_colMap = rcp_const_cast<const MV> (X_colMapNonConst);
3877  }
3878  else {
3879  // The domain and column Maps are the same, so do the local
3880  // multiply using the domain Map input MV X_in.
3881  X_colMap = rcpFromRef (X_in);
3882  }
3883  }
3884  else {
3885  // We're doing an Import anyway, which will copy the relevant
3886  // elements of the domain Map MV X_in into a separate column Map
3887  // MV. Thus, we don't have to worry whether X_in is constant
3888  // stride.
3889  RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in);
3890 
3891  // Import from the domain Map MV to the column Map MV.
3892  X_colMapNonConst->doImport (X_in, *importer, INSERT);
3893  X_colMap = rcp_const_cast<const MV> (X_colMapNonConst);
3894  }
3895 
3896  // Temporary MV for Export operation, or for copying a nonconstant
3897  // stride output MV into a constant stride MV.
3898  RCP<MV> Y_rowMap = getRowMapMultiVector (Y_in);
3899 
3900  // If we have a nontrivial Export object, we must perform an
3901  // Export. In that case, the local multiply result will go into
3902  // the row Map multivector. We don't have to make a
3903  // constant-stride version of Y_in in this case, because we had to
3904  // make a constant stride Y_rowMap MV and do an Export anyway.
3905  if (! exporter.is_null ()) {
3906  this->template localMultiply<Scalar, Scalar> (*X_colMap, *Y_rowMap,
3907  Teuchos::NO_TRANS,
3908  alpha, ZERO);
3909  // If we're overwriting the output MV Y_in completely (beta ==
3910  // 0), then make sure that it is filled with zeros before we do
3911  // the Export. Otherwise, the ADD combine mode will use data in
3912  // Y_in, which is supposed to be zero.
3913  if (Y_is_overwritten) {
3914  Y_in.putScalar (ZERO);
3915  }
3916  else {
3917  // Scale the output MV by beta, so that the Export sums in the
3918  // mat-vec contribution: Y_in = beta*Y_in + alpha*A*X_in.
3919  Y_in.scale (beta);
3920  }
3921  // Do the Export operation.
3922  Y_in.doExport (*Y_rowMap, *exporter, ADD);
3923  }
3924  else { // Don't do an Export: row Map and range Map are the same.
3925  //
3926  // If Y_in does not have constant stride, or if the column Map
3927  // MV aliases Y_in, then we can't let the kernel write directly
3928  // to Y_in. Instead, we have to use the cached row (== range)
3929  // Map MV as temporary storage.
3930  //
3931  // FIXME (mfh 05 Jun 2014) This test for aliasing only tests if
3932  // the user passed in the same MultiVector for both X and Y. It
3933  // won't detect whether one MultiVector views the other. We
3934  // should also check the MultiVectors' raw data pointers.
3935  if (! Y_in.isConstantStride () || X_colMap.getRawPtr () == &Y_in) {
3936  // Force creating the MV if it hasn't been created already.
3937  // This will reuse a previously created cached MV.
3938  Y_rowMap = getRowMapMultiVector (Y_in, true);
3939 
3940  // If beta == 0, we don't need to copy Y_in into Y_rowMap,
3941  // since we're overwriting it anyway.
3942  if (beta != ZERO) {
3943  Tpetra::deep_copy (*Y_rowMap, Y_in);
3944  }
3945  this->template localMultiply<Scalar, Scalar> (*X_colMap,
3946  *Y_rowMap,
3947  Teuchos::NO_TRANS,
3948  alpha, beta);
3949  Tpetra::deep_copy (Y_in, *Y_rowMap);
3950  }
3951  else {
3952  this->template localMultiply<Scalar, Scalar> (*X_colMap, Y_in,
3953  Teuchos::NO_TRANS,
3954  alpha, beta);
3955  }
3956  }
3957 
3958  // If the range Map is a locally replicated Map, sum up
3959  // contributions from each process. We set beta = 0 on all
3960  // processes but Proc 0 initially, so this will handle the scaling
3961  // factor beta correctly.
3962  if (Y_is_replicated) {
3963  Y_in.reduce ();
3964  }
3965  }
3966 
3967  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3968  void
3972  const Teuchos::ETransp mode,
3973  Scalar alpha,
3974  Scalar beta) const
3975  {
3976  using Teuchos::null;
3977  using Teuchos::RCP;
3978  using Teuchos::rcp;
3979  using Teuchos::rcp_const_cast;
3980  using Teuchos::rcpFromRef;
3981  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
3982 
3983  // Take shortcuts for alpha == 0.
3984  if (alpha == ZERO) {
3985  // Follow the Sparse BLAS convention by ignoring both the matrix
3986  // and X_in, in this case.
3987  if (beta == ZERO) {
3988  // Follow the Sparse BLAS convention by overwriting any Inf or
3989  // NaN values in Y_in, in this case.
3990  Y_in.putScalar (ZERO);
3991  }
3992  else {
3993  Y_in.scale (beta);
3994  }
3995  return;
3996  }
3997 
3998  const size_t numVectors = X_in.getNumVectors ();
3999 
4000  // We don't allow X_in and Y_in to alias one another. It's hard
4001  // to check this, because advanced users could create views from
4002  // raw pointers. However, if X_in and Y_in reference the same
4003  // object, we will do the user a favor by copying X into new
4004  // storage (with a warning). We only need to do this if we have
4005  // trivial importers; otherwise, we don't actually apply the
4006  // operator from X into Y.
4007  RCP<const import_type> importer = this->getGraph ()->getImporter ();
4008  RCP<const export_type> exporter = this->getGraph ()->getExporter ();
4009  // access X indirectly, in case we need to create temporary storage
4010  RCP<const MV> X;
4011 
4012  // some parameters for below
4013  const bool Y_is_replicated = ! Y_in.isDistributed ();
4014  const bool Y_is_overwritten = (beta == ZERO);
4015  if (Y_is_replicated && this->getComm ()->getRank () > 0) {
4016  beta = ZERO;
4017  }
4018 
4019  // The kernels do not allow input or output with nonconstant stride.
4020  if (! X_in.isConstantStride () && importer.is_null ()) {
4021  X = rcp (new MV (X_in, Teuchos::Copy)); // Constant-stride copy of X_in
4022  } else {
4023  X = rcpFromRef (X_in); // Reference to X_in
4024  }
4025 
4026  // Set up temporary multivectors for Import and/or Export.
4027  if (importer != null) {
4028  if (importMV_ != null && importMV_->getNumVectors() != numVectors) {
4029  importMV_ = null;
4030  }
4031  if (importMV_ == null) {
4032  importMV_ = rcp (new MV (this->getColMap (), numVectors));
4033  }
4034  }
4035  if (exporter != null) {
4036  if (exportMV_ != null && exportMV_->getNumVectors() != numVectors) {
4037  exportMV_ = null;
4038  }
4039  if (exportMV_ == null) {
4040  exportMV_ = rcp (new MV (this->getRowMap (), numVectors));
4041  }
4042  }
4043 
4044  // If we have a non-trivial exporter, we must import elements that
4045  // are permuted or are on other processors.
4046  if (! exporter.is_null ()) {
4047  exportMV_->doImport (X_in, *exporter, INSERT);
4048  X = exportMV_; // multiply out of exportMV_
4049  }
4050 
4051  // If we have a non-trivial importer, we must export elements that
4052  // are permuted or belong to other processors. We will compute
4053  // solution into the to-be-exported MV; get a view.
4054  if (importer != null) {
4055  // FIXME (mfh 18 Apr 2015) Temporary fix suggested by Clark
4056  // Dohrmann on Fri 17 Apr 2015. At some point, we need to go
4057  // back and figure out why this helps. importMV_ SHOULD be
4058  // completely overwritten in the localMultiply() call below,
4059  // because beta == ZERO there.
4060  importMV_->putScalar (ZERO);
4061  // Do the local computation.
4062  this->template localMultiply<Scalar, Scalar> (*X, *importMV_, mode,
4063  alpha, ZERO);
4064  if (Y_is_overwritten) {
4065  Y_in.putScalar (ZERO);
4066  } else {
4067  Y_in.scale (beta);
4068  }
4069  Y_in.doExport (*importMV_, *importer, ADD);
4070  }
4071  // otherwise, multiply into Y
4072  else {
4073  // can't multiply in-situ; can't multiply into non-strided multivector
4074  //
4075  // FIXME (mfh 05 Jun 2014) This test for aliasing only tests if
4076  // the user passed in the same MultiVector for both X and Y. It
4077  // won't detect whether one MultiVector views the other. We
4078  // should also check the MultiVectors' raw data pointers.
4079  if (! Y_in.isConstantStride () || X.getRawPtr () == &Y_in) {
4080  // Make a deep copy of Y_in, into which to write the multiply result.
4081  MV Y (Y_in, Teuchos::Copy);
4082  this->template localMultiply<Scalar, Scalar> (*X, Y, mode, alpha, beta);
4083  Tpetra::deep_copy (Y_in, Y);
4084  } else {
4085  this->template localMultiply<Scalar, Scalar> (*X, Y_in, mode, alpha, beta);
4086  }
4087  }
4088 
4089  // If the range Map is a locally replicated map, sum the
4090  // contributions from each process. (That's why we set beta=0
4091  // above for all processes but Proc 0.)
4092  if (Y_is_replicated) {
4093  Y_in.reduce ();
4094  }
4095  }
4096 
4097  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4098  void
4102  Teuchos::ETransp mode,
4103  Scalar alpha,
4104  Scalar beta) const
4105  {
4106  TEUCHOS_TEST_FOR_EXCEPTION(
4107  ! isFillComplete (), std::runtime_error,
4108  "Tpetra::CrsMatrix::apply(): Cannot call apply() until fillComplete() "
4109  "has been called.");
4110 
4111  if (mode == Teuchos::NO_TRANS) {
4112  applyNonTranspose (X, Y, alpha, beta);
4113  } else {
4114  //Thyra was implicitly assuming that Y gets set to zero / or is overwritten
4115  //when bets==0. This was not the case with transpose in a multithreaded
4116  //environment where a multiplication with subsequent atomic_adds is used
4117  //since 0 is effectively not special cased. Doing the explicit set to zero here
4118  //This catches cases where Y is nan or inf.
4119  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4120  if(beta == ZERO)
4121  Y.putScalar (ZERO);
4122  applyTranspose (X, Y, mode, alpha, beta);
4123  }
4124  }
4125 
4126  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4127  void
4132  const Scalar& dampingFactor,
4133  const ESweepDirection direction,
4134  const int numSweeps) const
4135  {
4136  reorderedGaussSeidel (B, X, D, Teuchos::null, dampingFactor, direction, numSweeps);
4137  }
4138 
4139  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4140  void
4145  const Teuchos::ArrayView<LocalOrdinal>& rowIndices,
4146  const Scalar& dampingFactor,
4147  const ESweepDirection direction,
4148  const int numSweeps) const
4149  {
4150  using Teuchos::null;
4151  using Teuchos::RCP;
4152  using Teuchos::rcp;
4153  using Teuchos::rcp_const_cast;
4154  using Teuchos::rcpFromRef;
4155  typedef Scalar ST;
4156 
4157  TEUCHOS_TEST_FOR_EXCEPTION(
4158  isFillComplete() == false, std::runtime_error,
4159  "Tpetra::CrsMatrix::gaussSeidel: cannot call this method until "
4160  "fillComplete() has been called.");
4161  TEUCHOS_TEST_FOR_EXCEPTION(
4162  numSweeps < 0,
4163  std::invalid_argument,
4164  "Tpetra::CrsMatrix::gaussSeidel: The number of sweeps must be , "
4165  "nonnegative but you provided numSweeps = " << numSweeps << " < 0.");
4166 
4167  // Translate from global to local sweep direction.
4168  // While doing this, validate the input.
4169  KokkosClassic::ESweepDirection localDirection;
4170  if (direction == Forward) {
4171  localDirection = KokkosClassic::Forward;
4172  }
4173  else if (direction == Backward) {
4174  localDirection = KokkosClassic::Backward;
4175  }
4176  else if (direction == Symmetric) {
4177  // We'll control local sweep direction manually.
4178  localDirection = KokkosClassic::Forward;
4179  }
4180  else {
4181  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument,
4182  "Tpetra::CrsMatrix::gaussSeidel: The 'direction' enum does not have "
4183  "any of its valid values: Forward, Backward, or Symmetric.");
4184  }
4185 
4186  if (numSweeps == 0) {
4187  return; // Nothing to do.
4188  }
4189 
4190  // We don't need the Export object because this method assumes
4191  // that the row, domain, and range Maps are the same. We do need
4192  // the Import object, if there is one, though.
4193  RCP<const import_type> importer = this->getGraph()->getImporter();
4194  RCP<const export_type> exporter = this->getGraph()->getExporter();
4195  TEUCHOS_TEST_FOR_EXCEPTION(
4196  ! exporter.is_null (), std::runtime_error,
4197  "Tpetra's gaussSeidel implementation requires that the row, domain, "
4198  "and range Maps be the same. This cannot be the case, because the "
4199  "matrix has a nontrivial Export object.");
4200 
4201  RCP<const map_type> domainMap = this->getDomainMap ();
4202  RCP<const map_type> rangeMap = this->getRangeMap ();
4203  RCP<const map_type> rowMap = this->getGraph ()->getRowMap ();
4204  RCP<const map_type> colMap = this->getGraph ()->getColMap ();
4205 
4206 #ifdef HAVE_TEUCHOS_DEBUG
4207  {
4208  // The relation 'isSameAs' is transitive. It's also a
4209  // collective, so we don't have to do a "shared" test for
4210  // exception (i.e., a global reduction on the test value).
4211  TEUCHOS_TEST_FOR_EXCEPTION(
4212  ! X.getMap ()->isSameAs (*domainMap),
4213  std::runtime_error,
4214  "Tpetra::CrsMatrix::gaussSeidel requires that the input "
4215  "multivector X be in the domain Map of the matrix.");
4216  TEUCHOS_TEST_FOR_EXCEPTION(
4217  ! B.getMap ()->isSameAs (*rangeMap),
4218  std::runtime_error,
4219  "Tpetra::CrsMatrix::gaussSeidel requires that the input "
4220  "B be in the range Map of the matrix.");
4221  TEUCHOS_TEST_FOR_EXCEPTION(
4222  ! D.getMap ()->isSameAs (*rowMap),
4223  std::runtime_error,
4224  "Tpetra::CrsMatrix::gaussSeidel requires that the input "
4225  "D be in the row Map of the matrix.");
4226  TEUCHOS_TEST_FOR_EXCEPTION(
4227  ! rowMap->isSameAs (*rangeMap),
4228  std::runtime_error,
4229  "Tpetra::CrsMatrix::gaussSeidel requires that the row Map and the "
4230  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
4231  TEUCHOS_TEST_FOR_EXCEPTION(
4232  ! domainMap->isSameAs (*rangeMap),
4233  std::runtime_error,
4234  "Tpetra::CrsMatrix::gaussSeidel requires that the domain Map and "
4235  "the range Map of the matrix be the same.");
4236  }
4237 #else
4238  // Forestall any compiler warnings for unused variables.
4239  (void) rangeMap;
4240  (void) rowMap;
4241 #endif // HAVE_TEUCHOS_DEBUG
4242 
4243  // If B is not constant stride, copy it into a constant stride
4244  // multivector. We'l handle the right-hand side B first and deal
4245  // with X right before the sweeps, to improve locality of the
4246  // first sweep. (If the problem is small enough, then that will
4247  // hopefully keep more of the entries of X in cache. This
4248  // optimizes for the typical case of a small number of sweeps.)
4249  RCP<const MV> B_in;
4250  if (B.isConstantStride()) {
4251  B_in = rcpFromRef (B);
4252  }
4253  else {
4254  // The range Map and row Map are the same in this case, so we
4255  // can use the (possibly cached) row Map multivector to store a
4256  // constant stride copy of B. We don't have to copy back, since
4257  // Gauss-Seidel won't modify B.
4258  RCP<MV> B_in_nonconst = getRowMapMultiVector (B, true);
4259  deep_copy (*B_in_nonconst, B); // Copy from B into B_in(_nonconst).
4260  B_in = rcp_const_cast<const MV> (B_in_nonconst);
4261 
4263  ! B.isConstantStride (),
4264  std::runtime_error,
4265  "gaussSeidel: The current implementation of the Gauss-Seidel kernel "
4266  "requires that X and B both have constant stride. Since B does not "
4267  "have constant stride, we had to make a copy. This is a limitation of "
4268  "the current implementation and not your fault, but we still report it "
4269  "as an efficiency warning for your information.");
4270  }
4271 
4272  // If X is not constant stride, copy it into a constant stride
4273  // multivector. Also, make the column Map multivector X_colMap,
4274  // and its domain Map view X_domainMap. (X actually must be a
4275  // domain Map view of a column Map multivector; exploit this, if X
4276  // has constant stride.)
4277 
4278  RCP<MV> X_domainMap;
4279  RCP<MV> X_colMap;
4280  bool copiedInput = false;
4281 
4282  if (importer.is_null ()) { // Domain and column Maps are the same.
4283  if (X.isConstantStride ()) {
4284  X_domainMap = rcpFromRef (X);
4285  X_colMap = X_domainMap;
4286  copiedInput = false;
4287  }
4288  else {
4289  // Get a temporary column Map multivector, make a domain Map
4290  // view of it, and copy X into the domain Map view. We have
4291  // to copy here because we won't be doing Import operations.
4292  X_colMap = getColumnMapMultiVector (X, true);
4293  X_domainMap = X_colMap; // Domain and column Maps are the same.
4294  deep_copy (*X_domainMap, X); // Copy X into the domain Map view.
4295  copiedInput = true;
4297  ! X.isConstantStride (), std::runtime_error,
4298  "Tpetra::CrsMatrix::gaussSeidel: The current implementation of the "
4299  "Gauss-Seidel kernel requires that X and B both have constant "
4300  "stride. Since X does not have constant stride, we had to make a "
4301  "copy. This is a limitation of the current implementation and not "
4302  "your fault, but we still report it as an efficiency warning for "
4303  "your information.");
4304  }
4305  }
4306  else { // We will be doing Import operations in the sweeps.
4307  if (X.isConstantStride ()) {
4308  X_domainMap = rcpFromRef (X);
4309  // This kernel assumes that X is a domain Map view of a column
4310  // Map multivector. We will only check if this is valid if
4311  // the CMake configure Teuchos_ENABLE_DEBUG is ON.
4312  X_colMap = X_domainMap->offsetViewNonConst (colMap, 0);
4313 
4314  // FIXME (mfh 19 Mar 2013) Do we need to fill the remote
4315  // entries of X_colMap with zeros? Do we need to fill all of
4316  // X_domainMap initially with zeros? Ifpack
4317  // (Ifpack_PointRelaxation.cpp, line 906) creates an entirely
4318  // new MultiVector each time.
4319 
4320  // Do the first Import for the first sweep. This simplifies
4321  // the logic in the sweeps.
4322  X_colMap->doImport (X, *importer, INSERT);
4323  copiedInput = false;
4324  }
4325  else {
4326  // Get a temporary column Map multivector X_colMap, and make a
4327  // domain Map view X_domainMap of it. Instead of copying, we
4328  // do an Import from X into X_domainMap. This saves us a
4329  // copy, since the Import has to copy the data anyway.
4330  X_colMap = getColumnMapMultiVector (X, true);
4331  X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
4332  X_colMap->doImport (X, *importer, INSERT);
4333  copiedInput = true;
4335  ! X.isConstantStride (), std::runtime_error,
4336  "Tpetra::CrsMatrix::gaussSeidel: The current implementation of the "
4337  "Gauss-Seidel kernel requires that X and B both have constant stride. "
4338  "Since X does not have constant stride, we had to make a copy. "
4339  "This is a limitation of the current implementation and not your fault, "
4340  "but we still report it as an efficiency warning for your information.");
4341  }
4342  }
4343 
4344  for (int sweep = 0; sweep < numSweeps; ++sweep) {
4345  if (! importer.is_null () && sweep > 0) {
4346  // We already did the first Import for the zeroth sweep.
4347  X_colMap->doImport (*X_domainMap, *importer, INSERT);
4348  }
4349 
4350  // Do local Gauss-Seidel.
4351  if (direction != Symmetric) {
4352  if (rowIndices.is_null ()) {
4353  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
4354  dampingFactor,
4355  localDirection);
4356  }
4357  else {
4358  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
4359  D, rowIndices,
4360  dampingFactor,
4361  localDirection);
4362  }
4363  }
4364  else { // direction == Symmetric
4365  const bool doImportBetweenDirections = false;
4366  if (rowIndices.is_null ()) {
4367  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
4368  dampingFactor,
4369  KokkosClassic::Forward);
4370  // mfh 18 Mar 2013: Aztec's implementation of "symmetric
4371  // Gauss-Seidel" does _not_ do an Import between the forward
4372  // and backward sweeps. This makes sense, because Aztec
4373  // considers "symmetric Gauss-Seidel" a subdomain solver.
4374  if (doImportBetweenDirections) {
4375  // Communicate again before the Backward sweep.
4376  if (! importer.is_null ()) {
4377  X_colMap->doImport (*X_domainMap, *importer, INSERT);
4378  }
4379  }
4380  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
4381  dampingFactor,
4382  KokkosClassic::Backward);
4383  }
4384  else {
4385  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
4386  D, rowIndices,
4387  dampingFactor,
4388  KokkosClassic::Forward);
4389  if (doImportBetweenDirections) {
4390  // Communicate again before the Backward sweep.
4391  if (! importer.is_null ()) {
4392  X_colMap->doImport (*X_domainMap, *importer, INSERT);
4393  }
4394  }
4395  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
4396  D, rowIndices,
4397  dampingFactor,
4398  KokkosClassic::Backward);
4399  }
4400  }
4401  }
4402 
4403  if (copiedInput) {
4404  deep_copy (X, *X_domainMap); // Copy back from X_domainMap to X.
4405  }
4406  }
4407 
4408  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4409  void
4414  const Scalar& dampingFactor,
4415  const ESweepDirection direction,
4416  const int numSweeps,
4417  const bool zeroInitialGuess) const
4418  {
4419  reorderedGaussSeidelCopy (X, B, D, Teuchos::null, dampingFactor, direction,
4420  numSweeps, zeroInitialGuess);
4421  }
4422 
4423  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4424  void
4429  const Teuchos::ArrayView<LocalOrdinal>& rowIndices,
4430  const Scalar& dampingFactor,
4431  const ESweepDirection direction,
4432  const int numSweeps,
4433  const bool zeroInitialGuess) const
4434  {
4435  using Teuchos::null;
4436  using Teuchos::RCP;
4437  using Teuchos::rcp;
4438  using Teuchos::rcpFromRef;
4439  using Teuchos::rcp_const_cast;
4440  typedef Scalar ST;
4441  const char prefix[] = "Tpetra::CrsMatrix::(reordered)gaussSeidelCopy: ";
4442  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4443 
4444  TEUCHOS_TEST_FOR_EXCEPTION(
4445  ! isFillComplete (), std::runtime_error,
4446  prefix << "The matrix is not fill complete.");
4447  TEUCHOS_TEST_FOR_EXCEPTION(
4448  numSweeps < 0, std::invalid_argument,
4449  prefix << "The number of sweeps must be nonnegative, "
4450  "but you provided numSweeps = " << numSweeps << " < 0.");
4451 
4452  // Translate from global to local sweep direction.
4453  // While doing this, validate the input.
4454  KokkosClassic::ESweepDirection localDirection;
4455  if (direction == Forward) {
4456  localDirection = KokkosClassic::Forward;
4457  }
4458  else if (direction == Backward) {
4459  localDirection = KokkosClassic::Backward;
4460  }
4461  else if (direction == Symmetric) {
4462  // We'll control local sweep direction manually.
4463  localDirection = KokkosClassic::Forward;
4464  }
4465  else {
4466  TEUCHOS_TEST_FOR_EXCEPTION(
4467  true, std::invalid_argument,
4468  prefix << "The 'direction' enum does not have any of its valid "
4469  "values: Forward, Backward, or Symmetric.");
4470  }
4471 
4472  if (numSweeps == 0) {
4473  return;
4474  }
4475 
4476  RCP<const import_type> importer = this->getGraph ()->getImporter ();
4477  RCP<const export_type> exporter = this->getGraph ()->getExporter ();
4478  TEUCHOS_TEST_FOR_EXCEPTION(
4479  ! exporter.is_null (), std::runtime_error,
4480  "This method's implementation currently requires that the matrix's row, "
4481  "domain, and range Maps be the same. This cannot be the case, because "
4482  "the matrix has a nontrivial Export object.");
4483 
4484  RCP<const map_type> domainMap = this->getDomainMap ();
4485  RCP<const map_type> rangeMap = this->getRangeMap ();
4486  RCP<const map_type> rowMap = this->getGraph ()->getRowMap ();
4487  RCP<const map_type> colMap = this->getGraph ()->getColMap ();
4488 
4489 #ifdef HAVE_TEUCHOS_DEBUG
4490  {
4491  // The relation 'isSameAs' is transitive. It's also a
4492  // collective, so we don't have to do a "shared" test for
4493  // exception (i.e., a global reduction on the test value).
4494  TEUCHOS_TEST_FOR_EXCEPTION(
4495  ! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
4496  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
4497  "multivector X be in the domain Map of the matrix.");
4498  TEUCHOS_TEST_FOR_EXCEPTION(
4499  ! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
4500  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
4501  "B be in the range Map of the matrix.");
4502  TEUCHOS_TEST_FOR_EXCEPTION(
4503  ! D.getMap ()->isSameAs (*rowMap), std::runtime_error,
4504  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
4505  "D be in the row Map of the matrix.");
4506  TEUCHOS_TEST_FOR_EXCEPTION(
4507  ! rowMap->isSameAs (*rangeMap), std::runtime_error,
4508  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the row Map and the "
4509  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
4510  TEUCHOS_TEST_FOR_EXCEPTION(
4511  ! domainMap->isSameAs (*rangeMap), std::runtime_error,
4512  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the domain Map and "
4513  "the range Map of the matrix be the same.");
4514  }
4515 #else
4516  // Forestall any compiler warnings for unused variables.
4517  (void) rangeMap;
4518  (void) rowMap;
4519 #endif // HAVE_TEUCHOS_DEBUG
4520 
4521  // Fetch a (possibly cached) temporary column Map multivector
4522  // X_colMap, and a domain Map view X_domainMap of it. Both have
4523  // constant stride by construction. We know that the domain Map
4524  // must include the column Map, because our Gauss-Seidel kernel
4525  // requires that the row Map, domain Map, and range Map are all
4526  // the same, and that each process owns all of its own diagonal
4527  // entries of the matrix.
4528 
4529  RCP<MV> X_colMap;
4530  RCP<MV> X_domainMap;
4531  bool copyBackOutput = false;
4532  if (importer.is_null ()) {
4533  if (X.isConstantStride ()) {
4534  X_colMap = rcpFromRef (X);
4535  X_domainMap = rcpFromRef (X);
4536  // Column Map and domain Map are the same, so there are no
4537  // remote entries. Thus, if we are not setting the initial
4538  // guess to zero, we don't have to worry about setting remote
4539  // entries to zero, even though we are not doing an Import in
4540  // this case.
4541  if (zeroInitialGuess) {
4542  X_colMap->putScalar (ZERO);
4543  }
4544  // No need to copy back to X at end.
4545  }
4546  else { // We must copy X into a constant stride multivector.
4547  // Just use the cached column Map multivector for that.
4548  // force=true means fill with zeros, so no need to fill
4549  // remote entries (not in domain Map) with zeros.
4550  X_colMap = getColumnMapMultiVector (X, true);
4551  // X_domainMap is always a domain Map view of the column Map
4552  // multivector. In this case, the domain and column Maps are
4553  // the same, so X_domainMap _is_ X_colMap.
4554  X_domainMap = X_colMap;
4555  if (! zeroInitialGuess) { // Don't copy if zero initial guess
4556  try {
4557  deep_copy (*X_domainMap , X); // Copy X into constant stride MV
4558  } catch (std::exception& e) {
4559  std::ostringstream os;
4560  os << "Tpetra::CrsMatrix::reorderedGaussSeidelCopy: "
4561  "deep_copy(*X_domainMap, X) threw an exception: "
4562  << e.what () << ".";
4563  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
4564  }
4565  }
4566  copyBackOutput = true; // Don't forget to copy back at end.
4568  ! X.isConstantStride (),
4569  std::runtime_error,
4570  "gaussSeidelCopy: The current implementation of the Gauss-Seidel "
4571  "kernel requires that X and B both have constant stride. Since X "
4572  "does not have constant stride, we had to make a copy. This is a "
4573  "limitation of the current implementation and not your fault, but we "
4574  "still report it as an efficiency warning for your information.");
4575  }
4576  }
4577  else { // Column Map and domain Map are _not_ the same.
4578  X_colMap = getColumnMapMultiVector (X);
4579  X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
4580 
4581 #ifdef HAVE_TPETRA_DEBUG
4582  typename MV::dual_view_type X_colMap_view = X_colMap->getDualView ();
4583  typename MV::dual_view_type X_domainMap_view = X_domainMap->getDualView ();
4584 
4585  if (X_colMap->getLocalLength () != 0 && X_domainMap->getLocalLength ()) {
4586  TEUCHOS_TEST_FOR_EXCEPTION(
4587  X_colMap_view.h_view.ptr_on_device () != X_domainMap_view.h_view.ptr_on_device (),
4588  std::logic_error, "Tpetra::CrsMatrix::gaussSeidelCopy: "
4589  "Pointer to start of column Map view of X is not equal to pointer to "
4590  "start of (domain Map view of) X. This may mean that "
4591  "Tpetra::MultiVector::offsetViewNonConst is broken. "
4592  "Please report this bug to the Tpetra developers.");
4593  }
4594 
4595  TEUCHOS_TEST_FOR_EXCEPTION(
4596  X_colMap_view.dimension_0 () < X_domainMap_view.dimension_0 () ||
4597  X_colMap->getLocalLength () < X_domainMap->getLocalLength (),
4598  std::logic_error, "Tpetra::CrsMatrix::gaussSeidelCopy: "
4599  "X_colMap has fewer local rows than X_domainMap. "
4600  "X_colMap_view.dimension_0() = " << X_colMap_view.dimension_0 ()
4601  << ", X_domainMap_view.dimension_0() = "
4602  << X_domainMap_view.dimension_0 ()
4603  << ", X_colMap->getLocalLength() = " << X_colMap->getLocalLength ()
4604  << ", and X_domainMap->getLocalLength() = "
4605  << X_domainMap->getLocalLength ()
4606  << ". This means that Tpetra::MultiVector::offsetViewNonConst "
4607  "is broken. Please report this bug to the Tpetra developers.");
4608 
4609  TEUCHOS_TEST_FOR_EXCEPTION(
4610  X_colMap->getNumVectors () != X_domainMap->getNumVectors (),
4611  std::logic_error, "Tpetra::CrsMatrix::gaussSeidelCopy: "
4612  "X_colMap has a different number of columns than X_domainMap. "
4613  "X_colMap->getNumVectors() = " << X_colMap->getNumVectors ()
4614  << " != X_domainMap->getNumVectors() = "
4615  << X_domainMap->getNumVectors ()
4616  << ". This means that Tpetra::MultiVector::offsetViewNonConst "
4617  "is broken. Please report this bug to the Tpetra developers.");
4618 #endif // HAVE_TPETRA_DEBUG
4619 
4620  if (zeroInitialGuess) {
4621  // No need for an Import, since we're filling with zeros.
4622  X_colMap->putScalar (ZERO);
4623  } else {
4624  // We could just copy X into X_domainMap. However, that
4625  // wastes a copy, because the Import also does a copy (plus
4626  // communication). Since the typical use case for
4627  // Gauss-Seidel is a small number of sweeps (2 is typical), we
4628  // don't want to waste that copy. Thus, we do the Import
4629  // here, and skip the first Import in the first sweep.
4630  // Importing directly from X effects the copy into X_domainMap
4631  // (which is a view of X_colMap).
4632  X_colMap->doImport (X, *importer, INSERT);
4633  }
4634  copyBackOutput = true; // Don't forget to copy back at end.
4635  } // if column and domain Maps are (not) the same
4636 
4637  // The Gauss-Seidel / SOR kernel expects multivectors of constant
4638  // stride. X_colMap is by construction, but B might not be. If
4639  // it's not, we have to make a copy.
4640  RCP<const MV> B_in;
4641  if (B.isConstantStride ()) {
4642  B_in = rcpFromRef (B);
4643  }
4644  else {
4645  // Range Map and row Map are the same in this case, so we can
4646  // use the cached row Map multivector to store a constant stride
4647  // copy of B.
4648  RCP<MV> B_in_nonconst = getRowMapMultiVector (B, true);
4649  try {
4650  deep_copy (*B_in_nonconst, B);
4651  } catch (std::exception& e) {
4652  std::ostringstream os;
4653  os << "Tpetra::CrsMatrix::reorderedGaussSeidelCopy: "
4654  "deep_copy(*B_in_nonconst, B) threw an exception: "
4655  << e.what () << ".";
4656  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
4657  }
4658  B_in = rcp_const_cast<const MV> (B_in_nonconst);
4659 
4661  ! B.isConstantStride (),
4662  std::runtime_error,
4663  "gaussSeidelCopy: The current implementation requires that B have "
4664  "constant stride. Since B does not have constant stride, we had to "
4665  "copy it into a separate constant-stride multivector. This is a "
4666  "limitation of the current implementation and not your fault, but we "
4667  "still report it as an efficiency warning for your information.");
4668  }
4669 
4670  for (int sweep = 0; sweep < numSweeps; ++sweep) {
4671  if (! importer.is_null () && sweep > 0) {
4672  // We already did the first Import for the zeroth sweep above,
4673  // if it was necessary.
4674  X_colMap->doImport (*X_domainMap, *importer, INSERT);
4675  }
4676 
4677  // Do local Gauss-Seidel.
4678  if (direction != Symmetric) {
4679  if (rowIndices.is_null ()) {
4680  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
4681  dampingFactor,
4682  localDirection);
4683  }
4684  else {
4685  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
4686  D, rowIndices,
4687  dampingFactor,
4688  localDirection);
4689  }
4690  }
4691  else { // direction == Symmetric
4692  if (rowIndices.is_null ()) {
4693  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
4694  dampingFactor,
4695  KokkosClassic::Forward);
4696  // mfh 18 Mar 2013: Aztec's implementation of "symmetric
4697  // Gauss-Seidel" does _not_ do an Import between the forward
4698  // and backward sweeps. This makes symmetric Gauss-Seidel a
4699  // symmetric preconditioner if the matrix A is symmetric. We
4700  // imitate Aztec's behavior here.
4701  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
4702  dampingFactor,
4703  KokkosClassic::Backward);
4704  }
4705  else {
4706  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
4707  D, rowIndices,
4708  dampingFactor,
4709  KokkosClassic::Forward);
4710  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
4711  D, rowIndices,
4712  dampingFactor,
4713  KokkosClassic::Backward);
4714 
4715  }
4716  }
4717  }
4718 
4719  if (copyBackOutput) {
4720  try {
4721  deep_copy (X , *X_domainMap); // Copy result back into X.
4722  } catch (std::exception& e) {
4723  TEUCHOS_TEST_FOR_EXCEPTION(
4724  true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
4725  "threw an exception: " << e.what ());
4726  }
4727  }
4728  }
4729 
4730  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4731  template<class T>
4732  Teuchos::RCP<CrsMatrix<T, LocalOrdinal, GlobalOrdinal, Node, classic> >
4734  convert () const
4735  {
4736  using Teuchos::ArrayRCP;
4737  using Teuchos::RCP;
4738  using Teuchos::rcp;
4740  typedef typename out_mat_type::local_matrix_type out_lcl_mat_type;
4741  typedef typename out_lcl_mat_type::values_type out_vals_type;
4742  typedef ArrayRCP<size_t>::size_type size_type;
4743  const char tfecfFuncName[] = "convert";
4744 
4745  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4746  ! isFillComplete (), std::runtime_error, "This matrix (the source of "
4747  "the conversion) is not fill complete. You must first call "
4748  "fillComplete() (possibly with the domain and range Map) without an "
4749  "intervening call to resumeFill(), before you may call this method.");
4750 
4751  // mfh 27 Feb 2014: It seems reasonable that if this matrix has a
4752  // const graph, then the returned matrix should also. However, if
4753  // this matrix does not have a const graph, then neither should
4754  // the returned matrix. The code below implements this strategy.
4755 
4756  RCP<out_mat_type> newmat; // the matrix to return
4757 
4758  if (this->isStaticGraph ()) {
4759  // This matrix has a const graph, so the returned matrix should too.
4760  newmat = rcp (new out_mat_type (this->getCrsGraph ()));
4761 
4762  // Convert the values from Scalar to T, and stuff them directly
4763  // into the matrix to return.
4764  const size_type numVals =
4765  static_cast<size_type> (this->lclMatrix_.values.dimension_0 ());
4766 
4767  // FIXME (mfh 05 Aug 2014) Write a copy kernel (impl_scalar_type and
4768  // T differ, so we can't use Kokkos::deep_copy).
4769  //
4770  // FIXME (mfh 05 Aug 2014) This assumes UVM.
4771  out_vals_type newVals1D ("Tpetra::CrsMatrix::val", numVals);
4772  for (size_type k = 0; k < numVals; ++k) {
4773  newVals1D(k) = static_cast<T> (this->k_values1D_(k));
4774  }
4775  newmat->lclMatrix_ =
4776  out_lcl_mat_type ("Tpetra::CrsMatrix::lclMatrix_",
4777  this->lclMatrix_.numCols (), newVals1D,
4778  this->lclMatrix_.graph);
4779  newmat->k_values1D_ = newVals1D;
4780  // Since newmat has a static (const) graph, the graph already
4781  // has a column Map, and Import and Export objects already exist
4782  // (if applicable). Thus, calling fillComplete is cheap.
4783  newmat->fillComplete (this->getDomainMap (), this->getRangeMap ());
4784  }
4785  else {
4786  // This matrix has a nonconst graph, so the returned matrix
4787  // should also have a nonconst graph. However, it's fine for
4788  // the returned matrix to have static profile. This will
4789  // certainly speed up its fillComplete.
4790 
4791  //
4792  // FIXME (mfh 05 Aug 2014) Instead of the slow stuff below, we
4793  // should copy the values and existing graph into a new local
4794  // matrix (lclMatrix), and then use the Tpetra::CrsMatrix
4795  // constructor that takes (rowMap, colMap, lclMatrix, params).
4796  //
4797 
4798  // Get this matrix's local data.
4799  ArrayRCP<const size_t> ptr;
4800  ArrayRCP<const LocalOrdinal> ind;
4801  ArrayRCP<const Scalar> oldVal;
4802  this->getAllValues (ptr, ind, oldVal);
4803 
4804  RCP<const map_type> rowMap = this->getRowMap ();
4805  RCP<const map_type> colMap = this->getColMap ();
4806 
4807  // Get an array of the number of entries in each (locally owned)
4808  // row, so that we can make the new matrix with static profile.
4809  const size_type numLocalRows =
4810  static_cast<size_type> (rowMap->getNodeNumElements ());
4811  ArrayRCP<size_t> numEntriesPerRow (numLocalRows);
4812  for (size_type localRow = 0; localRow < numLocalRows; ++localRow) {
4813  numEntriesPerRow[localRow] =
4814  static_cast<size_type> (getNumEntriesInLocalRow (localRow));
4815  }
4816 
4817  newmat = rcp (new out_mat_type (rowMap, colMap, numEntriesPerRow,
4818  StaticProfile));
4819 
4820  // Convert this matrix's values from Scalar to T.
4821  const size_type numVals = this->lclMatrix_.values.dimension_0 ();
4822  ArrayRCP<T> newVals1D (numVals);
4823  // FIXME (mfh 05 Aug 2014) This assumes UVM.
4824  for (size_type k = 0; k < numVals; ++k) {
4825  newVals1D[k] = static_cast<T> (this->k_values1D_(k));
4826  }
4827 
4828  // Give this matrix all of its local data. We can all this
4829  // method because newmat was _not_ created with a const graph.
4830  // The data must be passed in as nonconst, so we have to copy it
4831  // first.
4832  ArrayRCP<size_t> newPtr (ptr.size ());
4833  std::copy (ptr.begin (), ptr.end (), newPtr.begin ());
4834  ArrayRCP<LocalOrdinal> newInd (ind.size ());
4835  std::copy (ind.begin (), ind.end (), newInd.begin ());
4836  newmat->setAllValues (newPtr, newInd, newVals1D);
4837 
4838  // We already have the Import and Export (if applicable) objects
4839  // from the graph, so we can save a lot of time by passing them
4840  // in to expertStaticFillComplete.
4841  RCP<const map_type> domainMap = this->getDomainMap ();
4842  RCP<const map_type> rangeMap = this->getRangeMap ();
4843  RCP<const import_type> importer = this->getCrsGraph ()->getImporter ();
4844  RCP<const export_type> exporter = this->getCrsGraph ()->getExporter ();
4845  newmat->expertStaticFillComplete (domainMap, rangeMap, importer, exporter);
4846  }
4847 
4848  return newmat;
4849  }
4850 
4851 
4852  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4853  void
4856  {
4857 #ifdef HAVE_TPETRA_DEBUG
4858  const char tfecfFuncName[] = "checkInternalState: ";
4859  const char err[] = "Internal state is not consistent. "
4860  "Please report this bug to the Tpetra developers.";
4861 
4862  // This version of the graph (RCP<const crs_graph_type>) must
4863  // always be nonnull.
4864  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4865  staticGraph_.is_null (),
4866  std::logic_error, err);
4867  // myGraph == null means that the matrix has a const ("static")
4868  // graph. Otherwise, the matrix has a dynamic graph (it owns its
4869  // graph).
4870  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4871  ! myGraph_.is_null () && myGraph_ != staticGraph_,
4872  std::logic_error, err);
4873  // if matrix is fill complete, then graph must be fill complete
4874  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4875  isFillComplete () && ! staticGraph_->isFillComplete (),
4876  std::logic_error, err << " Specifically, the matrix is fill complete, "
4877  "but its graph is NOT fill complete.");
4878  // if matrix is storage optimized, it should have a 1D allocation
4879  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4880  isStorageOptimized () && ! values2D_.is_null (),
4881  std::logic_error, err);
4882  // if matrix/graph are static profile, then 2D allocation should not be present
4883  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4884  getProfileType() == StaticProfile && values2D_ != null,
4885  std::logic_error, err);
4886  // if matrix/graph are dynamic profile, then 1D allocation should not be present
4887  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4888  getProfileType() == DynamicProfile && k_values1D_.dimension_0 () > 0,
4889  std::logic_error, err);
4890  // if values are allocated and they are non-zero in number, then
4891  // one of the allocations should be present
4892  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4893  staticGraph_->indicesAreAllocated () &&
4894  staticGraph_->getNodeAllocationSize() > 0 &&
4895  staticGraph_->getNodeNumRows() > 0
4896  && values2D_.is_null () &&
4897  k_values1D_.dimension_0 () == 0,
4898  std::logic_error, err);
4899  // we cannot have both a 1D and 2D allocation
4900  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4901  k_values1D_.dimension_0 () > 0 && values2D_ != null,
4902  std::logic_error, err << " Specifically, k_values1D_ is allocated (has "
4903  "size " << k_values1D_.dimension_0 () << " > 0) and values2D_ is also "
4904  "allocated. CrsMatrix is not suppose to have both a 1-D and a 2-D "
4905  "allocation at the same time.");
4906 #endif
4907  }
4908 
4909  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4910  std::string
4913  {
4914  std::ostringstream os;
4915 
4916  os << "Tpetra::CrsMatrix (Kokkos refactor): {";
4917  if (this->getObjectLabel () != "") {
4918  os << "Label: \"" << this->getObjectLabel () << "\", ";
4919  }
4920  if (isFillComplete ()) {
4921  os << "isFillComplete: true"
4922  << ", global dimensions: [" << getGlobalNumRows () << ", "
4923  << getGlobalNumCols () << "]"
4924  << ", global number of entries: " << getGlobalNumEntries ()
4925  << "}";
4926  }
4927  else {
4928  os << "isFillComplete: false"
4929  << ", global dimensions: [" << getGlobalNumRows () << ", "
4930  << getGlobalNumCols () << "]}";
4931  }
4932  return os.str ();
4933  }
4934 
4935  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4936  void
4938  describe (Teuchos::FancyOStream &out,
4939  const Teuchos::EVerbosityLevel verbLevel) const
4940  {
4941  using std::endl;
4942  using std::setw;
4943  using Teuchos::Comm;
4944  using Teuchos::RCP;
4945  using Teuchos::TypeNameTraits;
4946  using Teuchos::VERB_DEFAULT;
4947  using Teuchos::VERB_NONE;
4948  using Teuchos::VERB_LOW;
4949  using Teuchos::VERB_MEDIUM;
4950  using Teuchos::VERB_HIGH;
4951  using Teuchos::VERB_EXTREME;
4952 
4953  const Teuchos::EVerbosityLevel vl = (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
4954 
4955  if (vl == VERB_NONE) {
4956  return; // Don't print anything at all
4957  }
4958  // By convention, describe() always begins with a tab.
4959  Teuchos::OSTab tab0 (out);
4960 
4961  RCP<const Comm<int> > comm = this->getComm();
4962  const int myRank = comm->getRank();
4963  const int numProcs = comm->getSize();
4964  size_t width = 1;
4965  for (size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
4966  ++width;
4967  }
4968  width = std::max<size_t> (width, static_cast<size_t> (11)) + 2;
4969 
4970  // none: print nothing
4971  // low: print O(1) info from node 0
4972  // medium: print O(P) info, num entries per process
4973  // high: print O(N) info, num entries per row
4974  // extreme: print O(NNZ) info: print indices and values
4975  //
4976  // for medium and higher, print constituent objects at specified verbLevel
4977  if (myRank == 0) {
4978  out << "Tpetra::CrsMatrix (Kokkos refactor):" << endl;
4979  }
4980  Teuchos::OSTab tab1 (out);
4981 
4982  if (myRank == 0) {
4983  if (this->getObjectLabel () != "") {
4984  out << "Label: \"" << this->getObjectLabel () << "\", ";
4985  }
4986  {
4987  out << "Template parameters:" << endl;
4988  Teuchos::OSTab tab2 (out);
4989  out << "Scalar: " << TypeNameTraits<Scalar>::name () << endl
4990  << "LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl
4991  << "GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl
4992  << "Node: " << TypeNameTraits<Node>::name () << endl;
4993  }
4994  if (isFillComplete()) {
4995  out << "isFillComplete: true" << endl
4996  << "Global dimensions: [" << getGlobalNumRows () << ", "
4997  << getGlobalNumCols () << "]" << endl
4998  << "Global number of entries: " << getGlobalNumEntries () << endl
4999  << "Global number of diagonal entries: " << getGlobalNumDiags ()
5000  << endl << "Global max number of entries in a row: "
5001  << getGlobalMaxNumRowEntries () << endl;
5002  }
5003  else {
5004  out << "isFillComplete: false" << endl
5005  << "Global dimensions: [" << getGlobalNumRows () << ", "
5006  << getGlobalNumCols () << "]" << endl;
5007  }
5008  }
5009 
5010  if (vl < VERB_MEDIUM) {
5011  return; // all done!
5012  }
5013 
5014  // Describe the row Map.
5015  if (myRank == 0) {
5016  out << endl << "Row Map:" << endl;
5017  }
5018  if (getRowMap ().is_null ()) {
5019  if (myRank == 0) {
5020  out << "null" << endl;
5021  }
5022  }
5023  else {
5024  if (myRank == 0) {
5025  out << endl;
5026  }
5027  getRowMap ()->describe (out, vl);
5028  }
5029 
5030  // Describe the column Map.
5031  if (myRank == 0) {
5032  out << "Column Map: ";
5033  }
5034  if (getColMap ().is_null ()) {
5035  if (myRank == 0) {
5036  out << "null" << endl;
5037  }
5038  } else if (getColMap () == getRowMap ()) {
5039  if (myRank == 0) {
5040  out << "same as row Map" << endl;
5041  }
5042  } else {
5043  if (myRank == 0) {
5044  out << endl;
5045  }
5046  getColMap ()->describe (out, vl);
5047  }
5048 
5049  // Describe the domain Map.
5050  if (myRank == 0) {
5051  out << "Domain Map: ";
5052  }
5053  if (getDomainMap ().is_null ()) {
5054  if (myRank == 0) {
5055  out << "null" << endl;
5056  }
5057  } else if (getDomainMap () == getRowMap ()) {
5058  if (myRank == 0) {
5059  out << "same as row Map" << endl;
5060  }
5061  } else if (getDomainMap () == getColMap ()) {
5062  if (myRank == 0) {
5063  out << "same as column Map" << endl;
5064  }
5065  } else {
5066  if (myRank == 0) {
5067  out << endl;
5068  }
5069  getDomainMap ()->describe (out, vl);
5070  }
5071 
5072  // Describe the range Map.
5073  if (myRank == 0) {
5074  out << "Range Map: ";
5075  }
5076  if (getRangeMap ().is_null ()) {
5077  if (myRank == 0) {
5078  out << "null" << endl;
5079  }
5080  } else if (getRangeMap () == getDomainMap ()) {
5081  if (myRank == 0) {
5082  out << "same as domain Map" << endl;
5083  }
5084  } else if (getRangeMap () == getRowMap ()) {
5085  if (myRank == 0) {
5086  out << "same as row Map" << endl;
5087  }
5088  } else {
5089  if (myRank == 0) {
5090  out << endl;
5091  }
5092  getRangeMap ()->describe (out, vl);
5093  }
5094 
5095  // O(P) data
5096  for (int curRank = 0; curRank < numProcs; ++curRank) {
5097  if (myRank == curRank) {
5098  out << "Process rank: " << curRank << endl;
5099  Teuchos::OSTab tab2 (out);
5100  if (! staticGraph_->indicesAreAllocated ()) {
5101  out << "Graph indices not allocated" << endl;
5102  }
5103  else {
5104  out << "Number of allocated entries: "
5105  << staticGraph_->getNodeAllocationSize () << endl;
5106  }
5107  out << "Number of entries: " << getNodeNumEntries () << endl;
5108  if (isFillComplete ()) {
5109  out << "Number of diagonal entries: " << getNodeNumDiags () << endl;
5110  }
5111  out << "Max number of entries per row: " << getNodeMaxNumRowEntries ()
5112  << endl;
5113  }
5114  // Give output time to complete by executing some barriers.
5115  comm->barrier ();
5116  comm->barrier ();
5117  comm->barrier ();
5118  }
5119 
5120  if (vl < VERB_HIGH) {
5121  return; // all done!
5122  }
5123 
5124  // O(N) and O(NNZ) data
5125  for (int curRank = 0; curRank < numProcs; ++curRank) {
5126  if (myRank == curRank) {
5127  out << std::setw(width) << "Proc Rank"
5128  << std::setw(width) << "Global Row"
5129  << std::setw(width) << "Num Entries";
5130  if (vl == VERB_EXTREME) {
5131  out << std::setw(width) << "(Index,Value)";
5132  }
5133  out << endl;
5134  for (size_t r = 0; r < getNodeNumRows (); ++r) {
5135  const size_t nE = getNumEntriesInLocalRow(r);
5136  GlobalOrdinal gid = getRowMap()->getGlobalElement(r);
5137  out << std::setw(width) << myRank
5138  << std::setw(width) << gid
5139  << std::setw(width) << nE;
5140  if (vl == VERB_EXTREME) {
5141  if (isGloballyIndexed()) {
5142  ArrayView<const GlobalOrdinal> rowinds;
5143  ArrayView<const Scalar> rowvals;
5144  getGlobalRowView (gid, rowinds, rowvals);
5145  for (size_t j = 0; j < nE; ++j) {
5146  out << " (" << rowinds[j]
5147  << ", " << rowvals[j]
5148  << ") ";
5149  }
5150  }
5151  else if (isLocallyIndexed()) {
5152  ArrayView<const LocalOrdinal> rowinds;
5153  ArrayView<const Scalar> rowvals;
5154  getLocalRowView (r, rowinds, rowvals);
5155  for (size_t j=0; j < nE; ++j) {
5156  out << " (" << getColMap()->getGlobalElement(rowinds[j])
5157  << ", " << rowvals[j]
5158  << ") ";
5159  }
5160  } // globally or locally indexed
5161  } // vl == VERB_EXTREME
5162  out << endl;
5163  } // for each row r on this process
5164  } // if (myRank == curRank)
5165 
5166  // Give output time to complete
5167  comm->barrier ();
5168  comm->barrier ();
5169  comm->barrier ();
5170  } // for each process p
5171  }
5172 
5173  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5174  bool
5177  {
5178  // It's not clear what kind of compatibility checks on sizes can
5179  // be performed here. Epetra_CrsGraph doesn't check any sizes for
5180  // compatibility.
5181 
5182  // Currently, the source object must be a RowMatrix with the same
5183  // four template parameters as the target CrsMatrix. We might
5184  // relax this requirement later.
5186  const row_matrix_type* srcRowMat =
5187  dynamic_cast<const row_matrix_type*> (&source);
5188  return (srcRowMat != NULL);
5189  }
5190 
5191  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5192  void
5195  size_t numSameIDs,
5196  const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
5197  const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs)
5198  {
5199  using Teuchos::Array;
5200  using Teuchos::ArrayView;
5201  typedef LocalOrdinal LO;
5202  typedef GlobalOrdinal GO;
5203  typedef node_type NT;
5204  // Method name string for TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC.
5205  const char tfecfFuncName[] = "copyAndPermute: ";
5206 
5207  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5208  permuteToLIDs.size() != permuteFromLIDs.size(),
5209  std::invalid_argument, "permuteToLIDs.size() = " << permuteToLIDs.size()
5210  << "!= permuteFromLIDs.size() = " << permuteFromLIDs.size() << ".");
5211 
5212  // This dynamic cast should succeed, because we've already tested
5213  // it in checkSizes().
5214  typedef RowMatrix<Scalar, LO, GO, NT> row_matrix_type;
5215  const row_matrix_type& srcMat = dynamic_cast<const row_matrix_type&> (source);
5216 
5217  const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed ();
5218  //
5219  // Copy the first numSame row from source to target (this matrix).
5220  // This involves copying rows corresponding to LIDs [0, numSame-1].
5221  //
5222  const map_type& srcRowMap = * (srcMat.getRowMap ());
5223  Array<GO> rowInds;
5224  Array<Scalar> rowVals;
5225  const LO numSameIDs_as_LID = static_cast<LO> (numSameIDs);
5226  for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
5227  // Global ID for the current row index in the source matrix.
5228  // The first numSameIDs GIDs in the two input lists are the
5229  // same, so sourceGID == targetGID in this case.
5230  const GO sourceGID = srcRowMap.getGlobalElement (sourceLID);
5231  const GO targetGID = sourceGID;
5232 
5233  // Input views for the combineGlobalValues() call below.
5234  ArrayView<const GO> rowIndsConstView;
5235  ArrayView<const Scalar> rowValsConstView;
5236 
5237  if (sourceIsLocallyIndexed) {
5238  const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5239  if (rowLength > static_cast<size_t> (rowInds.size())) {
5240  rowInds.resize (rowLength);
5241  rowVals.resize (rowLength);
5242  }
5243  // Resizing invalidates an Array's views, so we must make new
5244  // ones, even if rowLength hasn't changed.
5245  ArrayView<GO> rowIndsView = rowInds.view (0, rowLength);
5246  ArrayView<Scalar> rowValsView = rowVals.view (0, rowLength);
5247 
5248  // The source matrix is locally indexed, so we have to get a
5249  // copy. Really it's the GIDs that have to be copied (because
5250  // they have to be converted from LIDs).
5251  size_t checkRowLength = 0;
5252  srcMat.getGlobalRowCopy (sourceGID, rowIndsView, rowValsView, checkRowLength);
5253 
5254 #ifdef HAVE_TPETRA_DEBUG
5255  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowLength != checkRowLength,
5256  std::logic_error, "For global row index " << sourceGID << ", the source"
5257  " matrix's getNumEntriesInGlobalRow() method returns a row length of "
5258  << rowLength << ", but the getGlobalRowCopy() method reports that "
5259  "the row length is " << checkRowLength << ". Please report this bug "
5260  "to the Tpetra developers.");
5261 #endif // HAVE_TPETRA_DEBUG
5262 
5263  rowIndsConstView = rowIndsView.view (0, rowLength);
5264  rowValsConstView = rowValsView.view (0, rowLength);
5265  }
5266  else { // source matrix is globally indexed.
5267  srcMat.getGlobalRowView (sourceGID, rowIndsConstView, rowValsConstView);
5268  }
5269 
5270  // Combine the data into the target matrix.
5271  if (isStaticGraph()) {
5272  // Applying a permutation to a matrix with a static graph
5273  // means REPLACE-ing entries.
5274  combineGlobalValues (targetGID, rowIndsConstView, rowValsConstView, REPLACE);
5275  }
5276  else {
5277  // Applying a permutation to a matrix with a dynamic graph
5278  // means INSERT-ing entries. This has the same effect as
5279  // ADD, if the target graph already has an entry there.
5280  combineGlobalValues (targetGID, rowIndsConstView, rowValsConstView, INSERT);
5281  }
5282  } // For each of the consecutive source and target IDs that are the same
5283 
5284  //
5285  // Permute the remaining rows.
5286  //
5287  const map_type& tgtRowMap = * (this->getRowMap ());
5288  const size_t numPermuteToLIDs = static_cast<size_t> (permuteToLIDs.size ());
5289  for (size_t p = 0; p < numPermuteToLIDs; ++p) {
5290  const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]);
5291  const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]);
5292 
5293  // Input views for the combineGlobalValues() call below.
5294  ArrayView<const GO> rowIndsConstView;
5295  ArrayView<const Scalar> rowValsConstView;
5296 
5297  if (sourceIsLocallyIndexed) {
5298  const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5299  if (rowLength > static_cast<size_t> (rowInds.size ())) {
5300  rowInds.resize (rowLength);
5301  rowVals.resize (rowLength);
5302  }
5303  // Resizing invalidates an Array's views, so we must make new
5304  // ones, even if rowLength hasn't changed.
5305  ArrayView<GO> rowIndsView = rowInds.view (0, rowLength);
5306  ArrayView<Scalar> rowValsView = rowVals.view (0, rowLength);
5307 
5308  // The source matrix is locally indexed, so we have to get a
5309  // copy. Really it's the GIDs that have to be copied (because
5310  // they have to be converted from LIDs).
5311  size_t checkRowLength = 0;
5312  srcMat.getGlobalRowCopy (sourceGID, rowIndsView, rowValsView, checkRowLength);
5313 
5314 #ifdef HAVE_TPETRA_DEBUG
5315  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowLength != checkRowLength,
5316  std::logic_error, "For the source matrix's global row index "
5317  << sourceGID << ", the source matrix's getNumEntriesInGlobalRow() "
5318  "method returns a row length of " << rowLength << ", but the "
5319  "getGlobalRowCopy() method reports that the row length is "
5320  << checkRowLength << ". Please report this bug to the Tpetra "
5321  "developers.");
5322 #endif // HAVE_TPETRA_DEBUG
5323 
5324  rowIndsConstView = rowIndsView.view (0, rowLength);
5325  rowValsConstView = rowValsView.view (0, rowLength);
5326  }
5327  else {
5328  srcMat.getGlobalRowView (sourceGID, rowIndsConstView, rowValsConstView);
5329  }
5330 
5331  // Combine the data into the target matrix.
5332  if (isStaticGraph()) {
5333  this->combineGlobalValues (targetGID, rowIndsConstView,
5334  rowValsConstView, REPLACE);
5335  }
5336  else {
5337  this->combineGlobalValues (targetGID, rowIndsConstView,
5338  rowValsConstView, INSERT);
5339  }
5340  } // For each ID to permute
5341  }
5342 
5343  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5344  void
5346  packAndPrepare (const SrcDistObject& source,
5347  const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5348  Teuchos::Array<char>& exports,
5349  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5350  size_t& constantNumPackets,
5351  Distributor& distor)
5352  {
5353  using Teuchos::Array;
5354  using Teuchos::ArrayView;
5355  using Teuchos::av_reinterpret_cast;
5356  typedef LocalOrdinal LO;
5357  typedef GlobalOrdinal GO;
5358  const char tfecfFuncName[] = "packAndPrepare: ";
5359 
5360  // Attempt to cast the source object to RowMatrix. If the cast
5361  // succeeds, use the source object's pack method to pack its data
5362  // for communication. If the source object is really a CrsMatrix,
5363  // this will pick up the CrsMatrix's more efficient override. If
5364  // the RowMatrix cast fails, then the source object doesn't have
5365  // the right type.
5366  //
5367  // FIXME (mfh 30 Jun 2013) We don't even need the RowMatrix to
5368  // have the same Node type. Unfortunately, we don't have a way to
5369  // ask if the RowMatrix is "a RowMatrix with any Node type," since
5370  // RowMatrix doesn't have a base class. A hypothetical
5371  // RowMatrixBase<Scalar, LO, GO> class, which does not currently
5372  // exist, would satisfy this requirement.
5373  //
5374  // Why RowMatrixBase<Scalar, LO, GO>? The source object's Scalar
5375  // type doesn't technically need to match the target object's
5376  // Scalar type, so we could just have RowMatrixBase<LO, GO>. LO
5377  // and GO need not be the same, as long as there is no overflow of
5378  // the indices. However, checking for index overflow is global
5379  // and therefore undesirable.
5380  typedef RowMatrix<Scalar, LO, GO, Node> row_matrix_type;
5381  const row_matrix_type* srcRowMat =
5382  dynamic_cast<const row_matrix_type*> (&source);
5383  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5384  srcRowMat == NULL, std::invalid_argument,
5385  "The source object of the Import or Export operation is neither a "
5386  "CrsMatrix (with the same template parameters as the target object), "
5387  "nor a RowMatrix (with the same first four template parameters as the "
5388  "target object).");
5389 #ifdef HAVE_TPETRA_DEBUG
5390  {
5391  using Teuchos::reduceAll;
5392  std::ostringstream msg;
5393  int lclBad = 0;
5394  try {
5395  srcRowMat->pack (exportLIDs, exports, numPacketsPerLID,
5396  constantNumPackets, distor);
5397  } catch (std::exception& e) {
5398  lclBad = 1;
5399  msg << e.what ();
5400  }
5401  int gblBad = 0;
5402  const Teuchos::Comm<int>& comm = * (this->getComm ());
5403  reduceAll<int, int> (comm, Teuchos::REDUCE_MAX,
5404  lclBad, Teuchos::outArg (gblBad));
5405  if (gblBad != 0) {
5406  const int myRank = comm.getRank ();
5407  const int numProcs = comm.getSize ();
5408  for (int r = 0; r < numProcs; ++r) {
5409  if (r == myRank && lclBad != 0) {
5410  std::ostringstream os;
5411  os << "Proc " << myRank << ": " << msg.str () << std::endl;
5412  std::cerr << os.str ();
5413  }
5414  comm.barrier ();
5415  comm.barrier ();
5416  comm.barrier ();
5417  }
5418  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5419  true, std::logic_error, "pack() threw an exception on one or "
5420  "more participating processes.");
5421  }
5422  }
5423 #else
5424  srcRowMat->pack (exportLIDs, exports, numPacketsPerLID,
5425  constantNumPackets, distor);
5426 #endif // HAVE_TPETRA_DEBUG
5427  }
5428 
5429  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5430  bool
5432  packRow (char* const numEntOut,
5433  char* const valOut,
5434  char* const indOut,
5435  const size_t numEnt,
5436  const LocalOrdinal lclRow) const
5437  {
5438  using Teuchos::ArrayView;
5439  typedef LocalOrdinal LO;
5440  typedef GlobalOrdinal GO;
5441 
5442  const LO numEntLO = static_cast<LO> (numEnt);
5443  memcpy (numEntOut, &numEntLO, sizeof (LO));
5444  if (this->isLocallyIndexed ()) {
5445  // If the matrix is locally indexed on the calling process, we
5446  // have to use its column Map (which it _must_ have in this
5447  // case) to convert to global indices.
5448  ArrayView<const LO> indIn;
5449  ArrayView<const Scalar> valIn;
5450  this->getLocalRowView (lclRow, indIn, valIn);
5451  const map_type& colMap = * (this->getColMap ());
5452  // Copy column indices one at a time, so that we don't need
5453  // temporary storage.
5454  for (size_t k = 0; k < numEnt; ++k) {
5455  const GO gblIndIn = colMap.getGlobalElement (indIn[k]);
5456  memcpy (indOut + k * sizeof (GO), &gblIndIn, sizeof (GO));
5457  }
5458  memcpy (valOut, valIn.getRawPtr (), numEnt * sizeof (Scalar));
5459  }
5460  else if (this->isGloballyIndexed ()) {
5461  // If the matrix is globally indexed on the calling process,
5462  // then we can use the column indices directly. However, we
5463  // have to get the global row index. The calling process must
5464  // have a row Map, since otherwise it shouldn't be participating
5465  // in packing operations.
5466  ArrayView<const GO> indIn;
5467  ArrayView<const Scalar> valIn;
5468  const map_type& rowMap = * (this->getRowMap ());
5469  const GO gblRow = rowMap.getGlobalElement (lclRow);
5470  this->getGlobalRowView (gblRow, indIn, valIn);
5471  memcpy (indOut, indIn.getRawPtr (), numEnt * sizeof (GO));
5472  memcpy (valOut, valIn.getRawPtr (), numEnt * sizeof (Scalar));
5473  }
5474  else {
5475  if (numEnt != 0) {
5476  return false;
5477  }
5478  }
5479  return true;
5480  }
5481 
5482 
5483  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5484  bool
5486  unpackRow (Scalar* const valInTmp,
5487  GlobalOrdinal* const indInTmp,
5488  const size_t tmpSize,
5489  const char* const valIn,
5490  const char* const indIn,
5491  const size_t numEnt,
5492  const LocalOrdinal lclRow,
5493  const Tpetra::CombineMode combineMode)
5494  {
5495  if (tmpSize < numEnt || (numEnt != 0 && (valInTmp == NULL || indInTmp == NULL))) {
5496  return false;
5497  }
5498  memcpy (valInTmp, valIn, numEnt * sizeof (Scalar));
5499  memcpy (indInTmp, indIn, numEnt * sizeof (GlobalOrdinal));
5500  const GlobalOrdinal gblRow = this->getRowMap ()->getGlobalElement (lclRow);
5501  Teuchos::ArrayView<Scalar> val ((numEnt == 0) ? NULL : valInTmp, numEnt);
5502  Teuchos::ArrayView<GlobalOrdinal> ind ((numEnt == 0) ? NULL : indInTmp, numEnt);
5503  this->combineGlobalValues (gblRow, ind, val, combineMode);
5504  return true;
5505  }
5506 
5507 
5508  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5509  void
5511  allocatePackSpace (Teuchos::Array<char>& exports,
5512  size_t& totalNumEntries,
5513  const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs) const
5514  {
5515  typedef LocalOrdinal LO;
5516  typedef GlobalOrdinal GO;
5517  typedef typename Teuchos::ArrayView<const LO>::size_type size_type;
5518  //const char tfecfFuncName[] = "allocatePackSpace: ";
5519  const size_type numExportLIDs = exportLIDs.size ();
5520 
5521  // Count the total number of entries to send.
5522  totalNumEntries = 0;
5523  for (size_type i = 0; i < numExportLIDs; ++i) {
5524  const LO lclRow = exportLIDs[i];
5525  size_t curNumEntries = this->getNumEntriesInLocalRow (lclRow);
5526  // FIXME (mfh 25 Jan 2015) We should actually report invalid row
5527  // indices as an error. Just consider them nonowned for now.
5528  if (curNumEntries == Teuchos::OrdinalTraits<size_t>::invalid ()) {
5529  curNumEntries = 0;
5530  }
5531  totalNumEntries += curNumEntries;
5532  }
5533 
5534  // FIXME (mfh 24 Feb 2013) This code is only correct if
5535  // sizeof(Scalar) is a meaningful representation of the amount of
5536  // data in a Scalar instance. (LO and GO are always built-in
5537  // integer types.)
5538  //
5539  // Allocate the exports array. It does NOT need padding for
5540  // alignment, since we use memcpy to write to / read from send /
5541  // receive buffers.
5542  const size_t allocSize =
5543  static_cast<size_t> (numExportLIDs) * sizeof (LO) +
5544  totalNumEntries * (sizeof (Scalar) + sizeof (GO));
5545  if (static_cast<size_t> (exports.size ()) < allocSize) {
5546  exports.resize (allocSize);
5547  }
5548  }
5549 
5550  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5551  void
5553  pack (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5554  Teuchos::Array<char>& exports,
5555  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5556  size_t& constantNumPackets,
5557  Distributor& distor) const
5558  {
5559  using Teuchos::Array;
5560  using Teuchos::ArrayView;
5561  using Teuchos::av_reinterpret_cast;
5562  using Teuchos::RCP;
5563  typedef LocalOrdinal LO;
5564  typedef GlobalOrdinal GO;
5565  typedef typename ArrayView<const LO>::size_type size_type;
5566  const char tfecfFuncName[] = "pack: ";
5567 
5568  const size_type numExportLIDs = exportLIDs.size ();
5569  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5570  numExportLIDs != numPacketsPerLID.size (), std::invalid_argument,
5571  "exportLIDs.size() = " << numExportLIDs << " != numPacketsPerLID.size()"
5572  " = " << numPacketsPerLID.size () << ".");
5573 
5574  // Setting this to zero tells the caller to expect a possibly
5575  // different ("nonconstant") number of packets per local index
5576  // (i.e., a possibly different number of entries per row).
5577  constantNumPackets = 0;
5578 
5579  // The pack buffer 'exports' enters this method possibly
5580  // unallocated. Do the first two parts of "Count, allocate, fill,
5581  // compute."
5582  size_t totalNumEntries = 0;
5583  allocatePackSpace (exports, totalNumEntries, exportLIDs);
5584  const size_t bufSize = static_cast<size_t> (exports.size ());
5585 
5586  // Compute the number of "packets" (in this case, bytes) per
5587  // export LID (in this case, local index of the row to send), and
5588  // actually pack the data.
5589  //
5590  // FIXME (mfh 24 Feb 2013, 25 Jan 2015) This code is only correct
5591  // if sizeof(Scalar) is a meaningful representation of the amount
5592  // of data in a Scalar instance. (LO and GO are always built-in
5593  // integer types.)
5594 
5595  // Variables for error reporting in the loop.
5596  size_type firstBadIndex = 0; // only valid if outOfBounds == true.
5597  size_t firstBadOffset = 0; // only valid if outOfBounds == true.
5598  size_t firstBadNumBytes = 0; // only valid if outOfBounds == true.
5599  bool outOfBounds = false;
5600  bool packErr = false;
5601 
5602  char* const exportsRawPtr = exports.getRawPtr ();
5603  size_t offset = 0; // current index into 'exports' array.
5604  for (size_type i = 0; i < numExportLIDs; ++i) {
5605  const LO lclRow = exportLIDs[i];
5606  const size_t numEnt = this->getNumEntriesInLocalRow (lclRow);
5607 
5608  // Only pad this row if it has a nonzero number of entries.
5609  if (numEnt == 0) {
5610  numPacketsPerLID[i] = 0;
5611  }
5612  else {
5613  char* const numEntBeg = exportsRawPtr + offset;
5614  char* const numEntEnd = numEntBeg + sizeof (LO);
5615  char* const valBeg = numEntEnd;
5616  char* const valEnd = valBeg + numEnt * sizeof (Scalar);
5617  char* const indBeg = valEnd;
5618  const size_t numBytes = sizeof (LO) +
5619  numEnt * (sizeof (Scalar) + sizeof (GO));
5620  if (offset > bufSize || offset + numBytes > bufSize) {
5621  firstBadIndex = i;
5622  firstBadOffset = offset;
5623  firstBadNumBytes = numBytes;
5624  outOfBounds = true;
5625  break;
5626  }
5627  packErr = ! packRow (numEntBeg, valBeg, indBeg, numEnt, lclRow);
5628  if (packErr) {
5629  firstBadIndex = i;
5630  firstBadOffset = offset;
5631  firstBadNumBytes = numBytes;
5632  break;
5633  }
5634  // numPacketsPerLID[i] is the number of "packets" in the
5635  // current local row i. Packet=char (really "byte") so use
5636  // the number of bytes of the packed data for that row.
5637  numPacketsPerLID[i] = numBytes;
5638  offset += numBytes;
5639  }
5640  }
5641 
5642  TEUCHOS_TEST_FOR_EXCEPTION(
5643  outOfBounds, std::logic_error, "First invalid offset into 'exports' "
5644  "pack buffer at index i = " << firstBadIndex << ". exportLIDs[i]: "
5645  << exportLIDs[firstBadIndex] << ", bufSize: " << bufSize << ", offset: "
5646  << firstBadOffset << ", numBytes: " << firstBadNumBytes << ".");
5647  TEUCHOS_TEST_FOR_EXCEPTION(
5648  packErr, std::logic_error, "First error in packRow() at index i = "
5649  << firstBadIndex << ". exportLIDs[i]: " << exportLIDs[firstBadIndex]
5650  << ", bufSize: " << bufSize << ", offset: " << firstBadOffset
5651  << ", numBytes: " << firstBadNumBytes << ".");
5652  }
5653 
5654  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5655  void
5657  combineGlobalValues (const GlobalOrdinal globalRowIndex,
5658  const Teuchos::ArrayView<const GlobalOrdinal>& columnIndices,
5659  const Teuchos::ArrayView<const Scalar>& values,
5660  const Tpetra::CombineMode combineMode)
5661  {
5662  const char tfecfFuncName[] = "combineGlobalValues: ";
5663 
5664  if (isStaticGraph ()) {
5665  // INSERT doesn't make sense for a static graph, since you
5666  // aren't allowed to change the structure of the graph.
5667  // However, all the other combine modes work.
5668  if (combineMode == ADD) {
5669  sumIntoGlobalValues (globalRowIndex, columnIndices, values);
5670  }
5671  else if (combineMode == REPLACE) {
5672  replaceGlobalValues (globalRowIndex, columnIndices, values);
5673  }
5674  else if (combineMode == ABSMAX) {
5675  using Details::AbsMax;
5676  AbsMax<Scalar> f;
5677  this->template transformGlobalValues<AbsMax<Scalar> > (globalRowIndex,
5678  columnIndices,
5679  values, f);
5680  }
5681  else if (combineMode == INSERT) {
5682  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5683  isStaticGraph () && combineMode == INSERT, std::invalid_argument,
5684  "INSERT combine mode is not allowed if the matrix has a static graph "
5685  "(i.e., was constructed with the CrsMatrix constructor that takes a "
5686  "const CrsGraph pointer).");
5687  }
5688  else {
5689  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5690  true, std::logic_error, "Invalid combine mode; should never get "
5691  "here! Please report this bug to the Tpetra developers.");
5692  }
5693  }
5694  else { // The matrix has a dynamic graph.
5695  if (combineMode == ADD || combineMode == INSERT) {
5696  // For a dynamic graph, all incoming column indices are
5697  // inserted into the target graph. Duplicate indices will
5698  // have their values summed. In this context, ADD and INSERT
5699  // are equivalent. We need to call insertGlobalValues()
5700  // anyway if the column indices don't yet exist in this row,
5701  // so we just call insertGlobalValues() for both cases.
5702  insertGlobalValuesFiltered (globalRowIndex, columnIndices, values);
5703  }
5704  // FIXME (mfh 14 Mar 2012):
5705  //
5706  // Implementing ABSMAX or REPLACE for a dynamic graph would
5707  // require modifying assembly to attach a possibly different
5708  // combine mode to each inserted (i, j, A_ij) entry. For
5709  // example, consider two different Export operations to the same
5710  // target CrsMatrix, the first with ABSMAX combine mode and the
5711  // second with REPLACE. This isn't a common use case, so we
5712  // won't mess with it for now.
5713  else if (combineMode == ABSMAX) {
5714  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5715  ! isStaticGraph () && combineMode == ABSMAX, std::logic_error,
5716  "ABSMAX combine mode when the matrix has a dynamic graph is not yet "
5717  "implemented.");
5718  }
5719  else if (combineMode == REPLACE) {
5720  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5721  ! isStaticGraph () && combineMode == REPLACE, std::logic_error,
5722  "REPLACE combine mode when the matrix has a dynamic graph is not yet "
5723  "implemented.");
5724  }
5725  else {
5726  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5727  true, std::logic_error, "Should never get here! Please report this "
5728  "bug to the Tpetra developers.");
5729  }
5730  }
5731  }
5732 
5733 
5734  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5735  void
5737  unpackAndCombine (const Teuchos::ArrayView<const LocalOrdinal>& importLIDs,
5738  const Teuchos::ArrayView<const char>& imports,
5739  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5740  size_t constantNumPackets,
5741  Distributor& distor,
5742  CombineMode combineMode)
5743  {
5744 #ifdef HAVE_TPETRA_DEBUG
5745  const char tfecfFuncName[] = "unpackAndCombine: ";
5746  const CombineMode validModes[4] = {ADD, REPLACE, ABSMAX, INSERT};
5747  const char* validModeNames[4] = {"ADD", "REPLACE", "ABSMAX", "INSERT"};
5748  const int numValidModes = 4;
5749 
5750  if (std::find (validModes, validModes+numValidModes, combineMode) ==
5751  validModes+numValidModes) {
5752  std::ostringstream os;
5753  os << "Invalid combine mode. Valid modes are {";
5754  for (int k = 0; k < numValidModes; ++k) {
5755  os << validModeNames[k];
5756  if (k < numValidModes - 1) {
5757  os << ", ";
5758  }
5759  }
5760  os << "}.";
5761  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5762  true, std::invalid_argument, os.str ());
5763  }
5764 
5765  {
5766  using Teuchos::reduceAll;
5767  std::ostringstream msg;
5768  int lclBad = 0;
5769  try {
5770  this->unpackAndCombineImpl (importLIDs, imports, numPacketsPerLID,
5771  constantNumPackets, distor, combineMode);
5772  } catch (std::exception& e) {
5773  lclBad = 1;
5774  msg << e.what ();
5775  }
5776  int gblBad = 0;
5777  const Teuchos::Comm<int>& comm = * (this->getComm ());
5778  reduceAll<int, int> (comm, Teuchos::REDUCE_MAX,
5779  lclBad, Teuchos::outArg (gblBad));
5780  if (gblBad != 0) {
5781  const int myRank = comm.getRank ();
5782  const int numProcs = comm.getSize ();
5783  for (int r = 0; r < numProcs; ++r) {
5784  if (r == myRank && lclBad != 0) {
5785  std::ostringstream os;
5786  os << "Proc " << myRank << ": " << msg.str () << std::endl;
5787  std::cerr << os.str ();
5788  }
5789  comm.barrier ();
5790  comm.barrier ();
5791  comm.barrier ();
5792  }
5793  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5794  true, std::logic_error, "unpackAndCombineImpl() threw an "
5795  "exception on one or more participating processes.");
5796  }
5797  }
5798 #else
5799  this->unpackAndCombineImpl (importLIDs, imports, numPacketsPerLID,
5800  constantNumPackets, distor, combineMode);
5801 #endif // HAVE_TPETRA_DEBUG
5802  }
5803 
5804  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5805  void
5807  unpackAndCombineImpl (const Teuchos::ArrayView<const LocalOrdinal>& importLIDs,
5808  const Teuchos::ArrayView<const char>& imports,
5809  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5810  size_t constantNumPackets,
5811  Distributor & /* distor */,
5812  CombineMode combineMode)
5813  {
5814  typedef LocalOrdinal LO;
5815  typedef GlobalOrdinal GO;
5816  typedef typename Teuchos::ArrayView<const LO>::size_type size_type;
5817  const char tfecfFuncName[] = "unpackAndCombine: ";
5818 
5819 #ifdef HAVE_TPETRA_DEBUG
5820  const CombineMode validModes[4] = {ADD, REPLACE, ABSMAX, INSERT};
5821  const char* validModeNames[4] = {"ADD", "REPLACE", "ABSMAX", "INSERT"};
5822  const int numValidModes = 4;
5823 
5824  if (std::find (validModes, validModes+numValidModes, combineMode) ==
5825  validModes+numValidModes) {
5826  std::ostringstream os;
5827  os << "Invalid combine mode. Valid modes are {";
5828  for (int k = 0; k < numValidModes; ++k) {
5829  os << validModeNames[k];
5830  if (k < numValidModes - 1) {
5831  os << ", ";
5832  }
5833  }
5834  os << "}.";
5835  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5836  true, std::invalid_argument, os.str ());
5837  }
5838 #endif // HAVE_TPETRA_DEBUG
5839 
5840  const size_type numImportLIDs = importLIDs.size ();
5841  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5842  numImportLIDs != numPacketsPerLID.size (), std::invalid_argument,
5843  "importLIDs.size() = " << numImportLIDs << " != numPacketsPerLID.size()"
5844  << " = " << numPacketsPerLID.size () << ".");
5845 
5846  // If a sanity check fails, keep track of some state at the
5847  // "first" place where it fails. After the first failure, "run
5848  // through the motions" until the end of this method, then raise
5849  // an error with an informative message.
5850  size_type firstBadIndex = 0;
5851  size_t firstBadOffset = 0;
5852  size_t firstBadExpectedNumBytes = 0;
5853  size_t firstBadNumBytes = 0;
5854  LO firstBadNumEnt = 0;
5855  // We have sanity checks for three kinds of errors:
5856  //
5857  // 1. Offset into array of all the incoming data (for all rows)
5858  // is out of bounds
5859  // 2. Too few bytes of incoming data for a row, given the
5860  // reported number of entries in those incoming data
5861  // 3. Error in unpacking the row's incoming data
5862  //
5863  bool outOfBounds = false;
5864  bool wrongNumBytes = false;
5865  bool unpackErr = false;
5866 
5867  const size_t bufSize = static_cast<size_t> (imports.size ());
5868  const char* const importsRawPtr = imports.getRawPtr ();
5869  size_t offset = 0;
5870 
5871  // Temporary storage for incoming values and indices. We need
5872  // this because the receive buffer does not align storage; it's
5873  // just contiguous bytes. In order to avoid violating ANSI
5874  // aliasing rules, we memcpy each incoming row's data into these
5875  // temporary arrays. We double their size every time we run out
5876  // of storage.
5877  Array<Scalar> valInTmp;
5878  Array<GO> indInTmp;
5879  for (size_type i = 0; i < numImportLIDs; ++i) {
5880  const LO lclRow = importLIDs[i];
5881  const size_t numBytes = numPacketsPerLID[i];
5882 
5883  if (numBytes > 0) { // there is actually something in the row
5884  const char* const numEntBeg = importsRawPtr + offset;
5885  const char* const numEntEnd = numEntBeg + sizeof (LO);
5886 
5887  // Now we know how many entries to expect in the received data
5888  // for this row.
5889  LO numEnt = 0;
5890  memcpy (&numEnt, numEntBeg, sizeof (LO));
5891 
5892  const char* const valBeg = numEntEnd;
5893  const char* const valEnd =
5894  valBeg + static_cast<size_t> (numEnt) * sizeof (Scalar);
5895  const char* const indBeg = valEnd;
5896  const size_t expectedNumBytes = sizeof (LO) +
5897  static_cast<size_t> (numEnt) * (sizeof (Scalar) + sizeof (GO));
5898 
5899  if (expectedNumBytes > numBytes) {
5900  firstBadIndex = i;
5901  firstBadOffset = offset;
5902  firstBadExpectedNumBytes = expectedNumBytes;
5903  firstBadNumBytes = numBytes;
5904  firstBadNumEnt = numEnt;
5905  wrongNumBytes = true;
5906  break;
5907  }
5908  if (offset > bufSize || offset + numBytes > bufSize) {
5909  firstBadIndex = i;
5910  firstBadOffset = offset;
5911  firstBadExpectedNumBytes = expectedNumBytes;
5912  firstBadNumBytes = numBytes;
5913  firstBadNumEnt = numEnt;
5914  outOfBounds = true;
5915  break;
5916  }
5917  size_t tmpNumEnt = static_cast<size_t> (valInTmp.size ());
5918  if (tmpNumEnt < static_cast<size_t> (numEnt) ||
5919  static_cast<size_t> (indInTmp.size ()) < static_cast<size_t> (numEnt)) {
5920  // Double the size of the temporary arrays for incoming data.
5921  tmpNumEnt = std::max (static_cast<size_t> (numEnt), tmpNumEnt * 2);
5922  valInTmp.resize (tmpNumEnt);
5923  indInTmp.resize (tmpNumEnt);
5924  }
5925  unpackErr =
5926  ! unpackRow (valInTmp.getRawPtr (), indInTmp.getRawPtr (), tmpNumEnt,
5927  valBeg, indBeg, numEnt, lclRow, combineMode);
5928  if (unpackErr) {
5929  firstBadIndex = i;
5930  firstBadOffset = offset;
5931  firstBadExpectedNumBytes = expectedNumBytes;
5932  firstBadNumBytes = numBytes;
5933  firstBadNumEnt = numEnt;
5934  break;
5935  }
5936  offset += numBytes;
5937  }
5938  }
5939 
5940  if (wrongNumBytes || outOfBounds || unpackErr) {
5941  std::ostringstream os;
5942  os << " importLIDs[i]: " << importLIDs[firstBadIndex]
5943  << ", bufSize: " << bufSize
5944  << ", offset: " << firstBadOffset
5945  << ", numBytes: " << firstBadNumBytes
5946  << ", expectedNumBytes: " << firstBadExpectedNumBytes
5947  << ", numEnt: " << firstBadNumEnt;
5948  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5949  wrongNumBytes, std::logic_error, "At index i = " << firstBadIndex
5950  << ", expectedNumBytes > numBytes." << os.str ());
5951  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5952  outOfBounds, std::logic_error, "First invalid offset into 'imports' "
5953  "unpack buffer at index i = " << firstBadIndex << "." << os.str ());
5954  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5955  unpackErr, std::logic_error, "First error in unpackRow() at index i = "
5956  << firstBadIndex << "." << os.str ());
5957  }
5958  }
5959 
5960  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5961  Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
5963  getColumnMapMultiVector (const MV& X_domainMap,
5964  const bool force) const
5965  {
5966  using Teuchos::null;
5967  using Teuchos::RCP;
5968  using Teuchos::rcp;
5969 
5970  TEUCHOS_TEST_FOR_EXCEPTION(
5971  ! this->hasColMap (), std::runtime_error, "Tpetra::CrsMatrix::getColumn"
5972  "MapMultiVector: You may only call this method if the matrix has a "
5973  "column Map. If the matrix does not yet have a column Map, you should "
5974  "first call fillComplete (with domain and range Map if necessary).");
5975 
5976  // If the graph is not fill complete, then the Import object (if
5977  // one should exist) hasn't been constructed yet.
5978  TEUCHOS_TEST_FOR_EXCEPTION(
5979  ! this->getGraph ()->isFillComplete (), std::runtime_error, "Tpetra::"
5980  "CrsMatrix::getColumnMapMultiVector: You may only call this method if "
5981  "this matrix's graph is fill complete.");
5982 
5983  const size_t numVecs = X_domainMap.getNumVectors ();
5984  RCP<const import_type> importer = this->getGraph ()->getImporter ();
5985  RCP<const map_type> colMap = this->getColMap ();
5986 
5987  RCP<MV> X_colMap; // null by default
5988 
5989  // If the Import object is trivial (null), then we don't need a
5990  // separate column Map multivector. Just return null in that
5991  // case. The caller is responsible for knowing not to use the
5992  // returned null pointer.
5993  //
5994  // If the Import is nontrivial, then we do need a separate
5995  // column Map multivector for the Import operation. Check in
5996  // that case if we have to (re)create the column Map
5997  // multivector.
5998  if (! importer.is_null () || force) {
5999  if (importMV_.is_null () || importMV_->getNumVectors () != numVecs) {
6000  X_colMap = rcp (new MV (colMap, numVecs));
6001 
6002  // Cache the newly created multivector for later reuse.
6003  importMV_ = X_colMap;
6004  }
6005  else { // Yay, we can reuse the cached multivector!
6006  X_colMap = importMV_;
6007  // mfh 09 Jan 2013: We don't have to fill with zeros first,
6008  // because the Import uses INSERT combine mode, which overwrites
6009  // existing entries.
6010  //
6011  //X_colMap->putScalar (ZERO);
6012  }
6013  }
6014  return X_colMap;
6015  }
6016 
6017  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
6018  Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
6021  const bool force) const
6022  {
6023  using Teuchos::null;
6024  using Teuchos::RCP;
6025  using Teuchos::rcp;
6026 
6027  // If the graph is not fill complete, then the Export object (if
6028  // one should exist) hasn't been constructed yet.
6029  TEUCHOS_TEST_FOR_EXCEPTION(
6030  ! this->getGraph ()->isFillComplete (), std::runtime_error, "Tpetra::"
6031  "CrsMatrix::getRowMapMultiVector: You may only call this method if this "
6032  "matrix's graph is fill complete.");
6033 
6034  const size_t numVecs = Y_rangeMap.getNumVectors ();
6035  RCP<const export_type> exporter = this->getGraph ()->getExporter ();
6036  // Every version of the constructor takes either a row Map, or a
6037  // graph (all of whose constructors take a row Map). Thus, the
6038  // matrix always has a row Map.
6039  RCP<const map_type> rowMap = this->getRowMap ();
6040 
6041  RCP<MV> Y_rowMap; // null by default
6042 
6043  // If the Export object is trivial (null), then we don't need a
6044  // separate row Map multivector. Just return null in that case.
6045  // The caller is responsible for knowing not to use the returned
6046  // null pointer.
6047  //
6048  // If the Export is nontrivial, then we do need a separate row
6049  // Map multivector for the Export operation. Check in that case
6050  // if we have to (re)create the row Map multivector.
6051  if (! exporter.is_null () || force) {
6052  if (exportMV_.is_null () || exportMV_->getNumVectors () != numVecs) {
6053  Y_rowMap = rcp (new MV (rowMap, numVecs));
6054  exportMV_ = Y_rowMap; // Cache the newly created MV for later reuse.
6055  }
6056  else { // Yay, we can reuse the cached multivector!
6057  Y_rowMap = exportMV_;
6058  }
6059  }
6060  return Y_rowMap;
6061  }
6062 
6063  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
6064  void
6066  removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& newMap)
6067  {
6068  TEUCHOS_TEST_FOR_EXCEPTION(
6069  myGraph_.is_null (), std::logic_error, "Tpetra::CrsMatrix::"
6070  "removeEmptyProcessesInPlace: This method does not work when the matrix "
6071  "was created with a constant graph (that is, when it was created using "
6072  "the version of its constructor that takes an RCP<const CrsGraph>). "
6073  "This is because the matrix is not allowed to modify the graph in that "
6074  "case, but removing empty processes requires modifying the graph.");
6075  myGraph_->removeEmptyProcessesInPlace (newMap);
6076  // Even though CrsMatrix's row Map (as returned by getRowMap())
6077  // comes from its CrsGraph, CrsMatrix still implements DistObject,
6078  // so we also have to change the DistObject's Map.
6079  this->map_ = this->getRowMap ();
6080  // In the nonconst graph case, staticGraph_ is just a const
6081  // pointer to myGraph_. This assignment is probably redundant,
6082  // but it doesn't hurt.
6083  staticGraph_ = Teuchos::rcp_const_cast<const Graph> (myGraph_);
6084  }
6085 
6086  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
6087  Teuchos::RCP<RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
6089  add (const Scalar& alpha,
6091  const Scalar& beta,
6092  const Teuchos::RCP<const map_type>& domainMap,
6093  const Teuchos::RCP<const map_type>& rangeMap,
6094  const Teuchos::RCP<Teuchos::ParameterList>& params) const
6095  {
6096  using Teuchos::Array;
6097  using Teuchos::ArrayRCP;
6098  using Teuchos::ParameterList;
6099  using Teuchos::RCP;
6100  using Teuchos::rcp;
6101  using Teuchos::rcp_implicit_cast;
6102  using Teuchos::sublist;
6103  typedef LocalOrdinal LO;
6104  typedef GlobalOrdinal GO;
6107 
6108  const crs_matrix_type& B = *this; // a convenient abbreviation
6109  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
6110  const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one ();
6111 
6112  // If the user didn't supply a domain or range Map, then try to
6113  // get one from B first (if it has them), then from A (if it has
6114  // them). If we don't have any domain or range Maps, scold the
6115  // user.
6116  RCP<const map_type> A_domainMap = A.getDomainMap ();
6117  RCP<const map_type> A_rangeMap = A.getRangeMap ();
6118  RCP<const map_type> B_domainMap = B.getDomainMap ();
6119  RCP<const map_type> B_rangeMap = B.getRangeMap ();
6120 
6121  RCP<const map_type> theDomainMap = domainMap;
6122  RCP<const map_type> theRangeMap = rangeMap;
6123 
6124  if (domainMap.is_null ()) {
6125  if (B_domainMap.is_null ()) {
6126  TEUCHOS_TEST_FOR_EXCEPTION(
6127  A_domainMap.is_null (), std::invalid_argument,
6128  "Tpetra::CrsMatrix::add: If neither A nor B have a domain Map, "
6129  "then you must supply a nonnull domain Map to this method.");
6130  theDomainMap = A_domainMap;
6131  } else {
6132  theDomainMap = B_domainMap;
6133  }
6134  }
6135  if (rangeMap.is_null ()) {
6136  if (B_rangeMap.is_null ()) {
6137  TEUCHOS_TEST_FOR_EXCEPTION(
6138  A_rangeMap.is_null (), std::invalid_argument,
6139  "Tpetra::CrsMatrix::add: If neither A nor B have a range Map, "
6140  "then you must supply a nonnull range Map to this method.");
6141  theRangeMap = A_rangeMap;
6142  } else {
6143  theRangeMap = B_rangeMap;
6144  }
6145  }
6146 
6147 #ifdef HAVE_TPETRA_DEBUG
6148  // In a debug build, check that A and B have matching domain and
6149  // range Maps, if they have domain and range Maps at all. (If
6150  // they aren't fill complete, then they may not yet have them.)
6151  if (! A_domainMap.is_null () && ! A_rangeMap.is_null ()) {
6152  if (! B_domainMap.is_null () && ! B_rangeMap.is_null ()) {
6153  TEUCHOS_TEST_FOR_EXCEPTION(
6154  ! B_domainMap->isSameAs (*A_domainMap), std::invalid_argument,
6155  "Tpetra::CrsMatrix::add: The input RowMatrix A must have a domain Map "
6156  "which is the same as (isSameAs) this RowMatrix's domain Map.");
6157  TEUCHOS_TEST_FOR_EXCEPTION(
6158  ! B_rangeMap->isSameAs (*A_rangeMap), std::invalid_argument,
6159  "Tpetra::CrsMatrix::add: The input RowMatrix A must have a range Map "
6160  "which is the same as (isSameAs) this RowMatrix's range Map.");
6161  TEUCHOS_TEST_FOR_EXCEPTION(
6162  ! domainMap.is_null () && ! domainMap->isSameAs (*B_domainMap),
6163  std::invalid_argument,
6164  "Tpetra::CrsMatrix::add: The input domain Map must be the same as "
6165  "(isSameAs) this RowMatrix's domain Map.");
6166  TEUCHOS_TEST_FOR_EXCEPTION(
6167  ! rangeMap.is_null () && ! rangeMap->isSameAs (*B_rangeMap),
6168  std::invalid_argument,
6169  "Tpetra::CrsMatrix::add: The input range Map must be the same as "
6170  "(isSameAs) this RowMatrix's range Map.");
6171  }
6172  }
6173  else if (! B_domainMap.is_null () && ! B_rangeMap.is_null ()) {
6174  TEUCHOS_TEST_FOR_EXCEPTION(
6175  ! domainMap.is_null () && ! domainMap->isSameAs (*B_domainMap),
6176  std::invalid_argument,
6177  "Tpetra::CrsMatrix::add: The input domain Map must be the same as "
6178  "(isSameAs) this RowMatrix's domain Map.");
6179  TEUCHOS_TEST_FOR_EXCEPTION(
6180  ! rangeMap.is_null () && ! rangeMap->isSameAs (*B_rangeMap),
6181  std::invalid_argument,
6182  "Tpetra::CrsMatrix::add: The input range Map must be the same as "
6183  "(isSameAs) this RowMatrix's range Map.");
6184  }
6185  else {
6186  TEUCHOS_TEST_FOR_EXCEPTION(
6187  domainMap.is_null () || rangeMap.is_null (), std::invalid_argument,
6188  "Tpetra::CrsMatrix::add: If neither A nor B have a domain and range "
6189  "Map, then you must supply a nonnull domain and range Map to this "
6190  "method.");
6191  }
6192 #endif // HAVE_TPETRA_DEBUG
6193 
6194  // What parameters do we pass to C's constructor? Do we call
6195  // fillComplete on C after filling it? And if so, what parameters
6196  // do we pass to C's fillComplete call?
6197  bool callFillComplete = true;
6198  RCP<ParameterList> constructorSublist;
6199  RCP<ParameterList> fillCompleteSublist;
6200  if (! params.is_null ()) {
6201  callFillComplete = params->get ("Call fillComplete", callFillComplete);
6202  constructorSublist = sublist (params, "Constructor parameters");
6203  fillCompleteSublist = sublist (params, "fillComplete parameters");
6204  }
6205 
6206  RCP<const map_type> A_rowMap = A.getRowMap ();
6207  RCP<const map_type> B_rowMap = B.getRowMap ();
6208  RCP<const map_type> C_rowMap = B_rowMap; // see discussion in documentation
6209  RCP<crs_matrix_type> C; // The result matrix.
6210 
6211  // If A and B's row Maps are the same, we can compute an upper
6212  // bound on the number of entries in each row of C, before
6213  // actually computing the sum. A reasonable upper bound is the
6214  // sum of the two entry counts in each row. If we choose this as
6215  // the actual per-row upper bound, we can use static profile.
6216  if (A_rowMap->isSameAs (*B_rowMap)) {
6217  const LO localNumRows = static_cast<LO> (A_rowMap->getNodeNumElements ());
6218  ArrayRCP<size_t> C_maxNumEntriesPerRow (localNumRows, 0);
6219 
6220  // Get the number of entries in each row of A.
6221  if (alpha != ZERO) {
6222  for (LO localRow = 0; localRow < localNumRows; ++localRow) {
6223  const size_t A_numEntries = A.getNumEntriesInLocalRow (localRow);
6224  C_maxNumEntriesPerRow[localRow] += A_numEntries;
6225  }
6226  }
6227  // Get the number of entries in each row of B.
6228  if (beta != ZERO) {
6229  for (LO localRow = 0; localRow < localNumRows; ++localRow) {
6230  const size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
6231  C_maxNumEntriesPerRow[localRow] += B_numEntries;
6232  }
6233  }
6234  // Construct the result matrix C.
6235  if (constructorSublist.is_null ()) {
6236  C = rcp (new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow,
6237  StaticProfile));
6238  } else {
6239  C = rcp (new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow,
6240  StaticProfile, constructorSublist));
6241  }
6242  // Since A and B have the same row Maps, we could add them
6243  // together all at once and merge values before we call
6244  // insertGlobalValues. However, we don't really need to, since
6245  // we've already allocated enough space in each row of C for C
6246  // to do the merge itself.
6247  }
6248  else { // the row Maps of A and B are not the same
6249  // Construct the result matrix C.
6250  if (constructorSublist.is_null ()) {
6251  C = rcp (new crs_matrix_type (C_rowMap, 0, DynamicProfile));
6252  } else {
6253  C = rcp (new crs_matrix_type (C_rowMap, 0, DynamicProfile,
6254  constructorSublist));
6255  }
6256  }
6257 
6258 #ifdef HAVE_TPETRA_DEBUG
6259  TEUCHOS_TEST_FOR_EXCEPTION(C.is_null (), std::logic_error,
6260  "Tpetra::RowMatrix::add: C should not be null at this point. "
6261  "Please report this bug to the Tpetra developers.");
6262 #endif // HAVE_TPETRA_DEBUG
6263  //
6264  // Compute C = alpha*A + beta*B.
6265  //
6266  Array<GO> ind;
6267  Array<Scalar> val;
6268 
6269  if (alpha != ZERO) {
6270  const LO A_localNumRows = static_cast<LO> (A_rowMap->getNodeNumElements ());
6271  for (LO localRow = 0; localRow < A_localNumRows; ++localRow) {
6272  size_t A_numEntries = A.getNumEntriesInLocalRow (localRow);
6273  const GO globalRow = A_rowMap->getGlobalElement (localRow);
6274  if (A_numEntries > static_cast<size_t> (ind.size ())) {
6275  ind.resize (A_numEntries);
6276  val.resize (A_numEntries);
6277  }
6278  ArrayView<GO> indView = ind (0, A_numEntries);
6279  ArrayView<Scalar> valView = val (0, A_numEntries);
6280  A.getGlobalRowCopy (globalRow, indView, valView, A_numEntries);
6281 
6282  if (alpha != ONE) {
6283  for (size_t k = 0; k < A_numEntries; ++k) {
6284  valView[k] *= alpha;
6285  }
6286  }
6287  C->insertGlobalValues (globalRow, indView, valView);
6288  }
6289  }
6290 
6291  if (beta != ZERO) {
6292  const LO B_localNumRows = static_cast<LO> (B_rowMap->getNodeNumElements ());
6293  for (LO localRow = 0; localRow < B_localNumRows; ++localRow) {
6294  size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
6295  const GO globalRow = B_rowMap->getGlobalElement (localRow);
6296  if (B_numEntries > static_cast<size_t> (ind.size ())) {
6297  ind.resize (B_numEntries);
6298  val.resize (B_numEntries);
6299  }
6300  ArrayView<GO> indView = ind (0, B_numEntries);
6301  ArrayView<Scalar> valView = val (0, B_numEntries);
6302  B.getGlobalRowCopy (globalRow, indView, valView, B_numEntries);
6303 
6304  if (beta != ONE) {
6305  for (size_t k = 0; k < B_numEntries; ++k) {
6306  valView[k] *= beta;
6307  }
6308  }
6309  C->insertGlobalValues (globalRow, indView, valView);
6310  }
6311  }
6312 
6313  if (callFillComplete) {
6314  if (fillCompleteSublist.is_null ()) {
6315  C->fillComplete (theDomainMap, theRangeMap);
6316  } else {
6317  C->fillComplete (theDomainMap, theRangeMap, fillCompleteSublist);
6318  }
6319  }
6320  return rcp_implicit_cast<row_matrix_type> (C);
6321  }
6322 
6323  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
6324  void
6327  const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
6328  const Teuchos::RCP<const map_type>& domainMap,
6329  const Teuchos::RCP<const map_type>& rangeMap,
6330  const Teuchos::RCP<Teuchos::ParameterList>& params) const
6331  {
6332  using Teuchos::ArrayView;
6333  using Teuchos::Comm;
6334  using Teuchos::ParameterList;
6335  using Teuchos::RCP;
6336  typedef LocalOrdinal LO;
6337  typedef GlobalOrdinal GO;
6338  typedef node_type NT;
6340  typedef Vector<int, LO, GO, NT> IntVectorType;
6341 
6342 #ifdef HAVE_TPETRA_MMM_TIMINGS
6343  std::string label;
6344  if(!params.is_null())
6345  label = params->get("Timer Label",label);
6346  std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
6347  using Teuchos::TimeMonitor;
6348  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC Pack-1"))));
6349 #endif
6350 
6351  // Make sure that the input argument rowTransfer is either an
6352  // Import or an Export. Import and Export are the only two
6353  // subclasses of Transfer that we defined, but users might
6354  // (unwisely, for now at least) decide to implement their own
6355  // subclasses. Exclude this possibility.
6356  const import_type* xferAsImport = dynamic_cast<const import_type*> (&rowTransfer);
6357  const export_type* xferAsExport = dynamic_cast<const export_type*> (&rowTransfer);
6358  TEUCHOS_TEST_FOR_EXCEPTION(
6359  xferAsImport == NULL && xferAsExport == NULL, std::invalid_argument,
6360  "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' input "
6361  "argument must be either an Import or an Export, and its template "
6362  "parameters must match the corresponding template parameters of the "
6363  "CrsMatrix.");
6364 
6365  // FIXME (mfh 15 May 2014) Wouldn't communication still be needed,
6366  // if the source Map is not distributed but the target Map is?
6367  const bool communication_needed = rowTransfer.getSourceMap ()->isDistributed ();
6368 
6369  //
6370  // Get the caller's parameters
6371  //
6372 
6373  bool reverseMode = false; // Are we in reverse mode?
6374  bool restrictComm = false; // Do we need to restrict the communicator?
6375  RCP<ParameterList> matrixparams; // parameters for the destination matrix
6376  if (! params.is_null ()) {
6377  reverseMode = params->get ("Reverse Mode", reverseMode);
6378  restrictComm = params->get ("Restrict Communicator", restrictComm);
6379  matrixparams = sublist (params, "CrsMatrix");
6380  }
6381 
6382  // Get the new domain and range Maps. We need some of them for
6383  // error checking, now that we have the reverseMode parameter.
6384  RCP<const map_type> MyRowMap = reverseMode ?
6385  rowTransfer.getSourceMap () : rowTransfer.getTargetMap ();
6386  RCP<const map_type> MyColMap; // create this below
6387  RCP<const map_type> MyDomainMap = ! domainMap.is_null () ?
6388  domainMap : getDomainMap ();
6389  RCP<const map_type> MyRangeMap = ! rangeMap.is_null () ?
6390  rangeMap : getRangeMap ();
6391  RCP<const map_type> BaseRowMap = MyRowMap;
6392  RCP<const map_type> BaseDomainMap = MyDomainMap;
6393 
6394  // If the user gave us a nonnull destMat, then check whether it's
6395  // "pristine." That means that it has no entries.
6396  //
6397  // FIXME (mfh 15 May 2014) If this is not true on all processes,
6398  // then this exception test may hang. It would be better to
6399  // forward an error flag to the next communication phase.
6400  if (! destMat.is_null ()) {
6401  // FIXME (mfh 15 May 2014): The classic Petra idiom for checking
6402  // whether a graph or matrix has no entries on the calling
6403  // process, is that it is neither locally nor globally indexed.
6404  // This may change eventually with the Kokkos refactor version
6405  // of Tpetra, so it would be better just to check the quantity
6406  // of interest directly. Note that with the Kokkos refactor
6407  // version of Tpetra, asking for the total number of entries in
6408  // a graph or matrix that is not fill complete might require
6409  // computation (kernel launch), since it is not thread scalable
6410  // to update a count every time an entry is inserted.
6411  const bool NewFlag = ! destMat->getGraph ()->isLocallyIndexed () &&
6412  ! destMat->getGraph ()->isGloballyIndexed ();
6413  TEUCHOS_TEST_FOR_EXCEPTION(
6414  ! NewFlag, std::invalid_argument, "Tpetra::CrsMatrix::"
6415  "transferAndFillComplete: The input argument 'destMat' is only allowed "
6416  "to be nonnull, if its graph is empty (neither locally nor globally "
6417  "indexed).");
6418  // FIXME (mfh 15 May 2014) At some point, we want to change
6419  // graphs and matrices so that their DistObject Map
6420  // (this->getMap()) may differ from their row Map. This will
6421  // make redistribution for 2-D distributions more efficient. I
6422  // hesitate to change this check, because I'm not sure how much
6423  // the code here depends on getMap() and getRowMap() being the
6424  // same.
6425  TEUCHOS_TEST_FOR_EXCEPTION(
6426  ! destMat->getRowMap ()->isSameAs (*MyRowMap), std::invalid_argument,
6427  "Tpetra::CrsMatrix::transferAndFillComplete: The (row) Map of the "
6428  "input argument 'destMat' is not the same as the (row) Map specified "
6429  "by the input argument 'rowTransfer'.");
6430  TEUCHOS_TEST_FOR_EXCEPTION(
6431  ! destMat->checkSizes (*this), std::invalid_argument,
6432  "Tpetra::CrsMatrix::transferAndFillComplete: You provided a nonnull "
6433  "destination matrix, but checkSizes() indicates that it is not a legal "
6434  "legal target for redistribution from the source matrix (*this). This "
6435  "may mean that they do not have the same dimensions.");
6436  }
6437 
6438  // If forward mode (the default), then *this's (row) Map must be
6439  // the same as the source Map of the Transfer. If reverse mode,
6440  // then *this's (row) Map must be the same as the target Map of
6441  // the Transfer.
6442  //
6443  // FIXME (mfh 15 May 2014) At some point, we want to change graphs
6444  // and matrices so that their DistObject Map (this->getMap()) may
6445  // differ from their row Map. This will make redistribution for
6446  // 2-D distributions more efficient. I hesitate to change this
6447  // check, because I'm not sure how much the code here depends on
6448  // getMap() and getRowMap() being the same.
6449  TEUCHOS_TEST_FOR_EXCEPTION(
6450  ! (reverseMode || getRowMap ()->isSameAs (*rowTransfer.getSourceMap ())),
6451  std::invalid_argument, "Tpetra::CrsMatrix::transferAndFillComplete: "
6452  "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
6453  TEUCHOS_TEST_FOR_EXCEPTION(
6454  ! (! reverseMode || getRowMap ()->isSameAs (*rowTransfer.getTargetMap ())),
6455  std::invalid_argument, "Tpetra::CrsMatrix::transferAndFillComplete: "
6456  "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
6457 
6458  // The basic algorithm here is:
6459  //
6460  // 1. Call the moral equivalent of "distor.do" to handle the import.
6461  // 2. Copy all the Imported and Copy/Permuted data into the raw
6462  // CrsMatrix / CrsGraphData pointers, still using GIDs.
6463  // 3. Call an optimized version of MakeColMap that avoids the
6464  // Directory lookups (since the importer knows who owns all the
6465  // GIDs) AND reindexes to LIDs.
6466  // 4. Call expertStaticFillComplete()
6467 
6468  // Get information from the Importer
6469  const size_t NumSameIDs = rowTransfer.getNumSameIDs();
6470  ArrayView<const LO> ExportLIDs = reverseMode ?
6471  rowTransfer.getRemoteLIDs () : rowTransfer.getExportLIDs ();
6472  ArrayView<const LO> RemoteLIDs = reverseMode ?
6473  rowTransfer.getExportLIDs () : rowTransfer.getRemoteLIDs ();
6474  ArrayView<const LO> PermuteToLIDs = reverseMode ?
6475  rowTransfer.getPermuteFromLIDs () : rowTransfer.getPermuteToLIDs ();
6476  ArrayView<const LO> PermuteFromLIDs = reverseMode ?
6477  rowTransfer.getPermuteToLIDs () : rowTransfer.getPermuteFromLIDs ();
6478  Distributor& Distor = rowTransfer.getDistributor ();
6479 
6480  // Owning PIDs
6481  Teuchos::Array<int> SourcePids;
6482  Teuchos::Array<int> TargetPids;
6483  int MyPID = getComm ()->getRank ();
6484 
6485  // Temp variables for sub-communicators
6486  RCP<const map_type> ReducedRowMap, ReducedColMap,
6487  ReducedDomainMap, ReducedRangeMap;
6488  RCP<const Comm<int> > ReducedComm;
6489 
6490  // If the user gave us a null destMat, then construct the new
6491  // destination matrix. We will replace its column Map later.
6492  if (destMat.is_null ()) {
6493  destMat = rcp (new this_type (MyRowMap, 0, StaticProfile, matrixparams));
6494  }
6495 
6496  /***************************************************/
6497  /***** 1) First communicator restriction phase ****/
6498  /***************************************************/
6499  if (restrictComm) {
6500  ReducedRowMap = MyRowMap->removeEmptyProcesses ();
6501  ReducedComm = ReducedRowMap.is_null () ?
6502  Teuchos::null :
6503  ReducedRowMap->getComm ();
6504  destMat->removeEmptyProcessesInPlace (ReducedRowMap);
6505 
6506  ReducedDomainMap = MyRowMap.getRawPtr () == MyDomainMap.getRawPtr () ?
6507  ReducedRowMap :
6508  MyDomainMap->replaceCommWithSubset (ReducedComm);
6509  ReducedRangeMap = MyRowMap.getRawPtr () == MyRangeMap.getRawPtr () ?
6510  ReducedRowMap :
6511  MyRangeMap->replaceCommWithSubset (ReducedComm);
6512 
6513  // Reset the "my" maps
6514  MyRowMap = ReducedRowMap;
6515  MyDomainMap = ReducedDomainMap;
6516  MyRangeMap = ReducedRangeMap;
6517 
6518  // Update my PID, if we've restricted the communicator
6519  if (! ReducedComm.is_null ()) {
6520  MyPID = ReducedComm->getRank ();
6521  }
6522  else {
6523  MyPID = -2; // For debugging
6524  }
6525  }
6526  else {
6527  ReducedComm = MyRowMap->getComm ();
6528  }
6529 
6530  /***************************************************/
6531  /***** 2) From Tpera::DistObject::doTransfer() ****/
6532  /***************************************************/
6533 #ifdef HAVE_TPETRA_MMM_TIMINGS
6534  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC ImportSetup"))));
6535 #endif
6536  // Get the owning PIDs
6537  RCP<const import_type> MyImporter = getGraph ()->getImporter ();
6538 
6539  if (! restrictComm && ! MyImporter.is_null () &&
6540  BaseDomainMap->isSameAs (*getDomainMap ())) {
6541  // Same domain map as source matrix
6542  //
6543  // NOTE: This won't work for restrictComm (because the Import
6544  // doesn't know the restricted PIDs), though writing an
6545  // optimized version for that case would be easy (Import an
6546  // IntVector of the new PIDs). Might want to add this later.
6547  Import_Util::getPids (*MyImporter, SourcePids, false);
6548  }
6549  else if (MyImporter.is_null () && BaseDomainMap->isSameAs (*getDomainMap ())) {
6550  // Matrix has no off-process entries
6551  SourcePids.resize (getColMap ()->getNodeNumElements ());
6552  SourcePids.assign (getColMap ()->getNodeNumElements (), MyPID);
6553  }
6554  else if (BaseDomainMap->isSameAs (*BaseRowMap) &&
6555  getDomainMap ()->isSameAs (*getRowMap ())) {
6556  // We can use the rowTransfer + SourceMatrix's Import to find out who owns what.
6557  IntVectorType TargetRow_pids (domainMap);
6558  IntVectorType SourceRow_pids (getRowMap ());
6559  IntVectorType SourceCol_pids (getColMap ());
6560 
6561  TargetRow_pids.putScalar (MyPID);
6562  if (! reverseMode && xferAsImport != NULL) {
6563  SourceRow_pids.doExport (TargetRow_pids, *xferAsImport, INSERT);
6564  }
6565  else if (reverseMode && xferAsExport != NULL) {
6566  SourceRow_pids.doExport (TargetRow_pids, *xferAsExport, INSERT);
6567  }
6568  else if (! reverseMode && xferAsExport != NULL) {
6569  SourceRow_pids.doImport (TargetRow_pids, *xferAsExport, INSERT);
6570  }
6571  else if (reverseMode && xferAsImport != NULL) {
6572  SourceRow_pids.doImport (TargetRow_pids, *xferAsImport, INSERT);
6573  }
6574  else {
6575  TEUCHOS_TEST_FOR_EXCEPTION(
6576  true, std::logic_error, "Tpetra::CrsMatrix::"
6577  "transferAndFillComplete: Should never get here! "
6578  "Please report this bug to a Tpetra developer.");
6579  }
6580  SourceCol_pids.doImport (SourceRow_pids, *MyImporter, INSERT);
6581  SourcePids.resize (getColMap ()->getNodeNumElements ());
6582  SourceCol_pids.get1dCopy (SourcePids ());
6583  }
6584  else {
6585  TEUCHOS_TEST_FOR_EXCEPTION(
6586  true, std::invalid_argument, "Tpetra::CrsMatrix::"
6587  "transferAndFillComplete: This method only allows either domainMap == "
6588  "getDomainMap (), or (domainMap == rowTransfer.getTargetMap () and "
6589  "getDomainMap () == getRowMap ()).");
6590  }
6591 #ifdef HAVE_TPETRA_MMM_TIMINGS
6592  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC Pack-2"))));
6593 #endif
6594 
6595  // Tpetra-specific stuff
6596  //
6597  // FIXME (mfh 15 May 2014) This should work fine if CrsMatrix
6598  // inherits from DistObject (in which case all arrays that get
6599  // resized here are Teuchos::Array), but it won't work if
6600  // CrsMatrix inherits from DistObjectKA (in which case all arrays
6601  // that get resized here are Kokkos::View). In the latter case,
6602  // imports_ and numExportPacketsPerLID_ each have only a device
6603  // view, but numImportPacketsPerLID_ has a device view and a host
6604  // view (host_numImportPacketsPerLID_).
6605  //
6606  // Currently, CrsMatrix inherits from DistObject, not
6607  // DistObjectKA, so the code below should be fine for the Kokkos
6608  // refactor version of CrsMatrix.
6609  //
6610  // For this and for all other cases in this function that want to
6611  // resize the DistObject's communication arrays, it would make
6612  // sense to give DistObject (and DistObjectKA) methods for
6613  // resizing that don't expose the details of whether these are
6614  // Teuchos::Array or Kokkos::View.
6615  size_t constantNumPackets = destMat->constantNumberOfPackets ();
6616  if (constantNumPackets == 0) {
6617  destMat->numExportPacketsPerLID_old_.resize (ExportLIDs.size ());
6618  destMat->numImportPacketsPerLID_old_.resize (RemoteLIDs.size ());
6619  }
6620  else {
6621  // There are a constant number of packets per element. We
6622  // already know (from the number of "remote" (incoming)
6623  // elements) how many incoming elements we expect, so we can
6624  // resize the buffer accordingly.
6625  const size_t rbufLen = RemoteLIDs.size() * constantNumPackets;
6626  if (static_cast<size_t> (destMat->imports_old_.size ()) != rbufLen) {
6627  destMat->imports_old_.resize (rbufLen);
6628  }
6629  }
6630 
6631  // Pack & Prepare w/ owning PIDs
6632  //
6633  // FIXME (mfh 15 May 2014) This should work fine if CrsMatrix
6634  // inherits from DistObject (in which case all arrays that get
6635  // passed in here are Teuchos::Array), but it won't work if
6636  // CrsMatrix inherits from DistObjectKA (in which case all arrays
6637  // that get passed in here are Kokkos::View). In the latter case,
6638  // exports_ and numExportPacketsPerLID_ each have only a device
6639  // view.
6640  //
6641  // Currently, CrsMatrix inherits from DistObject, not
6642  // DistObjectKA, so the code below should be fine for the Kokkos
6643  // refactor version of CrsMatrix.
6644 #ifdef HAVE_TPETRA_DEBUG
6645  {
6646  using Teuchos::outArg;
6647  using Teuchos::REDUCE_MAX;
6648  using Teuchos::reduceAll;
6649  using std::cerr;
6650  using std::endl;
6651  RCP<const Teuchos::Comm<int> > comm = this->getComm ();
6652  const int myRank = comm->getRank ();
6653  const int numProcs = comm->getSize ();
6654 
6655  std::ostringstream os;
6656  int lclErr = 0;
6657  try {
6658  Import_Util::packAndPrepareWithOwningPIDs (*this, ExportLIDs,
6659  destMat->exports_old_,
6660  destMat->numExportPacketsPerLID_old_ (),
6661  constantNumPackets, Distor,
6662  SourcePids);
6663  }
6664  catch (std::exception& e) {
6665  os << "Proc " << myRank << ": " << e.what ();
6666  lclErr = 1;
6667  }
6668  int gblErr = 0;
6669  if (! comm.is_null ()) {
6670  reduceAll<int, int> (*comm, REDUCE_MAX, lclErr, outArg (gblErr));
6671  }
6672  if (gblErr != 0) {
6673  if (myRank == 0) {
6674  cerr << "packAndPrepareWithOwningPIDs threw an exception: " << endl;
6675  }
6676  std::ostringstream err;
6677  for (int r = 0; r < numProcs; ++r) {
6678  if (r == myRank && lclErr != 0) {
6679  cerr << os.str () << endl;
6680  }
6681  comm->barrier ();
6682  comm->barrier ();
6683  comm->barrier ();
6684  }
6685 
6686  TEUCHOS_TEST_FOR_EXCEPTION(
6687  true, std::logic_error, "packAndPrepareWithOwningPIDs threw an "
6688  "exception.");
6689  }
6690  }
6691 
6692 #else
6693  Import_Util::packAndPrepareWithOwningPIDs (*this, ExportLIDs,
6694  destMat->exports_old_,
6695  destMat->numExportPacketsPerLID_old_ (),
6696  constantNumPackets, Distor,
6697  SourcePids);
6698 #endif // HAVE_TPETRA_DEBUG
6699 
6700  // Do the exchange of remote data.
6701  //
6702  // FIXME (mfh 15 May 2014) This should work fine if CrsMatrix
6703  // inherits from DistObject (in which case all arrays that get
6704  // passed in here are Teuchos::Array), but it won't work if
6705  // CrsMatrix inherits from DistObjectKA (in which case all arrays
6706  // that get passed in here are Kokkos::View).
6707  //
6708  // In the latter case, imports_, exports_, and
6709  // numExportPacketsPerLID_ each have only a device view.
6710  // numImportPacketsPerLIDs_ is a device view, and also has a host
6711  // view (host_numImportPacketsPerLID_).
6712 #ifdef HAVE_TPETRA_MMM_TIMINGS
6713  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC Transfer"))));
6714 #endif
6715 
6716  if (communication_needed) {
6717  if (reverseMode) {
6718  if (constantNumPackets == 0) { // variable number of packets per LID
6719  Distor.doReversePostsAndWaits (destMat->numExportPacketsPerLID_old_ ().getConst (), 1,
6720  destMat->numImportPacketsPerLID_old_ ());
6721  size_t totalImportPackets = 0;
6722  for (Array_size_type i = 0; i < destMat->numImportPacketsPerLID_old_.size (); ++i) {
6723  totalImportPackets += destMat->numImportPacketsPerLID_old_[i];
6724  }
6725  destMat->imports_old_.resize (totalImportPackets);
6726  Distor.doReversePostsAndWaits (destMat->exports_old_ ().getConst (),
6727  destMat->numExportPacketsPerLID_old_ (),
6728  destMat->imports_old_ (),
6729  destMat->numImportPacketsPerLID_old_ ());
6730  }
6731  else { // constant number of packets per LID
6732  Distor.doReversePostsAndWaits (destMat->exports_old_ ().getConst (),
6733  constantNumPackets,
6734  destMat->imports_old_ ());
6735  }
6736  }
6737  else { // forward mode (the default)
6738  if (constantNumPackets == 0) { // variable number of packets per LID
6739  Distor.doPostsAndWaits (destMat->numExportPacketsPerLID_old_ ().getConst (), 1,
6740  destMat->numImportPacketsPerLID_old_ ());
6741  size_t totalImportPackets = 0;
6742  for (Array_size_type i = 0; i < destMat->numImportPacketsPerLID_old_.size (); ++i) {
6743  totalImportPackets += destMat->numImportPacketsPerLID_old_[i];
6744  }
6745  destMat->imports_old_.resize (totalImportPackets);
6746  Distor.doPostsAndWaits (destMat->exports_old_ ().getConst (),
6747  destMat->numExportPacketsPerLID_old_ (),
6748  destMat->imports_old_ (),
6749  destMat->numImportPacketsPerLID_old_ ());
6750  }
6751  else { // constant number of packets per LID
6752  Distor.doPostsAndWaits (destMat->exports_old_ ().getConst (),
6753  constantNumPackets,
6754  destMat->imports_old_ ());
6755  }
6756  }
6757  }
6758 
6759  /*********************************************************************/
6760  /**** 3) Copy all of the Same/Permute/Remote data into CSR_arrays ****/
6761  /*********************************************************************/
6762 
6763  // FIXME (mfh 15 May 2014) This should work fine if CrsMatrix
6764  // inherits from DistObject (in which case all arrays that get
6765  // passed in here are Teuchos::Array), but it won't work if
6766  // CrsMatrix inherits from DistObjectKA (in which case all arrays
6767  // that get passed in here are Kokkos::View).
6768  //
6769  // In the latter case, imports_ only has a device view.
6770  // numImportPacketsPerLIDs_ is a device view, and also has a host
6771  // view (host_numImportPacketsPerLID_).
6772 #ifdef HAVE_TPETRA_MMM_TIMINGS
6773  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC Unpack-1"))));
6774 #endif
6775  size_t mynnz =
6776  Import_Util::unpackAndCombineWithOwningPIDsCount (*this, RemoteLIDs,
6777  destMat->imports_old_ (),
6778  destMat->numImportPacketsPerLID_old_ (),
6779  constantNumPackets, Distor, INSERT,
6780  NumSameIDs, PermuteToLIDs,
6781  PermuteFromLIDs);
6782  size_t N = BaseRowMap->getNodeNumElements ();
6783 
6784  // Allocations
6785  ArrayRCP<size_t> CSR_rowptr(N+1);
6786  ArrayRCP<GO> CSR_colind_GID;
6787  ArrayRCP<LO> CSR_colind_LID;
6788  ArrayRCP<Scalar> CSR_vals;
6789  CSR_colind_GID.resize (mynnz);
6790  CSR_vals.resize (mynnz);
6791 
6792  // If LO and GO are the same, we can reuse memory when
6793  // converting the column indices from global to local indices.
6794  if (typeid (LO) == typeid (GO)) {
6795  CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO> (CSR_colind_GID);
6796  }
6797  else {
6798  CSR_colind_LID.resize (mynnz);
6799  }
6800 
6801  // FIXME (mfh 15 May 2014) This should work fine if CrsMatrix
6802  // inherits from DistObject (in which case all arrays that get
6803  // passed in here are Teuchos::Array), but it won't work if
6804  // CrsMatrix inherits from DistObjectKA (in which case all arrays
6805  // that get passed in here are Kokkos::View).
6806  //
6807  // In the latter case, imports_ only has a device view.
6808  // numImportPacketsPerLIDs_ is a device view, and also has a host
6809  // view (host_numImportPacketsPerLID_).
6810  //
6811  // FIXME (mfh 15 May 2014) Why can't we abstract this out as an
6812  // unpackAndCombine method on a "CrsArrays" object? This passing
6813  // in a huge list of arrays is icky. Can't we have a bit of an
6814  // abstraction? Implementing a concrete DistObject subclass only
6815  // takes five methods.
6816  Import_Util::unpackAndCombineIntoCrsArrays (*this, RemoteLIDs, destMat->imports_old_ (),
6817  destMat->numImportPacketsPerLID_old_ (),
6818  constantNumPackets, Distor, INSERT, NumSameIDs,
6819  PermuteToLIDs, PermuteFromLIDs, N, mynnz, MyPID,
6820  CSR_rowptr (), CSR_colind_GID (), CSR_vals (),
6821  SourcePids (), TargetPids);
6822 
6823  /**************************************************************/
6824  /**** 4) Call Optimized MakeColMap w/ no Directory Lookups ****/
6825  /**************************************************************/
6826 #ifdef HAVE_TPETRA_MMM_TIMINGS
6827  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC Unpack-2"))));
6828 #endif
6829  // Call an optimized version of makeColMap that avoids the
6830  // Directory lookups (since the Import object knows who owns all
6831  // the GIDs).
6832  Teuchos::Array<int> RemotePids;
6833  Import_Util::lowCommunicationMakeColMapAndReindex (CSR_rowptr (),
6834  CSR_colind_LID (),
6835  CSR_colind_GID (),
6836  BaseDomainMap,
6837  TargetPids, RemotePids,
6838  MyColMap);
6839 
6840  /*******************************************************/
6841  /**** 4) Second communicator restriction phase ****/
6842  /*******************************************************/
6843  if (restrictComm) {
6844  ReducedColMap = (MyRowMap.getRawPtr () == MyColMap.getRawPtr ()) ?
6845  ReducedRowMap :
6846  MyColMap->replaceCommWithSubset (ReducedComm);
6847  MyColMap = ReducedColMap; // Reset the "my" maps
6848  }
6849 
6850  // Replace the col map
6851  destMat->replaceColMap (MyColMap);
6852 
6853  // Short circuit if the processor is no longer in the communicator
6854  //
6855  // NOTE: Epetra replaces modifies all "removed" processes so they
6856  // have a dummy (serial) Map that doesn't touch the original
6857  // communicator. Duplicating that here might be a good idea.
6858  if (ReducedComm.is_null ()) {
6859  return;
6860  }
6861 
6862  /***************************************************/
6863  /**** 5) Sort ****/
6864  /***************************************************/
6865 #ifdef HAVE_TPETRA_MMM_TIMINGS
6866  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC Unpack-3"))));
6867 #endif
6868  Import_Util::sortCrsEntries (CSR_rowptr (),
6869  CSR_colind_LID (),
6870  CSR_vals ());
6871  if ((! reverseMode && xferAsImport != NULL) ||
6872  (reverseMode && xferAsExport != NULL)) {
6873  Import_Util::sortCrsEntries (CSR_rowptr (),
6874  CSR_colind_LID (),
6875  CSR_vals ());
6876  }
6877  else if ((! reverseMode && xferAsExport != NULL) ||
6878  (reverseMode && xferAsImport != NULL)) {
6879  Import_Util::sortAndMergeCrsEntries (CSR_rowptr (),
6880  CSR_colind_LID (),
6881  CSR_vals ());
6882  if (CSR_rowptr[N] != mynnz) {
6883  CSR_colind_LID.resize (CSR_rowptr[N]);
6884  CSR_vals.resize (CSR_rowptr[N]);
6885  }
6886  }
6887  else {
6888  TEUCHOS_TEST_FOR_EXCEPTION(
6889  true, std::logic_error, "Tpetra::CrsMatrix::"
6890  "transferAndFillComplete: Should never get here! "
6891  "Please report this bug to a Tpetra developer.");
6892  }
6893  /***************************************************/
6894  /**** 6) Reset the colmap and the arrays ****/
6895  /***************************************************/
6896 
6897  // Call constructor for the new matrix (restricted as needed)
6898  //
6899  // NOTE (mfh 15 May 2014) This should work fine for the Kokkos
6900  // refactor version of CrsMatrix, though it reserves the right to
6901  // make a deep copy of the arrays.
6902  destMat->setAllValues (CSR_rowptr, CSR_colind_LID, CSR_vals);
6903 
6904  /***************************************************/
6905  /**** 7) Build Importer & Call ESFC ****/
6906  /***************************************************/
6907  // Pre-build the importer using the existing PIDs
6908  Teuchos::ParameterList esfc_params;
6909 #ifdef HAVE_TPETRA_MMM_TIMINGS
6910  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC CreateImporter"))));
6911 #endif
6912  RCP<import_type> MyImport = rcp (new import_type (MyDomainMap, MyColMap, RemotePids));
6913 #ifdef HAVE_TPETRA_MMM_TIMINGS
6914  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC ESFC"))));
6915 
6916  esfc_params.set("Timer Label",prefix + std::string("TAFC"));
6917 #endif
6918 
6919  destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap, MyImport,Teuchos::null,rcp(&esfc_params,false));
6920  }
6921 
6922  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
6923  void
6926  const import_type& importer,
6927  const Teuchos::RCP<const map_type>& domainMap,
6928  const Teuchos::RCP<const map_type>& rangeMap,
6929  const Teuchos::RCP<Teuchos::ParameterList>& params) const
6930  {
6931  transferAndFillComplete (destMatrix, importer, domainMap, rangeMap, params);
6932  }
6933 
6934 
6935  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
6936  void
6939  const export_type& exporter,
6940  const Teuchos::RCP<const map_type>& domainMap,
6941  const Teuchos::RCP<const map_type>& rangeMap,
6942  const Teuchos::RCP<Teuchos::ParameterList>& params) const
6943  {
6944  transferAndFillComplete (destMatrix, exporter, domainMap, rangeMap, params);
6945  }
6946 
6947 } // namespace Tpetra
6948 
6949 //
6950 // Explicit instantiation macro
6951 //
6952 // Must be expanded from within the Tpetra namespace!
6953 //
6954 
6955 #define TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR,LO,GO,NODE) \
6956  \
6957  template class CrsMatrix< SCALAR , LO , GO , NODE >; \
6958  template RCP< CrsMatrix< SCALAR , LO , GO , NODE > > \
6959  CrsMatrix< SCALAR , LO , GO , NODE >::convert< SCALAR > () const;
6960 
6961 #define TPETRA_CRSMATRIX_CONVERT_INSTANT(SO,SI,LO,GO,NODE) \
6962  \
6963  template RCP< CrsMatrix< SO , LO , GO , NODE > > \
6964  CrsMatrix< SI , LO , GO , NODE >::convert< SO > () const;
6965 
6966 #define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
6967  template<> \
6968  RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
6969  importAndFillCompleteCrsMatrix (const RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
6970  const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
6971  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
6972  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& importer, \
6973  const RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
6974  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
6975  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
6976  const RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
6977  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
6978  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
6979  const RCP<Teuchos::ParameterList>& params);
6980 
6981 #define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
6982  template<> \
6983  RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
6984  exportAndFillCompleteCrsMatrix (const RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
6985  const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
6986  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
6987  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& exporter, \
6988  const RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
6989  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
6990  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
6991  const RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
6992  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
6993  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
6994  const RCP<Teuchos::ParameterList>& params);
6995 
6996 #define TPETRA_CRSMATRIX_INSTANT(SCALAR, LO, GO ,NODE) \
6997  TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR, LO, GO, NODE) \
6998  TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
6999  TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE)
7000 
7001 #endif // TPETRA_CRSMATRIX_DEF_HPP
void scale(const Scalar &alpha)
Scale in place: this = alpha*this.
Kokkos::CrsMatrix< impl_scalar_type, LocalOrdinal, execution_space, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
std::string description() const
A one-line description of this object.
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
void reindexColumns(crs_graph_type *const graph, const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Teuchos::RCP< node_type > getNode() const
The Kokkos Node instance.
Functor for the the ABSMAX CombineMode of Import and Export operations.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
Teuchos::ArrayView< const impl_scalar_type > getView(RowInfo rowinfo) const
Constant view of all entries (including extra space) in the given row.
void getLocalRowCopy(LocalOrdinal localRow, const Teuchos::ArrayView< LocalOrdinal > &colInds, const Teuchos::ArrayView< Scalar > &vals, size_t &numEntries) const
Fill given arrays with a deep copy of the locally owned entries of the matrix in a given row...
virtual bool isLocallyIndexed() const =0
Whether matrix indices are locally indexed.
void checkInternalState() const
Check that this object&#39;s state is sane; throw if it&#39;s not.
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
Teuchos::RCP< const crs_graph_type > getCrsGraph() const
This matrix&#39;s graph, as a CrsGraph.
bool isNodeGlobalElement(GlobalOrdinal globalIndex) const
Whether the given global index is owned by this Map on the calling process.
Definition of the Tpetra::CrsGraph class.
void sortEntries()
Sort the entries of each row by their column indices.
Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const
This matrix&#39;s graph, as a RowGraph.
virtual void copyAndPermute(const SrcDistObject &source, size_t numSameIDs, const Teuchos::ArrayView< const LocalOrdinal > &permuteToLIDs, const Teuchos::ArrayView< const LocalOrdinal > &permuteFromLIDs)
Perform copies and permutations that are local to this process.
void gaussSeidel(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &B, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &X, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &D, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps) const
"Hybrid" Jacobi + (Gauss-Seidel or SOR) on .
Teuchos::RCP< const map_type > getRowMap() const
The Map that describes the row distribution in this matrix.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
void rightScale(const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &x)
Details::EStorageStatus storageStatus_
Status of the matrix&#39;s storage, when not in a fill-complete state.
void getGlobalRowView(GlobalOrdinal GlobalRow, Teuchos::ArrayView< const GlobalOrdinal > &indices, Teuchos::ArrayView< const Scalar > &values) const
Get a constant, nonpersisting view of a row of this matrix, using global row and column indices...
Teuchos::RCP< const map_type > getColMap() const
The Map that describes the column distribution in this matrix.
One or more distributed dense vectors.
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
The current number of entries on the calling process in the specified local row.
void mergeRedundantEntries()
Merge entries in each row with the same column indices.
bool isLocallyIndexed() const
Whether the matrix is locally indexed on the calling process.
virtual bool supportsRowViews() const
Return true if getLocalRowView() and getGlobalRowView() are valid for this object.
void deep_copy(MultiVector< DS, DL, DG, DN, dstClassic > &dst, const MultiVector< SS, SL, SG, SN, srcClassic > &src)
Copy the contents of the MultiVector src into dst.
bool isUpperTriangular() const
Indicates whether the matrix is upper triangular.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the matrix&#39;s column Map with the given Map.
virtual bool checkSizes(const SrcDistObject &source)
Compare the source and target (this) objects for compatibility.
void setAllValues(const typename local_matrix_type::row_map_type &rowPointers, const typename local_graph_type::entries_type::non_const_type &columnIndices, const typename local_matrix_type::values_type &values)
Sets the 1D pointer arrays of the graph.
size_t getNodeNumRows() const
The number of matrix rows owned by the calling process.
Node node_type
This class&#39; fourth template parameter; the Kokkos device type.
bool fillComplete_
Whether the matrix is fill complete.
void deep_copy(const LittleBlock< ST2, LO > &dst, const LittleBlock< ST1, LO > &src, typename std::enable_if< std::is_convertible< ST1, ST2 >::value &&!std::is_const< ST2 >::value, int >::type *=NULL)
Copy the LittleBlock src into the LittleBlock dst.
Node::device_type device_type
The Kokkos device type.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRangeMap() const =0
The Map associated with the range of this operator, which must be compatible with Y...
std::enable_if< Kokkos::is_view< LocalIndicesViewType >::value &&Kokkos::is_view< ImplScalarViewType >::value &&std::is_same< typename LocalIndicesViewType::non_const_value_type, local_ordinal_type >::value &&std::is_same< typename ImplScalarViewType::non_const_value_type, impl_scalar_type >::value, LocalOrdinal >::type replaceLocalValues(const LocalOrdinal localRow, const typename UnmanagedView< LocalIndicesViewType >::type &inputInds, const typename UnmanagedView< ImplScalarViewType >::type &inputVals) const
Replace one or more entries&#39; values, using local row and column indices.
GlobalOrdinal getIndexBase() const
The index base for global indices for this matrix.
void getGlobalRowCopy(GlobalOrdinal GlobalRow, const Teuchos::ArrayView< GlobalOrdinal > &Indices, const Teuchos::ArrayView< Scalar > &Values, size_t &NumEntries) const
Fill given arrays with a deep copy of the locally owned entries of the matrix in a given row...
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.
Teuchos_Ordinal Array_size_type
Size type for Teuchos Array objects.
Teuchos::RCP< CrsMatrix< T, LocalOrdinal, GlobalOrdinal, Node, classic > > convert() const
Return another CrsMatrix with the same entries, but converted to a different Scalar type T...
CrsIJV()
Default constructor.
bool isNodeLocalElement(LocalOrdinal localIndex) const
Whether the given local index is valid for this Map on the calling process.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap)
Remove processes owning zero rows from the Maps and their communicator.
void insertLocalValues(const LocalOrdinal localRow, const ArrayView< const LocalOrdinal > &cols, const ArrayView< const Scalar > &vals)
Insert one or more entries into the matrix, using local indices.
void getLocalRowView(LocalOrdinal LocalRow, Teuchos::ArrayView< const LocalOrdinal > &indices, Teuchos::ArrayView< const Scalar > &values) const
Get a constant, nonpersisting view of a row of this matrix, using local row and column indices...
device_type::execution_space execution_space
The Kokkos execution space.
mag_type frobNorm_
Cached Frobenius norm of the (global) matrix.
Implementation details of Tpetra.
Teuchos::ArrayView< impl_scalar_type > getViewNonConst(const RowInfo &rowinfo) const
Nonconst view of all entries (including extra space) in the given row.
void insertGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals)
Insert one or more entries into the matrix, using global indices.
void reduce()
Sum values of a locally replicated multivector across all processes.
void fillLocalMatrix(const Teuchos::RCP< Teuchos::ParameterList > &params)
Fill data into the local matrix.
size_t global_size_t
Global size_t object.
LocalOrdinal sumIntoGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals, const bool atomic=useAtomicUpdatesByDefault)
Sum into one or more sparse matrix entries, using global indices.
virtual void getGlobalRowCopy(GlobalOrdinal GlobalRow, const Teuchos::ArrayView< GlobalOrdinal > &Indices, const Teuchos::ArrayView< Scalar > &Values, size_t &NumEntries) const =0
Get a copy of the given global row&#39;s entries.
Kokkos::StaticCrsGraph< LocalOrdinal, Kokkos::LayoutLeft, execution_space > local_graph_type
The type of the part of the sparse graph on each MPI process.
Traits class for "invalid" (flag) values of integer types that Tpetra uses as local ordinals or globa...
Insert new values that don&#39;t currently exist.
void getLocalDiagCopy(Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &diag) const
Get a copy of the diagonal entries of the matrix.
std::enable_if< Kokkos::is_view< LocalIndicesViewType >::value &&Kokkos::is_view< ImplScalarViewType >::value &&std::is_same< typename LocalIndicesViewType::non_const_value_type, local_ordinal_type >::value &&std::is_same< typename ImplScalarViewType::non_const_value_type, impl_scalar_type >::value, LocalOrdinal >::type sumIntoLocalValues(const LocalOrdinal localRow, const typename UnmanagedView< LocalIndicesViewType >::type &inputInds, const typename UnmanagedView< ImplScalarViewType >::type &inputVals, const bool atomic=useAtomicUpdatesByDefault) const
Sum into one or more sparse matrix entries, using local row and column indices.
dual_view_type getDualView() const
Get the Kokkos::DualView which implements local storage.
Teuchos::RCP< MV > getRowMapMultiVector(const MV &Y_rangeMap, const bool force=false) const
Create a (or fetch a cached) row Map MultiVector.
bool isLowerTriangular() const
Indicates whether the matrix is lower triangular.
ESweepDirection
Sweep direction for Gauss-Seidel or Successive Over-Relaxation (SOR).
void allocateValues(ELocalGlobal lg, GraphAllocationStatus gas)
Allocate values (and optionally indices) using the Node.
Teuchos::RCP< const map_type > getDomainMap() const
The domain Map of this matrix.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix is distributed.
bool isFillComplete() const
Whether the matrix is fill complete.
void scale(const Scalar &alpha)
Scale the matrix&#39;s values: this := alpha*this.
void resumeFill(const RCP< ParameterList > &params=null)
Resume operations that may change the values or structure of the matrix.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getDomainMap() const =0
The Map associated with the domain of this operator, which must be compatible with X...
#define TPETRA_ABUSE_WARNING(throw_exception_test, Exception, msg)
Handle an abuse warning, according to HAVE_TPETRA_THROW_ABUSE_WARNINGS and HAVE_TPETRA_PRINT_ABUSE_WA...
void unpackAndCombine(const Teuchos::ArrayView< const LocalOrdinal > &importLIDs, const Teuchos::ArrayView< const char > &imports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t constantNumPackets, Distributor &distor, CombineMode combineMode)
Unpack the imported column indices and values, and combine into matrix.
Sets up and executes a communication plan for a Tpetra DistObject.
void reorderedGaussSeidel(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &B, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &X, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &D, const Teuchos::ArrayView< LocalOrdinal > &rowIndices, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps) const
Reordered "Hybrid" Jacobi + (Gauss-Seidel or SOR) on .
Struct representing a sparse matrix entry as an i,j,v triplet.
GlobalOrdinal getGlobalElement(LocalOrdinal localIndex) const
The global index corresponding to the given local index.
CombineMode
Rule for combining data in an Import or Export.
bool isStorageOptimized() const
Returns true if storage has been optimized.
Sum new values into existing values.
Teuchos::RCP< Node > getNode() const
Get this Map&#39;s Node object.
void exportAndFillComplete(Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > > &destMatrix, const export_type &exporter, const Teuchos::RCP< const map_type > &domainMap=Teuchos::null, const Teuchos::RCP< const map_type > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Export from this to the given destination matrix, and make the result fill complete.
Teuchos::RCP< const map_type > map_
The Map over which this object is distributed.
size_t getNodeNumDiags() const
Returns the number of local diagonal entries, based on global row/column index comparisons.
Utility functions for packing and unpacking sparse matrix entries.
bool isDistributed() const
Whether this is a globally distributed object.
ProfileType getProfileType() const
Returns true if the matrix was allocated with static data structures.
virtual ~CrsMatrix()
Destructor.
Replace old value with maximum of magnitudes of old and new values.
Abstract base class for objects that can be the source of an Import or Export operation.
global_size_t getGlobalNumRows() const
Number of global elements in the row map of this matrix.
global_size_t getGlobalNumDiags() const
Returns the number of global diagonal entries, based on global row/column index comparisons.
void replaceDomainMapAndImporter(const Teuchos::RCP< const map_type > &newDomainMap, Teuchos::RCP< const import_type > &newImporter)
Replace the current domain Map and Import with the given objects.
Teuchos::RCP< MV > getColumnMapMultiVector(const MV &X_domainMap, const bool force=false) const
Create a (or fetch a cached) column Map MultiVector.
Replace existing values with new values.
#define TPETRA_EFFICIENCY_WARNING(throw_exception_test, Exception, msg)
Print or throw an efficency warning.
Binary function that returns its second argument.
void reindexColumns(const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortIndicesInEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
bool hasTransposeApply() const
Whether apply() allows applying the transpose or conjugate transpose.
Scalar v
Value of matrix entry.
void computeGlobalConstants()
Compute matrix properties that require collectives.
bool isFillActive() const
Whether the matrix is not fill complete.
bool isStaticGraph() const
Indicates that the graph is static, so that new entries cannot be added to this matrix.
Replace old values with zero.
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
global_size_t getGlobalNumCols() const
The number of global columns in the matrix.
bool hasColMap() const
Indicates whether the matrix has a well-defined column map.
mag_type getFrobeniusNorm() const
Compute and return the Frobenius norm of the matrix.
Kokkos::Details::ArithTraits< impl_scalar_type >::mag_type mag_type
Type of a norm result.
LocalOrdinal getLocalElement(GlobalOrdinal globalIndex) const
The local index corresponding to the given global index.
void leftScale(const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &x)
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
Teuchos::RCP< MV > importMV_
Column Map MultiVector used in apply() and gaussSeidel().
Ordinal i
(Global) row index
Kokkos::DualView< impl_scalar_type **, Kokkos::LayoutLeft, typename execution_space::execution_space > dual_view_type
Kokkos::DualView specialization used by this class.
Describes a parallel distribution of objects over processes.
size_t getGlobalMaxNumRowEntries() const
Returns the maximum number of entries across all rows/columns on all nodes.
global_size_t getGlobalNumEntries() const
The global number of entries in this matrix.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects...
A read-only, row-oriented interface to a sparse matrix.
Scalar operator()(const Scalar &x, const Scalar &y)
Return the maximum of the magnitudes (absolute values) of x and y.
std::map< GlobalOrdinal, std::pair< Teuchos::Array< GlobalOrdinal >, Teuchos::Array< Scalar > > > nonlocals_
Nonlocal data added using insertGlobalValues().
A distributed dense vector.
Stand-alone utility functions and macros.
virtual void pack(const Teuchos::ArrayView< const LocalOrdinal > &exportLIDs, Teuchos::Array< char > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t &constantNumPackets, Distributor &distor) const
Pack this object&#39;s data for an Import or Export.
void expertStaticFillComplete(const RCP< const map_type > &domainMap, const RCP< const map_type > &rangeMap, const RCP< const import_type > &importer=Teuchos::null, const RCP< const export_type > &exporter=Teuchos::null, const RCP< ParameterList > &params=Teuchos::null)
Perform a fillComplete on a matrix that already has data.
Ordinal j
(Global) column index
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object with some verbosity level to an FancyOStream object.
size_t getNumVectors() const
Number of columns in the multivector.
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, CombineMode CM)
Export data into this object using an Export object ("forward mode").
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
void globalAssemble()
Communicate nonlocal contributions to other processes.
Kokkos::Details::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
LocalOrdinal replaceGlobalValues(const GlobalOrdinal globalRow, const Kokkos::View< const GlobalOrdinal *, device_type, Kokkos::MemoryUnmanaged > &cols, const Kokkos::View< const impl_scalar_type *, device_type, Kokkos::MemoryUnmanaged > &vals) const
Replace one or more entries&#39; values, using global indices.
void gaussSeidelCopy(MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &X, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &B, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &D, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps, const bool zeroInitialGuess) const
Version of gaussSeidel(), with fewer requirements on X.
size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const
Returns the current number of entries on this node in the specified local row.
void applyNonTranspose(const MV &X_in, MV &Y_in, Scalar alpha, Scalar beta) const
Special case of apply() for mode == Teuchos::NO_TRANS.
Teuchos::RCP< MV > exportMV_
Row Map MultiVector used in apply().
void getLocalDiagOffsets(Teuchos::ArrayRCP< size_t > &offsets) const
Get offsets of the diagonal entries in the matrix.
void apply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=ScalarTraits< Scalar >::one(), Scalar beta=ScalarTraits< Scalar >::zero()) const
Compute a sparse matrix-MultiVector multiply.
size_t getNodeNumCols() const
The number of columns connected to the locally owned rows of this matrix.
void reorderedGaussSeidelCopy(MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &X, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &B, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &D, const Teuchos::ArrayView< LocalOrdinal > &rowIndices, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps, const bool zeroInitialGuess) const
Version of reorderedGaussSeidel(), with fewer requirements on X.
size_t getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const
Returns the current number of entries on this node in the specified global row.
size_t getNodeNumEntries() const
The local number of entries in this matrix.
virtual Teuchos::RCP< RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > add(const Scalar &alpha, const RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params) const
Implementation of RowMatrix::add: return alpha*A + beta*this.
void clearGlobalConstants()
Clear matrix properties that require collectives.
CrsIJV(Ordinal row, Ordinal col, const Scalar &val)
Standard constructor.
bool isGloballyIndexed() const
Whether the matrix is globally indexed on the calling process.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes the distribution of rows over processes.
void applyTranspose(const MV &X_in, MV &Y_in, const Teuchos::ETransp mode, Scalar alpha, Scalar beta) const
Special case of apply() for mode != Teuchos::NO_TRANS.
void fillComplete(const RCP< const map_type > &domainMap, const RCP< const map_type > &rangeMap, const RCP< ParameterList > &params=null)
Signal that data entry is complete, specifying domain and range maps.
local_matrix_type lclMatrix_
The local sparse matrix.
void importAndFillComplete(Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > > &destMatrix, const import_type &importer, const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Import from this to the given destination matrix, and make the result fill complete.
void fillLocalGraphAndMatrix(const Teuchos::RCP< Teuchos::ParameterList > &params)
Fill data into the local graph and matrix.
Teuchos::RCP< const map_type > getRangeMap() const
The range Map of this matrix.
size_t getNodeMaxNumRowEntries() const
Returns the maximum number of entries across all rows/columns on this node.