Tpetra parallel linear algebra  Version of the Day
Tpetra_CrsGraph_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_CRSGRAPH_DEF_HPP
43 #define TPETRA_CRSGRAPH_DEF_HPP
44 
52 
53 #include "Tpetra_Distributor.hpp"
54 #include "Teuchos_Assert.hpp"
55 #include "Teuchos_NullIteratorTraits.hpp"
56 #include "Teuchos_as.hpp"
57 #include "Teuchos_SerialDenseMatrix.hpp"
58 #include "KokkosCompat_ClassicNodeAPI_Wrapper.hpp"
59 
60 #include <algorithm>
61 #include <string>
62 #include <utility>
63 
64 
65 namespace Tpetra {
66 
67  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
68  CrsGraph<LocalOrdinal, GlobalOrdinal, Node, classic>::
69  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
70  size_t maxNumEntriesPerRow,
71  ProfileType pftype,
72  const Teuchos::RCP<Teuchos::ParameterList>& params) :
73  dist_object_type (rowMap)
74  , rowMap_ (rowMap)
75  , nodeNumEntries_ (0)
76  , nodeNumAllocated_ (Teuchos::OrdinalTraits<size_t>::invalid ())
77  , pftype_ (pftype)
78  , numAllocForAllRows_ (maxNumEntriesPerRow)
79  , storageStatus_ (pftype == StaticProfile ?
80  Details::STORAGE_1D_UNPACKED :
81  Details::STORAGE_2D)
82  , indicesAreAllocated_ (false)
83  , indicesAreLocal_ (false)
84  , indicesAreGlobal_ (false)
85  , fillComplete_ (false)
86  , indicesAreSorted_ (true)
87  , noRedundancies_ (true)
88  , haveLocalConstants_ (false)
89  , haveGlobalConstants_ (false)
90  , sortGhostsAssociatedWithEachProcessor_ (true)
91  {
92  const char tfecfFuncName[] = "CrsGraph(rowMap,maxNumEntriesPerRow,"
93  "pftype,params): ";
94  staticAssertions ();
95  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
96  maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
97  std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
98  "a valid size_t value, which in this case means it must not be "
99  "Teuchos::OrdinalTraits<size_t>::invalid().");
100  resumeFill (params);
102  }
103 
104  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
106  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
107  const Teuchos::RCP<const map_type>& colMap,
108  const size_t maxNumEntriesPerRow,
109  const ProfileType pftype,
110  const Teuchos::RCP<Teuchos::ParameterList>& params) :
111  dist_object_type (rowMap)
112  , rowMap_ (rowMap)
113  , colMap_ (colMap)
114  , nodeNumEntries_ (0)
115  , nodeNumAllocated_ (Teuchos::OrdinalTraits<size_t>::invalid ())
116  , pftype_ (pftype)
117  , numAllocForAllRows_ (maxNumEntriesPerRow)
118  , storageStatus_ (pftype == StaticProfile ?
119  Details::STORAGE_1D_UNPACKED :
120  Details::STORAGE_2D)
121  , indicesAreAllocated_ (false)
122  , indicesAreLocal_ (false)
123  , indicesAreGlobal_ (false)
124  , fillComplete_ (false)
125  , indicesAreSorted_ (true)
126  , noRedundancies_ (true)
127  , haveLocalConstants_ (false)
128  , haveGlobalConstants_ (false)
130  {
131  const char tfecfFuncName[] = "CrsGraph(rowMap,colMap,maxNumEntriesPerRow,"
132  "pftype,params): ";
133  staticAssertions ();
134  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
135  maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
136  std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
137  "a valid size_t value, which in this case means it must not be "
138  "Teuchos::OrdinalTraits<size_t>::invalid().");
139  resumeFill (params);
141  }
142 
143  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
145  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
146  const Teuchos::ArrayRCP<const size_t>& numEntPerRow,
147  const ProfileType pftype,
148  const Teuchos::RCP<Teuchos::ParameterList>& params) :
149  dist_object_type (rowMap)
150  , rowMap_ (rowMap)
151  , nodeNumEntries_ (0)
152  , nodeNumAllocated_ (Teuchos::OrdinalTraits<size_t>::invalid ())
153  , pftype_ (pftype)
154  , numAllocForAllRows_ (0)
155  , storageStatus_ (pftype == StaticProfile ?
156  Details::STORAGE_1D_UNPACKED :
157  Details::STORAGE_2D)
158  , indicesAreAllocated_ (false)
159  , indicesAreLocal_ (false)
160  , indicesAreGlobal_ (false)
161  , fillComplete_ (false)
162  , indicesAreSorted_ (true)
163  , noRedundancies_ (true)
164  , haveLocalConstants_ (false)
165  , haveGlobalConstants_ (false)
167  {
168  const char tfecfFuncName[] = "CrsGraph(rowMap,numEntPerRow,pftype,params): ";
169  staticAssertions ();
170 
171  const size_t lclNumRows = rowMap.is_null () ?
172  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
173  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
174  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
175  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
176  << " != the local number of rows " << lclNumRows << " as specified by "
177  "the input row Map.");
178 
179 #ifdef HAVE_TPETRA_DEBUG
180  for (size_t r = 0; r < lclNumRows; ++r) {
181  const size_t curRowCount = numEntPerRow[r];
182  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
183  curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
184  std::invalid_argument, "numEntPerRow(" << r << ") specifies an invalid "
185  "number of entries (Teuchos::OrdinalTraits<size_t>::invalid()).");
186  }
187 #endif // HAVE_TPETRA_DEBUG
188 
189  // Allocate k_numAllocPerRow_ (upper bound on number of entries
190  // per row). We get a host view of the data, as an ArrayRCP.
191  // Create a nonowning Kokkos::View of it, copy into
192  // k_numAllocPerRow, and sync. Then assign to k_numAllocPerRow_
193  // (which is a const view, so we can't copy into it directly).
194  typedef Kokkos::DualView<size_t*, Kokkos::LayoutLeft, execution_space>
195  dual_view_type;
196  typedef typename dual_view_type::host_mirror_space host_type;
197  typedef Kokkos::View<const size_t*, Kokkos::LayoutLeft, host_type,
198  Kokkos::MemoryUnmanaged> in_view_type;
199  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
200  dual_view_type k_numAllocPerRow ("Tpetra::CrsGraph::numAllocPerRow",
201  lclNumRows);
202  k_numAllocPerRow.template modify<host_type> ();
203  Kokkos::deep_copy (k_numAllocPerRow.h_view, numAllocPerRowIn);
204  k_numAllocPerRow.template sync<execution_space> ();
205  k_numAllocPerRow_ = k_numAllocPerRow;
206 
207  resumeFill (params);
209  }
210 
211  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
213  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
214  const Kokkos::DualView<const size_t*, execution_space>& numEntPerRow,
215  const ProfileType pftype,
216  const Teuchos::RCP<Teuchos::ParameterList>& params) :
217  dist_object_type (rowMap)
218  , rowMap_ (rowMap)
219  , nodeNumEntries_ (0)
220  , nodeNumAllocated_ (Teuchos::OrdinalTraits<size_t>::invalid ())
221  , pftype_ (pftype)
222  , k_numAllocPerRow_ (numEntPerRow)
223  , numAllocForAllRows_ (0)
224  , storageStatus_ (pftype == StaticProfile ?
225  Details::STORAGE_1D_UNPACKED :
226  Details::STORAGE_2D)
227  , indicesAreAllocated_ (false)
228  , indicesAreLocal_ (false)
229  , indicesAreGlobal_ (false)
230  , fillComplete_ (false)
231  , indicesAreSorted_ (true)
232  , noRedundancies_ (true)
233  , haveLocalConstants_ (false)
234  , haveGlobalConstants_ (false)
236  {
237  const char tfecfFuncName[] = "CrsGraph(rowMap,numEntPerRow,pftype,params): ";
238  staticAssertions ();
239 
240  const size_t lclNumRows = rowMap.is_null () ?
241  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
242  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
243  static_cast<size_t> (numEntPerRow.dimension_0 ()) != lclNumRows,
244  std::invalid_argument, "numEntPerRow has length " <<
245  numEntPerRow.dimension_0 () << " != the local number of rows " <<
246  lclNumRows << " as specified by " "the input row Map.");
247 
248 #ifdef HAVE_TPETRA_DEBUG
249  for (size_t r = 0; r < lclNumRows; ++r) {
250  const size_t curRowCount = numEntPerRow.h_view(r);
251  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
252  curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
253  std::invalid_argument, "numEntPerRow(" << r << ") specifies an invalid "
254  "number of entries (Teuchos::OrdinalTraits<size_t>::invalid()).");
255  }
256 #endif // HAVE_TPETRA_DEBUG
257 
258  resumeFill (params);
260  }
261 
262 
263  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
265  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
266  const Teuchos::RCP<const map_type>& colMap,
267  const Kokkos::DualView<const size_t*, execution_space>& numEntPerRow,
268  const ProfileType pftype,
269  const Teuchos::RCP<Teuchos::ParameterList>& params) :
270  dist_object_type (rowMap)
271  , rowMap_ (rowMap)
272  , colMap_ (colMap)
273  , nodeNumEntries_ (0)
274  , nodeNumAllocated_ (Teuchos::OrdinalTraits<size_t>::invalid ())
275  , pftype_ (pftype)
276  , k_numAllocPerRow_ (numEntPerRow)
277  , numAllocForAllRows_ (0)
278  , storageStatus_ (pftype == StaticProfile ?
279  Details::STORAGE_1D_UNPACKED :
280  Details::STORAGE_2D)
281  , indicesAreAllocated_ (false)
282  , indicesAreLocal_ (false)
283  , indicesAreGlobal_ (false)
284  , fillComplete_ (false)
285  , indicesAreSorted_ (true)
286  , noRedundancies_ (true)
287  , haveLocalConstants_ (false)
288  , haveGlobalConstants_ (false)
290  {
291  const char tfecfFuncName[] = "CrsGraph(rowMap,colMap,numEntPerRow,pftype,params): ";
292  staticAssertions ();
293 
294  const size_t lclNumRows = rowMap.is_null () ?
295  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
296  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
297  static_cast<size_t> (numEntPerRow.dimension_0 ()) != lclNumRows,
298  std::invalid_argument, "numEntPerRow has length " <<
299  numEntPerRow.dimension_0 () << " != the local number of rows " <<
300  lclNumRows << " as specified by " "the input row Map.");
301 
302 #ifdef HAVE_TPETRA_DEBUG
303  for (size_t r = 0; r < lclNumRows; ++r) {
304  const size_t curRowCount = numEntPerRow.h_view(r);
305  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
306  curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
307  std::invalid_argument, "numEntPerRow(" << r << ") specifies an invalid "
308  "number of entries (Teuchos::OrdinalTraits<size_t>::invalid()).");
309  }
310 #endif // HAVE_TPETRA_DEBUG
311 
312  resumeFill (params);
314  }
315 
316 
317  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
319  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
320  const Teuchos::RCP<const map_type>& colMap,
321  const Teuchos::ArrayRCP<const size_t>& numEntPerRow,
322  ProfileType pftype,
323  const Teuchos::RCP<Teuchos::ParameterList>& params) :
324  dist_object_type (rowMap)
325  , rowMap_ (rowMap)
326  , colMap_ (colMap)
327  , nodeNumEntries_ (0)
328  , nodeNumAllocated_ (Teuchos::OrdinalTraits<size_t>::invalid ())
329  , pftype_ (pftype)
330  , numAllocForAllRows_ (0)
331  , storageStatus_ (pftype == StaticProfile ?
332  Details::STORAGE_1D_UNPACKED :
333  Details::STORAGE_2D)
334  , indicesAreAllocated_ (false)
335  , indicesAreLocal_ (false)
336  , indicesAreGlobal_ (false)
337  , fillComplete_ (false)
338  , indicesAreSorted_ (true)
339  , noRedundancies_ (true)
340  , haveLocalConstants_ (false)
341  , haveGlobalConstants_ (false)
343  {
344  const char tfecfFuncName[] = "CrsGraph(rowMap,colMap,numEntPerRow,pftype,"
345  "params): ";
346  staticAssertions ();
347 
348  const size_t lclNumRows = rowMap.is_null () ?
349  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
350  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
351  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
352  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
353  << " != the local number of rows " << lclNumRows << " as specified by "
354  "the input row Map.");
355 
356 #ifdef HAVE_TPETRA_DEBUG
357  for (size_t r = 0; r < lclNumRows; ++r) {
358  const size_t curRowCount = numEntPerRow[r];
359  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
360  curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
361  std::invalid_argument, "numEntPerRow(" << r << ") specifies an invalid "
362  "number of entries (Teuchos::OrdinalTraits<size_t>::invalid()).");
363  }
364 #endif // HAVE_TPETRA_DEBUG
365 
366  // Allocate k_numAllocPerRow_ (upper bound on number of entries
367  // per row). We get a host view of the data, as an ArrayRCP.
368  // Create a nonowning Kokkos::View of it, copy into
369  // k_numAllocPerRow, and sync. Then assign to k_numAllocPerRow_
370  // (which is a const view, so we can't copy into it directly).
371  typedef Kokkos::DualView<size_t*, Kokkos::LayoutLeft, execution_space>
372  dual_view_type;
373  typedef typename dual_view_type::host_mirror_space host_type;
374  typedef Kokkos::View<const size_t*, Kokkos::LayoutLeft, host_type,
375  Kokkos::MemoryUnmanaged> in_view_type;
376  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
377  dual_view_type k_numAllocPerRow ("Tpetra::CrsGraph::numAllocPerRow",
378  lclNumRows);
379  k_numAllocPerRow.template modify<host_type> ();
380  Kokkos::deep_copy (k_numAllocPerRow.h_view, numAllocPerRowIn);
381  k_numAllocPerRow.template sync<execution_space> ();
382  k_numAllocPerRow_ = k_numAllocPerRow;
383 
384  resumeFill (params);
386  }
387 
388 
389  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
391  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
392  const Teuchos::RCP<const map_type>& colMap,
393  const typename local_graph_type::row_map_type& rowPointers,
394  const typename local_graph_type::entries_type::non_const_type& columnIndices,
395  const Teuchos::RCP<Teuchos::ParameterList>& params) :
396  dist_object_type (rowMap)
397  , rowMap_(rowMap)
398  , colMap_(colMap)
399  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
400  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
401  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
402  , nodeNumEntries_(0)
403  , nodeNumAllocated_(OrdinalTraits<size_t>::invalid())
406  , storageStatus_ (Details::STORAGE_1D_PACKED)
407  , indicesAreAllocated_(true)
408  , indicesAreLocal_(true)
409  , indicesAreGlobal_(false)
410  , fillComplete_(false)
411  , indicesAreSorted_(true)
412  , noRedundancies_(true)
413  , haveLocalConstants_ (false)
414  , haveGlobalConstants_ (false)
416  {
417  staticAssertions ();
418  setAllIndices (rowPointers, columnIndices);
420  }
421 
422 
423  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
425  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
426  const Teuchos::RCP<const map_type>& colMap,
427  const Teuchos::ArrayRCP<size_t>& rowPointers,
428  const Teuchos::ArrayRCP<LocalOrdinal> & columnIndices,
429  const Teuchos::RCP<Teuchos::ParameterList>& params) :
430  dist_object_type (rowMap)
431  , rowMap_ (rowMap)
432  , colMap_ (colMap)
433  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
434  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
435  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
436  , nodeNumEntries_ (0)
437  , nodeNumAllocated_ (Teuchos::OrdinalTraits<size_t>::invalid ())
439  , numAllocForAllRows_ (0)
440  , storageStatus_ (Details::STORAGE_1D_PACKED)
441  , indicesAreAllocated_ (true)
442  , indicesAreLocal_ (true)
443  , indicesAreGlobal_ (false)
444  , fillComplete_ (false)
445  , indicesAreSorted_ (true)
446  , noRedundancies_ (true)
447  , haveLocalConstants_ (false)
448  , haveGlobalConstants_ (false)
450  {
451  staticAssertions ();
452  setAllIndices (rowPointers, columnIndices);
454  }
455 
456 
457  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
459  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
460  const Teuchos::RCP<const map_type>& colMap,
461  const local_graph_type& k_local_graph_,
462  const Teuchos::RCP<Teuchos::ParameterList>& params)
463  : DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap)
464  , rowMap_ (rowMap)
465  , colMap_ (colMap)
466  , lclGraph_ (k_local_graph_)
467  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
468  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
469  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
470  , nodeNumEntries_ (0) // FIXME (mfh 17 Mar 2014) should get from lclGraph_ right now
471  , nodeNumAllocated_ (Teuchos::OrdinalTraits<size_t>::invalid ())
473  , numAllocForAllRows_ (0)
474  , storageStatus_ (Details::STORAGE_1D_PACKED)
475  , indicesAreAllocated_ (true)
476  , indicesAreLocal_ (true)
477  , indicesAreGlobal_ (false)
478  , fillComplete_ (false)
479  , indicesAreSorted_ (true)
480  , noRedundancies_ (true)
481  , haveLocalConstants_ (false)
482  , haveGlobalConstants_ (false)
484  {
485  using Teuchos::arcp;
486  using Teuchos::ArrayRCP;
487  using Teuchos::as;
488  using Teuchos::ParameterList;
489  using Teuchos::parameterList;
490  using Teuchos::rcp;
491  typedef GlobalOrdinal GO;
492  typedef LocalOrdinal LO;
493 
494  staticAssertions();
495  const char tfecfFuncName[] = "CrsGraph(Map,Map,Kokkos::LocalStaticCrsGraph)";
496 
497  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
498  colMap.is_null (), std::runtime_error,
499  ": The input column Map must be nonnull.");
500  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
501  k_local_graph_.numRows () != rowMap->getNodeNumElements (),
502  std::runtime_error,
503  ": The input row Map and the input local graph need to have the same "
504  "number of rows. The row Map claims " << rowMap->getNodeNumElements ()
505  << " row(s), but the local graph claims " << k_local_graph_.numRows ()
506  << " row(s).");
507  // NOTE (mfh 17 Mar 2014) getNodeNumRows() returns
508  // rowMap_->getNodeNumElements(), but it doesn't have to.
509  // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
510  // k_local_graph_.numRows () != getNodeNumRows (), std::runtime_error,
511  // ": The input row Map and the input local graph need to have the same "
512  // "number of rows. The row Map claims " << getNodeNumRows () << " row(s), "
513  // "but the local graph claims " << k_local_graph_.numRows () << " row(s).");
514  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
515  k_lclInds1D_.dimension_0 () != 0 || k_gblInds1D_.dimension_0 () != 0, std::logic_error,
516  ": cannot have 1D data structures allocated.");
517  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
518  ! lclInds2D_.is_null () || ! gblInds2D_.is_null (), std::logic_error,
519  ": cannot have 2D data structures allocated.");
520 
521  nodeNumAllocated_ = k_local_graph_.row_map (getNodeNumRows ());
522  nodeNumEntries_ = k_local_graph_.row_map (getNodeNumRows ());
523 
524  // NOTE (mfh 17 Mar 2014) We also need a version of this CrsGraph
525  // constructor that takes a domain and range Map, as well as a row
526  // and column Map. In that case, we must pass the domain and
527  // range Map into the following method.
529  makeImportExport ();
530 
531  k_lclInds1D_ = lclGraph_.entries;
532  k_rowPtrs_ = lclGraph_.row_map;
533 
534  typename local_graph_type::row_map_type d_ptrs = lclGraph_.row_map;
535  typename local_graph_type::entries_type d_inds = lclGraph_.entries;
536 
537  // Reset local properties
538  upperTriangular_ = true;
539  lowerTriangular_ = true;
540  nodeMaxNumRowEntries_ = 0;
541  nodeNumDiags_ = 0;
542 
543  // Compute triangular properties
544  const size_t numLocalRows = getNodeNumRows ();
545  for (size_t localRow = 0; localRow < numLocalRows; ++localRow) {
546  const GO globalRow = rowMap_->getGlobalElement (localRow);
547  const LO rlcid = colMap_->getLocalElement (globalRow);
548 
549  // It's entirely possible that the local matrix has no entries
550  // in the column corresponding to the current row. In that
551  // case, the column Map may not necessarily contain that GID.
552  // This is why we check whether rlcid is "invalid" (which means
553  // that globalRow is not a GID in the column Map).
554  if (rlcid != Teuchos::OrdinalTraits<LO>::invalid ()) {
555  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
556  rlcid + 1 >= static_cast<LO> (d_ptrs.dimension_0 ()),
557  std::runtime_error, ": The given row Map and/or column Map is/are "
558  "not compatible with the provided local graph.");
559  if (d_ptrs(rlcid) != d_ptrs(rlcid + 1)) {
560  const size_t smallestCol =
561  static_cast<size_t> (d_inds(d_ptrs(rlcid)));
562  const size_t largestCol =
563  static_cast<size_t> (d_inds(d_ptrs(rlcid + 1)-1));
564  if (smallestCol < localRow) {
565  upperTriangular_ = false;
566  }
567  if (localRow < largestCol) {
568  lowerTriangular_ = false;
569  }
570  for (size_t i = d_ptrs(rlcid); i < d_ptrs(rlcid + 1); ++i) {
571  if (d_inds(i) == rlcid) {
572  ++nodeNumDiags_;
573  }
574  }
575  }
576  nodeMaxNumRowEntries_ =
577  std::max (static_cast<size_t> (d_ptrs(rlcid + 1) - d_ptrs(rlcid)),
578  nodeMaxNumRowEntries_);
579  }
580  }
581 
582  haveLocalConstants_ = true;
583  computeGlobalConstants ();
584 
585  fillComplete_ = true;
587  }
588 
589 
590  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
593  {}
594 
595 
596  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
597  Teuchos::RCP<const Teuchos::ParameterList>
600  {
601  using Teuchos::RCP;
602  using Teuchos::ParameterList;
603  using Teuchos::parameterList;
604 
605  RCP<ParameterList> params = parameterList ("Tpetra::CrsGraph");
606 
607  // Make a sublist for the Import.
608  RCP<ParameterList> importSublist = parameterList ("Import");
609 
610  // FIXME (mfh 02 Apr 2012) We should really have the Import and
611  // Export objects fill in these lists. However, we don't want to
612  // create an Import or Export unless we need them. For now, we
613  // know that the Import and Export just pass the list directly to
614  // their Distributor, so we can create a Distributor here
615  // (Distributor's constructor is a lightweight operation) and have
616  // it fill in the list.
617 
618  // Fill in Distributor default parameters by creating a
619  // Distributor and asking it to do the work.
620  Distributor distributor (rowMap_->getComm (), importSublist);
621  params->set ("Import", *importSublist, "How the Import performs communication.");
622 
623  // Make a sublist for the Export. For now, it's a clone of the
624  // Import sublist. It's not a shallow copy, though, since we
625  // might like the Import to do communication differently than the
626  // Export.
627  params->set ("Export", *importSublist, "How the Export performs communication.");
628 
629  return params;
630  }
631 
632 
633  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
634  void
636  setParameterList (const Teuchos::RCP<Teuchos::ParameterList>& params)
637  {
638  Teuchos::RCP<const Teuchos::ParameterList> validParams =
640  params->validateParametersAndSetDefaults (*validParams);
641  this->setMyParamList (params);
642  }
643 
644 
645  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
649  {
650  return rowMap_->getGlobalNumElements ();
651  }
652 
653 
654  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
658  {
659  const char tfecfFuncName[] = "getGlobalNumCols: ";
660  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
661  ! isFillComplete () || getDomainMap ().is_null (), std::runtime_error,
662  "The graph does not have a domain Map. You may not call this method in "
663  "that case.");
664  return getDomainMap ()->getGlobalNumElements ();
665  }
666 
667 
668  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
669  size_t
672  {
673  return rowMap_.is_null () ? static_cast<size_t> (0) :
674  rowMap_->getNodeNumElements ();
675  }
676 
677 
678  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
679  size_t
682  {
683  const char tfecfFuncName[] = "getNodeNumCols: ";
684  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
685  ! hasColMap (), std::runtime_error,
686  "The graph does not have a column Map. You may not call this method "
687  "unless the graph has a column Map. This requires either that a custom "
688  "column Map was given to the constructor, or that fillComplete() has "
689  "been called.");
690  return colMap_.is_null () ? static_cast<size_t> (0) :
691  colMap_->getNodeNumElements ();
692  }
693 
694 
695  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
696  size_t
699  {
700  return nodeNumDiags_;
701  }
702 
703 
704  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
708  {
709  return globalNumDiags_;
710  }
711 
712 
713  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
714  Teuchos::RCP<Node>
716  getNode () const
717  {
718  return rowMap_.is_null () ? Teuchos::null : rowMap_->getNode ();
719  }
720 
721 
722  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
723  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
725  getRowMap () const
726  {
727  return rowMap_;
728  }
729 
730 
731  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
732  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
734  getColMap () const
735  {
736  return colMap_;
737  }
738 
739 
740  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
741  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
744  {
745  return domainMap_;
746  }
747 
748 
749  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
750  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
752  getRangeMap () const
753  {
754  return rangeMap_;
755  }
756 
757  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
758  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Node> >
760  getImporter () const
761  {
762  return importer_;
763  }
764 
765 
766  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
767  Teuchos::RCP<const Export<LocalOrdinal, GlobalOrdinal, Node> >
769  getExporter () const
770  {
771  return exporter_;
772  }
773 
774 
775  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
776  bool
778  hasColMap () const
779  {
780  return ! colMap_.is_null ();
781  }
782 
783 
784  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
785  bool
788  {
789  // FIXME (mfh 07 Aug 2014) Why wouldn't storage be optimized if
790  // getNodeNumRows() is zero?
791 
792  const bool isOpt = indicesAreAllocated_ &&
793  k_numRowEntries_.dimension_0 () == 0 &&
794  getNodeNumRows () > 0;
795 
796 #ifdef HAVE_TPETRA_DEBUG
797  const char tfecfFuncName[] = "isStorageOptimized";
798  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
799  isOpt && getProfileType () == DynamicProfile, std::logic_error,
800  ": The matrix claims to have optimized storage, but getProfileType() "
801  "returns DynamicProfile. This should never happen. Please report this "
802  "bug to the Tpetra developers.");
803 #endif // HAVE_TPETRA_DEBUG
804 
805  return isOpt;
806  }
807 
808 
809  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
813  {
814  return pftype_;
815  }
816 
817 
818  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
822  {
823  return globalNumEntries_;
824  }
825 
826 
827  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
828  size_t
831  {
832  return nodeNumEntries_;
833  }
834 
835 
836  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
840  {
841  return globalMaxNumRowEntries_;
842  }
843 
844 
845  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
846  size_t
849  {
850  return nodeMaxNumRowEntries_;
851  }
852 
853 
854  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
855  bool
858  {
859  return fillComplete_;
860  }
861 
862 
863  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
864  bool
867  {
868  return ! fillComplete_;
869  }
870 
871 
872  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
873  bool
876  {
877  return upperTriangular_;
878  }
879 
880 
881  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
882  bool
885  {
886  return lowerTriangular_;
887  }
888 
889 
890  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
891  bool
894  {
895  return indicesAreLocal_;
896  }
897 
898 
899  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
900  bool
903  {
904  return indicesAreGlobal_;
905  }
906 
907 
908  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
909  size_t
912  {
913  return nodeNumAllocated_;
914  }
915 
916 
917  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
918  Teuchos::RCP<const Teuchos::Comm<int> >
920  getComm () const
921  {
922  return rowMap_.is_null () ? Teuchos::null : rowMap_->getComm ();
923  }
924 
925 
926  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
927  GlobalOrdinal
930  {
931  return rowMap_->getIndexBase ();
932  }
933 
934 
935  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
936  bool
938  indicesAreAllocated () const
939  {
940  return indicesAreAllocated_;
941  }
942 
943 
944  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
945  bool
947  isSorted () const
948  {
949  return indicesAreSorted_;
950  }
951 
952 
953  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
954  bool
956  isMerged () const
957  {
958  return noRedundancies_;
959  }
960 
961 
962  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
963  void
966  {
967  // FIXME (mfh 07 May 2013) How do we know that the change
968  // introduced a redundancy, or even that it invalidated the sorted
969  // order of indices? CrsGraph has always made this conservative
970  // guess. It could be a bit costly to check at insertion time,
971  // though.
972  indicesAreSorted_ = false;
973  noRedundancies_ = false;
974 
975  // We've modified the graph, so we'll have to recompute local
976  // constants like the number of diagonal entries on this process.
977  haveLocalConstants_ = false;
978  }
979 
980 
981  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
982  void
984  allocateIndices (const ELocalGlobal lg)
985  {
986  using Teuchos::arcp;
987  using Teuchos::Array;
988  using Teuchos::ArrayRCP;
989  typedef Teuchos::ArrayRCP<size_t>::size_type size_type;
990  typedef typename local_graph_type::row_map_type::non_const_type
991  non_const_row_map_type;
992  typedef typename local_graph_type::entries_type::non_const_type
993  lcl_col_inds_type;
994  typedef Kokkos::View<GlobalOrdinal*,
995  typename lcl_col_inds_type::array_layout,
996  device_type> gbl_col_inds_type;
997  const char tfecfFuncName[] = "allocateIndices: ";
998 
999  // This is a protected function, only callable by us. If it was
1000  // called incorrectly, it is our fault. That's why the tests
1001  // below throw std::logic_error instead of std::invalid_argument.
1002  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1003  isLocallyIndexed () && lg == GlobalIndices, std::logic_error,
1004  "The graph is locally indexed, but Tpetra code is calling this method "
1005  "with lg=GlobalIndices. Please report this bug to the Tpetra developers.");
1006  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1007  isGloballyIndexed () && lg == LocalIndices, std::logic_error,
1008  "The graph is globally indexed, but Tpetra code is calling this method "
1009  "with lg=LocalIndices. Please report this bug to the Tpetra developers.");
1010  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1011  indicesAreAllocated (), std::logic_error, "The graph's indices are "
1012  "already allocated, but Tpetra code is calling allocateIndices) again. "
1013  "Please report this bug to the Tpetra developers.");
1014 
1015  const size_t numRows = getNodeNumRows ();
1016 
1017  if (getProfileType () == StaticProfile) {
1018  //
1019  // STATIC ALLOCATION PROFILE
1020  //
1021  non_const_row_map_type k_rowPtrs ("Tpetra::CrsGraph::ptr", numRows + 1);
1022 
1023  if (k_numAllocPerRow_.dimension_0 () != 0) {
1024  // It's OK to throw std::invalid_argument here, because we
1025  // haven't incurred any side effects yet. Throwing that
1026  // exception (and not, say, std::logic_error) implies that the
1027  // instance can recover.
1028  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1029  k_numAllocPerRow_.dimension_0 () != numRows, std::invalid_argument,
1030  "k_numAllocPerRow_ is allocated (has length != 0), but its length = "
1031  << k_numAllocPerRow_.dimension_0 () << " != numRows = " << numRows
1032  << ".");
1033  // FIXME hack until we get parallel_scan in kokkos
1034  //
1035  // FIXME (mfh 11 Aug 2014) This assumes UVM, since k_rowPtrs_
1036  // is currently a device View. Should instead use a DualView.
1037  typename Kokkos::DualView<const size_t*, execution_space>::t_host h_numAllocPerRow =
1038  k_numAllocPerRow_.h_view; // use a host view for now, since we compute on host
1039  bool anyInvalidAllocSizes = false;
1040  for (size_t i = 0; i < numRows; ++i) {
1041  size_t allocSize = h_numAllocPerRow(i);
1042  if (allocSize == Teuchos::OrdinalTraits<size_t>::invalid ()) {
1043  anyInvalidAllocSizes = true;
1044  allocSize = 0;
1045  }
1046  k_rowPtrs(i+1) = k_rowPtrs(i) + allocSize;
1047  }
1048  // It's OK to throw std::invalid_argument here, because we
1049  // haven't incurred any side effects yet. Throwing that
1050  // exception (and not, say, std::logic_error) implies that the
1051  // instance can recover.
1052  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1053  anyInvalidAllocSizes, std::invalid_argument, "The input array of "
1054  "allocation sizes per row had at least one invalid (== "
1055  "Teuchos::OrdinalTraits<size_t>::invalid()) entry.");
1056  }
1057  else {
1058  // It's OK to throw std::invalid_argument here, because we
1059  // haven't incurred any side effects yet. Throwing that
1060  // exception (and not, say, std::logic_error) implies that the
1061  // instance can recover.
1062  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1063  numAllocForAllRows_ == Teuchos::OrdinalTraits<size_t>::invalid (),
1064  std::invalid_argument, "numAllocForAllRows_ has an invalid value, "
1065  "namely Teuchos::OrdinalTraits<size_t>::invalid() = " <<
1066  Teuchos::OrdinalTraits<size_t>::invalid () << ".");
1067  // FIXME hack until we get parallel_scan in kokkos
1068  //
1069  // FIXME (mfh 11 Aug 2014) This assumes UVM, since k_rowPtrs_
1070  // is currently a device View. Should instead use a DualView.
1071  for (size_t i = 0; i < numRows; ++i) {
1072  k_rowPtrs(i+1) = k_rowPtrs(i) + numAllocForAllRows_;
1073  }
1074  }
1075 
1076  // "Commit" the resulting row offsets.
1077  k_rowPtrs_ = k_rowPtrs;
1078 
1079  // FIXME (mfh 05,11 Aug 2014) This assumes UVM, since k_rowPtrs_
1080  // is currently a device View. Should instead use a DualView.
1081  const size_type numInds = static_cast<size_type> (k_rowPtrs_(numRows));
1082  if (lg == LocalIndices) {
1083  k_lclInds1D_ = lcl_col_inds_type ("Tpetra::CrsGraph::ind", numInds);
1084  }
1085  else {
1086  k_gblInds1D_ = gbl_col_inds_type ("Tpetra::CrsGraph::ind", numInds);
1087  }
1088  nodeNumAllocated_ = numInds;
1089  storageStatus_ = Details::STORAGE_1D_UNPACKED;
1090  }
1091  else {
1092  //
1093  // DYNAMIC ALLOCATION PROFILE
1094  //
1095 
1096  // Use the host view of k_numAllocPerRow_, since we have to
1097  // allocate 2-D storage on the host.
1098  typename Kokkos::DualView<const size_t*, execution_space>::t_host h_numAllocPerRow =
1099  k_numAllocPerRow_.h_view;
1100  const bool useNumAllocPerRow = (k_numAllocPerRow_.dimension_0 () != 0);
1101 
1102  if (lg == LocalIndices) {
1103  lclInds2D_ = arcp<Array<LocalOrdinal> > (numRows);
1104  nodeNumAllocated_ = 0;
1105  for (size_t i = 0; i < numRows; ++i) {
1106  const size_t howMany = useNumAllocPerRow ?
1107  h_numAllocPerRow(i) : numAllocForAllRows_;
1108  nodeNumAllocated_ += howMany;
1109  if (howMany > 0) {
1110  lclInds2D_[i].resize (howMany);
1111  }
1112  }
1113  }
1114  else { // allocate global indices
1115  gblInds2D_ = arcp<Array<GlobalOrdinal> > (numRows);
1116  nodeNumAllocated_ = 0;
1117  for (size_t i = 0; i < numRows; ++i) {
1118  const size_t howMany = useNumAllocPerRow ?
1119  h_numAllocPerRow(i) : numAllocForAllRows_;
1120  nodeNumAllocated_ += howMany;
1121  if (howMany > 0) {
1122  gblInds2D_[i].resize (howMany);
1123  }
1124  }
1125  }
1126  storageStatus_ = Details::STORAGE_2D;
1127  }
1128 
1129  indicesAreLocal_ = (lg == LocalIndices);
1130  indicesAreGlobal_ = (lg == GlobalIndices);
1131 
1132  if (numRows > 0) {
1134  t_numRowEntries_ ("Tpetra::CrsGraph::numRowEntries", numRows);
1135 
1136  // Fill with zeros on the host. k_numRowEntries_ is a DualView.
1137  //
1138  // TODO (mfh 05 Aug 2014) Write a device kernel for this.
1139  k_numRowEntries_.template modify<typename t_numRowEntries_::host_mirror_space> ();
1140  size_t* const hostPtr = k_numRowEntries_.h_view.ptr_on_device ();
1141  std::fill (hostPtr, hostPtr + numRows, static_cast<size_t> (0));
1142  k_numRowEntries_.template sync<execution_space> ();
1143  }
1144 
1145  // done with these
1146  numAllocForAllRows_ = 0;
1147  k_numAllocPerRow_ = Kokkos::DualView<const size_t*, execution_space> ();
1148  indicesAreAllocated_ = true;
1149  checkInternalState ();
1150  }
1151 
1152 
1153  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1154  template <class T>
1155  Teuchos::ArrayRCP<Teuchos::Array<T> >
1157  allocateValues2D () const
1158  {
1159  using Teuchos::arcp;
1160  using Teuchos::Array;
1161  using Teuchos::ArrayRCP;
1162  const char tfecfFuncName[] = "allocateValues2D: ";
1163  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1164  ! indicesAreAllocated (), std::runtime_error,
1165  "Graph indices must be allocated before values.");
1166  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1167  getProfileType () != DynamicProfile, std::runtime_error,
1168  "Graph indices must be allocated in a dynamic profile.");
1169 
1170  ArrayRCP<Array<T> > values2D;
1171  values2D = arcp<Array<T> > (getNodeNumRows ());
1172  if (lclInds2D_ != null) {
1173  const size_t numRows = lclInds2D_.size ();
1174  for (size_t r = 0; r < numRows; ++r) {
1175  values2D[r].resize (lclInds2D_[r].size ());
1176  }
1177  }
1178  else if (gblInds2D_ != null) {
1179  const size_t numRows = gblInds2D_.size ();
1180  for (size_t r = 0; r < numRows; ++r) {
1181  values2D[r].resize (gblInds2D_[r].size ());
1182  }
1183  }
1184  return values2D;
1185  }
1186 
1187 
1188  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1189  Teuchos::ArrayView<const LocalOrdinal>
1191  getLocalView (const RowInfo rowinfo) const
1192  {
1193  using Kokkos::subview;
1194  typedef LocalOrdinal LO;
1195  typedef Kokkos::View<const LO*, execution_space,
1196  Kokkos::MemoryUnmanaged> row_view_type;
1197 
1198  if (rowinfo.allocSize == 0) {
1199  return Teuchos::ArrayView<const LO> ();
1200  }
1201  else { // nothing in the row to view
1202  if (k_lclInds1D_.dimension_0 () != 0) { // 1-D storage
1203  const size_t start = rowinfo.offset1D;
1204  const size_t len = rowinfo.allocSize;
1205  const std::pair<size_t, size_t> rng (start, start + len);
1206  // mfh 23 Nov 2015: Don't just create a subview of
1207  // k_lclInds1D_ directly, because that first creates a
1208  // _managed_ subview, then returns an unmanaged version of
1209  // that. That touches the reference count, which costs
1210  // performance in a measurable way.
1211  row_view_type rowView = subview (row_view_type (k_lclInds1D_), rng);
1212  const LO* const rowViewRaw = (len == 0) ? NULL : rowView.ptr_on_device ();
1213  return Teuchos::ArrayView<const LO> (rowViewRaw, len, Teuchos::RCP_DISABLE_NODE_LOOKUP);
1214  }
1215  else if (! lclInds2D_[rowinfo.localRow].empty ()) { // 2-D storage
1216  return lclInds2D_[rowinfo.localRow] ();
1217  }
1218  else {
1219  return Teuchos::ArrayView<const LO> (); // nothing in the row to view
1220  }
1221  }
1222  }
1223 
1224 
1225  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1226  Teuchos::ArrayView<LocalOrdinal>
1229  {
1230  using Kokkos::subview;
1231  typedef LocalOrdinal LO;
1232  typedef Kokkos::View<LO*, execution_space,
1233  Kokkos::MemoryUnmanaged> row_view_type;
1234 
1235  if (rowinfo.allocSize == 0) { // nothing in the row to view
1236  return Teuchos::ArrayView<LO> ();
1237  }
1238  else {
1239  if (k_lclInds1D_.dimension_0 () != 0) { // 1-D storage
1240  const size_t start = rowinfo.offset1D;
1241  const size_t len = rowinfo.allocSize;
1242  const std::pair<size_t, size_t> rng (start, start + len);
1243  // mfh 23 Nov 2015: Don't just create a subview of
1244  // k_lclInds1D_ directly, because that first creates a
1245  // _managed_ subview, then returns an unmanaged version of
1246  // that. That touches the reference count, which costs
1247  // performance in a measurable way.
1248  row_view_type rowView = subview (row_view_type (k_lclInds1D_), rng);
1249  LO* const rowViewRaw = (len == 0) ? NULL : rowView.ptr_on_device ();
1250  return Teuchos::ArrayView<LO> (rowViewRaw, len, Teuchos::RCP_DISABLE_NODE_LOOKUP);
1251  }
1252  else if (! lclInds2D_[rowinfo.localRow].empty ()) { // 2-D storage
1253  return lclInds2D_[rowinfo.localRow] ();
1254  }
1255  else {
1256  return Teuchos::ArrayView<LO> (); // nothing in the row to view
1257  }
1258  }
1259  }
1260 
1261 
1262  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1263  Kokkos::View<const LocalOrdinal*,
1265  Kokkos::MemoryUnmanaged>
1267  getLocalKokkosRowView (const RowInfo& rowinfo) const
1268  {
1269  typedef LocalOrdinal LO;
1270  typedef Kokkos::View<const LO*, execution_space,
1271  Kokkos::MemoryUnmanaged> row_view_type;
1272 
1273  if (rowinfo.allocSize == 0) {
1274  return row_view_type ();
1275  }
1276  else { // nothing in the row to view
1277  if (k_lclInds1D_.dimension_0 () != 0) { // 1-D storage
1278  const size_t start = rowinfo.offset1D;
1279  const size_t len = rowinfo.allocSize;
1280  const std::pair<size_t, size_t> rng (start, start + len);
1281  // mfh 23 Nov 2015: Don't just create a subview of
1282  // k_lclInds1D_ directly, because that first creates a
1283  // _managed_ subview, then returns an unmanaged version of
1284  // that. That touches the reference count, which costs
1285  // performance in a measurable way.
1286  return Kokkos::subview (row_view_type (k_lclInds1D_), rng);
1287  }
1288  else if (! lclInds2D_[rowinfo.localRow].empty ()) { // 2-D storage
1289  Teuchos::ArrayView<const LO> rowAv = lclInds2D_[rowinfo.localRow] ();
1290  return row_view_type (rowAv.getRawPtr (), rowAv.size ());
1291  }
1292  else {
1293  return row_view_type (); // nothing in the row to view
1294  }
1295  }
1296  }
1297 
1298 
1299  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1300  Kokkos::View<LocalOrdinal*,
1301  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node, classic>::execution_space,
1302  Kokkos::MemoryUnmanaged>
1304  getLocalKokkosRowViewNonConst (const RowInfo& rowinfo)
1305  {
1306  typedef LocalOrdinal LO;
1307  typedef Kokkos::View<LO*, execution_space,
1308  Kokkos::MemoryUnmanaged> row_view_type;
1309 
1310  if (rowinfo.allocSize == 0) {
1311  return row_view_type ();
1312  }
1313  else { // nothing in the row to view
1314  if (k_lclInds1D_.dimension_0 () != 0) { // 1-D storage
1315  const size_t start = rowinfo.offset1D;
1316  const size_t len = rowinfo.allocSize;
1317  const std::pair<size_t, size_t> rng (start, start + len);
1318  // mfh 23 Nov 2015: Don't just create a subview of
1319  // k_lclInds1D_ directly, because that first creates a
1320  // _managed_ subview, then returns an unmanaged version of
1321  // that. That touches the reference count, which costs
1322  // performance in a measurable way.
1323  return Kokkos::subview (row_view_type (k_lclInds1D_), rng);
1324  }
1325  else if (! lclInds2D_[rowinfo.localRow].empty ()) { // 2-D storage
1326  Teuchos::ArrayView<LO> rowAv = lclInds2D_[rowinfo.localRow] ();
1327  return row_view_type (rowAv.getRawPtr (), rowAv.size ());
1328  }
1329  else {
1330  return row_view_type (); // nothing in the row to view
1331  }
1332  }
1333  }
1334 
1335 
1336  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1337  Kokkos::View<const GlobalOrdinal*,
1338  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node, classic>::execution_space,
1339  Kokkos::MemoryUnmanaged>
1341  getGlobalKokkosRowView (const RowInfo& rowinfo) const
1342  {
1343  typedef GlobalOrdinal GO;
1344  typedef Kokkos::View<const GO*, execution_space,
1345  Kokkos::MemoryUnmanaged> row_view_type;
1346 
1347  if (rowinfo.allocSize == 0) {
1348  return row_view_type ();
1349  }
1350  else { // nothing in the row to view
1351  if (this->k_gblInds1D_.dimension_0 () != 0) { // 1-D storage
1352  const size_t start = rowinfo.offset1D;
1353  const size_t len = rowinfo.allocSize;
1354  const std::pair<size_t, size_t> rng (start, start + len);
1355  // mfh 23 Nov 2015: Don't just create a subview of
1356  // k_gblInds1D_ directly, because that first creates a
1357  // _managed_ subview, then returns an unmanaged version of
1358  // that. That touches the reference count, which costs
1359  // performance in a measurable way.
1360  return Kokkos::subview (row_view_type (this->k_gblInds1D_), rng);
1361  }
1362  else if (! this->gblInds2D_[rowinfo.localRow].empty ()) { // 2-D storage
1363  Teuchos::ArrayView<const GO> rowAv = this->gblInds2D_[rowinfo.localRow] ();
1364  // FIXME (mfh 26 Nov 2015) This assumes UVM, because it
1365  // assumes that host code can access device memory through
1366  // Teuchos::ArrayView.
1367  return row_view_type (rowAv.getRawPtr (), rowAv.size ());
1368  }
1369  else {
1370  return row_view_type (); // nothing in the row to view
1371  }
1372  }
1373  }
1374 
1375 
1376  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1377  Teuchos::ArrayView<const GlobalOrdinal>
1379  getGlobalView (const RowInfo rowinfo) const
1380  {
1381  Teuchos::ArrayView<const GlobalOrdinal> view;
1382  if (rowinfo.allocSize > 0) {
1383  if (k_gblInds1D_.dimension_0 () != 0) {
1384  auto rng = std::make_pair (rowinfo.offset1D,
1385  rowinfo.offset1D + rowinfo.allocSize);
1386  // mfh 23 Nov 2015: Don't just create a subview of
1387  // k_gblInds1D_ directly, because that first creates a
1388  // _managed_ subview, then returns an unmanaged version of
1389  // that. That touches the reference count, which costs
1390  // performance in a measurable way.
1391  Kokkos::View<const GlobalOrdinal*, execution_space,
1392  Kokkos::MemoryUnmanaged> k_gblInds1D_unmanaged = k_gblInds1D_;
1393  view = Kokkos::Compat::getConstArrayView (Kokkos::subview (k_gblInds1D_unmanaged, rng));
1394  }
1395  else if (! gblInds2D_[rowinfo.localRow].empty()) {
1396  view = gblInds2D_[rowinfo.localRow] ();
1397  }
1398  }
1399  return view;
1400  }
1401 
1402 
1403  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1404  Teuchos::ArrayView<GlobalOrdinal>
1407  {
1408  Teuchos::ArrayView<GlobalOrdinal> view;
1409  if (rowinfo.allocSize > 0) {
1410  if (k_gblInds1D_.dimension_0 () != 0) {
1411  auto rng = std::make_pair (rowinfo.offset1D,
1412  rowinfo.offset1D + rowinfo.allocSize);
1413  // mfh 23 Nov 2015: Don't just create a subview of
1414  // k_gblInds1D_ directly, because that first creates a
1415  // _managed_ subview, then returns an unmanaged version of
1416  // that. That touches the reference count, which costs
1417  // performance in a measurable way.
1418  Kokkos::View<GlobalOrdinal*, execution_space,
1419  Kokkos::MemoryUnmanaged> k_gblInds1D_unmanaged = k_gblInds1D_;
1420  view = Kokkos::Compat::getArrayView (Kokkos::subview (k_gblInds1D_unmanaged, rng));
1421  }
1422  else if (! gblInds2D_[rowinfo.localRow].empty()) {
1423  view = gblInds2D_[rowinfo.localRow] ();
1424  }
1425  }
1426  return view;
1427  }
1428 
1429 
1430  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1431  RowInfo
1433  getRowInfo (const LocalOrdinal myRow) const
1434  {
1435 #ifdef HAVE_TPETRA_DEBUG
1436  const char tfecfFuncName[] = "getRowInfo";
1437  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1438  ! rowMap_->isNodeLocalElement (myRow), std::logic_error,
1439  ": The given (local) row index myRow = " << myRow
1440  << " does not belong to the graph's row Map. "
1441  "This probably indicates a bug in Tpetra::CrsGraph or Tpetra::CrsMatrix. "
1442  "Please report this bug to the Tpetra developers.");
1443  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1444  ! hasRowInfo (), std::logic_error,
1445  ": Late catch! Graph does not have row info anymore. "
1446  "Error should have been caught earlier. "
1447  "Please report this bug to the Tpetra developers.");
1448 #endif // HAVE_TPETRA_DEBUG
1449 
1450  const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1451  RowInfo ret;
1452  if (! hasRowInfo () || rowMap_.is_null () || ! rowMap_->isNodeLocalElement (myRow)) {
1453  ret.localRow = STINV;
1454  ret.allocSize = 0;
1455  ret.numEntries = 0;
1456  ret.offset1D = STINV;
1457  return ret;
1458  }
1459 
1460  ret.localRow = static_cast<size_t> (myRow);
1461  if (nodeNumAllocated_ != 0 && nodeNumAllocated_ != STINV) {
1462  // graph data structures have the info that we need
1463  //
1464  // if static graph, offsets tell us the allocation size
1465  if (getProfileType() == StaticProfile) {
1466  ret.offset1D = k_rowPtrs_(myRow);
1467  ret.allocSize = k_rowPtrs_(myRow+1) - k_rowPtrs_(myRow);
1468  if (k_numRowEntries_.dimension_0 () == 0) {
1469  ret.numEntries = ret.allocSize;
1470  } else {
1471  ret.numEntries = k_numRowEntries_.h_view(myRow);
1472  }
1473  }
1474  else {
1475  ret.offset1D = STINV;
1476  if (isLocallyIndexed ()) {
1477  ret.allocSize = lclInds2D_[myRow].size ();
1478  }
1479  else {
1480  ret.allocSize = gblInds2D_[myRow].size ();
1481  }
1482  ret.numEntries = k_numRowEntries_.h_view(myRow);
1483  }
1484  }
1485  else if (nodeNumAllocated_ == 0) {
1486  // have performed allocation, but the graph has no allocation or entries
1487  ret.allocSize = 0;
1488  ret.numEntries = 0;
1489  ret.offset1D = STINV;
1490  }
1491  else if (! indicesAreAllocated ()) {
1492  // haven't performed allocation yet; probably won't hit this code
1493  //
1494  // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1495  // allocate, rather than doing lazy allocation at first insert.
1496  // This will make k_numAllocPerRow_ obsolete.
1497  const bool useNumAllocPerRow = (k_numAllocPerRow_.dimension_0 () != 0);
1498  if (useNumAllocPerRow) {
1499  ret.allocSize = k_numAllocPerRow_.h_view(myRow);
1500  } else {
1501  ret.allocSize = numAllocForAllRows_;
1502  }
1503  ret.numEntries = 0;
1504  ret.offset1D = STINV;
1505  }
1506  else {
1507  // don't know how we ended up here...
1508  TEUCHOS_TEST_FOR_EXCEPT(true);
1509  }
1510  return ret;
1511  }
1512 
1513 
1514  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1515  RowInfo
1517  getRowInfoFromGlobalRowIndex (const GlobalOrdinal gblRow) const
1518  {
1519  const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1520  RowInfo ret;
1521  if (! this->hasRowInfo () || this->rowMap_.is_null ()) {
1522  ret.localRow = STINV;
1523  ret.allocSize = 0;
1524  ret.numEntries = 0;
1525  ret.offset1D = STINV;
1526  return ret;
1527  }
1528  const LocalOrdinal myRow = this->rowMap_->getLocalElement (gblRow);
1529  if (myRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) {
1530  ret.localRow = STINV;
1531  ret.allocSize = 0;
1532  ret.numEntries = 0;
1533  ret.offset1D = STINV;
1534  return ret;
1535  }
1536 
1537  ret.localRow = static_cast<size_t> (myRow);
1538  if (nodeNumAllocated_ != 0 && nodeNumAllocated_ != STINV) {
1539  // graph data structures have the info that we need
1540  //
1541  // if static graph, offsets tell us the allocation size
1542  if (getProfileType() == StaticProfile) {
1543  ret.offset1D = k_rowPtrs_(myRow);
1544  ret.allocSize = k_rowPtrs_(myRow+1) - k_rowPtrs_(myRow);
1545  if (k_numRowEntries_.dimension_0 () == 0) {
1546  ret.numEntries = ret.allocSize;
1547  } else {
1548  ret.numEntries = k_numRowEntries_.h_view(myRow);
1549  }
1550  }
1551  else {
1552  ret.offset1D = STINV;
1553  if (isLocallyIndexed ()) {
1554  ret.allocSize = lclInds2D_[myRow].size ();
1555  }
1556  else {
1557  ret.allocSize = gblInds2D_[myRow].size ();
1558  }
1559  ret.numEntries = k_numRowEntries_.h_view(myRow);
1560  }
1561  }
1562  else if (nodeNumAllocated_ == 0) {
1563  // have performed allocation, but the graph has no allocation or entries
1564  ret.allocSize = 0;
1565  ret.numEntries = 0;
1566  ret.offset1D = STINV;
1567  }
1568  else if (! indicesAreAllocated ()) {
1569  // haven't performed allocation yet; probably won't hit this code
1570  //
1571  // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1572  // allocate, rather than doing lazy allocation at first insert.
1573  // This will make k_numAllocPerRow_ obsolete.
1574  const bool useNumAllocPerRow = (k_numAllocPerRow_.dimension_0 () != 0);
1575  if (useNumAllocPerRow) {
1576  ret.allocSize = k_numAllocPerRow_.h_view(myRow);
1577  } else {
1578  ret.allocSize = numAllocForAllRows_;
1579  }
1580  ret.numEntries = 0;
1581  ret.offset1D = STINV;
1582  }
1583  else {
1584  // don't know how we ended up here...
1585  TEUCHOS_TEST_FOR_EXCEPT(true);
1586  }
1587  return ret;
1588  }
1589 
1590 
1591  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1592  void
1594  staticAssertions () const
1595  {
1596  using Teuchos::OrdinalTraits;
1597  typedef LocalOrdinal LO;
1598  typedef GlobalOrdinal GO;
1599  typedef global_size_t GST;
1600 
1601  // Assumption: sizeof(GlobalOrdinal) >= sizeof(LocalOrdinal):
1602  // This is so that we can store local indices in the memory
1603  // formerly occupied by global indices.
1604  static_assert (sizeof (GlobalOrdinal) >= sizeof (LocalOrdinal),
1605  "Tpetra::CrsGraph: sizeof(GlobalOrdinal) must be >= sizeof(LocalOrdinal).");
1606  // Assumption: max(size_t) >= max(LocalOrdinal)
1607  // This is so that we can represent any LocalOrdinal as a size_t.
1608  static_assert (sizeof (size_t) >= sizeof (LocalOrdinal),
1609  "Tpetra::CrsGraph: sizeof(size_t) must be >= sizeof(LocalOrdinal).");
1610  static_assert (sizeof(GST) >= sizeof(size_t),
1611  "Tpetra::CrsGraph: sizeof(Tpetra::global_size_t) must be >= sizeof(size_t).");
1612 
1613  // FIXME (mfh 30 Sep 2015) We're not using
1614  // Teuchos::CompileTimeAssert any more. Can we do these checks
1615  // with static_assert?
1616 
1617  // can't call max() with CompileTimeAssert, because it isn't a
1618  // constant expression; will need to make this a runtime check
1619  const char msg[] = "Tpetra::CrsGraph: Object cannot be created with the "
1620  "given template arguments: size assumptions are not valid.";
1621  TEUCHOS_TEST_FOR_EXCEPTION(
1622  static_cast<size_t> (OrdinalTraits<LO>::max ()) > OrdinalTraits<size_t>::max (),
1623  std::runtime_error, msg);
1624  TEUCHOS_TEST_FOR_EXCEPTION(
1625  static_cast<GST> (OrdinalTraits<LO>::max ()) > static_cast<GST> (OrdinalTraits<GO>::max ()),
1626  std::runtime_error, msg);
1627  TEUCHOS_TEST_FOR_EXCEPTION(
1628  static_cast<size_t> (OrdinalTraits<GO>::max ()) > OrdinalTraits<GST>::max(),
1629  std::runtime_error, msg);
1630  TEUCHOS_TEST_FOR_EXCEPTION(
1631  OrdinalTraits<size_t>::max () > OrdinalTraits<GST>::max (),
1632  std::runtime_error, msg);
1633  }
1634 
1635 
1636  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1637  size_t
1639  insertIndices (const RowInfo& rowinfo,
1640  const SLocalGlobalViews &newInds,
1641  const ELocalGlobal lg,
1642  const ELocalGlobal I)
1643  {
1644  using Teuchos::ArrayView;
1645 #ifdef HAVE_TPETRA_DEBUG
1646  TEUCHOS_TEST_FOR_EXCEPTION(
1647  lg != GlobalIndices && lg != LocalIndices, std::invalid_argument,
1648  "Tpetra::CrsGraph::insertIndices: lg must be either GlobalIndices or "
1649  "LocalIndices.");
1650 #endif // HAVE_TPETRA_DEBUG
1651  size_t numNewInds = 0;
1652  if (lg == GlobalIndices) { // input indices are global
1653  ArrayView<const GlobalOrdinal> new_ginds = newInds.ginds;
1654  numNewInds = new_ginds.size();
1655  if (I == GlobalIndices) { // store global indices
1656  ArrayView<GlobalOrdinal> gind_view = getGlobalViewNonConst(rowinfo);
1657  std::copy(new_ginds.begin(), new_ginds.end(), gind_view.begin()+rowinfo.numEntries);
1658  }
1659  else if (I == LocalIndices) { // store local indices
1660  ArrayView<LocalOrdinal> lind_view = getLocalViewNonConst(rowinfo);
1661  typename ArrayView<const GlobalOrdinal>::iterator in = new_ginds.begin();
1662  const typename ArrayView<const GlobalOrdinal>::iterator stop = new_ginds.end();
1663  typename ArrayView<LocalOrdinal>::iterator out = lind_view.begin()+rowinfo.numEntries;
1664  while (in != stop) {
1665  *out++ = colMap_->getLocalElement (*in++);
1666  }
1667  }
1668  }
1669  else if (lg == LocalIndices) { // input indices are local
1670  ArrayView<const LocalOrdinal> new_linds = newInds.linds;
1671  numNewInds = new_linds.size();
1672  if (I == LocalIndices) { // store local indices
1673  ArrayView<LocalOrdinal> lind_view = getLocalViewNonConst(rowinfo);
1674  std::copy(new_linds.begin(), new_linds.end(), lind_view.begin()+rowinfo.numEntries);
1675  }
1676  else if (I == GlobalIndices) {
1677  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "Tpetra::CrsGraph::"
1678  "insertIndices: the case where the input indices are local and the "
1679  "indices to write are global (lg=LocalIndices, I=GlobalIndices) is "
1680  "not implemented, because it does not make sense." << std::endl <<
1681  "If you have correct local column indices, that means the graph has "
1682  "a column Map. In that case, you should be storing local indices.");
1683  }
1684  }
1685 
1686  // FIXME (mfh 07 Aug 2014) We should just sync at fillComplete,
1687  // but for now, for correctness, do the modify-sync cycle here.
1688  k_numRowEntries_.template modify<typename t_numRowEntries_::host_mirror_space> ();
1689  k_numRowEntries_.h_view(rowinfo.localRow) += numNewInds;
1690  k_numRowEntries_.template sync<execution_space> ();
1691 
1692  nodeNumEntries_ += numNewInds;
1693  setLocallyModified ();
1694  return numNewInds;
1695  }
1696 
1697 
1698  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1699  void
1701  insertGlobalIndicesImpl (const LocalOrdinal myRow,
1702  const Teuchos::ArrayView<const GlobalOrdinal>& indices)
1703  {
1704  const char tfecfFuncName[] = "insertGlobalIndicesImpl";
1705 
1706  RowInfo rowInfo = getRowInfo(myRow);
1707  const size_t numNewInds = indices.size();
1708  const size_t newNumEntries = rowInfo.numEntries + numNewInds;
1709  if (newNumEntries > rowInfo.allocSize) {
1710  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1711  getProfileType() == StaticProfile, std::runtime_error,
1712  ": new indices exceed statically allocated graph structure.");
1713 
1714  // update allocation, doubling size to reduce # reallocations
1715  size_t newAllocSize = 2*rowInfo.allocSize;
1716  if (newAllocSize < newNumEntries) {
1717  newAllocSize = newNumEntries;
1718  }
1719  gblInds2D_[myRow].resize(newAllocSize);
1720  nodeNumAllocated_ += (newAllocSize - rowInfo.allocSize);
1721  }
1722 
1723  // Copy new indices at end of global index array
1724  if (k_gblInds1D_.dimension_0 () != 0) {
1725  const size_t numIndsToCopy = static_cast<size_t> (indices.size ());
1726  const size_t offset = rowInfo.offset1D + rowInfo.numEntries;
1727  for (size_t k = 0; k < numIndsToCopy; ++k) {
1728  k_gblInds1D_[offset + k] = indices[k];
1729  }
1730  }
1731  else {
1732  std::copy(indices.begin(), indices.end(),
1733  gblInds2D_[myRow].begin()+rowInfo.numEntries);
1734  }
1735 
1736  // FIXME (mfh 07 Aug 2014) We should just sync at fillComplete,
1737  // but for now, for correctness, do the modify-sync cycle here.
1738  k_numRowEntries_.template modify<typename t_numRowEntries_::host_mirror_space> ();
1739  k_numRowEntries_.h_view(myRow) += numNewInds;
1740  k_numRowEntries_.template sync<execution_space> ();
1741 
1742  nodeNumEntries_ += numNewInds;
1743  setLocallyModified ();
1744 
1745 #ifdef HAVE_TPETRA_DEBUG
1746  {
1747  const size_t chkNewNumEntries = getNumEntriesInLocalRow (myRow);
1748  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1749  chkNewNumEntries != newNumEntries, std::logic_error,
1750  ": Internal logic error. Please contact Tpetra team.");
1751  }
1752 #endif
1753  }
1754 
1755 
1756  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1757  void
1759  insertLocalIndicesImpl (const LocalOrdinal myRow,
1760  const Teuchos::ArrayView<const LocalOrdinal>& indices)
1761  {
1762  using Kokkos::MemoryUnmanaged;
1763  using Kokkos::subview;
1764  using Kokkos::View;
1765  typedef LocalOrdinal LO;
1766  const char* tfecfFuncName ("insertLocallIndicesImpl");
1767 
1768  const RowInfo rowInfo = this->getRowInfo(myRow);
1769  const size_t numNewInds = indices.size();
1770  const size_t newNumEntries = rowInfo.numEntries + numNewInds;
1771  if (newNumEntries > rowInfo.allocSize) {
1772  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1773  getProfileType() == StaticProfile, std::runtime_error,
1774  ": new indices exceed statically allocated graph structure.");
1775 
1776  // update allocation, doubling size to reduce number of reallocations
1777  size_t newAllocSize = 2*rowInfo.allocSize;
1778  if (newAllocSize < newNumEntries)
1779  newAllocSize = newNumEntries;
1780  lclInds2D_[myRow].resize(newAllocSize);
1781  nodeNumAllocated_ += (newAllocSize - rowInfo.allocSize);
1782  }
1783 
1784  // Store the new indices at the end of row myRow.
1785  if (k_lclInds1D_.dimension_0 () != 0) {
1786  typedef View<const LO*, execution_space, MemoryUnmanaged> input_view_type;
1787  typedef View<LO*, execution_space, MemoryUnmanaged> row_view_type;
1788 
1789  input_view_type inputInds (indices.getRawPtr (), indices.size ());
1790  const size_t start = rowInfo.offset1D + rowInfo.numEntries; // end of row
1791  const std::pair<size_t, size_t> rng (start, start + newNumEntries);
1792  // mfh 23 Nov 2015: Don't just create a subview of k_lclInds1D_
1793  // directly, because that first creates a _managed_ subview,
1794  // then returns an unmanaged version of that. That touches the
1795  // reference count, which costs performance in a measurable way.
1796  row_view_type myInds = subview (row_view_type (k_lclInds1D_), rng);
1797  Kokkos::deep_copy (myInds, inputInds);
1798  }
1799  else {
1800  std::copy (indices.begin (), indices.end (),
1801  lclInds2D_[myRow].begin () + rowInfo.numEntries);
1802  }
1803 
1804  // FIXME (mfh 07 Aug 2014) We should just sync at fillComplete,
1805  // but for now, for correctness, do the modify-sync cycle here.
1806  k_numRowEntries_.template modify<typename t_numRowEntries_::host_mirror_space> ();
1807  k_numRowEntries_.h_view(myRow) += numNewInds;
1808  k_numRowEntries_.template sync<execution_space> ();
1809 
1810  nodeNumEntries_ += numNewInds;
1811  setLocallyModified ();
1812 #ifdef HAVE_TPETRA_DEBUG
1813  {
1814  const size_t chkNewNumEntries = getNumEntriesInLocalRow (myRow);
1815  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1816  chkNewNumEntries != newNumEntries, std::logic_error,
1817  ": Internal logic error. Please contact Tpetra team.");
1818  }
1819 #endif
1820  }
1821 
1822 
1823  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1824  template <class Scalar>
1825  void
1828  const SLocalGlobalViews& newInds,
1829  const Teuchos::ArrayView<Scalar>& oldRowVals,
1830  const Teuchos::ArrayView<const Scalar>& newRowVals,
1831  const ELocalGlobal lg,
1832  const ELocalGlobal I)
1833  {
1834 #ifdef HAVE_TPETRA_DEBUG
1835  size_t numNewInds = 0;
1836  try {
1837  numNewInds = insertIndices (rowInfo, newInds, lg, I);
1838  } catch (std::exception& e) {
1839  TEUCHOS_TEST_FOR_EXCEPTION(
1840  true, std::runtime_error, "Tpetra::CrsGraph::insertIndicesAndValues: "
1841  "insertIndices threw an exception: " << e.what ());
1842  }
1843  TEUCHOS_TEST_FOR_EXCEPTION(
1844  numNewInds > static_cast<size_t> (oldRowVals.size ()), std::runtime_error,
1845  "Tpetra::CrsGraph::insertIndicesAndValues: numNewInds (" << numNewInds
1846  << ") > oldRowVals.size() (" << oldRowVals.size () << ".");
1847 #else
1848  const size_t numNewInds = insertIndices (rowInfo, newInds, lg, I);
1849 #endif // HAVE_TPETRA_DEBUG
1850 
1851  typedef typename Teuchos::ArrayView<Scalar>::size_type size_type;
1852 
1853 #ifdef HAVE_TPETRA_DEBUG
1854  TEUCHOS_TEST_FOR_EXCEPTION(
1855  rowInfo.numEntries + numNewInds > static_cast<size_t> (oldRowVals.size ()),
1856  std::runtime_error, "Tpetra::CrsGraph::insertIndicesAndValues: rowInfo."
1857  "numEntries (" << rowInfo.numEntries << ") + numNewInds (" << numNewInds
1858  << ") > oldRowVals.size() (" << oldRowVals.size () << ").");
1859  TEUCHOS_TEST_FOR_EXCEPTION(
1860  static_cast<size_type> (numNewInds) > newRowVals.size (),
1861  std::runtime_error, "Tpetra::CrsGraph::insertIndicesAndValues: "
1862  "numNewInds (" << numNewInds << ") > newRowVals.size() ("
1863  << newRowVals.size () << ").");
1864 #endif // HAVE_TPETRA_DEBUG
1865 
1866  size_type oldInd = static_cast<size_type> (rowInfo.numEntries);
1867 
1868 #ifdef HAVE_TPETRA_DEBUG
1869  try {
1870 #endif // HAVE_TPETRA_DEBUG
1871  //NOTE: The code in the else branch fails on GCC 4.9 and newer in the assignement oldRowVals[oldInd] = newRowVals[newInd];
1872  //We supply a workaround n as well as other code variants which produce or not produce the error
1873  #if defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__)
1874  #define GCC_VERSION __GNUC__*100+__GNUC_MINOR__*10+__GNUC_PATCHLEVEL__
1875  #if GCC_VERSION >= 490
1876  #define GCC_WORKAROUND
1877  #endif
1878  #endif
1879  #ifdef GCC_WORKAROUND
1880  size_type nNI = static_cast<size_type>(numNewInds);
1881  if (nNI > 0)
1882  memcpy(&oldRowVals[oldInd], &newRowVals[0], nNI*sizeof(Scalar));
1883  /*
1884  //Original Code Fails
1885  for (size_type newInd = 0; newInd < static_cast<size_type> (numNewInds);
1886  ++newInd, ++oldInd) {
1887  oldRowVals[oldInd] = newRowVals[newInd];
1888  }
1889 
1890 
1891  //char cast variant fails
1892  char* oldRowValPtr = (char*)&oldRowVals[oldInd];
1893  const char* newRowValPtr = (const char*) &newRowVals[0];
1894 
1895  for(size_type newInd = 0; newInd < (nNI * sizeof(Scalar)); newInd++) {
1896  oldRowValPtr[newInd] = newRowValPtr[newInd];
1897  }
1898 
1899  //Raw ptr variant fails
1900  Scalar* oldRowValPtr = &oldRowVals[oldInd];
1901  Scalar* newRowValPtr = const_cast<Scalar*>(&newRowVals[0]);
1902 
1903  for(size_type newInd = 0; newInd < nNI; newInd++) {
1904  oldRowValPtr[newInd] = newRowValPtr[newInd];
1905  }
1906 
1907  //memcpy works
1908  for (size_type newInd = 0; newInd < nNI; newInd++) {
1909  memcpy( &oldRowVals[oldInd+newInd], &newRowVals[newInd], sizeof(Scalar));
1910  }
1911 
1912  //just one loop index fails
1913  for (size_type newInd = 0; newInd < nNI; newInd++) {
1914  oldRowVals[oldInd+newInd] = newRowVals[newInd];
1915  }
1916 
1917  //inline increment fails
1918  for (size_type newInd = 0; newInd < numNewInds;) {
1919  oldRowVals[oldInd++] = newRowVals[newInd++];
1920  }
1921 
1922  */
1923 
1924  #else // GCC Workaround above
1925  for (size_type newInd = 0; newInd < static_cast<size_type> (numNewInds);
1926  ++newInd, ++oldInd) {
1927  oldRowVals[oldInd] = newRowVals[newInd];
1928  }
1929  #endif // GCC Workaround
1930 #ifdef HAVE_TPETRA_DEBUG
1931  }
1932  catch (std::exception& e) {
1933  TEUCHOS_TEST_FOR_EXCEPTION(
1934  true, std::runtime_error, "Tpetra::CrsGraph::insertIndicesAndValues: "
1935  "for loop for copying values threw an exception: " << e.what ());
1936  }
1937 #endif // HAVE_TPETRA_DEBUG
1938  }
1939 
1940 
1941  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1942  void
1944  sortRowIndices (const RowInfo rowinfo)
1945  {
1946  if (rowinfo.numEntries > 0) {
1947  Teuchos::ArrayView<LocalOrdinal> inds_view =
1948  this->getLocalViewNonConst (rowinfo);
1949  std::sort (inds_view.begin (), inds_view.begin () + rowinfo.numEntries);
1950  }
1951  }
1952 
1953 
1954  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1955  template <class Scalar>
1956  void
1959  const Teuchos::ArrayView<Scalar>& values)
1960  {
1961  if (rowinfo.numEntries > 0) {
1962  Teuchos::ArrayView<LocalOrdinal> inds_view =
1963  this->getLocalViewNonConst (rowinfo);
1964  sort2 (inds_view.begin (), inds_view.begin () + rowinfo.numEntries,
1965  values.begin ());
1966  }
1967  }
1968 
1969 
1970  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1971  void
1974  {
1975  using Teuchos::ArrayView;
1976  const char tfecfFuncName[] = "mergeRowIndices: ";
1977  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1978  isStorageOptimized (), std::logic_error, "The graph is already storage "
1979  "optimized, so we shouldn't be merging any indices. "
1980  "Please report this bug to the Tpetra developers.");
1981 
1982  ArrayView<LocalOrdinal> inds_view = this->getLocalViewNonConst (rowinfo);
1983  typename ArrayView<LocalOrdinal>::iterator beg, end, newend;
1984  beg = inds_view.begin();
1985  end = inds_view.begin() + rowinfo.numEntries;
1986  newend = std::unique(beg,end);
1987  const size_t mergedEntries = newend - beg;
1988 #ifdef HAVE_TPETRA_DEBUG
1989  // merge should not have eliminated any entries; if so, the
1990  // assignment below will destroy the packed structure
1991  TEUCHOS_TEST_FOR_EXCEPT( isStorageOptimized () && mergedEntries != rowinfo.numEntries );
1992 #endif // HAVE_TPETRA_DEBUG
1993 
1994  // FIXME (mfh 07 Aug 2014) We should just sync at fillComplete,
1995  // but for now, for correctness, do the modify-sync cycle here.
1996  k_numRowEntries_.template modify<typename t_numRowEntries_::host_mirror_space> ();
1997  k_numRowEntries_.h_view(rowinfo.localRow) = mergedEntries;
1998  k_numRowEntries_.template sync<execution_space> ();
1999 
2000  nodeNumEntries_ -= (rowinfo.numEntries - mergedEntries);
2001  }
2002 
2003 
2004  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2005  template<class Scalar>
2006  void
2009  const Teuchos::ArrayView<Scalar>& rowValues)
2010  {
2011  using Teuchos::ArrayView;
2012  const char tfecfFuncName[] = "mergeRowIndicesAndValues: ";
2013  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2014  isStorageOptimized(), std::logic_error, "It is invalid to call this "
2015  "method if the graph's storage has already been optimized. Please "
2016  "report this bug to the Tpetra developers.");
2017 
2018  typedef typename ArrayView<Scalar>::iterator Iter;
2019  Iter rowValueIter = rowValues.begin ();
2020  ArrayView<LocalOrdinal> inds_view = getLocalViewNonConst (rowinfo);
2021  typename ArrayView<LocalOrdinal>::iterator beg, end, newend;
2022 
2023  // beg,end define a half-exclusive interval over which to iterate.
2024  beg = inds_view.begin();
2025  end = inds_view.begin() + rowinfo.numEntries;
2026  newend = beg;
2027  if (beg != end) {
2028  typename ArrayView<LocalOrdinal>::iterator cur = beg + 1;
2029  Iter vcur = rowValueIter + 1;
2030  Iter vend = rowValueIter;
2031  cur = beg+1;
2032  while (cur != end) {
2033  if (*cur != *newend) {
2034  // new entry; save it
2035  ++newend;
2036  ++vend;
2037  (*newend) = (*cur);
2038  (*vend) = (*vcur);
2039  }
2040  else {
2041  // old entry; merge it
2042  //(*vend) = f (*vend, *vcur);
2043  (*vend) += *vcur;
2044  }
2045  ++cur;
2046  ++vcur;
2047  }
2048  ++newend; // one past the last entry, per typical [beg,end) semantics
2049  }
2050  const size_t mergedEntries = newend - beg;
2051 #ifdef HAVE_TPETRA_DEBUG
2052  // merge should not have eliminated any entries; if so, the
2053  // assignment below will destroy the packed structure
2054  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2055  isStorageOptimized() && mergedEntries != rowinfo.numEntries,
2056  std::logic_error,
2057  ": Merge was incorrect; it eliminated entries from the graph. "
2058  << std::endl << "Please report this bug to the Tpetra developers.");
2059 #endif // HAVE_TPETRA_DEBUG
2060 
2061  // FIXME (mfh 07 Aug 2014) We should just sync at fillComplete,
2062  // but for now, for correctness, do the modify-sync cycle here.
2063  k_numRowEntries_.template modify<typename t_numRowEntries_::host_mirror_space> ();
2064  k_numRowEntries_.h_view(rowinfo.localRow) = mergedEntries;
2065  k_numRowEntries_.template sync<execution_space> ();
2066 
2067  nodeNumEntries_ -= (rowinfo.numEntries - mergedEntries);
2068  }
2069 
2070 
2071  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2072  void
2074  setDomainRangeMaps (const Teuchos::RCP<const map_type>& domainMap,
2075  const Teuchos::RCP<const map_type>& rangeMap)
2076  {
2077  // simple pointer comparison for equality
2078  if (domainMap_ != domainMap) {
2079  domainMap_ = domainMap;
2080  importer_ = null;
2081  }
2082  if (rangeMap_ != rangeMap) {
2083  rangeMap_ = rangeMap;
2084  exporter_ = null;
2085  }
2086  }
2087 
2088 
2089  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2090  size_t
2092  findLocalIndex (const RowInfo& rowinfo,
2093  const LocalOrdinal ind,
2094  const size_t hint) const
2095  {
2096  auto colInds = this->getLocalKokkosRowView (rowinfo);
2097  return this->findLocalIndex (rowinfo, ind, colInds, hint);
2098  }
2099 
2100 
2101  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2102  size_t
2104  findLocalIndex (const RowInfo& rowinfo,
2105  const LocalOrdinal ind,
2106  const Kokkos::View<const LocalOrdinal*, device_type,
2107  Kokkos::MemoryUnmanaged>& colInds,
2108  const size_t hint) const
2109  {
2110  typedef const LocalOrdinal* IT;
2111 
2112  // NOTE (mfh 11 Oct 2015) This method assumes UVM. We could
2113  // imagine templating this method on the memory space, but makes
2114  // more sense to let UVM work.
2115 
2116  // If the hint was correct, then the hint is the offset to return.
2117  if (hint < rowinfo.numEntries && colInds(hint) == ind) {
2118  return hint;
2119  }
2120 
2121  // The hint was wrong, so we must search for the given column
2122  // index in the column indices for the given row. How we do the
2123  // search depends on whether the graph's column indices are
2124  // sorted.
2125  IT beg = colInds.ptr_on_device ();
2126  IT end = beg + rowinfo.numEntries;
2127  IT ptr = beg + rowinfo.numEntries; // "null"
2128  bool found = true;
2129 
2130  if (isSorted ()) {
2131  std::pair<IT,IT> p = std::equal_range (beg, end, ind); // binary search
2132  if (p.first == p.second) {
2133  found = false;
2134  } else {
2135  ptr = p.first;
2136  }
2137  }
2138  else {
2139  ptr = std::find (beg, end, ind); // direct search
2140  if (ptr == end) {
2141  found = false;
2142  }
2143  }
2144 
2145  if (found) {
2146  return static_cast<size_t> (ptr - beg);
2147  }
2148  else {
2149  return Teuchos::OrdinalTraits<size_t>::invalid ();
2150  }
2151  }
2152 
2153 
2154  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2155  size_t
2157  findGlobalIndex (const RowInfo& rowinfo,
2158  const GlobalOrdinal ind,
2159  const Kokkos::View<const GlobalOrdinal*,
2160  device_type, Kokkos::MemoryUnmanaged>& colInds,
2161  const size_t hint) const
2162  {
2163  typedef const GlobalOrdinal* IT;
2164 
2165  // NOTE (mfh 26 Nov 2015) This method assumes UVM. We could
2166  // imagine templating this method on the memory space, but makes
2167  // more sense to let UVM work.
2168 
2169  // If the hint was correct, then the hint is the offset to return.
2170  if (hint < rowinfo.numEntries && colInds(hint) == ind) {
2171  return hint;
2172  }
2173 
2174  // The hint was wrong, so we must search for the given column
2175  // index in the column indices for the given row. How we do the
2176  // search depends on whether the graph's column indices are
2177  // sorted.
2178  IT beg = colInds.ptr_on_device ();
2179  IT end = beg + rowinfo.numEntries;
2180  IT ptr = beg + rowinfo.numEntries; // "null"
2181  bool found = true;
2182 
2183  if (isSorted ()) {
2184  std::pair<IT,IT> p = std::equal_range (beg, end, ind); // binary search
2185  if (p.first == p.second) {
2186  found = false;
2187  } else {
2188  ptr = p.first;
2189  }
2190  }
2191  else {
2192  ptr = std::find (beg, end, ind); // direct search
2193  if (ptr == end) {
2194  found = false;
2195  }
2196  }
2197 
2198  if (found) {
2199  return static_cast<size_t> (ptr - beg);
2200  }
2201  else {
2202  return Teuchos::OrdinalTraits<size_t>::invalid ();
2203  }
2204  }
2205 
2206 
2207  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2208  size_t
2210  findGlobalIndex (const RowInfo& rowinfo,
2211  const GlobalOrdinal ind,
2212  const size_t hint) const
2213  {
2214  auto colInds = this->getGlobalKokkosRowView (rowinfo);
2215  return this->findGlobalIndex (rowinfo, ind, colInds, hint);
2216  }
2217 
2218 
2219  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2220  void
2223  {
2224  globalNumEntries_ = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2225  globalNumDiags_ = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2226  globalMaxNumRowEntries_ = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2227  haveGlobalConstants_ = false;
2228  }
2229 
2230 
2231  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2232  void
2235  {
2236 #ifdef HAVE_TPETRA_DEBUG
2237  const global_size_t GSTI = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2238  const size_t STI = Teuchos::OrdinalTraits<size_t>::invalid ();
2239  const char err[] = "Tpetra::CrsGraph::checkInternalState: Likely internal "
2240  "logic error. Please contact Tpetra team.";
2241  // check the internal state of this data structure
2242  // this is called by numerous state-changing methods, in a debug build, to ensure that the object
2243  // always remains in a valid state
2244  // the graph should have been allocated with a row map
2245  TEUCHOS_TEST_FOR_EXCEPTION( rowMap_ == null, std::logic_error, err );
2246  // am either complete or active
2247  TEUCHOS_TEST_FOR_EXCEPTION( isFillActive() == isFillComplete(), std::logic_error, err );
2248  // if the graph has been fill completed, then all maps should be present
2249  TEUCHOS_TEST_FOR_EXCEPTION( isFillComplete() == true && (colMap_ == null || rangeMap_ == null || domainMap_ == null), std::logic_error, err );
2250  // if storage has been optimized, then indices should have been allocated (even if trivially so)
2251  TEUCHOS_TEST_FOR_EXCEPTION( isStorageOptimized() == true && indicesAreAllocated() == false, std::logic_error, err );
2252  // if storage has been optimized, then number of allocated is now the number of entries
2253  TEUCHOS_TEST_FOR_EXCEPTION( isStorageOptimized() == true && getNodeAllocationSize() != getNodeNumEntries(), std::logic_error, err );
2254  // if graph doesn't have the global constants, then they should all be marked as invalid
2255  TEUCHOS_TEST_FOR_EXCEPTION( haveGlobalConstants_ == false && ( globalNumEntries_ != GSTI || globalNumDiags_ != GSTI || globalMaxNumRowEntries_ != GSTI ), std::logic_error, err );
2256  // if the graph has global cosntants, then they should be valid.
2257  TEUCHOS_TEST_FOR_EXCEPTION( haveGlobalConstants_ == true && ( globalNumEntries_ == GSTI || globalNumDiags_ == GSTI || globalMaxNumRowEntries_ == GSTI ), std::logic_error, err );
2258  TEUCHOS_TEST_FOR_EXCEPTION( haveGlobalConstants_ == true && ( globalNumEntries_ < nodeNumEntries_ || globalNumDiags_ < nodeNumDiags_ || globalMaxNumRowEntries_ < nodeMaxNumRowEntries_ ),
2259  std::logic_error, err );
2260  // if indices are allocated, then the allocation specifications should have been released
2261  TEUCHOS_TEST_FOR_EXCEPTION(
2262  indicesAreAllocated () && (numAllocForAllRows_ != 0 ||
2263  k_numAllocPerRow_.dimension_0 () != 0),
2264  std::logic_error, err );
2265  // if indices are not allocated, then information dictating allocation quantities should be present
2266  TEUCHOS_TEST_FOR_EXCEPTION(
2267  ! indicesAreAllocated () && (nodeNumAllocated_ != STI ||
2268  nodeNumEntries_ != 0),
2269  std::logic_error, err );
2270  // if storage is optimized, then profile should be static
2271  TEUCHOS_TEST_FOR_EXCEPTION( isStorageOptimized() && pftype_ != StaticProfile, std::logic_error, err );
2272 
2273  // If k_rowPtrs_ exists (has nonzero size), it must have N+1 rows,
2274  // and k_rowPtrs_(N) must equal k_gblInds1D_.dimension_0() (if
2275  // globally indexed) or k_lclInds1D_.dimension_0() (if locally
2276  // indexed).
2277 
2278  TEUCHOS_TEST_FOR_EXCEPTION(
2279  isGloballyIndexed () && k_rowPtrs_.dimension_0 () != 0 &&
2280  (static_cast<size_t> (k_rowPtrs_.dimension_0 ()) != getNodeNumRows () + 1 ||
2281  k_rowPtrs_(getNodeNumRows ()) != static_cast<size_t> (k_gblInds1D_.dimension_0 ())),
2282  std::logic_error, err );
2283 
2284  TEUCHOS_TEST_FOR_EXCEPTION(
2285  isLocallyIndexed () && k_rowPtrs_.dimension_0 () != 0 &&
2286  (static_cast<size_t> (k_rowPtrs_.dimension_0 ()) != getNodeNumRows () + 1 ||
2287  k_rowPtrs_(getNodeNumRows ()) != static_cast<size_t> (k_lclInds1D_.dimension_0 ())),
2288  std::logic_error, err );
2289 
2290  // If profile is dynamic and indices are allocated, then 2-D
2291  // allocations of column index storage (either local or global)
2292  // must be present.
2293  TEUCHOS_TEST_FOR_EXCEPTION(
2294  pftype_ == DynamicProfile &&
2295  indicesAreAllocated () &&
2296  getNodeNumRows () > 0 &&
2297  lclInds2D_.is_null () && gblInds2D_.is_null (),
2298  std::logic_error, err );
2299 
2300  // If profile is dynamic and the calling process owns nonzero
2301  // rows, then k_numRowEntries_ and 2-D storage of column indices
2302  // (whether local or global) must be present.
2303  TEUCHOS_TEST_FOR_EXCEPTION(
2304  pftype_ == DynamicProfile &&
2305  indicesAreAllocated () &&
2306  getNodeNumRows () > 0 &&
2307  (k_numRowEntries_.dimension_0 () == 0 || (lclInds2D_.is_null () && gblInds2D_.is_null ())),
2308  std::logic_error, err );
2309 
2310  // if profile is dynamic, then 1D allocations should not be present
2311  TEUCHOS_TEST_FOR_EXCEPTION(
2312  pftype_ == DynamicProfile &&
2313  (k_lclInds1D_.dimension_0 () != 0 || k_gblInds1D_.dimension_0 () != 0),
2314  std::logic_error, err );
2315  // if profile is dynamic, then row offsets should not be present
2316  TEUCHOS_TEST_FOR_EXCEPTION(
2317  pftype_ == DynamicProfile && k_rowPtrs_.dimension_0 () != 0,
2318  std::logic_error, err );
2319  // if profile is static and we have allocated non-trivially, then
2320  // 1D allocations should be present
2321  TEUCHOS_TEST_FOR_EXCEPTION(
2322  pftype_ == StaticProfile && indicesAreAllocated () &&
2323  getNodeAllocationSize () > 0 && k_lclInds1D_.dimension_0 () == 0 &&
2324  k_gblInds1D_.dimension_0 () == 0,
2325  std::logic_error, err);
2326  // if profile is static, then 2D allocations should not be present
2327  TEUCHOS_TEST_FOR_EXCEPTION(
2328  pftype_ == StaticProfile && (lclInds2D_ != null || gblInds2D_ != null),
2329  std::logic_error, err );
2330 
2331  // if indices are not allocated, then none of the buffers should be.
2332  TEUCHOS_TEST_FOR_EXCEPTION(
2333  ! indicesAreAllocated () &&
2334  ((k_rowPtrs_.dimension_0 () != 0 || k_numRowEntries_.dimension_0 () != 0) ||
2335  k_lclInds1D_.dimension_0 () != 0 || lclInds2D_ != null ||
2336  k_gblInds1D_.dimension_0 () != 0 || gblInds2D_ != null),
2337  std::logic_error, err );
2338 
2339  // indices may be local or global only if they are allocated
2340  // (numAllocated is redundant; could simply be indicesAreLocal_ ||
2341  // indicesAreGlobal_)
2342  TEUCHOS_TEST_FOR_EXCEPTION( (indicesAreLocal_ || indicesAreGlobal_) && ! indicesAreAllocated_, std::logic_error, err );
2343  // indices may be local or global, but not both
2344  TEUCHOS_TEST_FOR_EXCEPTION( indicesAreLocal_ == true && indicesAreGlobal_ == true, std::logic_error, err );
2345  // if indices are local, then global allocations should not be present
2346  TEUCHOS_TEST_FOR_EXCEPTION(
2347  indicesAreLocal_ && (k_gblInds1D_.dimension_0 () != 0 || gblInds2D_ != null),
2348  std::logic_error, err );
2349  // if indices are global, then local allocations should not be present
2350  TEUCHOS_TEST_FOR_EXCEPTION(
2351  indicesAreGlobal_ && (k_lclInds1D_.dimension_0 () != 0 || lclInds2D_ != null),
2352  std::logic_error, err );
2353  // if indices are local, then local allocations should be present
2354  TEUCHOS_TEST_FOR_EXCEPTION(
2355  indicesAreLocal_ && getNodeAllocationSize () > 0 &&
2356  k_lclInds1D_.dimension_0 () == 0 && getNodeNumRows () > 0 &&
2357  lclInds2D_.is_null (),
2358  std::logic_error, err);
2359  // if indices are global, then global allocations should be present
2360  TEUCHOS_TEST_FOR_EXCEPTION(
2361  indicesAreGlobal_ && getNodeAllocationSize () > 0 &&
2362  k_gblInds1D_.dimension_0 () == 0 && getNodeNumRows () > 0 &&
2363  gblInds2D_.is_null (),
2364  std::logic_error, err);
2365  // if indices are allocated, then we should have recorded how many were allocated
2366  TEUCHOS_TEST_FOR_EXCEPTION( indicesAreAllocated() == true && nodeNumAllocated_ == STI, std::logic_error, err );
2367  // if indices are not allocated, then the allocation size should be marked invalid
2368  TEUCHOS_TEST_FOR_EXCEPTION( indicesAreAllocated() == false && nodeNumAllocated_ != STI, std::logic_error, err );
2369  // check the actual allocations
2370  if (indicesAreAllocated()) {
2371  size_t actualNumAllocated = 0;
2372  if (pftype_ == DynamicProfile) {
2373  if (isGloballyIndexed() && gblInds2D_ != null) {
2374  for (size_t r = 0; r < getNodeNumRows(); ++r) {
2375  actualNumAllocated += gblInds2D_[r].size();
2376  }
2377  }
2378  else if (isLocallyIndexed() && lclInds2D_ != null) {
2379  for (size_t r = 0; r < getNodeNumRows(); ++r) {
2380  actualNumAllocated += lclInds2D_[r].size();
2381  }
2382  }
2383  TEUCHOS_TEST_FOR_EXCEPTION(actualNumAllocated != nodeNumAllocated_, std::logic_error, err );
2384  }
2385  else if (k_rowPtrs_.dimension_0 () != 0) { // pftype_ == StaticProfile
2386  TEUCHOS_TEST_FOR_EXCEPTION(
2387  static_cast<size_t> (k_rowPtrs_.dimension_0 ()) != getNodeNumRows () + 1,
2388  std::logic_error, err);
2389 
2390  actualNumAllocated = k_rowPtrs_(getNodeNumRows ());
2391  TEUCHOS_TEST_FOR_EXCEPTION(
2392  isLocallyIndexed () &&
2393  static_cast<size_t> (k_lclInds1D_.dimension_0 ()) != actualNumAllocated,
2394  std::logic_error, err );
2395  TEUCHOS_TEST_FOR_EXCEPTION(
2396  isGloballyIndexed () &&
2397  static_cast<size_t> (k_gblInds1D_.dimension_0 ()) != actualNumAllocated,
2398  std::logic_error, err );
2399  TEUCHOS_TEST_FOR_EXCEPTION(actualNumAllocated != nodeNumAllocated_, std::logic_error, err );
2400  }
2401  }
2402 #endif // HAVE_TPETRA_DEBUG
2403  }
2404 
2405 
2406  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2407  size_t
2409  getNumEntriesInGlobalRow (GlobalOrdinal globalRow) const
2410  {
2411  using Teuchos::OrdinalTraits;
2412  const LocalOrdinal lrow = rowMap_->getLocalElement (globalRow);
2413  if (hasRowInfo () && lrow != OrdinalTraits<LocalOrdinal>::invalid ()) {
2414  const RowInfo rowinfo = this->getRowInfo (lrow);
2415  return rowinfo.numEntries;
2416  } else {
2417  return OrdinalTraits<size_t>::invalid ();
2418  }
2419  }
2420 
2421 
2422  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2423  size_t
2425  getNumEntriesInLocalRow (LocalOrdinal localRow) const
2426  {
2427  if (hasRowInfo () && rowMap_->isNodeLocalElement (localRow)) {
2428  const RowInfo rowinfo = this->getRowInfo (localRow);
2429  return rowinfo.numEntries;
2430  } else {
2431  return Teuchos::OrdinalTraits<size_t>::invalid ();
2432  }
2433  }
2434 
2435 
2436  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2437  size_t
2439  getNumAllocatedEntriesInGlobalRow (GlobalOrdinal globalRow) const
2440  {
2441  const LocalOrdinal lrow = rowMap_->getLocalElement (globalRow);
2442  if (hasRowInfo () && lrow != Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) {
2443  const RowInfo rowinfo = this->getRowInfo (lrow);
2444  return rowinfo.allocSize;
2445  } else {
2446  return Teuchos::OrdinalTraits<size_t>::invalid ();
2447  }
2448  }
2449 
2450 
2451  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2452  size_t
2454  getNumAllocatedEntriesInLocalRow (LocalOrdinal localRow) const
2455  {
2456  if (hasRowInfo () && rowMap_->isNodeLocalElement (localRow)) {
2457  const RowInfo rowinfo = this->getRowInfo (localRow);
2458  return rowinfo.allocSize;
2459  } else {
2460  return Teuchos::OrdinalTraits<size_t>::invalid();
2461  }
2462  }
2463 
2464 
2465  namespace { // (anonymous)
2466  template<class OutputViewType, class InputViewType>
2467  class CopyOffsets {
2468  public:
2469  typedef typename OutputViewType::execution_space execution_space;
2470 
2471  CopyOffsets (const OutputViewType& ptr_out,
2472  const InputViewType& ptr_in) :
2473  ptr_out_ (ptr_out),
2474  ptr_in_ (ptr_in)
2475  {}
2476 
2477  KOKKOS_INLINE_FUNCTION void operator () (const ptrdiff_t& i) const {
2478  typedef typename OutputViewType::non_const_value_type value_type;
2479  // FIXME (mfh 22 Mar 2015) Change this into parallel_reduce
2480  // and check for overflow.
2481  ptr_out_(i) = static_cast<value_type> (ptr_in_(i));
2482  }
2483 
2484  private:
2485  OutputViewType ptr_out_;
2486  InputViewType ptr_in_;
2487  };
2488  } // namespace (anonymous)
2489 
2490 
2491  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2492  Teuchos::ArrayRCP<const size_t>
2495  {
2496  using Kokkos::ViewAllocateWithoutInitializing;
2497  using Kokkos::create_mirror_view;
2498  using Teuchos::ArrayRCP;
2499  typedef typename local_graph_type::row_map_type row_map_type;
2500  typedef typename row_map_type::non_const_value_type row_offset_type;
2501 #ifdef HAVE_TPETRA_DEBUG
2502  const char prefix[] = "Tpetra::CrsGraph::getNodeRowPtrs: ";
2503  const char suffix[] = " Please report this bug to the Tpetra developers.";
2504 #endif // HAVE_TPETRA_DEBUG
2505  const size_t size = k_rowPtrs_.dimension_0 ();
2506  const bool same = Kokkos::Impl::is_same<size_t, row_offset_type>::value;
2507 
2508  if (size == 0) {
2509  return ArrayRCP<const size_t> ();
2510  }
2511 
2512  ArrayRCP<const row_offset_type> ptr_rot;
2513  ArrayRCP<const size_t> ptr_st;
2514  if (same) { // size_t == row_offset_type
2515  // NOTE (mfh 22 Mar 2015) In a debug build of Kokkos, the result
2516  // of create_mirror_view might actually be a new allocation.
2517  // This helps with debugging when there are two memory spaces.
2518  typename row_map_type::HostMirror ptr_h = create_mirror_view (k_rowPtrs_);
2519  Kokkos::deep_copy (ptr_h, k_rowPtrs_);
2520 #ifdef HAVE_TPETRA_DEBUG
2521  TEUCHOS_TEST_FOR_EXCEPTION(
2522  ptr_h.dimension_0 () != k_rowPtrs_.dimension_0 (), std::logic_error,
2523  prefix << "size_t == row_offset_type, but ptr_h.dimension_0() = "
2524  << ptr_h.dimension_0 () << " != k_rowPtrs_.dimension_0() = "
2525  << k_rowPtrs_.dimension_0 () << ".");
2526  TEUCHOS_TEST_FOR_EXCEPTION(
2527  same && size != 0 && k_rowPtrs_.ptr_on_device () == NULL, std::logic_error,
2528  prefix << "size_t == row_offset_type and k_rowPtrs_.dimension_0() = "
2529  << size << " != 0, but k_rowPtrs_.ptr_on_device() == NULL." << suffix);
2530  TEUCHOS_TEST_FOR_EXCEPTION(
2531  same && size != 0 && ptr_h.ptr_on_device () == NULL, std::logic_error,
2532  prefix << "size_t == row_offset_type and k_rowPtrs_.dimension_0() = "
2533  << size << " != 0, but create_mirror_view(k_rowPtrs_).ptr_on_device() "
2534  "== NULL." << suffix);
2535 #endif // HAVE_TPETRA_DEBUG
2536  ptr_rot = Kokkos::Compat::persistingView (ptr_h);
2537  }
2538  else { // size_t != row_offset_type
2539  typedef Kokkos::View<size_t*, device_type> ret_view_type;
2540  ret_view_type ptr_d (ViewAllocateWithoutInitializing ("ptr"), size);
2541  CopyOffsets<ret_view_type, row_map_type> functor (ptr_d, k_rowPtrs_);
2542  Kokkos::parallel_for (size, functor);
2543  typename ret_view_type::HostMirror ptr_h = create_mirror_view (ptr_d);
2544  Kokkos::deep_copy (ptr_h, ptr_d);
2545  ptr_st = Kokkos::Compat::persistingView (ptr_h);
2546  }
2547 #ifdef HAVE_TPETRA_DEBUG
2548  TEUCHOS_TEST_FOR_EXCEPTION(
2549  same && size != 0 && ptr_rot.is_null (), std::logic_error,
2550  prefix << "size_t == row_offset_type and size = " << size
2551  << " != 0, but ptr_rot is null." << suffix);
2552  TEUCHOS_TEST_FOR_EXCEPTION(
2553  ! same && size != 0 && ptr_st.is_null (), std::logic_error,
2554  prefix << "size_t != row_offset_type and size = " << size
2555  << " != 0, but ptr_st is null." << suffix);
2556 #endif // HAVE_TPETRA_DEBUG
2557 
2558  // If size_t == row_offset_type, return a persisting host view of
2559  // k_rowPtrs_. Otherwise, return a size_t host copy of k_rowPtrs_.
2560 #ifdef HAVE_TPETRA_DEBUG
2561  ArrayRCP<const size_t> retval =
2562  Kokkos::Impl::if_c<same,
2563  ArrayRCP<const row_offset_type>,
2564  ArrayRCP<const size_t> >::select (ptr_rot, ptr_st);
2565  TEUCHOS_TEST_FOR_EXCEPTION(
2566  size != 0 && retval.is_null (), std::logic_error,
2567  prefix << "size = " << size << " != 0, but retval is null." << suffix);
2568  return retval;
2569 #else
2570  return Kokkos::Impl::if_c<same,
2571  ArrayRCP<const row_offset_type>,
2572  ArrayRCP<const size_t> >::select (ptr_rot, ptr_st);
2573 #endif // HAVE_TPETRA_DEBUG
2574  }
2575 
2576 
2577  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2578  Teuchos::ArrayRCP<const LocalOrdinal>
2581  {
2582  return Kokkos::Compat::persistingView (k_lclInds1D_);
2583  }
2584 
2585 
2586  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2587  void
2589  getLocalRowCopy (LocalOrdinal localRow,
2590  const Teuchos::ArrayView<LocalOrdinal>&indices,
2591  size_t& numEntries) const
2592  {
2593  using Teuchos::ArrayView;
2594  typedef LocalOrdinal LO;
2595  typedef GlobalOrdinal GO;
2596 
2597  TEUCHOS_TEST_FOR_EXCEPTION(
2598  isGloballyIndexed () && ! hasColMap (), std::runtime_error,
2599  "Tpetra::CrsGraph::getLocalRowCopy: The graph is globally indexed and "
2600  "does not have a column Map yet. That means we don't have local indices "
2601  "for columns yet, so it doesn't make sense to call this method. If the "
2602  "graph doesn't have a column Map yet, you should call fillComplete on "
2603  "it first.");
2604  TEUCHOS_TEST_FOR_EXCEPTION(
2605  ! hasRowInfo(), std::runtime_error,
2606  "Tpetra::CrsGraph::getLocalRowCopy: graph row information was deleted "
2607  "at fillComplete().");
2608 
2609  if (! getRowMap ()->isNodeLocalElement (localRow)) {
2610  numEntries = 0;
2611  return;
2612  }
2613 
2614  const RowInfo rowinfo = this->getRowInfo (localRow);
2615  const size_t theNumEntries = rowinfo.numEntries;
2616 
2617  TEUCHOS_TEST_FOR_EXCEPTION(
2618  static_cast<size_t> (indices.size ()) < theNumEntries,
2619  std::runtime_error,
2620  "Tpetra::CrsGraph::getLocalRowCopy: The given row " << localRow << " has "
2621  << theNumEntries << " entries, but indices.size() = " << indices.size ()
2622  << ", which does not suffice to store the row's indices.");
2623 
2624  numEntries = theNumEntries;
2625 
2626  if (isLocallyIndexed ()) {
2627  ArrayView<const LO> lview = getLocalView (rowinfo);
2628  std::copy (lview.begin (), lview.begin () + numEntries, indices.begin ());
2629  }
2630  else if (isGloballyIndexed ()) {
2631  ArrayView<const GO> gview = getGlobalView (rowinfo);
2632  const map_type& colMap = * (this->getColMap ());
2633  for (size_t j = 0; j < numEntries; ++j) {
2634  indices[j] = colMap.getLocalElement (gview[j]);
2635  }
2636  }
2637  else {
2638  // If the graph on the calling process is neither locally nor
2639  // globally indexed, that means it owns no column indices.
2640  //
2641  // FIXME (mfh 21 Oct 2013) It's not entirely clear to me whether
2642  // we can reach this branch, given the checks above. However,
2643  // if that is the case, it should still be correct to call this
2644  // function if the calling process owns no column indices.
2645  numEntries = 0;
2646  }
2647  }
2648 
2649 
2650  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2651  void
2653  getGlobalRowCopy (GlobalOrdinal globalRow,
2654  const Teuchos::ArrayView<GlobalOrdinal>& indices,
2655  size_t& NumIndices) const
2656  {
2657  using Teuchos::ArrayView;
2658  const char tfecfFuncName[] = "getGlobalRowCopy: ";
2659  // we either currently store global indices, or we have a column
2660  // map with which to transcribe our local indices for the user
2661  const LocalOrdinal lrow = rowMap_->getLocalElement (globalRow);
2662 
2663  // FIXME (mfh 22 Aug 2014) Instead of throwing an exception,
2664  // should just set NumIndices=0 and return. In that case, the
2665  // calling process owns no entries in that row, so the right thing
2666  // to do is to return an empty copy.
2667  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2668  lrow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (),
2669  std::runtime_error,
2670  "GlobalRow (== " << globalRow << ") does not belong to this process.");
2671  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2672  ! hasRowInfo (), std::runtime_error,
2673  "Graph row information was deleted at fillComplete().");
2674  const RowInfo rowinfo = this->getRowInfo (lrow);
2675  NumIndices = rowinfo.numEntries;
2676  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2677  static_cast<size_t> (indices.size ()) < NumIndices, std::runtime_error,
2678  "Specified storage (size==" << indices.size () << ") does not suffice "
2679  "to hold all entries for this row (NumIndices == " << NumIndices << ").");
2680  if (isLocallyIndexed ()) {
2681  ArrayView<const LocalOrdinal> lview = this->getLocalView (rowinfo);
2682  for (size_t j = 0; j < NumIndices; ++j) {
2683  indices[j] = colMap_->getGlobalElement (lview[j]);
2684  }
2685  }
2686  else if (isGloballyIndexed ()) {
2687  ArrayView<const GlobalOrdinal> gview = this->getGlobalView (rowinfo);
2688  std::copy (gview.begin (), gview.begin () + NumIndices, indices.begin ());
2689  }
2690  }
2691 
2692 
2693  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2694  void
2696  getLocalRowView (LocalOrdinal localRow,
2697  Teuchos::ArrayView<const LocalOrdinal>& indices) const
2698  {
2699  const char tfecfFuncName[] = "getLocalRowView: ";
2700  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2701  isGloballyIndexed (), std::runtime_error, "The graph's indices are "
2702  "currently stored as global indices, so we cannot return a view with "
2703  "local column indices, whether or not the graph has a column Map. If "
2704  "the graph _does_ have a column Map, use getLocalRowCopy() instead.");
2705  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2706  ! hasRowInfo (), std::runtime_error, "Graph row information was "
2707  "deleted at fillComplete().");
2708  indices = Teuchos::null;
2709  if (rowMap_->isNodeLocalElement (localRow)) {
2710  const RowInfo rowinfo = this->getRowInfo (localRow);
2711  if (rowinfo.numEntries > 0) {
2712  indices = this->getLocalView (rowinfo);
2713  // getLocalView returns a view of the _entire_ row, including
2714  // any extra space at the end (which 1-D unpacked storage
2715  // might have, for example). That's why we have to take a
2716  // subview of the returned view.
2717  indices = indices (0, rowinfo.numEntries);
2718  }
2719  }
2720 #ifdef HAVE_TPETRA_DEBUG
2721  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2722  static_cast<size_t> (indices.size ()) != this->getNumEntriesInLocalRow (localRow),
2723  std::logic_error, ": Violated stated post-conditions. Please contact Tpetra team.");
2724 #endif // HAVE_TPETRA_DEBUG
2725  }
2726 
2727 
2728  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2729  void
2731  getGlobalRowView (GlobalOrdinal globalRow,
2732  Teuchos::ArrayView<const GlobalOrdinal>& indices) const
2733  {
2734  const char tfecfFuncName[] = "getGlobalRowView: ";
2735  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2736  isLocallyIndexed (), std::runtime_error, "The graph's indices are "
2737  "currently stored as local indices, so we cannot return a view with "
2738  "global column indices. Use getGlobalRowCopy() instead.");
2739  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2740  ! hasRowInfo (), std::runtime_error,
2741  "Graph row information was deleted at fillComplete().");
2742 
2743  // isNodeGlobalElement() requires a global to local lookup anyway,
2744  // and getLocalElement() returns invalid() if the element wasn't found.
2745  const LocalOrdinal localRow = rowMap_->getLocalElement (globalRow);
2746  indices = Teuchos::null;
2747  if (localRow != Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) {
2748  const RowInfo rowInfo = this->getRowInfo (localRow);
2749  if (rowInfo.numEntries > 0) {
2750  indices = (this->getGlobalView (rowInfo)) (0, rowInfo.numEntries);
2751  }
2752  }
2753 #ifdef HAVE_TPETRA_DEBUG
2754  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2755  static_cast<size_t> (indices.size ()) != this->getNumEntriesInGlobalRow (globalRow),
2756  std::logic_error,
2757  "Violated stated postconditions: indices.size() = " << indices.size ()
2758  << " != getNumEntriesInGlobalRow(globalRow=" << globalRow
2759  << ") = " << this->getNumEntriesInGlobalRow (globalRow)
2760  << ". Please report this bug to the Tpetra developers.");
2761 #endif // HAVE_TPETRA_DEBUG
2762  }
2763 
2764 
2765  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2766  void
2768  insertLocalIndices (const LocalOrdinal localRow,
2769  const Teuchos::ArrayView<const LocalOrdinal>& indices)
2770  {
2771  const char tfecfFuncName[] = "insertLocalIndices";
2772 
2773  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2774  ! isFillActive (), std::runtime_error,
2775  ": requires that fill is active.");
2776  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2777  isGloballyIndexed (), std::runtime_error,
2778  ": graph indices are global; use insertGlobalIndices().");
2779  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2780  ! hasColMap (), std::runtime_error,
2781  ": cannot insert local indices without a column map.");
2782  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2783  ! rowMap_->isNodeLocalElement (localRow), std::runtime_error,
2784  ": row does not belong to this node.");
2785  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2786  ! hasRowInfo (), std::runtime_error,
2787  ": graph row information was deleted at fillComplete().");
2788  if (! indicesAreAllocated ()) {
2789  allocateIndices (LocalIndices);
2790  }
2791 
2792 #ifdef HAVE_TPETRA_DEBUG
2793  // In a debug build, if the graph has a column Map, test whether
2794  // any of the given column indices are not in the column Map.
2795  // Keep track of the invalid column indices so we can tell the
2796  // user about them.
2797  if (hasColMap ()) {
2798  using Teuchos::Array;
2799  using Teuchos::toString;
2800  using std::endl;
2801  typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type size_type;
2802 
2803  const map_type& colMap = * (getColMap ());
2804  Array<LocalOrdinal> badColInds;
2805  bool allInColMap = true;
2806  for (size_type k = 0; k < indices.size (); ++k) {
2807  if (! colMap.isNodeLocalElement (indices[k])) {
2808  allInColMap = false;
2809  badColInds.push_back (indices[k]);
2810  }
2811  }
2812  if (! allInColMap) {
2813  std::ostringstream os;
2814  os << "Tpetra::CrsMatrix::insertLocalIndices: You attempted to insert "
2815  "entries in owned row " << localRow << ", at the following column "
2816  "indices: " << toString (indices) << "." << endl;
2817  os << "Of those, the following indices are not in the column Map on "
2818  "this process: " << toString (badColInds) << "." << endl << "Since "
2819  "the graph has a column Map already, it is invalid to insert entries "
2820  "at those locations.";
2821  TEUCHOS_TEST_FOR_EXCEPTION(! allInColMap, std::invalid_argument, os.str ());
2822  }
2823  }
2824 #endif // HAVE_TPETRA_DEBUG
2825 
2826  insertLocalIndicesImpl (localRow, indices);
2827 
2828 #ifdef HAVE_TPETRA_DEBUG
2829  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2830  indicesAreAllocated() == false || isLocallyIndexed() == false,
2831  std::logic_error,
2832  ": Violated stated post-conditions. Please contact Tpetra team.");
2833 #endif
2834  }
2835 
2836 
2837  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2838  void
2840  insertLocalIndicesFiltered (const LocalOrdinal localRow,
2841  const Teuchos::ArrayView<const LocalOrdinal>& indices)
2842  {
2843  typedef LocalOrdinal LO;
2844  const char tfecfFuncName[] = "insertLocalIndicesFiltered";
2845 
2846  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2847  isFillActive() == false, std::runtime_error,
2848  ": requires that fill is active.");
2849  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2850  isGloballyIndexed() == true, std::runtime_error,
2851  ": graph indices are global; use insertGlobalIndices().");
2852  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2853  hasColMap() == false, std::runtime_error,
2854  ": cannot insert local indices without a column map.");
2855  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2856  rowMap_->isNodeLocalElement(localRow) == false, std::runtime_error,
2857  ": row does not belong to this node.");
2858  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2859  ! hasRowInfo (), std::runtime_error,
2860  ": graph row information was deleted at fillComplete().");
2861  if (! indicesAreAllocated ()) {
2862  allocateIndices (LocalIndices);
2863  }
2864 
2865  // If we have a column map, use it to filter the entries.
2866  if (hasColMap ()) {
2867  Teuchos::Array<LO> filtered_indices (indices);
2868  SLocalGlobalViews inds_view;
2869  SLocalGlobalNCViews inds_ncview;
2870  inds_ncview.linds = filtered_indices();
2871  const size_t numFilteredEntries =
2872  filterIndices<LocalIndices>(inds_ncview);
2873  inds_view.linds = filtered_indices (0, numFilteredEntries);
2874  insertLocalIndicesImpl(localRow, inds_view.linds);
2875  }
2876  else {
2877  insertLocalIndicesImpl(localRow, indices);
2878  }
2879 #ifdef HAVE_TPETRA_DEBUG
2880  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2881  indicesAreAllocated() == false || isLocallyIndexed() == false,
2882  std::logic_error,
2883  ": Violated stated post-conditions. Please contact Tpetra team.");
2884 #endif
2885  }
2886 
2887 
2888  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2889  void
2891  insertGlobalIndices (const GlobalOrdinal grow,
2892  const Teuchos::ArrayView<const GlobalOrdinal>& indices)
2893  {
2894  using Teuchos::ArrayView;
2895  typedef LocalOrdinal LO;
2896  typedef GlobalOrdinal GO;
2897  typedef typename ArrayView<const GO>::size_type size_type;
2898  const char tfecfFuncName[] = "insertGlobalIndices";
2899 
2900  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2901  isLocallyIndexed() == true, std::runtime_error,
2902  ": graph indices are local; use insertLocalIndices().");
2903  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2904  ! hasRowInfo (), std::runtime_error,
2905  ": graph row information was deleted at fillComplete().");
2906  // This can't really be satisfied for now, because if we are
2907  // fillComplete(), then we are local. In the future, this may
2908  // change. However, the rule that modification require active
2909  // fill will not change.
2910  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2911  isFillActive() == false, std::runtime_error,
2912  ": You are not allowed to call this method if fill is not active. "
2913  "If fillComplete has been called, you must first call resumeFill "
2914  "before you may insert indices.");
2915  if (! indicesAreAllocated ()) {
2916  allocateIndices (GlobalIndices);
2917  }
2918  const LO myRow = rowMap_->getLocalElement (grow);
2919  if (myRow != Teuchos::OrdinalTraits<LO>::invalid ()) {
2920 #ifdef HAVE_TPETRA_DEBUG
2921  if (hasColMap ()) {
2922  using std::endl;
2923  const map_type& colMap = * (getColMap ());
2924  // In a debug build, keep track of the nonowned ("bad") column
2925  // indices, so that we can display them in the exception
2926  // message. In a release build, just ditch the loop early if
2927  // we encounter a nonowned column index.
2928  Array<GO> badColInds;
2929  bool allInColMap = true;
2930  for (size_type k = 0; k < indices.size (); ++k) {
2931  if (! colMap.isNodeGlobalElement (indices[k])) {
2932  allInColMap = false;
2933  badColInds.push_back (indices[k]);
2934  }
2935  }
2936  if (! allInColMap) {
2937  std::ostringstream os;
2938  os << "Tpetra::CrsGraph::insertGlobalIndices: You attempted to insert "
2939  "entries in owned row " << grow << ", at the following column "
2940  "indices: " << toString (indices) << "." << endl;
2941  os << "Of those, the following indices are not in the column Map on "
2942  "this process: " << toString (badColInds) << "." << endl << "Since "
2943  "the matrix has a column Map already, it is invalid to insert "
2944  "entries at those locations.";
2945  TEUCHOS_TEST_FOR_EXCEPTION(! allInColMap, std::invalid_argument, os.str ());
2946  }
2947  }
2948 #endif // HAVE_TPETRA_DEBUG
2949  insertGlobalIndicesImpl (myRow, indices);
2950  }
2951  else { // a nonlocal row
2952  const size_type numIndices = indices.size ();
2953  // This creates the Array if it doesn't exist yet. std::map's
2954  // operator[] does a lookup each time, so it's better to pull
2955  // nonlocals_[grow] out of the loop.
2956  std::vector<GO>& nonlocalRow = nonlocals_[grow];
2957  for (size_type k = 0; k < numIndices; ++k) {
2958  nonlocalRow.push_back (indices[k]);
2959  }
2960  }
2961 #ifdef HAVE_TPETRA_DEBUG
2962  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2963  indicesAreAllocated() == false || isGloballyIndexed() == false,
2964  std::logic_error,
2965  ": Violated stated post-conditions. Please contact Tpetra team.");
2966 #endif
2967  }
2968 
2969 
2970  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2971  void
2973  insertGlobalIndicesFiltered (const GlobalOrdinal grow,
2974  const Teuchos::ArrayView<const GlobalOrdinal>& indices)
2975  {
2976  using Teuchos::Array;
2977  using Teuchos::ArrayView;
2978  typedef LocalOrdinal LO;
2979  typedef GlobalOrdinal GO;
2980  const char tfecfFuncName[] = "insertGlobalIndicesFiltered";
2981 
2982  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2983  isLocallyIndexed() == true, std::runtime_error,
2984  ": graph indices are local; use insertLocalIndices().");
2985  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2986  ! hasRowInfo (), std::runtime_error,
2987  ": graph row information was deleted at fillComplete().");
2988  // This can't really be satisfied for now, because if we are
2989  // fillComplete(), then we are local. In the future, this may
2990  // change. However, the rule that modification require active
2991  // fill will not change.
2992  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2993  isFillActive() == false, std::runtime_error,
2994  ": You are not allowed to call this method if fill is not active. "
2995  "If fillComplete has been called, you must first call resumeFill "
2996  "before you may insert indices.");
2997  if (! indicesAreAllocated ()) {
2998  allocateIndices (GlobalIndices);
2999  }
3000  const LO myRow = rowMap_->getLocalElement (grow);
3001  if (myRow != Teuchos::OrdinalTraits<LO>::invalid ()) {
3002  // If we have a column map, use it to filter the entries.
3003  if (hasColMap ()) {
3004  Array<GO> filtered_indices(indices);
3005  SLocalGlobalViews inds_view;
3006  SLocalGlobalNCViews inds_ncview;
3007  inds_ncview.ginds = filtered_indices();
3008  const size_t numFilteredEntries =
3009  filterIndices<GlobalIndices> (inds_ncview);
3010  inds_view.ginds = filtered_indices (0, numFilteredEntries);
3011  insertGlobalIndicesImpl(myRow, inds_view.ginds);
3012  }
3013  else {
3014  insertGlobalIndicesImpl(myRow, indices);
3015  }
3016  }
3017  else { // a nonlocal row
3018  typedef typename ArrayView<const GO>::size_type size_type;
3019  const size_type numIndices = indices.size ();
3020  for (size_type k = 0; k < numIndices; ++k) {
3021  nonlocals_[grow].push_back (indices[k]);
3022  }
3023  }
3024 #ifdef HAVE_TPETRA_DEBUG
3025  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3026  indicesAreAllocated() == false || isGloballyIndexed() == false,
3027  std::logic_error,
3028  ": Violated stated post-conditions. Please contact Tpetra team.");
3029 #endif
3030  }
3031 
3032 
3033  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3034  void
3036  removeLocalIndices (LocalOrdinal lrow)
3037  {
3038  const char tfecfFuncName[] = "removeLocalIndices: ";
3039  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3040  ! isFillActive (), std::runtime_error, "requires that fill is active.");
3041  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3042  isStorageOptimized (), std::runtime_error,
3043  "cannot remove indices after optimizeStorage() has been called.");
3044  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3045  isGloballyIndexed (), std::runtime_error, "graph indices are global.");
3046  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3047  ! rowMap_->isNodeLocalElement (lrow), std::runtime_error,
3048  "Local row " << lrow << " is not in the row Map on the calling process.");
3049  if (! indicesAreAllocated ()) {
3050  allocateIndices (LocalIndices);
3051  }
3052 
3053  // FIXME (mfh 13 Aug 2014) What if they haven't been cleared on
3054  // all processes?
3055  clearGlobalConstants ();
3056 
3057  if (k_numRowEntries_.dimension_0 () != 0) {
3058  const size_t oldNumEntries = k_numRowEntries_.h_view (lrow);
3059  nodeNumEntries_ -= oldNumEntries;
3060 
3061  // FIXME (mfh 07 Aug 2014) We should just sync at fillComplete,
3062  // but for now, for correctness, do the modify-sync cycle here.
3063  k_numRowEntries_.template modify<typename t_numRowEntries_::host_mirror_space> ();
3064  k_numRowEntries_.h_view(lrow) = 0;
3065  k_numRowEntries_.template sync<execution_space> ();
3066  }
3067 #ifdef HAVE_TPETRA_DEBUG
3068  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3069  getNumEntriesInLocalRow (lrow) != 0 ||
3070  ! indicesAreAllocated () ||
3071  ! isLocallyIndexed (), std::logic_error,
3072  ": Violated stated post-conditions. Please contact Tpetra team.");
3073 #endif // HAVE_TPETRA_DEBUG
3074  }
3075 
3076 
3077  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3078  void
3080  setAllIndices (const typename local_graph_type::row_map_type& rowPointers,
3081  const typename local_graph_type::entries_type::non_const_type& columnIndices)
3082  {
3083  const char tfecfFuncName[] = "setAllIndices: ";
3084  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3085  ! hasColMap () || getColMap ().is_null (), std::runtime_error,
3086  "The graph must have a column Map before you may call this method.");
3087  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3088  static_cast<size_t> (rowPointers.size ()) != this->getNodeNumRows () + 1,
3089  std::runtime_error, "rowPointers.size() = " << rowPointers.size () <<
3090  " != this->getNodeNumRows()+1 = " << (this->getNodeNumRows () + 1) <<
3091  ".");
3092 
3093  // FIXME (mfh 07 Aug 2014) We need to relax this restriction,
3094  // since the future model will be allocation at construction, not
3095  // lazy allocation on first insert.
3096  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3097  k_lclInds1D_.dimension_0 () != 0 || k_gblInds1D_.dimension_0 () != 0,
3098  std::runtime_error, "You may not call this method if 1-D data structures "
3099  "are already allocated.");
3100 
3101  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3102  lclInds2D_ != Teuchos::null || gblInds2D_ != Teuchos::null,
3103  std::runtime_error, "You may not call this method if 2-D data structures "
3104  "are already allocated.");
3105 
3106  const size_t localNumEntries = rowPointers(getNodeNumRows ());
3107 
3108  indicesAreAllocated_ = true;
3109  indicesAreLocal_ = true;
3110  pftype_ = StaticProfile; // if the profile wasn't static before, it sure is now.
3111  k_lclInds1D_ = columnIndices;
3112  k_rowPtrs_ = rowPointers;
3113  nodeNumAllocated_ = localNumEntries;
3114  nodeNumEntries_ = localNumEntries;
3115 
3116  // These normally get cleared out at the end of allocateIndices.
3117  // It makes sense to clear them out here, because at the end of
3118  // this method, the graph is allocated on the calling process.
3119  numAllocForAllRows_ = 0;
3120  k_numAllocPerRow_ = Kokkos::DualView<const size_t*, execution_space> ();
3121 
3122  checkInternalState ();
3123  }
3124 
3125 
3126  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3127  void
3129  setAllIndices (const Teuchos::ArrayRCP<size_t>& rowPointers,
3130  const Teuchos::ArrayRCP<LocalOrdinal>& columnIndices)
3131  {
3132  using Kokkos::View;
3133  typedef typename local_graph_type::row_map_type row_map_type;
3134  typedef typename row_map_type::array_layout layout_type;
3135  typedef typename row_map_type::non_const_value_type row_offset_type;
3136  typedef View<size_t*, layout_type , Kokkos::HostSpace,
3137  Kokkos::MemoryUnmanaged> input_view_type;
3138  typedef typename row_map_type::non_const_type nc_row_map_type;
3139 
3140  const size_t size = static_cast<size_t> (rowPointers.size ());
3141  const bool same = Kokkos::Impl::is_same<size_t, row_offset_type>::value;
3142  input_view_type ptr_in (rowPointers.getRawPtr (), size);
3143 
3144  nc_row_map_type ptr_rot ("Tpetra::CrsGraph::ptr", size);
3145 
3146  if (same) { // size_t == row_offset_type
3147  // This compile-time logic ensures that the compiler never sees
3148  // an assignment of View<row_offset_type*, ...> to View<size_t*,
3149  // ...> unless size_t == row_offset_type.
3150  input_view_type ptr_decoy (rowPointers.getRawPtr (), size); // never used
3151  Kokkos::deep_copy (Kokkos::Impl::if_c<same,
3152  nc_row_map_type,
3153  input_view_type>::select (ptr_rot, ptr_decoy),
3154  ptr_in);
3155  }
3156  else { // size_t != row_offset_type
3157  // CudaUvmSpace != HostSpace, so this will be false in that case.
3158  const bool inHostMemory =
3159  Kokkos::Impl::is_same<typename row_map_type::memory_space,
3160  Kokkos::HostSpace>::value;
3161  if (inHostMemory) {
3162  // Copy (with cast from size_t to row_offset_type) to ptr_rot.
3163  typedef CopyOffsets<nc_row_map_type, input_view_type> functor_type;
3164  functor_type functor (ptr_rot, ptr_in);
3165  Kokkos::parallel_for (size, functor);
3166  }
3167  else { // Copy input row offsets to device first.
3168  //
3169  // FIXME (mfh 24 Mar 2015) If CUDA UVM, running in the host's
3170  // execution space would avoid the double copy.
3171  //
3172  View<size_t*, layout_type ,execution_space > ptr_st ("Tpetra::CrsGraph::ptr", size);
3173  Kokkos::deep_copy (ptr_st, ptr_in);
3174  // Copy on device (casting from size_t to row_offset_type) to
3175  // ptr_rot. This executes in the output View's execution
3176  // space, which is the same as execution_space.
3177  typedef CopyOffsets<nc_row_map_type,
3178  View<size_t*, layout_type, execution_space> > functor_type;
3179  functor_type functor (ptr_rot, ptr_st);
3180  Kokkos::parallel_for (size, functor);
3181  }
3182  }
3183 
3184  Kokkos::View<LocalOrdinal*, layout_type , execution_space > k_ind =
3185  Kokkos::Compat::getKokkosViewDeepCopy<device_type> (columnIndices ());
3186  setAllIndices (ptr_rot, k_ind);
3187  }
3188 
3189 
3190  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3191  void
3193  getNumEntriesPerLocalRowUpperBound (Teuchos::ArrayRCP<const size_t>& boundPerLocalRow,
3194  size_t& boundForAllLocalRows,
3195  bool& boundSameForAllLocalRows) const
3196  {
3197  // The three output arguments. We assign them to the actual
3198  // output arguments at the end, in order to implement
3199  // transactional semantics.
3200  Teuchos::ArrayRCP<const size_t> numEntriesPerRow;
3201  size_t numEntriesForAll = 0;
3202  bool allRowsSame = true;
3203 
3204  const ptrdiff_t numRows = static_cast<ptrdiff_t> (this->getNodeNumRows ());
3205 
3206  if (! this->indicesAreAllocated ()) {
3207  if (k_numAllocPerRow_.dimension_0 () != 0) {
3208  numEntriesPerRow = Kokkos::Compat::persistingView (k_numAllocPerRow_.h_view);
3209  allRowsSame = false; // conservatively; we don't check the array
3210  }
3211  else {
3212  numEntriesForAll = numAllocForAllRows_;
3213  allRowsSame = true;
3214  }
3215  }
3216  else if (k_numRowEntries_.dimension_0 () != 0) {
3217  numEntriesPerRow = Kokkos::Compat::persistingView (k_numRowEntries_.h_view);
3218  allRowsSame = false; // conservatively; we don't check the array
3219  }
3220  else if (this->nodeNumAllocated_ == 0) {
3221  numEntriesForAll = 0;
3222  allRowsSame = true;
3223  }
3224  else {
3225  // left with the case that we have optimized storage. in this
3226  // case, we have to construct a list of row sizes.
3227  TEUCHOS_TEST_FOR_EXCEPTION(
3228  this->getProfileType () != StaticProfile, std::logic_error,
3229  "Tpetra::CrsGraph::getNumEntriesPerRowUpperBound: "
3230  "The graph is not StaticProfile, but storage appears to be optimized. "
3231  "Please report this bug to the Tpetra developers.");
3232  TEUCHOS_TEST_FOR_EXCEPTION(
3233  numRows != 0 && k_rowPtrs_.dimension_0 () == 0, std::logic_error,
3234  "Tpetra::CrsGraph::getNumEntriesPerRowUpperBound: "
3235  "The graph has " << numRows << " (> 0) row" << (numRows != 1 ? "s" : "")
3236  << " on the calling process, but the k_rowPtrs_ array has zero entries. "
3237  "Please report this bug to the Tpetra developers.");
3238 
3239  Teuchos::ArrayRCP<size_t> numEnt;
3240  if (numRows != 0) {
3241  numEnt = Teuchos::arcp<size_t> (numRows);
3242  }
3243 
3244  // We have to iterate through the row offsets anyway, so we
3245  // might as well check whether all rows' bounds are the same.
3246  bool allRowsReallySame = false;
3247  for (ptrdiff_t i = 0; i < numRows; ++i) {
3248  numEnt[i] = k_rowPtrs_(i+1) - k_rowPtrs_(i);
3249  if (i != 0 && numEnt[i] != numEnt[i-1]) {
3250  allRowsReallySame = false;
3251  }
3252  }
3253  if (allRowsReallySame) {
3254  if (numRows == 0) {
3255  numEntriesForAll = 0;
3256  } else {
3257  numEntriesForAll = numEnt[1] - numEnt[0];
3258  }
3259  allRowsSame = true;
3260  }
3261  else {
3262  numEntriesPerRow = numEnt; // Teuchos::arcp_const_cast<const size_t> (numEnt);
3263  allRowsSame = false; // conservatively; we don't check the array
3264  }
3265  }
3266 
3267  TEUCHOS_TEST_FOR_EXCEPTION(
3268  numEntriesForAll != 0 && numEntriesPerRow.size () != 0, std::logic_error,
3269  "Tpetra::CrsGraph::getNumEntriesPerLocalRowUpperBound: "
3270  "numEntriesForAll and numEntriesPerRow are not consistent. The former "
3271  "is nonzero (" << numEntriesForAll << "), but the latter has nonzero "
3272  "size " << numEntriesPerRow.size () << ". "
3273  "Please report this bug to the Tpetra developers.");
3274  TEUCHOS_TEST_FOR_EXCEPTION(
3275  numEntriesForAll != 0 && ! allRowsSame, std::logic_error,
3276  "Tpetra::CrsGraph::getNumEntriesPerLocalRowUpperBound: "
3277  "numEntriesForAll and allRowsSame are not consistent. The former "
3278  "is nonzero (" << numEntriesForAll << "), but the latter is false. "
3279  "Please report this bug to the Tpetra developers.");
3280  TEUCHOS_TEST_FOR_EXCEPTION(
3281  numEntriesPerRow.size () != 0 && allRowsSame, std::logic_error,
3282  "Tpetra::CrsGraph::getNumEntriesPerLocalRowUpperBound: "
3283  "numEntriesPerRow and allRowsSame are not consistent. The former has "
3284  "nonzero length " << numEntriesForAll << ", but the latter is true. "
3285  "Please report this bug to the Tpetra developers.");
3286 
3287  boundPerLocalRow = numEntriesPerRow;
3288  boundForAllLocalRows = numEntriesForAll;
3289  boundSameForAllLocalRows = allRowsSame;
3290  }
3291 
3292 
3293  // TODO: in the future, globalAssemble() should use import/export functionality
3294  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3295  void
3298  {
3299  using Teuchos::Array;
3300  using Teuchos::as;
3301  using Teuchos::Comm;
3302  using Teuchos::gatherAll;
3303  using Teuchos::ireceive;
3304  using Teuchos::isend;
3305  using Teuchos::outArg;
3306  using Teuchos::REDUCE_MAX;
3307  using Teuchos::reduceAll;
3308  using Teuchos::toString;
3309  using Teuchos::waitAll;
3310  using std::endl;
3311  using std::make_pair;
3312  using std::pair;
3313  typedef GlobalOrdinal GO;
3314  typedef typename std::map<GO, std::vector<GO> >::const_iterator NLITER;
3315  typedef typename Array<GO>::size_type size_type;
3316 
3317  const char tfecfFuncName[] = "globalAssemble"; // for exception macro
3318  RCP<const Comm<int> > comm = getComm();
3319 
3320  const int numImages = comm->getSize();
3321  const int myImageID = comm->getRank();
3322 #ifdef HAVE_TPETRA_DEBUG
3323  Teuchos::barrier (*comm);
3324 #endif // HAVE_TPETRA_DEBUG
3325 
3326  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( ! isFillActive(), std::runtime_error,
3327  ": requires that fill is active.");
3328  // Determine if any nodes have global entries to share
3329  {
3330  size_t MyNonlocals = nonlocals_.size(), MaxGlobalNonlocals;
3331  reduceAll (*comm, REDUCE_MAX, MyNonlocals, outArg (MaxGlobalNonlocals));
3332  if (MaxGlobalNonlocals == 0) {
3333  return; // no entries to share
3334  }
3335  }
3336 
3337  // compute a list of NLRs from nonlocals_ and use it to compute:
3338  // IdsAndRows: a vector of (id,row) pairs
3339  // NLR2Id: a map from NLR to the Id that owns it
3340  // globalNeighbors: a global graph of connectivity between images:
3341  // globalNeighbors(i,j) indicates that j sends to i
3342  // sendIDs: a list of all images I send to
3343  // recvIDs: a list of all images I receive from (constructed later)
3344  Array<pair<int, GO> > IdsAndRows;
3345  std::map<GO, int> NLR2Id;
3346  Teuchos::SerialDenseMatrix<int, char> globalNeighbors;
3347  Array<int> sendIDs, recvIDs;
3348  {
3349  // nonlocals_ contains the entries we are holding for all
3350  // nonowned rows. Compute list of rows for which we have data.
3351  Array<GO> NLRs;
3352  std::set<GO> setOfRows;
3353  for (NLITER iter = nonlocals_.begin (); iter != nonlocals_.end (); ++iter) {
3354  setOfRows.insert (iter->first);
3355  }
3356  // copy the elements in the set into an Array
3357  NLRs.resize (setOfRows.size ());
3358  std::copy (setOfRows.begin (), setOfRows.end (), NLRs.begin ());
3359 
3360  // get a list of ImageIDs for the non-local rows (NLRs)
3361  Array<int> NLRIds(NLRs.size());
3362  {
3363  const LookupStatus stat =
3364  rowMap_->getRemoteIndexList (NLRs (), NLRIds ());
3365  int lclerror = ( stat == IDNotPresent ? 1 : 0 );
3366  int gblerror;
3367  reduceAll<int, int> (*comm, REDUCE_MAX, lclerror, outArg (gblerror));
3368  if (gblerror != 0) {
3369  const int myRank = comm->getRank ();
3370  std::ostringstream os;
3371  os << "On one or more processes in the communicator, "
3372  << "there were insertions into rows of the graph that do not "
3373  << "exist in the row Map on any process in the communicator."
3374  << endl << "This process " << myRank << " is "
3375  << (lclerror == 0 ? "not " : "") << "one of those offending "
3376  << "processes." << endl;
3377  if (lclerror != 0) {
3378  // If NLRIds[k] is -1, then NLRs[k] is a row index not in
3379  // the row Map. Collect this list of invalid row indices
3380  // for display in the exception message.
3381  Array<GO> invalidNonlocalRows;
3382  for (size_type k = 0; k < NLRs.size (); ++k) {
3383  if (NLRIds[k] == -1) {
3384  invalidNonlocalRows.push_back (NLRs[k]);
3385  }
3386  }
3387  const size_type numInvalid = invalidNonlocalRows.size ();
3388  os << "On this process, " << numInvalid << " nonlocal row"
3389  << (numInvalid != 1 ? "s " : " ") << " were inserted that are "
3390  << "not in the row Map on any process." << endl;
3391  // Don't print _too_ many nonlocal rows.
3392  if (numInvalid <= 100) {
3393  os << "Offending row indices: "
3394  << toString (invalidNonlocalRows ()) << endl;
3395  }
3396  }
3397  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3398  gblerror != 0, std::runtime_error,
3399  ": nonlocal entries correspond to invalid rows."
3400  << endl << os.str ());
3401  }
3402  }
3403 
3404  // build up a list of neighbors, as well as a map between NLRs and Ids
3405  // localNeighbors[i] != 0 iff I have data to send to image i
3406  // put NLRs,Ids into an array of pairs
3407  IdsAndRows.reserve(NLRs.size());
3408  Array<char> localNeighbors(numImages, 0);
3409  typename Array<GO>::const_iterator nlr;
3410  typename Array<int>::const_iterator id;
3411  for (nlr = NLRs.begin(), id = NLRIds.begin();
3412  nlr != NLRs.end(); ++nlr, ++id) {
3413  NLR2Id[*nlr] = *id;
3414  localNeighbors[*id] = 1;
3415  // IdsAndRows.push_back(make_pair<int,GlobalOrdinal>(*id,*nlr));
3416  IdsAndRows.push_back(make_pair(*id,*nlr));
3417  }
3418  for (int j=0; j<numImages; ++j) {
3419  if (localNeighbors[j]) {
3420  sendIDs.push_back(j);
3421  }
3422  }
3423  // sort IdsAndRows, by Ids first, then rows
3424  std::sort(IdsAndRows.begin(),IdsAndRows.end());
3425  // gather from other nodes to form the full graph
3426  globalNeighbors.shapeUninitialized(numImages,numImages);
3427  gatherAll (*getComm(), numImages, localNeighbors.getRawPtr(),
3428  numImages * numImages, globalNeighbors.values());
3429  // globalNeighbors at this point contains (on all images) the
3430  // connectivity between the images.
3431  // globalNeighbors(i,j) != 0 means that j sends to i/that i receives from j
3432  }
3433 
3435  // FIGURE OUT WHO IS SENDING TO WHOM AND HOW MUCH
3436  // DO THIS IN THE PROCESS OF PACKING ALL OUTGOING DATA ACCORDING TO DESTINATION ID
3438 
3439  // loop over all columns to know from which images I can expect to receive something
3440  for (int j=0; j<numImages; ++j) {
3441  if (globalNeighbors(myImageID,j)) {
3442  recvIDs.push_back(j);
3443  }
3444  }
3445  const size_t numRecvs = recvIDs.size();
3446 
3447  // we know how many we're sending to already
3448  // form a contiguous list of all data to be sent
3449  // track the number of entries for each ID
3450  Array<pair<GO, GO> > IJSendBuffer;
3451  Array<size_t> sendSizes(sendIDs.size(), 0);
3452  size_t numSends = 0;
3453  for (typename Array<pair<int, GO> >::const_iterator IdAndRow = IdsAndRows.begin();
3454  IdAndRow != IdsAndRows.end(); ++IdAndRow) {
3455  int id = IdAndRow->first;
3456  GO row = IdAndRow->second;
3457  // have we advanced to a new send?
3458  if (sendIDs[numSends] != id) {
3459  numSends++;
3460  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(sendIDs[numSends] != id,
3461  std::logic_error, ": internal logic error. Contact Tpetra team.");
3462  }
3463  // copy data for row into contiguous storage
3464  for (typename std::vector<GO>::const_iterator j = nonlocals_[row].begin ();
3465  j != nonlocals_[row].end (); ++j) {
3466  IJSendBuffer.push_back (pair<GlobalOrdinal, GlobalOrdinal> (row, *j));
3467  sendSizes[numSends]++;
3468  }
3469  }
3470  if (IdsAndRows.size() > 0) {
3471  numSends++; // one last increment, to make it a count instead of an index
3472  }
3473  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3474  as<typename Array<int>::size_type> (numSends) != sendIDs.size (),
3475  std::logic_error, ": internal logic error. Contact Tpetra team.");
3476 
3477  // don't need this data anymore
3478  nonlocals_.clear();
3479 
3481  // TRANSMIT SIZE INFO BETWEEN SENDERS AND RECEIVERS
3483 
3484  // Array of pending nonblocking communication requests. It's OK
3485  // to mix nonblocking send and receive requests in the same
3486  // waitAll() call.
3487  Array<RCP<Teuchos::CommRequest<int> > > requests;
3488 
3489  // perform non-blocking sends: send sizes to our recipients
3490  for (size_t s = 0; s < numSends ; ++s) {
3491  // We're using a nonowning RCP because all communication
3492  // will be local to this method and the scope of our data
3493  requests.push_back (isend<int, size_t> (*comm,
3494  rcp (&sendSizes[s], false),
3495  sendIDs[s]));
3496  }
3497  // perform non-blocking receives: receive sizes from our senders
3498  Array<size_t> recvSizes (numRecvs);
3499  for (size_t r = 0; r < numRecvs; ++r) {
3500  // We're using a nonowning RCP because all communication
3501  // will be local to this method and the scope of our data
3502  requests.push_back (ireceive (*comm, rcp (&recvSizes[r], false), recvIDs[r]));
3503  }
3504  // Wait on all the nonblocking sends and receives.
3505  if (! requests.empty()) {
3506  waitAll (*comm, requests());
3507  }
3508 #ifdef HAVE_TPETRA_DEBUG
3509  Teuchos::barrier (*comm);
3510 #endif // HAVE_TPETRA_DEBUG
3511 
3512  // This doesn't necessarily deallocate the array.
3513  requests.resize (0);
3514 
3516  // NOW SEND/RECEIVE ALL ROW DATA
3518  // from the size info, build the ArrayViews into IJSendBuffer
3519  Array<ArrayView<pair<GO,GO> > > sendBuffers(numSends,null);
3520  {
3521  size_t cur = 0;
3522  for (size_t s=0; s<numSends; ++s) {
3523  sendBuffers[s] = IJSendBuffer(cur,sendSizes[s]);
3524  cur += sendSizes[s];
3525  }
3526  }
3527  // perform non-blocking sends
3528  for (size_t s=0; s < numSends ; ++s)
3529  {
3530  // We're using a nonowning RCP because all communication
3531  // will be local to this method and the scope of our data
3532  ArrayRCP<pair<GO,GO> > tmpSendBuf =
3533  arcp (sendBuffers[s].getRawPtr(), 0, sendBuffers[s].size(), false);
3534  requests.push_back (isend<int, pair<GO,GO> > (*comm, tmpSendBuf, sendIDs[s]));
3535  }
3536  // calculate amount of storage needed for receives
3537  // setup pointers for the receives as well
3538  size_t totalRecvSize = std::accumulate (recvSizes.begin(), recvSizes.end(), 0);
3539  Array<pair<GO,GO> > IJRecvBuffer (totalRecvSize);
3540  // from the size info, build the ArrayViews into IJRecvBuffer
3541  Array<ArrayView<pair<GO,GO> > > recvBuffers (numRecvs, null);
3542  {
3543  size_t cur = 0;
3544  for (size_t r=0; r<numRecvs; ++r) {
3545  recvBuffers[r] = IJRecvBuffer(cur,recvSizes[r]);
3546  cur += recvSizes[r];
3547  }
3548  }
3549  // perform non-blocking recvs
3550  for (size_t r = 0; r < numRecvs; ++r) {
3551  // We're using a nonowning RCP because all communication
3552  // will be local to this method and the scope of our data
3553  ArrayRCP<pair<GO,GO> > tmpRecvBuf =
3554  arcp (recvBuffers[r].getRawPtr(), 0, recvBuffers[r].size(), false);
3555  requests.push_back (ireceive (*comm, tmpRecvBuf, recvIDs[r]));
3556  }
3557  // perform waits
3558  if (! requests.empty()) {
3559  waitAll (*comm, requests());
3560  }
3561 #ifdef HAVE_TPETRA_DEBUG
3562  Teuchos::barrier (*comm);
3563 #endif // HAVE_TPETRA_DEBUG
3564 
3566  // NOW PROCESS THE RECEIVED ROW DATA
3568  // TODO: instead of adding one entry at a time, add one row at a time.
3569  // this requires resorting; they arrived sorted by sending node,
3570  // so that entries could be non-contiguous if we received
3571  // multiple entries for a particular row from different processors.
3572  // it also requires restoring the data, which may make it not worth the trouble.
3573  for (typename Array<pair<GO,GO> >::const_iterator ij = IJRecvBuffer.begin();
3574  ij != IJRecvBuffer.end(); ++ij)
3575  {
3576  insertGlobalIndicesFiltered (ij->first, tuple<GO> (ij->second));
3577  }
3579  }
3580 
3581 
3582  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3583  void
3585  resumeFill (const Teuchos::RCP<Teuchos::ParameterList>& params)
3586  {
3587  const char tfecfFuncName[] = "resumeFill";
3588  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! hasRowInfo(), std::runtime_error,
3589  ": Sorry, you cannot resume fill of the CrsGraph, since the graph's row "
3590  "information was deleted in fillComplete().");
3591 
3592 #ifdef HAVE_TPETRA_DEBUG
3593  Teuchos::barrier( *rowMap_->getComm() );
3594 #endif // HAVE_TPETRA_DEBUG
3595  clearGlobalConstants();
3596  if (params != null) this->setParameterList (params);
3597  lowerTriangular_ = false;
3598  upperTriangular_ = false;
3599  // either still sorted/merged or initially sorted/merged
3600  indicesAreSorted_ = true;
3601  noRedundancies_ = true;
3602  fillComplete_ = false;
3603 #ifdef HAVE_TPETRA_DEBUG
3604  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3605  ! isFillActive() || isFillComplete(), std::logic_error,
3606  "::resumeFill(): At end of method, either fill is not active or fill is "
3607  "complete. This violates stated post-conditions. Please report this bug "
3608  "to the Tpetra developers.");
3609 #endif // HAVE_TPETRA_DEBUG
3610  }
3611 
3612 
3613  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3614  void
3616  fillComplete (const Teuchos::RCP<Teuchos::ParameterList>& params)
3617  {
3618  // If the graph already has domain and range Maps, don't clobber
3619  // them. If it doesn't, use the current row Map for both the
3620  // domain and range Maps.
3621  //
3622  // NOTE (mfh 28 Sep 2014): If the graph was constructed without a
3623  // column Map, and column indices are inserted which are not in
3624  // the row Map on any process, this will cause troubles. However,
3625  // that is not a common case for most applications that we
3626  // encounter, and checking for it might require more
3627  // communication.
3628  Teuchos::RCP<const map_type> domMap = this->getDomainMap ();
3629  if (domMap.is_null ()) {
3630  domMap = this->getRowMap ();
3631  }
3632  Teuchos::RCP<const map_type> ranMap = this->getRangeMap ();
3633  if (ranMap.is_null ()) {
3634  ranMap = this->getRowMap ();
3635  }
3636  this->fillComplete (domMap, ranMap, params);
3637  }
3638 
3639 
3640  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3641  void
3643  fillComplete (const Teuchos::RCP<const map_type>& domainMap,
3644  const Teuchos::RCP<const map_type>& rangeMap,
3645  const Teuchos::RCP<Teuchos::ParameterList>& params)
3646  {
3647  const char tfecfFuncName[] = "fillComplete";
3648 
3649 #ifdef HAVE_TPETRA_DEBUG
3650  rowMap_->getComm ()->barrier ();
3651 #endif // HAVE_TPETRA_DEBUG
3652 
3653  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( ! isFillActive() || isFillComplete(),
3654  std::runtime_error, ": Graph fill state must be active (isFillActive() "
3655  "must be true) before calling fillComplete().");
3656 
3657  const int numProcs = getComm ()->getSize ();
3658 
3659  //
3660  // Read and set parameters
3661  //
3662 
3663  // Does the caller want to sort remote GIDs (within those owned by
3664  // the same process) in makeColMap()?
3665  if (! params.is_null ()) {
3666  if (params->isParameter ("sort column map ghost gids")) {
3668  params->get<bool> ("sort column map ghost gids",
3670  }
3671  else if (params->isParameter ("Sort column Map ghost GIDs")) {
3673  params->get<bool> ("Sort column Map ghost GIDs",
3675  }
3676  }
3677 
3678  // If true, the caller promises that no process did nonlocal
3679  // changes since the last call to fillComplete.
3680  bool assertNoNonlocalInserts = false;
3681  if (! params.is_null ()) {
3682  assertNoNonlocalInserts =
3683  params->get<bool> ("No Nonlocal Changes", assertNoNonlocalInserts);
3684  }
3685 
3686  //
3687  // Allocate indices, if they haven't already been allocated
3688  //
3689  if (! indicesAreAllocated ()) {
3690  if (hasColMap ()) {
3691  // We have a column Map, so use local indices.
3692  allocateIndices (LocalIndices);
3693  } else {
3694  // We don't have a column Map, so use global indices.
3695  allocateIndices (GlobalIndices);
3696  }
3697  }
3698 
3699  //
3700  // Do global assembly, if requested and if the communicator
3701  // contains more than one process.
3702  //
3703  const bool mayNeedGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
3704  if (mayNeedGlobalAssemble) {
3705  // This first checks if we need to do global assembly.
3706  // The check costs a single all-reduce.
3707  globalAssemble ();
3708  }
3709  else {
3710  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3711  numProcs > 1 && nonlocals_.size() > 0, std::runtime_error,
3712  ":" << std::endl << "The graph's communicator contains only one "
3713  "process, but there are nonlocal entries. " << std::endl <<
3714  "This probably means that invalid entries were added to the graph.");
3715  }
3716 
3717  // Set domain and range Map. This may clear the Import / Export
3718  // objects if the new Maps differ from any old ones.
3719  setDomainRangeMaps (domainMap, rangeMap);
3720 
3721  // If the graph does not already have a column Map (either from
3722  // the user constructor calling the version of the constructor
3723  // that takes a column Map, or from a previous fillComplete call),
3724  // then create it.
3725  if (! hasColMap ()) {
3726  makeColMap ();
3727  }
3728 
3729  // Make indices local, if they aren't already.
3730  // The method doesn't do any work if the indices are already local.
3731  makeIndicesLocal ();
3732 
3733  if (! isSorted ()) {
3734  // If this process has no indices, then CrsGraph considers it
3735  // already trivially sorted. Thus, this method need not be
3736  // called on all processes in the row Map's communicator.
3737  sortAllIndices ();
3738  }
3739 
3740  if (! isMerged()) {
3741  mergeAllIndices ();
3742  }
3743  makeImportExport (); // Make Import and Export objects, if necessary
3744  computeGlobalConstants ();
3745  fillLocalGraph (params);
3746  fillComplete_ = true;
3747 
3748 #ifdef HAVE_TPETRA_DEBUG
3749  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3750  isFillActive() == true || isFillComplete() == false, std::logic_error,
3751  ": Violated stated post-conditions. Please contact Tpetra team.");
3752 #endif
3753 
3754  checkInternalState ();
3755  }
3756 
3757 
3758  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3759  void
3761  expertStaticFillComplete (const Teuchos::RCP<const map_type>& domainMap,
3762  const Teuchos::RCP<const map_type>& rangeMap,
3763  const Teuchos::RCP<const import_type>& importer,
3764  const Teuchos::RCP<const export_type>& exporter,
3765  const Teuchos::RCP<Teuchos::ParameterList>& params)
3766  {
3767  const char tfecfFuncName[] = "expertStaticFillComplete: ";
3768 #ifdef HAVE_TPETRA_MMM_TIMINGS
3769  std::string label;
3770  if(!params.is_null())
3771  label = params->get("Timer Label",label);
3772  std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
3773  using Teuchos::TimeMonitor;
3774  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Setup"))));
3775 #endif
3776 
3777 
3778  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3779  domainMap.is_null () || rangeMap.is_null (),
3780  std::runtime_error, "The input domain Map and range Map must be nonnull.");
3781  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3782  pftype_ != StaticProfile, std::runtime_error, "You may not call this "
3783  "method unless the graph is StaticProfile.");
3784  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3785  isFillComplete () || ! hasColMap (), std::runtime_error, "You may not "
3786  "call this method unless the graph has a column Map.");
3787  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3788  getNodeNumRows () > 0 && k_rowPtrs_.dimension_0 () == 0,
3789  std::runtime_error, "The calling process has getNodeNumRows() = "
3790  << getNodeNumRows () << " > 0 rows, but the row offsets array has not "
3791  "been set.");
3792  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3793  static_cast<size_t> (k_rowPtrs_.dimension_0 ()) != getNodeNumRows () + 1,
3794  std::runtime_error, "The row offsets array has length " <<
3795  k_rowPtrs_.dimension_0 () << " != getNodeNumRows()+1 = " <<
3796  (getNodeNumRows () + 1) << ".");
3797 
3798  // Note: We don't need to do the following things which are normally done in fillComplete:
3799  // allocateIndices, globalAssemble, makeColMap, makeIndicesLocal, sortAllIndices, mergeAllIndices
3800 
3801  // Note: Need to do this so computeGlobalConstants & fillLocalGraph work
3802  //
3803  // The first assignment is always true if the graph has 1-D
3804  // storage (StaticProfile). The second assignment is only true if
3805  // storage is packed.
3806  nodeNumAllocated_ = k_rowPtrs_(getNodeNumRows ());
3807  nodeNumEntries_ = nodeNumAllocated_;
3808 
3809  // Constants from allocateIndices
3810  //
3811  // mfh 08 Aug 2014: numAllocForAllRows_ and k_numAllocPerRow_ go
3812  // away once the graph is allocated. expertStaticFillComplete
3813  // either presumes that the graph is allocated, or "allocates" it.
3814  //
3815  // FIXME (mfh 08 Aug 2014) The goal for the Kokkos refactor
3816  // version of CrsGraph is to allocate in the constructor, not
3817  // lazily on first insert. That will make both
3818  // numAllocForAllRows_ and k_numAllocPerRow_ obsolete.
3819  numAllocForAllRows_ = 0;
3820  k_numAllocPerRow_ = Kokkos::DualView<const size_t*, execution_space> ();
3821  indicesAreAllocated_ = true;
3822 
3823  // Constants from makeIndicesLocal
3824  //
3825  // The graph has a column Map, so its indices had better be local.
3826  indicesAreLocal_ = true;
3827  indicesAreGlobal_ = false;
3828 
3829  // set domain/range map: may clear the import/export objects
3830 #ifdef HAVE_TPETRA_MMM_TIMINGS
3831  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Maps"))));
3832 #endif
3833  setDomainRangeMaps (domainMap, rangeMap);
3834 
3835  // Presume the user sorted and merged the arrays first
3836  indicesAreSorted_ = true;
3837  noRedundancies_ = true;
3838 
3839  // makeImportExport won't create a new importer/exporter if I set one here first.
3840 #ifdef HAVE_TPETRA_MMM_TIMINGS
3841  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckI"))));
3842 #endif
3843 
3844  importer_ = Teuchos::null;
3845  exporter_ = Teuchos::null;
3846  if (importer != Teuchos::null) {
3847  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3848  ! importer->getSourceMap ()->isSameAs (*getDomainMap ()) ||
3849  ! importer->getTargetMap ()->isSameAs (*getColMap ()),
3850  std::invalid_argument,": importer does not match matrix maps.");
3851  importer_ = importer;
3852 
3853  }
3854 
3855 #ifdef HAVE_TPETRA_MMM_TIMINGS
3856  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckE"))));
3857 #endif
3858 
3859  if (exporter != Teuchos::null) {
3860  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3861  ! exporter->getSourceMap ()->isSameAs (*getRowMap ()) ||
3862  ! exporter->getTargetMap ()->isSameAs (*getRangeMap ()),
3863  std::invalid_argument,": exporter does not match matrix maps.");
3864  exporter_ = exporter;
3865  }
3866 
3867 #ifdef HAVE_TPETRA_MMM_TIMINGS
3868  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXmake"))));
3869 #endif
3870 
3871  makeImportExport ();
3872 
3873  // Compute the constants
3874 #ifdef HAVE_TPETRA_MMM_TIMINGS
3875  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC"))));
3876 #endif
3877  computeGlobalConstants ();
3878 
3879  // Since we have a StaticProfile, fillLocalGraph will do the right thing...
3880 #ifdef HAVE_TPETRA_MMM_TIMINGS
3881  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-fLG"))));
3882 #endif
3883  fillLocalGraph (params);
3884  fillComplete_ = true;
3885 
3886 #ifdef HAVE_TPETRA_MMM_TIMINGS
3887  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cIS"))));
3888 #endif
3889  checkInternalState ();
3890  }
3891 
3892 
3893  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3894  void
3896  fillLocalGraph (const Teuchos::RCP<Teuchos::ParameterList>& params)
3897  {
3898  using Kokkos::create_mirror_view;
3899  using Teuchos::ArrayRCP;
3900  using Teuchos::null;
3901  using Teuchos::RCP;
3902  using Teuchos::rcp;
3903  typedef ArrayRCP<size_t>::size_type size_type;
3904  typedef t_numRowEntries_ row_entries_type;
3905  typedef typename local_graph_type::row_map_type row_map_type;
3906  typedef typename row_map_type::non_const_type non_const_row_map_type;
3907  typedef typename local_graph_type::entries_type::non_const_type lclinds_1d_type;
3908 
3909  const size_t lclNumRows = this->getNodeNumRows ();
3910 
3911  // This method's goal is to fill in the two arrays (compressed
3912  // sparse row format) that define the sparse graph's structure.
3913  //
3914  // Use the nonconst version of row_map_type for k_ptrs, because
3915  // the latter is const and we need to modify k_ptrs here.
3916  non_const_row_map_type k_ptrs;
3917  row_map_type k_ptrs_const;
3918  lclinds_1d_type k_inds;
3919 
3920  // The number of entries in each locally owned row. This is a
3921  // DualView. 2-D storage lives on host and is currently not
3922  // thread-safe for parallel kernels even on host, so we have to
3923  // work sequentially with host storage in that case.
3924  row_entries_type k_numRowEnt = k_numRowEntries_;
3925  typename row_entries_type::t_host h_numRowEnt = k_numRowEnt.h_view;
3926 
3927  bool requestOptimizedStorage = true;
3928  if (! params.is_null () && ! params->get ("Optimize Storage", true)) {
3929  requestOptimizedStorage = false;
3930  }
3931  if (getProfileType () == DynamicProfile) {
3932  // Pack 2-D storage (DynamicProfile) into 1-D packed storage.
3933  //
3934  // DynamicProfile means that the graph's column indices are
3935  // currently stored in a 2-D "unpacked" format, in the
3936  // arrays-of-arrays lclInds2D_. We allocate 1-D storage
3937  // (k_inds) and then copy from 2-D storage (lclInds2D_) into 1-D
3938  // storage (k_inds).
3939  TEUCHOS_TEST_FOR_EXCEPTION(
3940  static_cast<size_t> (k_numRowEnt.dimension_0 ()) != lclNumRows,
3941  std::logic_error, "Tpetra::CrsGraph::fillLocalGraph (called from "
3942  "fillComplete or expertStaticFillComplete): For the DynamicProfile "
3943  "branch, k_numRowEnt has the wrong length. "
3944  "k_numRowEnt.dimension_0() = " << k_numRowEnt.dimension_0 ()
3945  << " != getNodeNumRows() = " << lclNumRows << "");
3946 
3947  // Pack the row offsets into k_ptrs, by doing a sum-scan of
3948  // the array of valid entry counts per row (h_numRowEnt).
3949  //
3950  // Total number of entries in the matrix on the calling
3951  // process. We will compute this in the loop below. It's
3952  // cheap to compute and useful as a sanity check.
3953  size_t lclTotalNumEntries = 0;
3954  // This will be a host view of packed row offsets.
3955  typename non_const_row_map_type::HostMirror h_ptrs;
3956  {
3957  // Allocate the packed row offsets array.
3958  k_ptrs = non_const_row_map_type ("Tpetra::CrsGraph::ptr", lclNumRows+1);
3959  k_ptrs_const = k_ptrs;
3960  //
3961  // FIXME hack until we get parallel_scan in kokkos
3962  //
3963  h_ptrs = create_mirror_view (k_ptrs);
3964  h_ptrs(0) = 0;
3965  for (size_type i = 0; i < static_cast<size_type> (lclNumRows); ++i) {
3966  const size_t numEnt = h_numRowEnt(i);
3967  lclTotalNumEntries += numEnt;
3968  h_ptrs(i+1) = h_ptrs(i) + numEnt;
3969  }
3970  Kokkos::deep_copy (k_ptrs, h_ptrs);
3971  }
3972 
3973  TEUCHOS_TEST_FOR_EXCEPTION(
3974  static_cast<size_t> (k_ptrs.dimension_0 ()) != lclNumRows + 1,
3975  std::logic_error, "Tpetra::CrsGraph::fillLocalGraph: In DynamicProfile "
3976  "branch, after packing k_ptrs, k_ptrs.dimension_0() = "
3977  << k_ptrs.dimension_0 () << " != (lclNumRows+1) = "
3978  << (lclNumRows+1) << ".");
3979  TEUCHOS_TEST_FOR_EXCEPTION(
3980  static_cast<size_t> (h_ptrs.dimension_0 ()) != lclNumRows + 1,
3981  std::logic_error, "Tpetra::CrsGraph::fillLocalGraph: In DynamicProfile "
3982  "branch, after packing h_ptrs, h_ptrs.dimension_0() = "
3983  << h_ptrs.dimension_0 () << " != (lclNumRows+1) = "
3984  << (lclNumRows+1) << ".");
3985  // FIXME (mfh 08 Aug 2014) This assumes UVM.
3986  TEUCHOS_TEST_FOR_EXCEPTION(
3987  k_ptrs(lclNumRows) != lclTotalNumEntries, std::logic_error,
3988  "Tpetra::CrsGraph::fillLocalGraph: In DynamicProfile branch, after "
3989  "packing k_ptrs, k_ptrs(lclNumRows = " << lclNumRows << ") = " <<
3990  k_ptrs(lclNumRows) << " != total number of entries on the calling "
3991  "process = " << lclTotalNumEntries << ".");
3992 
3993  // Allocate the array of packed column indices.
3994  k_inds = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries);
3995  // We need a host view of the above, since 2-D storage lives on host.
3996  typename lclinds_1d_type::HostMirror h_inds = create_mirror_view (k_inds);
3997  // Pack the column indices.
3998  for (size_t row = 0; row < lclNumRows; ++row) {
3999  const size_t numEnt = h_numRowEnt(row);
4000  std::copy (lclInds2D_[row].begin (),
4001  lclInds2D_[row].begin () + numEnt,
4002  h_inds.ptr_on_device () + h_ptrs(row));
4003  }
4004  Kokkos::deep_copy (k_inds, h_inds);
4005 
4006  // Sanity check of packed row offsets.
4007  if (k_ptrs.dimension_0 () != 0) {
4008  const size_t numOffsets = static_cast<size_t> (k_ptrs.dimension_0 ());
4009  TEUCHOS_TEST_FOR_EXCEPTION(
4010  static_cast<size_t> (k_ptrs(numOffsets-1)) != k_inds.dimension_0 (),
4011  std::logic_error, "Tpetra::CrsGraph::fillLocalGraph: "
4012  "In DynamicProfile branch, after packing, k_ptrs(" << (numOffsets-1)
4013  << ") = " << k_ptrs(numOffsets-1) << " != k_inds.dimension_0() = "
4014  << k_inds.dimension_0 () << ".");
4015  }
4016  }
4017  else if (getProfileType () == StaticProfile) {
4018  // StaticProfile means that the graph's column indices are
4019  // currently stored in a 1-D format, with row offsets in
4020  // k_rowPtrs_ and local column indices in k_lclInds1D_.
4021 
4022  // StaticProfile also means that the graph's array of row
4023  // offsets must already be allocated.
4024  TEUCHOS_TEST_FOR_EXCEPTION(
4025  k_rowPtrs_.dimension_0 () == 0, std::logic_error,
4026  "k_rowPtrs_ has size zero, but shouldn't");
4027  TEUCHOS_TEST_FOR_EXCEPTION(
4028  k_rowPtrs_.dimension_0 () != lclNumRows + 1, std::logic_error,
4029  "Tpetra::CrsGraph::fillLocalGraph: k_rowPtrs_ has size "
4030  << k_rowPtrs_.dimension_0 () << " != (lclNumRows + 1) = "
4031  << (lclNumRows + 1) << ".")
4032  {
4033  const size_t numOffsets = k_rowPtrs_.dimension_0 ();
4034  // FIXME (mfh 08 Aug 2014) This relies on UVM.
4035  TEUCHOS_TEST_FOR_EXCEPTION(
4036  numOffsets != 0 &&
4037  k_lclInds1D_.dimension_0 () != k_rowPtrs_(numOffsets-1),
4038  std::logic_error, "Tpetra::CrsGraph::fillLocalGraph: "
4039  "numOffsets = " << numOffsets << " != 0 and "
4040  "k_lclInds1D_.dimension_0() = " << k_lclInds1D_.dimension_0 ()
4041  << " != k_rowPtrs_(" << numOffsets << ") = "
4042  << k_rowPtrs_(numOffsets-1) << ".");
4043  }
4044 
4045  if (nodeNumEntries_ != nodeNumAllocated_) {
4046  // The graph's current 1-D storage is "unpacked." This means
4047  // the row offsets may differ from what the final row offsets
4048  // should be. This could happen, for example, if the user
4049  // specified StaticProfile in the constructor and set an upper
4050  // bound on the number of entries in each row, but didn't fill
4051  // all those entries.
4052  TEUCHOS_TEST_FOR_EXCEPTION(
4053  static_cast<size_t> (k_numRowEnt.dimension_0 ()) != lclNumRows,
4054  std::logic_error, "Tpetra::CrsGraph::fillLocalGraph (called from "
4055  "fillComplete or expertStaticFillComplete): In StaticProfile "
4056  "unpacked branch, k_numRowEnt has the wrong length. "
4057  "k_numRowEnt.dimension_0() = " << k_numRowEnt.dimension_0 ()
4058  << " != getNodeNumRows() = " << lclNumRows << "");
4059 
4060  if (k_rowPtrs_.dimension_0 () != 0) {
4061  const size_t numOffsets =
4062  static_cast<size_t> (k_rowPtrs_.dimension_0 ());
4063  TEUCHOS_TEST_FOR_EXCEPTION(
4064  k_rowPtrs_(numOffsets-1) != static_cast<size_t> (k_lclInds1D_.dimension_0 ()),
4065  std::logic_error, "Tpetra::CrsGraph::fillLocalGraph: "
4066  "In StaticProfile branch, before allocating or packing, "
4067  "k_rowPtrs_(" << (numOffsets-1) << ") = "
4068  << k_rowPtrs_(numOffsets-1) << " != k_lclInds1D_.dimension_0() = "
4069  << k_lclInds1D_.dimension_0 () << ".");
4070  }
4071 
4072  // Pack the row offsets into k_ptrs, by doing a sum-scan of
4073  // the array of valid entry counts per row (h_numRowEnt).
4074 
4075  // Total number of entries in the matrix on the calling
4076  // process. We will compute this in the loop below. It's
4077  // cheap to compute and useful as a sanity check.
4078  size_t lclTotalNumEntries = 0;
4079  {
4080  // Allocate the packed row offsets array.
4081  k_ptrs = non_const_row_map_type ("Tpetra::CrsGraph::ptr", lclNumRows + 1);
4082  k_ptrs_const = k_ptrs;
4083  //
4084  // FIXME hack until we get parallel_scan in kokkos
4085  //
4086  // Unlike in the 2-D storage case above, we don't need the
4087  // host view of the packed row offsets array after packing
4088  // the row offsets.
4089  typename non_const_row_map_type::HostMirror h_k_ptrs =
4090  create_mirror_view (k_ptrs);
4091  h_k_ptrs(0) = 0;
4092  for (size_t i = 0; i < lclNumRows; ++i) {
4093  const size_t numEnt = h_numRowEnt(i);
4094  lclTotalNumEntries += numEnt;
4095  h_k_ptrs(i+1) = h_k_ptrs(i) + numEnt;
4096  }
4097  Kokkos::deep_copy (k_ptrs, h_k_ptrs);
4098  }
4099 
4100  TEUCHOS_TEST_FOR_EXCEPTION(
4101  static_cast<size_t> (k_ptrs.dimension_0 ()) != lclNumRows + 1,
4102  std::logic_error, "Tpetra::CrsGraph::fillLocalGraph: In "
4103  "StaticProfile unpacked branch, after allocating k_ptrs, "
4104  "k_ptrs.dimension_0() = " << k_ptrs.dimension_0 () << " != "
4105  "lclNumRows+1 = " << (lclNumRows+1) << ".");
4106  // FIXME (mfh 08 Aug 2014) This assumes UVM.
4107  TEUCHOS_TEST_FOR_EXCEPTION(
4108  k_ptrs(lclNumRows) != lclTotalNumEntries, std::logic_error,
4109  "Tpetra::CrsGraph::fillLocalGraph: In StaticProfile unpacked "
4110  "branch, after filling k_ptrs, k_ptrs(lclNumRows=" << lclNumRows
4111  << ") = " << k_ptrs(lclNumRows) << " != total number of entries on "
4112  "the calling process = " << lclTotalNumEntries << ".");
4113 
4114  // Allocate the array of packed column indices.
4115  k_inds = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries);
4116 
4117  // k_rowPtrs_ and k_lclInds1D_ are currently unpacked. Pack
4118  // them, using the packed row offsets array k_ptrs that we
4119  // created above.
4120  //
4121  // FIXME (mfh 08 Aug 2014) If "Optimize Storage" is false (in
4122  // CrsMatrix?), we need to keep around the unpacked row
4123  // offsets and column indices.
4124 
4125  // Pack the column indices from unpacked k_lclInds1D_ into
4126  // packed k_inds. We will replace k_lclInds1D_ below.
4127  typedef pack_functor<typename local_graph_type::entries_type::non_const_type, row_map_type> inds_packer_type;
4128  inds_packer_type f (k_inds, k_lclInds1D_, k_ptrs, k_rowPtrs_);
4129  Kokkos::parallel_for (lclNumRows, f);
4130 
4131  TEUCHOS_TEST_FOR_EXCEPTION(
4132  k_ptrs.dimension_0 () == 0, std::logic_error, "Tpetra::CrsGraph::"
4133  "fillLocalGraph: In StaticProfile \"Optimize Storage\" = true branch,"
4134  " after packing, k_ptrs.dimension_0() = 0. This probably means that "
4135  "k_rowPtrs_ was never allocated.");
4136  if (k_ptrs.dimension_0 () != 0) {
4137  const size_t numOffsets = static_cast<size_t> (k_ptrs.dimension_0 ());
4138  TEUCHOS_TEST_FOR_EXCEPTION(
4139  static_cast<size_t> (k_ptrs(numOffsets - 1)) != k_inds.dimension_0 (),
4140  std::logic_error, "Tpetra::CrsGraph::fillLocalGraph: "
4141  "In StaticProfile \"Optimize Storage\"=true branch, after packing, "
4142  "k_ptrs(" << (numOffsets-1) << ") = " << k_ptrs(numOffsets-1) <<
4143  " != k_inds.dimension_0() = " << k_inds.dimension_0 () << ".");
4144  }
4145  }
4146  else { // We don't have to pack, so just set the pointers.
4147  k_ptrs_const = k_rowPtrs_;
4148  k_inds = k_lclInds1D_;
4149 
4150  TEUCHOS_TEST_FOR_EXCEPTION(
4151  k_ptrs_const.dimension_0 () == 0, std::logic_error, "Tpetra::CrsGraph::"
4152  "fillLocalGraph: In StaticProfile \"Optimize Storage\" = "
4153  "false branch, k_ptrs_const.dimension_0() = 0. This probably means that "
4154  "k_rowPtrs_ was never allocated.");
4155  if (k_ptrs_const.dimension_0 () != 0) {
4156  const size_t numOffsets = static_cast<size_t> (k_ptrs_const.dimension_0 ());
4157  TEUCHOS_TEST_FOR_EXCEPTION(
4158  static_cast<size_t> (k_ptrs_const(numOffsets - 1)) != k_inds.dimension_0 (),
4159  std::logic_error, "Tpetra::CrsGraph::fillLocalGraph: "
4160  "In StaticProfile \"Optimize Storage\" = false branch, "
4161  "k_ptrs_const(" << (numOffsets-1) << ") = " << k_ptrs_const(numOffsets - 1)
4162  << " != k_inds.dimension_0() = " << k_inds.dimension_0 () << ".");
4163  }
4164  }
4165  }
4166 
4167  // Extra sanity checks.
4168  TEUCHOS_TEST_FOR_EXCEPTION(
4169  static_cast<size_t> (k_ptrs_const.dimension_0 ()) != lclNumRows + 1,
4170  std::logic_error, "Tpetra::CrsGraph::fillLocalGraph: After packing, "
4171  "k_ptrs_const.dimension_0() = " << k_ptrs_const.dimension_0 ()
4172  << " != lclNumRows+1 = " << (lclNumRows+1) << ".");
4173  if (k_ptrs_const.dimension_0 () != 0) {
4174  const size_t numOffsets = static_cast<size_t> (k_ptrs_const.dimension_0 ());
4175  TEUCHOS_TEST_FOR_EXCEPTION(
4176  static_cast<size_t> (k_ptrs_const(numOffsets - 1)) != k_inds.dimension_0 (),
4177  std::logic_error, "Tpetra::CrsGraph::fillLocalGraph: After packing, "
4178  "k_ptrs_const(" << (numOffsets-1) << ") = " << k_ptrs_const(numOffsets-1)
4179  << " != k_inds.dimension_0() = " << k_inds.dimension_0 () << ".");
4180  }
4181 
4182  if (requestOptimizedStorage) {
4183  // With optimized storage, we don't need to store the 2-D column
4184  // indices array-of-arrays, or the array of row entry counts.
4185 
4186  // Free graph data structures that are only needed for 2-D or
4187  // unpacked 1-D storage.
4188  lclInds2D_ = null;
4189  k_numRowEntries_ = row_entries_type ();
4190 
4191  // Keep the new 1-D packed allocations.
4192  k_rowPtrs_ = k_ptrs_const;
4193  k_lclInds1D_ = k_inds;
4194 
4195  // Storage is packed now, so the number of allocated entries is
4196  // the same as the actual number of entries.
4197  nodeNumAllocated_ = nodeNumEntries_;
4198  // The graph is definitely StaticProfile now, whether or not it
4199  // was before.
4201  }
4202 
4203  // FIXME (mfh 28 Aug 2014) "Local Graph" sublist no longer used.
4204 
4205  // Build the local graph.
4206  lclGraph_ = local_graph_type (k_inds, k_ptrs_const);
4207 
4208  // TODO (mfh 13 Mar 2014) getNodeNumDiags(), isUpperTriangular(),
4209  // and isLowerTriangular() depend on computeGlobalConstants(), in
4210  // particular the part where it looks at the local matrix. You
4211  // have to use global indices to determine which entries are
4212  // diagonal, or above or below the diagonal. However, lower or
4213  // upper triangularness is a local property.
4214  }
4215 
4216  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4217  void
4219  replaceColMap (const Teuchos::RCP<const map_type>& newColMap)
4220  {
4221  // NOTE: This safety check matches the code, but not the documentation of Crsgraph
4222  //
4223  // FIXME (mfh 18 Aug 2014) This will break if the calling process
4224  // has no entries, because in that case, currently it is neither
4225  // locally nor globally indexed. This will change once we get rid
4226  // of lazy allocation (so that the constructor allocates indices
4227  // and therefore commits to local vs. global).
4228  const char tfecfFuncName[] = "replaceColMap: ";
4229  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4230  isLocallyIndexed () || isGloballyIndexed (), std::runtime_error,
4231  "Requires matching maps and non-static graph.");
4232  colMap_ = newColMap;
4233  }
4234 
4235  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4236  void
4238  reindexColumns (const Teuchos::RCP<const map_type>& newColMap,
4239  const Teuchos::RCP<const import_type>& newImport,
4240  const bool sortIndicesInEachRow)
4241  {
4242  using Teuchos::REDUCE_MIN;
4243  using Teuchos::reduceAll;
4244  using Teuchos::RCP;
4245  typedef GlobalOrdinal GO;
4246  typedef LocalOrdinal LO;
4247  typedef typename local_graph_type::entries_type::non_const_type col_inds_type;
4248  const char tfecfFuncName[] = "reindexColumns: ";
4249 
4250  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4251  isFillComplete (), std::runtime_error, "The graph is fill complete "
4252  "(isFillComplete() returns true). You must call resumeFill() before "
4253  "you may call this method.");
4254 
4255  // mfh 19 Aug 2014: This method does NOT redistribute data; it
4256  // doesn't claim to do the work of an Import or Export. This
4257  // means that for all processes, the calling process MUST own all
4258  // column indices, in both the old column Map (if it exists) and
4259  // the new column Map. We check this via an all-reduce.
4260  //
4261  // Some processes may be globally indexed, others may be locally
4262  // indexed, and others (that have no graph entries) may be
4263  // neither. This method will NOT change the graph's current
4264  // state. If it's locally indexed, it will stay that way, and
4265  // vice versa. It would easy to add an option to convert indices
4266  // from global to local, so as to save a global-to-local
4267  // conversion pass. However, we don't do this here. The intended
4268  // typical use case is that the graph already has a column Map and
4269  // is locally indexed, and this is the case for which we optimize.
4270 
4271  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
4272 
4273  // Attempt to convert indices to the new column Map's version of
4274  // local. This will fail if on the calling process, the graph has
4275  // indices that are not on that process in the new column Map.
4276  // After the local conversion attempt, we will do an all-reduce to
4277  // see if any processes failed.
4278 
4279  // If this is false, then either the graph contains a column index
4280  // which is invalid in the CURRENT column Map, or the graph is
4281  // locally indexed but currently has no column Map. In either
4282  // case, there is no way to convert the current local indices into
4283  // global indices, so that we can convert them into the new column
4284  // Map's local indices. It's possible for this to be true on some
4285  // processes but not others, due to replaceColMap.
4286  bool allCurColIndsValid = true;
4287  // On the calling process, are all valid current column indices
4288  // also in the new column Map on the calling process? In other
4289  // words, does local reindexing suffice, or should the user have
4290  // done an Import or Export instead?
4291  bool localSuffices = true;
4292 
4293  // Final arrays for the local indices. We will allocate exactly
4294  // one of these ONLY if the graph is locally indexed on the
4295  // calling process, and ONLY if the graph has one or more entries
4296  // (is not empty) on the calling process. In that case, we
4297  // allocate the first (1-D storage) if the graph has a static
4298  // profile, else we allocate the second (2-D storage).
4299  typename local_graph_type::entries_type::non_const_type newLclInds1D;
4300  Teuchos::ArrayRCP<Teuchos::Array<LO> > newLclInds2D;
4301 
4302  // If indices aren't allocated, that means the calling process
4303  // owns no entries in the graph. Thus, there is nothing to
4304  // convert, and it trivially succeeds locally.
4305  if (indicesAreAllocated ()) {
4306  if (isLocallyIndexed ()) {
4307  if (hasColMap ()) { // locally indexed, and currently has a column Map
4308  const map_type& oldColMap = * (getColMap ());
4309  if (pftype_ == StaticProfile) {
4310  // Allocate storage for the new local indices.
4311  RCP<node_type> node = getRowMap ()->getNode ();
4312  newLclInds1D =
4313  col_inds_type ("Tpetra::CrsGraph::ind", nodeNumAllocated_);
4314  // Attempt to convert the new indices locally.
4315  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
4316  const RowInfo rowInfo = this->getRowInfo (lclRow);
4317  const size_t beg = rowInfo.offset1D;
4318  const size_t end = beg + rowInfo.numEntries;
4319  for (size_t k = beg; k < end; ++k) {
4320  // FIXME (mfh 21 Aug 2014) This assumes UVM. Should
4321  // use a DualView instead.
4322  const LO oldLclCol = k_lclInds1D_(k);
4323  if (oldLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4324  allCurColIndsValid = false;
4325  break; // Stop at the first invalid index
4326  }
4327  const GO gblCol = oldColMap.getGlobalElement (oldLclCol);
4328 
4329  // The above conversion MUST succeed. Otherwise, the
4330  // current local index is invalid, which means that
4331  // the graph was constructed incorrectly.
4332  if (gblCol == Teuchos::OrdinalTraits<GO>::invalid ()) {
4333  allCurColIndsValid = false;
4334  break; // Stop at the first invalid index
4335  }
4336  else {
4337  const LO newLclCol = newColMap->getLocalElement (gblCol);
4338  if (newLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4339  localSuffices = false;
4340  break; // Stop at the first invalid index
4341  }
4342  // FIXME (mfh 21 Aug 2014) This assumes UVM. Should
4343  // use a DualView instead.
4344  newLclInds1D(k) = newLclCol;
4345  }
4346  } // for each entry in the current row
4347  } // for each locally owned row
4348  }
4349  else { // pftype_ == DynamicProfile
4350  // Allocate storage for the new local indices. We only
4351  // allocate the outer array here; we will allocate the
4352  // inner arrays below.
4353  newLclInds2D = Teuchos::arcp<Teuchos::Array<LO> > (lclNumRows);
4354 
4355  // Attempt to convert the new indices locally.
4356  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
4357  const RowInfo rowInfo = this->getRowInfo (lclRow);
4358  newLclInds2D.resize (rowInfo.allocSize);
4359 
4360  Teuchos::ArrayView<const LO> oldLclRowView = getLocalView (rowInfo);
4361  Teuchos::ArrayView<LO> newLclRowView = (newLclInds2D[lclRow]) ();
4362 
4363  for (size_t k = 0; k < rowInfo.numEntries; ++k) {
4364  const LO oldLclCol = oldLclRowView[k];
4365  if (oldLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4366  allCurColIndsValid = false;
4367  break; // Stop at the first invalid index
4368  }
4369  const GO gblCol = oldColMap.getGlobalElement (oldLclCol);
4370 
4371  // The above conversion MUST succeed. Otherwise, the
4372  // local index is invalid and the graph is wrong.
4373  if (gblCol == Teuchos::OrdinalTraits<GO>::invalid ()) {
4374  allCurColIndsValid = false;
4375  break; // Stop at the first invalid index
4376  }
4377  else {
4378  const LO newLclCol = newColMap->getLocalElement (gblCol);
4379  if (newLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4380  localSuffices = false;
4381  break; // Stop at the first invalid index.
4382  }
4383  newLclRowView[k] = newLclCol;
4384  }
4385  } // for each entry in the current row
4386  } // for each locally owned row
4387  } // pftype_
4388  }
4389  else { // locally indexed, but no column Map
4390  // This case is only possible if replaceColMap() was called
4391  // with a null argument on the calling process. It's
4392  // possible, but it means that this method can't possibly
4393  // succeed, since we have no way of knowing how to convert
4394  // the current local indices to global indices.
4395  allCurColIndsValid = false;
4396  }
4397  }
4398  else { // globally indexed
4399  // If the graph is globally indexed, we don't need to save
4400  // local indices, but we _do_ need to know whether the current
4401  // global indices are valid in the new column Map. We may
4402  // need to do a getRemoteIndexList call to find this out.
4403  //
4404  // In this case, it doesn't matter whether the graph currently
4405  // has a column Map. We don't need the old column Map to
4406  // convert from global indices to the _new_ column Map's local
4407  // indices. Furthermore, we can use the same code, whether
4408  // the graph is static or dynamic profile.
4409 
4410  // Test whether the current global indices are in the new
4411  // column Map on the calling process.
4412  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
4413  const RowInfo rowInfo = this->getRowInfo (lclRow);
4414  Teuchos::ArrayView<const GO> oldGblRowView = getGlobalView (rowInfo);
4415  for (size_t k = 0; k < rowInfo.numEntries; ++k) {
4416  const GO gblCol = oldGblRowView[k];
4417  if (! newColMap->isNodeGlobalElement (gblCol)) {
4418  localSuffices = false;
4419  break; // Stop at the first invalid index
4420  }
4421  } // for each entry in the current row
4422  } // for each locally owned row
4423  } // locally or globally indexed
4424  } // whether indices are allocated
4425 
4426  // Do an all-reduce to check both possible error conditions.
4427  int lclSuccess[2];
4428  lclSuccess[0] = allCurColIndsValid ? 1 : 0;
4429  lclSuccess[1] = localSuffices ? 1 : 0;
4430  int gblSuccess[2];
4431  gblSuccess[0] = 0;
4432  gblSuccess[1] = 0;
4433  RCP<const Teuchos::Comm<int> > comm =
4434  getRowMap ().is_null () ? Teuchos::null : getRowMap ()->getComm ();
4435  if (! comm.is_null ()) {
4436  reduceAll<int, int> (*comm, REDUCE_MIN, 2, lclSuccess, gblSuccess);
4437  }
4438 
4439  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4440  gblSuccess[0] == 0, std::runtime_error, "It is not possible to continue."
4441  " The most likely reason is that the graph is locally indexed, but the "
4442  "column Map is missing (null) on some processes, due to a previous call "
4443  "to replaceColMap().");
4444 
4445  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4446  gblSuccess[1] == 0, std::runtime_error, "On some process, the graph "
4447  "contains column indices that are in the old column Map, but not in the "
4448  "new column Map (on that process). This method does NOT redistribute "
4449  "data; it does not claim to do the work of an Import or Export operation."
4450  " This means that for all processess, the calling process MUST own all "
4451  "column indices, in both the old column Map and the new column Map. In "
4452  "this case, you will need to do an Import or Export operation to "
4453  "redistribute data.");
4454 
4455  // Commit the results.
4456  if (isLocallyIndexed ()) {
4457  if (pftype_ == StaticProfile) {
4458  k_lclInds1D_ = newLclInds1D;
4459  } else { // dynamic profile
4460  lclInds2D_ = newLclInds2D;
4461  }
4462  // We've reindexed, so we don't know if the indices are sorted.
4463  //
4464  // FIXME (mfh 17 Sep 2014) It could make sense to check this,
4465  // since we're already going through all the indices above. We
4466  // could also sort each row in place; that way, we would only
4467  // have to make one pass over the rows.
4468  indicesAreSorted_ = false;
4469  if (sortIndicesInEachRow) {
4470  // NOTE (mfh 17 Sep 2014) The graph must be locally indexed in
4471  // order to call this method.
4472  //
4473  // FIXME (mfh 17 Sep 2014) This violates the strong exception
4474  // guarantee. It would be better to sort the new index arrays
4475  // before committing them.
4476  sortAllIndices ();
4477  }
4478  }
4479  colMap_ = newColMap;
4480 
4481  if (newImport.is_null ()) {
4482  // FIXME (mfh 19 Aug 2014) Should use the above all-reduce to
4483  // check whether the input Import is null on any process.
4484  //
4485  // If the domain Map hasn't been set yet, we can't compute a new
4486  // Import object. Leave it what it is; it should be null, but
4487  // it doesn't matter. If the domain Map _has_ been set, then
4488  // compute a new Import object if necessary.
4489  if (! domainMap_.is_null ()) {
4490  if (! domainMap_->isSameAs (* newColMap)) {
4491  importer_ = Teuchos::rcp (new import_type (domainMap_, newColMap));
4492  } else {
4493  importer_ = Teuchos::null; // don't need an Import
4494  }
4495  }
4496  } else {
4497  // The caller gave us an Import object. Assume that it's valid.
4498  importer_ = newImport;
4499  }
4500  }
4501 
4502 
4503  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4504  void
4506  replaceDomainMapAndImporter (const Teuchos::RCP<const map_type>& newDomainMap,
4507  const Teuchos::RCP<const import_type>& newImporter)
4508  {
4509  const char prefix[] = "Tpetra::CrsGraph::replaceDomainMapAndImporter: ";
4510  TEUCHOS_TEST_FOR_EXCEPTION(
4511  colMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4512  "this method unless the graph already has a column Map.");
4513  TEUCHOS_TEST_FOR_EXCEPTION(
4514  newDomainMap.is_null (), std::invalid_argument,
4515  prefix << "The new domain Map must be nonnull.");
4516 
4517 #ifdef HAVE_TPETRA_DEBUG
4518  if (newImporter.is_null ()) {
4519  // It's not a good idea to put expensive operations in a macro
4520  // clause, even if they are side effect - free, because macros
4521  // don't promise that they won't evaluate their arguments more
4522  // than once. It's polite for them to do so, but not required.
4523  const bool colSameAsDom = colMap_->isSameAs (*newDomainMap);
4524  TEUCHOS_TEST_FOR_EXCEPTION(
4525  colSameAsDom, std::invalid_argument, "If the new Import is null, "
4526  "then the new domain Map must be the same as the current column Map.");
4527  }
4528  else {
4529  const bool colSameAsTgt =
4530  colMap_->isSameAs (* (newImporter->getTargetMap ()));
4531  const bool newDomSameAsSrc =
4532  newDomainMap->isSameAs (* (newImporter->getSourceMap ()));
4533  TEUCHOS_TEST_FOR_EXCEPTION(
4534  ! colSameAsTgt || ! newDomSameAsSrc, std::invalid_argument, "If the "
4535  "new Import is nonnull, then the current column Map must be the same "
4536  "as the new Import's target Map, and the new domain Map must be the "
4537  "same as the new Import's source Map.");
4538  }
4539 #endif // HAVE_TPETRA_DEBUG
4540 
4541  domainMap_ = newDomainMap;
4542  importer_ = Teuchos::rcp_const_cast<import_type> (newImporter);
4543  }
4544 
4545  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4549  {
4550  return lclGraph_;
4551  }
4552 
4553  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4557  {
4558  return lclGraph_;
4559  }
4560 
4561  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4562  void
4565  {
4566  using Teuchos::as;
4567  using Teuchos::outArg;
4568  using Teuchos::reduceAll;
4569  typedef LocalOrdinal LO;
4570  typedef GlobalOrdinal GO;
4571  typedef global_size_t GST;
4572 
4573 #ifdef HAVE_TPETRA_DEBUG
4574  TEUCHOS_TEST_FOR_EXCEPTION(! hasColMap(), std::logic_error, "Tpetra::"
4575  "CrsGraph::computeGlobalConstants: At this point, the graph should have "
4576  "a column Map, but it does not. Please report this bug to the Tpetra "
4577  "developers.");
4578 #endif // HAVE_TPETRA_DEBUG
4579 
4580  // If necessary, (re)compute the local constants: nodeNumDiags_,
4581  // lowerTriangular_, upperTriangular_, and nodeMaxNumRowEntries_.
4582  if (! haveLocalConstants_) {
4583  // We have actually already computed nodeNumEntries_.
4584  // nodeNumEntries_ gets updated by methods that insert or remove
4585  // indices (including setAllIndices and
4586  // expertStaticFillComplete). Before fillComplete, its count
4587  // may include duplicate column indices in the same row.
4588  // However, mergeRowIndices and mergeRowIndicesAndValues both
4589  // subtract off merged indices in each row from the total count.
4590  // Thus, nodeNumEntries_ _should_ be accurate at this point,
4591  // meaning that we don't have to re-count it here.
4592 
4593  // Reset local properties
4594  upperTriangular_ = true;
4595  lowerTriangular_ = true;
4596  nodeMaxNumRowEntries_ = 0;
4597  nodeNumDiags_ = 0;
4598 
4599  // At this point, we know that we have both a row Map and a column Map.
4600  const map_type& rowMap = *rowMap_;
4601  const map_type& colMap = *colMap_;
4602 
4603  // Go through all the entries of the graph. Count the number of
4604  // diagonal elements we encounter, and figure out whether the
4605  // graph is lower or upper triangular. Diagonal elements are
4606  // determined using global indices, with respect to the whole
4607  // graph. However, lower or upper triangularity is a local
4608  // property, and is determined using local indices.
4609  //
4610  // At this point, indices have already been sorted in each row.
4611  // That makes finding out whether the graph is lower / upper
4612  // triangular easier.
4613  if (indicesAreAllocated () && nodeNumAllocated_ > 0) {
4614  const LO numLocalRows = static_cast<LO> (this->getNodeNumRows ());
4615  for (LO localRow = 0; localRow < numLocalRows; ++localRow) {
4616  const GO globalRow = rowMap.getGlobalElement (localRow);
4617  // Find the local (column) index for the diagonal entry.
4618  // This process might not necessarily own _any_ entries in
4619  // the current row. If it doesn't, skip this row. It won't
4620  // affect any of the attributes (nodeNumDiagons_,
4621  // upperTriangular_, lowerTriangular_, or
4622  // nodeMaxNumRowEntries_) which this loop sets.
4623  const LO rlcid = colMap.getLocalElement (globalRow);
4624  // This process owns one or more entries in the current row.
4625  const RowInfo rowInfo = this->getRowInfo (localRow);
4626  ArrayView<const LO> rview = getLocalView (rowInfo);
4627  typename ArrayView<const LO>::iterator beg, end, cur;
4628  beg = rview.begin();
4629  end = beg + rowInfo.numEntries;
4630  if (beg != end) {
4631  for (cur = beg; cur != end; ++cur) {
4632  // is this the diagonal?
4633  if (rlcid == *cur) ++nodeNumDiags_;
4634  }
4635  // Local column indices are sorted in each row. That means
4636  // the smallest column index in this row (on this process)
4637  // is *beg, and the largest column index in this row (on
4638  // this process) is *(end - 1). We know that end - 1 is
4639  // valid because beg != end.
4640  const size_t smallestCol = static_cast<size_t> (*beg);
4641  const size_t largestCol = static_cast<size_t> (*(end - 1));
4642 
4643  if (smallestCol < static_cast<size_t> (localRow)) {
4644  upperTriangular_ = false;
4645  }
4646  if (static_cast<size_t> (localRow) < largestCol) {
4647  lowerTriangular_ = false;
4648  }
4649  }
4650  // Update the max number of entries over all rows.
4651  nodeMaxNumRowEntries_ = std::max (nodeMaxNumRowEntries_, rowInfo.numEntries);
4652  }
4653  }
4654  haveLocalConstants_ = true;
4655  } // if my process doesn't have local constants
4656 
4657  // Compute global constants from local constants. Processes that
4658  // already have local constants still participate in the
4659  // all-reduces, using their previously computed values.
4660  if (haveGlobalConstants_ == false) {
4661  // Promote all the nodeNum* and nodeMaxNum* quantities from
4662  // size_t to global_size_t, when doing the all-reduces for
4663  // globalNum* / globalMaxNum* results.
4664  //
4665  // FIXME (mfh 07 May 2013) Unfortunately, we either have to do
4666  // this in two all-reduces (one for the sum and the other for
4667  // the max), or use a custom MPI_Op that combines the sum and
4668  // the max. The latter might even be slower than two
4669  // all-reduces on modern network hardware. It would also be a
4670  // good idea to use nonblocking all-reduces (MPI 3), so that we
4671  // don't have to wait around for the first one to finish before
4672  // starting the second one.
4673  GST lcl[2], gbl[2];
4674  lcl[0] = static_cast<GST> (nodeNumEntries_);
4675  lcl[1] = static_cast<GST> (nodeNumDiags_);
4676  reduceAll<int,GST> (*getComm (), Teuchos::REDUCE_SUM,
4677  2, lcl, gbl);
4678  globalNumEntries_ = gbl[0];
4679  globalNumDiags_ = gbl[1];
4680  reduceAll<int,GST> (*getComm (), Teuchos::REDUCE_MAX,
4681  static_cast<GST> (nodeMaxNumRowEntries_),
4682  outArg (globalMaxNumRowEntries_));
4683  haveGlobalConstants_ = true;
4684  }
4685  }
4686 
4687 
4688  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4689  void
4692  {
4693  using Teuchos::arcp;
4694  using Teuchos::Array;
4695  typedef LocalOrdinal LO;
4696  typedef GlobalOrdinal GO;
4697  typedef typename local_graph_type::entries_type::non_const_type
4698  lcl_col_inds_type;
4699  typedef Kokkos::View<GlobalOrdinal*,
4700  typename lcl_col_inds_type::array_layout,
4701  device_type> gbl_col_inds_type;
4702  const char tfecfFuncName[] = "makeIndicesLocal";
4703 
4704  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4705  ! this->hasColMap (), std::logic_error, ": The graph does not have a "
4706  "column Map yet. This method should never be called in that case. "
4707  "Please report this bug to the Tpetra developers.");
4708  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4709  this->getColMap ().is_null (), std::logic_error, ": The graph claims "
4710  "that it has a column Map, because hasColMap() returns true. However, "
4711  "the result of getColMap() is null. This should never happen. Please "
4712  "report this bug to the Tpetra developers.");
4713 
4714  const size_t lclNumRows = this->getNodeNumRows ();
4715  const map_type& colMap = * (this->getColMap ());
4716 
4717  if (isGloballyIndexed () && lclNumRows > 0) {
4718  typename t_numRowEntries_::t_host h_numRowEnt = k_numRowEntries_.h_view;
4719 
4720  // allocate data for local indices
4721  if (getProfileType () == StaticProfile) {
4722  // If GO and LO are the same size, we can reuse the existing
4723  // array of 1-D index storage to convert column indices from
4724  // GO to LO. Otherwise, we'll just allocate a new buffer.
4725  if (nodeNumAllocated_ && Kokkos::Impl::is_same<LO,GO>::value) {
4726  k_lclInds1D_ = Kokkos::Impl::if_c<Kokkos::Impl::is_same<LO,GO>::value,
4728  lcl_col_inds_type >::select (k_gblInds1D_, k_lclInds1D_);
4729  }
4730  else {
4731  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4732  k_rowPtrs_.dimension_0 () == 0, std::logic_error, ": This should "
4733  "never happen at this point. Please report this bug to the Tpetra "
4734  "developers.");
4735  const size_t numEnt = k_rowPtrs_[lclNumRows];
4736 
4737  k_lclInds1D_ = lcl_col_inds_type ("Tpetra::CrsGraph::lclind", numEnt);
4738  }
4739 
4740  for (size_t r = 0; r < lclNumRows; ++r) {
4741  const size_t offset = k_rowPtrs_(r);
4742  const size_t numentry = h_numRowEnt(r);
4743  for (size_t j = 0; j < numentry; ++j) {
4744  const GO gid = k_gblInds1D_(offset + j);
4745  const LO lid = colMap.getLocalElement (gid);
4746  k_lclInds1D_(offset + j) = lid;
4747 #ifdef HAVE_TPETRA_DEBUG
4748  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4749  k_lclInds1D_(offset + j) == Teuchos::OrdinalTraits<LO>::invalid(),
4750  std::logic_error,
4751  ": In local row r=" << r << ", global column " << gid << " is "
4752  "not in the column Map. This should never happen. Please "
4753  "report this bug to the Tpetra developers.");
4754 #endif // HAVE_TPETRA_DEBUG
4755  }
4756  }
4757  // We've converted column indices from global to local, so we
4758  // can deallocate the global column indices (which we know are
4759  // in 1-D storage, because the graph has static profile).
4760  k_gblInds1D_ = gbl_col_inds_type ();
4761  }
4762  else { // the graph has dynamic profile (2-D index storage)
4763  lclInds2D_ = arcp<Array<LO> > (lclNumRows);
4764  for (size_t r = 0; r < lclNumRows; ++r) {
4765  if (! gblInds2D_[r].empty ()) {
4766  const GO* const ginds = gblInds2D_[r].getRawPtr ();
4767  const size_t rna = gblInds2D_[r].size ();
4768  const size_t numentry = h_numRowEnt(r);
4769  lclInds2D_[r].resize (rna);
4770  LO* const linds = lclInds2D_[r].getRawPtr ();
4771  for (size_t j = 0; j < numentry; ++j) {
4772  const GO gid = ginds[j];
4773  const LO lid = colMap.getLocalElement (gid);
4774  linds[j] = lid;
4775 #ifdef HAVE_TPETRA_DEBUG
4776  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4777  linds[j] == Teuchos::OrdinalTraits<LO>::invalid(),
4778  std::logic_error,
4779  ": Global column ginds[j=" << j << "]=" << ginds[j]
4780  << " of local row r=" << r << " is not in the column Map. "
4781  "This should never happen. Please report this bug to the "
4782  "Tpetra developers.");
4783 #endif // HAVE_TPETRA_DEBUG
4784  }
4785  }
4786  }
4787  gblInds2D_ = Teuchos::null;
4788  }
4789  } // globallyIndexed() && lclNumRows > 0
4790 
4792  indicesAreLocal_ = true;
4793  indicesAreGlobal_ = false;
4794  checkInternalState ();
4795  }
4796 
4797 
4798  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4799  void
4802  {
4803  typedef LocalOrdinal LO;
4804 
4805  // this should be called only after makeIndicesLocal()
4806  TEUCHOS_TEST_FOR_EXCEPT( isGloballyIndexed () );
4807  if (isSorted () == false) {
4808  // FIXME (mfh 06 Mar 2014) This would be a good place for a
4809  // thread-parallel kernel.
4810  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
4811  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
4812  const RowInfo rowInfo = this->getRowInfo (lclRow);
4813  this->sortRowIndices (rowInfo);
4814  }
4815  }
4816  indicesAreSorted_ = true; // we just sorted every row
4817  }
4818 
4819 
4820  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4821  void
4824  {
4825  using Teuchos::Array;
4826  using Teuchos::ArrayView;
4827  using Teuchos::rcp;
4828  using Teuchos::REDUCE_MAX;
4829  using Teuchos::reduceAll;
4830  using std::endl;
4831  typedef LocalOrdinal LO;
4832  typedef GlobalOrdinal GO;
4833  const char tfecfFuncName[] = "makeColMap";
4834 
4835  if (hasColMap ()) { // The graph already has a column Map.
4836  // FIXME (mfh 26 Feb 2013): This currently prevents structure
4837  // changes that affect the column Map.
4838  return;
4839  }
4840 
4841  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4842  isLocallyIndexed (), std::runtime_error,
4843  ": The graph is locally indexed. Calling makeColMap() to make the "
4844  "column Map requires that the graph be globally indexed.");
4845 
4846  // After the calling process is done going through all of the rows
4847  // it owns, myColumns will contain the list of indices owned by
4848  // this process in the column Map.
4849  Array<GO> myColumns;
4850 
4851  // If we reach this point, we don't have a column Map yet, so the
4852  // graph can't be locally indexed. Thus, isGloballyIndexed() ==
4853  // false means that the graph is empty on this process, so
4854  // myColumns will be left empty.
4855  if (isGloballyIndexed ()) {
4856  // Go through all the rows, finding the populated column indices.
4857  //
4858  // Our final list of indices for the column Map constructor will
4859  // have the following properties (all of which are with respect
4860  // to the calling process):
4861  //
4862  // 1. Indices in the domain Map go first.
4863  // 2. Indices not in the domain Map follow, ordered first
4864  // contiguously by their owning process rank (in the domain
4865  // Map), then in increasing order within that.
4866  // 3. No duplicate indices.
4867  //
4868  // This imitates the ordering used by Aztec(OO) and Epetra.
4869  // Storing indices owned by the same process (in the domain Map)
4870  // contiguously permits the use of contiguous send and receive
4871  // buffers.
4872  //
4873  // We begin by partitioning the column indices into "local" GIDs
4874  // (owned by the domain Map) and "remote" GIDs (not owned by the
4875  // domain Map). We use the same order for local GIDs as the
4876  // domain Map, so we track them in place in their array. We use
4877  // an std::set (RemoteGIDSet) to keep track of remote GIDs, so
4878  // that we don't have to merge duplicates later.
4879  const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
4880  size_t numLocalColGIDs = 0, numRemoteColGIDs = 0;
4881 
4882  // GIDisLocal[lid] == 0 if and only if local index lid in the
4883  // domain Map is remote (not local).
4884  Array<char> GIDisLocal (domainMap_->getNodeNumElements (), 0);
4885  std::set<GO> RemoteGIDSet;
4886  // This preserves the not-sorted Epetra order of GIDs.
4887  std::vector<GO> RemoteGIDUnorderedVector;
4888  const LO myNumRows = this->getNodeNumRows ();
4889  for (LO r = 0; r < myNumRows; ++r) {
4890  const RowInfo rowinfo = this->getRowInfo (r);
4891  if (rowinfo.numEntries > 0) {
4892  // NOTE (mfh 02 Sep 2014) getGlobalView() returns a view of
4893  // all the space in the row, not just the occupied entries.
4894  // (This matters for the case of unpacked 1-D storage. We
4895  // might not have packed it yet.) That's why we need to
4896  // take a subview.
4897  ArrayView<const GO> rowGids = getGlobalView (rowinfo);
4898  rowGids = rowGids (0, rowinfo.numEntries);
4899 
4900  for (size_t k = 0; k < rowinfo.numEntries; ++k) {
4901  const GO gid = rowGids[k];
4902  const LO lid = domainMap_->getLocalElement (gid);
4903  if (lid != LINV) {
4904  const char alreadyFound = GIDisLocal[lid];
4905  if (alreadyFound == 0) {
4906  GIDisLocal[lid] = static_cast<char> (1);
4907  ++numLocalColGIDs;
4908  }
4909  }
4910  else {
4911  const bool notAlreadyFound = RemoteGIDSet.insert (gid).second;
4912  if (notAlreadyFound) { // gid did not exist in the set before
4914  // The user doesn't want to sort remote GIDs (for
4915  // each remote process); they want us to keep remote
4916  // GIDs in their original order. We do this by
4917  // stuffing each remote GID into an array as we
4918  // encounter it for the first time. The std::set
4919  // helpfully tracks first encounters.
4920  RemoteGIDUnorderedVector.push_back (gid);
4921  }
4922  ++numRemoteColGIDs;
4923  }
4924  }
4925  } // for each entry k in row r
4926  } // if row r contains > 0 entries
4927  } // for each locally owned row r
4928 
4929  // Possible short-circuit for serial scenario:
4930  //
4931  // If all domain GIDs are present as column indices, then set
4932  // ColMap=DomainMap. By construction, LocalGIDs is a subset of
4933  // DomainGIDs.
4934  //
4935  // If we have
4936  // * Number of remote GIDs is 0, so that ColGIDs == LocalGIDs,
4937  // and
4938  // * Number of local GIDs is number of domain GIDs
4939  // then
4940  // * LocalGIDs \subset DomainGIDs && size(LocalGIDs) ==
4941  // size(DomainGIDs) => DomainGIDs == LocalGIDs == ColGIDs
4942  // on the calling process.
4943  //
4944  // We will concern ourselves only with the special case of a
4945  // serial DomainMap, obviating the need for communication.
4946  //
4947  // If
4948  // * DomainMap has a serial communicator
4949  // then we can set the column Map as the domain Map
4950  // return. Benefit: this graph won't need an Import object
4951  // later.
4952  //
4953  // Note, for a serial domain map, there can be no RemoteGIDs,
4954  // because there are no remote processes. Likely explanations
4955  // for this are:
4956  // * user submitted erroneous column indices
4957  // * user submitted erroneous domain Map
4958  if (domainMap_->getComm ()->getSize () == 1) {
4959  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4960  numRemoteColGIDs != 0, std::runtime_error,
4961  ": " << numRemoteColGIDs << " column "
4962  << (numRemoteColGIDs != 1 ? "indices are" : "index is")
4963  << " not in the domain Map." << endl
4964  << "Either these indices are invalid or the domain Map is invalid."
4965  << endl << "Remember that nonsquare matrices, or matrices where the "
4966  "row and range Maps are different, require calling the version of "
4967  "fillComplete that takes the domain and range Maps as input.");
4968  if (numLocalColGIDs == domainMap_->getNodeNumElements()) {
4969  colMap_ = domainMap_;
4970  checkInternalState ();
4971  return;
4972  }
4973  }
4974 
4975  // Populate myColumns with a list of all column GIDs. Put
4976  // locally owned (in the domain Map) GIDs at the front: they
4977  // correspond to "same" and "permuted" entries between the
4978  // column Map and the domain Map. Put remote GIDs at the back.
4979  myColumns.resize (numLocalColGIDs + numRemoteColGIDs);
4980  // get pointers into myColumns for each part
4981  ArrayView<GO> LocalColGIDs = myColumns (0, numLocalColGIDs);
4982  ArrayView<GO> RemoteColGIDs = myColumns (numLocalColGIDs, numRemoteColGIDs);
4983 
4984  // Copy the remote GIDs into myColumns
4986  // The std::set puts GIDs in increasing order.
4987  std::copy (RemoteGIDSet.begin(), RemoteGIDSet.end(),
4988  RemoteColGIDs.begin());
4989  } else {
4990  // Respect the originally encountered order.
4991  std::copy (RemoteGIDUnorderedVector.begin(),
4992  RemoteGIDUnorderedVector.end(), RemoteColGIDs.begin());
4993  }
4994 
4995  // Make a list of process ranks corresponding to the remote GIDs.
4996  Array<int> RemoteImageIDs (numRemoteColGIDs);
4997  // Look up the remote process' ranks in the domain Map.
4998  {
4999  const LookupStatus stat =
5000  domainMap_->getRemoteIndexList (RemoteColGIDs, RemoteImageIDs ());
5001 #ifdef HAVE_TPETRA_DEBUG
5002  // If any process returns IDNotPresent, then at least one of
5003  // the remote indices was not present in the domain Map. This
5004  // means that the Import object cannot be constructed, because
5005  // of incongruity between the column Map and domain Map.
5006  // This has two likely causes:
5007  // - The user has made a mistake in the column indices
5008  // - The user has made a mistake with respect to the domain Map
5009  const int missingID_lcl = (stat == IDNotPresent ? 1 : 0);
5010  int missingID_gbl = 0;
5011  reduceAll<int, int> (*getComm (), REDUCE_MAX, missingID_lcl,
5012  outArg (missingID_gbl));
5013  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5014  missingID_gbl == 1, std::runtime_error,
5015  ": Some column indices are not in the domain Map." << endl
5016  << "Either these column indices are invalid or the domain Map is "
5017  "invalid." << endl << "Likely cause: For a nonsquare matrix, you "
5018  "must give the domain and range Maps as input to fillComplete.");
5019 #else
5020  (void) stat; // forestall compiler warning for unused variable
5021 #endif // HAVE_TPETRA_DEBUG
5022  }
5023  // Sort incoming remote column indices by their owning process
5024  // rank, so that all columns coming from a given remote process
5025  // are contiguous. This means the Import's Distributor doesn't
5026  // need to reorder data.
5027  //
5028  // NOTE (mfh 02 Sep 2014) This needs to be a stable sort, so
5029  // that it respects either of the possible orderings of GIDs
5030  // (sorted, or original order) specified above.
5031  sort2 (RemoteImageIDs.begin(), RemoteImageIDs.end(), RemoteColGIDs.begin());
5032 
5033  // Copy the local GIDs into myColumns. Two cases:
5034  // 1. If the number of Local column GIDs is the same as the
5035  // number of Local domain GIDs, we can simply read the domain
5036  // GIDs into the front part of ColIndices (see logic above
5037  // from the serial short circuit case)
5038  // 2. We step through the GIDs of the DomainMap, checking to see
5039  // if each domain GID is a column GID. We want to do this to
5040  // maintain a consistent ordering of GIDs between the columns
5041  // and the domain.
5042 
5043  const size_t numDomainElts = domainMap_->getNodeNumElements ();
5044  if (numLocalColGIDs == numDomainElts) {
5045  // If the number of locally owned GIDs are the same as the
5046  // number of local domain Map elements, then the local domain
5047  // Map elements are the same as the locally owned GIDs.
5048  if (domainMap_->isContiguous ()) {
5049  // NOTE (mfh 03 Mar 2013, 02 Sep 2014) In the common case
5050  // that the domain Map is contiguous, it's more efficient to
5051  // avoid calling getNodeElementList(), since that
5052  // permanently constructs and caches the GID list in the
5053  // contiguous Map.
5054  GO curColMapGid = domainMap_->getMinGlobalIndex ();
5055  for (size_t k = 0; k < numLocalColGIDs; ++k, ++curColMapGid) {
5056  LocalColGIDs[k] = curColMapGid;
5057  }
5058  }
5059  else {
5060  ArrayView<const GO> domainElts = domainMap_->getNodeElementList ();
5061  std::copy (domainElts.begin(), domainElts.end(), LocalColGIDs.begin());
5062  }
5063  }
5064  else {
5065  // Count the number of locally owned GIDs, both to keep track
5066  // of the current array index, and as a sanity check.
5067  size_t numLocalCount = 0;
5068  if (domainMap_->isContiguous ()) {
5069  // NOTE (mfh 03 Mar 2013, 02 Sep 2014) In the common case
5070  // that the domain Map is contiguous, it's more efficient to
5071  // avoid calling getNodeElementList(), since that
5072  // permanently constructs and caches the GID list in the
5073  // contiguous Map.
5074  GO curColMapGid = domainMap_->getMinGlobalIndex ();
5075  for (size_t i = 0; i < numDomainElts; ++i, ++curColMapGid) {
5076  if (GIDisLocal[i]) {
5077  LocalColGIDs[numLocalCount++] = curColMapGid;
5078  }
5079  }
5080  }
5081  else {
5082  ArrayView<const GO> domainElts = domainMap_->getNodeElementList ();
5083  for (size_t i = 0; i < numDomainElts; ++i) {
5084  if (GIDisLocal[i]) {
5085  LocalColGIDs[numLocalCount++] = domainElts[i];
5086  }
5087  }
5088  }
5089  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5090  numLocalCount != numLocalColGIDs, std::logic_error,
5091  ": numLocalCount = " << numLocalCount << " != numLocalColGIDs = "
5092  << numLocalColGIDs << ". This should never happen. Please report "
5093  "this bug to the Tpetra developers.");
5094  }
5095 
5096  // FIXME (mfh 03 Apr 2013) Now would be a good time to use the
5097  // information we collected above to construct the Import. In
5098  // particular, building an Import requires:
5099  //
5100  // 1. numSameIDs (length of initial contiguous sequence of GIDs
5101  // on this process that are the same in both Maps; this
5102  // equals the number of domain Map elements on this process)
5103  //
5104  // 2. permuteToLIDs and permuteFromLIDs (both empty in this
5105  // case, since there's no permutation going on; the column
5106  // Map starts with the domain Map's GIDs, and immediately
5107  // after them come the remote GIDs)
5108  //
5109  // 3. remoteGIDs (exactly those GIDs that we found out above
5110  // were not in the domain Map) and remoteLIDs (which we could
5111  // have gotten above by using the three-argument version of
5112  // getRemoteIndexList() that computes local indices as well
5113  // as process ranks, instead of the two-argument version that
5114  // was used above)
5115  //
5116  // 4. remotePIDs (which we have from the getRemoteIndexList()
5117  // call above)
5118  //
5119  // 5. Sorting remotePIDs, and applying that permutation to
5120  // remoteGIDs and remoteLIDs (by calling sort3 above instead
5121  // of sort2)
5122  //
5123  // 6. Everything after the sort3 call in Import::setupExport():
5124  // a. Create the Distributor via createFromRecvs(), which
5125  // computes exportGIDs and exportPIDs
5126  // b. Compute exportLIDs from exportGIDs (by asking the
5127  // source Map, in this case the domain Map, to convert
5128  // global to local)
5129  //
5130  // Steps 1-5 come for free, since we must do that work anyway in
5131  // order to compute the column Map. In particular, Step 3 is
5132  // even more expensive than Step 6a, since it involves both
5133  // creating and using a new Distributor object.
5134 
5135  } // if the graph is globally indexed
5136 
5137  const global_size_t gstInv =
5138  Teuchos::OrdinalTraits<global_size_t>::invalid ();
5139  // FIXME (mfh 05 Mar 2014) Doesn't the index base of a Map have to
5140  // be the same as the Map's min GID? If the first column is empty
5141  // (contains no entries), then the column Map's min GID won't
5142  // necessarily be the same as the domain Map's index base.
5143  const GO indexBase = domainMap_->getIndexBase ();
5144  colMap_ = rcp (new map_type (gstInv, myColumns, indexBase,
5145  domainMap_->getComm (),
5146  domainMap_->getNode ()));
5147 
5148  checkInternalState ();
5149  }
5150 
5151 
5152  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5153  void
5156  {
5157  TEUCHOS_TEST_FOR_EXCEPT( isGloballyIndexed() ); // call only after makeIndicesLocal()
5158  TEUCHOS_TEST_FOR_EXCEPT( ! isSorted() ); // call only after sortIndices()
5159  if (! isMerged ()) {
5160  const LocalOrdinal lclNumRows =
5161  static_cast<LocalOrdinal> (this->getNodeNumRows ());
5162  for (LocalOrdinal row = 0; row < lclNumRows; ++row) {
5163  const RowInfo rowInfo = this->getRowInfo (row);
5164  mergeRowIndices(rowInfo);
5165  }
5166  // we just merged every row
5167  noRedundancies_ = true;
5168  }
5169  }
5170 
5171 
5172  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5173  void
5176  {
5177  using Teuchos::ParameterList;
5178  using Teuchos::RCP;
5179  using Teuchos::rcp;
5180 
5181  TEUCHOS_TEST_FOR_EXCEPTION(! hasColMap (), std::logic_error, "Tpetra::"
5182  "CrsGraph::makeImportExport: This method may not be called unless the "
5183  "graph has a column Map.");
5184  RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
5185 
5186  // Don't do any checks to see if we need to create the Import, if
5187  // it exists already.
5188  //
5189  // FIXME (mfh 25 Mar 2013) This will become incorrect if we
5190  // change CrsGraph in the future to allow changing the column
5191  // Map after fillComplete. For now, the column Map is fixed
5192  // after the first fillComplete call.
5193  if (importer_.is_null ()) {
5194  // Create the Import instance if necessary.
5195  if (domainMap_ != colMap_ && (! domainMap_->isSameAs (*colMap_))) {
5196  if (params.is_null () || ! params->isSublist ("Import")) {
5197  importer_ = rcp (new import_type (domainMap_, colMap_));
5198  } else {
5199  RCP<ParameterList> importSublist = sublist (params, "Import", true);
5200  importer_ = rcp (new import_type (domainMap_, colMap_, importSublist));
5201  }
5202  }
5203  }
5204 
5205  // Don't do any checks to see if we need to create the Export, if
5206  // it exists already.
5207  if (exporter_.is_null ()) {
5208  // Create the Export instance if necessary.
5209  if (rangeMap_ != rowMap_ && ! rangeMap_->isSameAs (*rowMap_)) {
5210  if (params.is_null () || ! params->isSublist ("Export")) {
5211  exporter_ = rcp (new export_type (rowMap_, rangeMap_));
5212  }
5213  else {
5214  RCP<ParameterList> exportSublist = sublist (params, "Export", true);
5215  exporter_ = rcp (new export_type (rowMap_, rangeMap_, exportSublist));
5216  }
5217  }
5218  }
5219  }
5220 
5221 
5222  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5223  std::string
5226  {
5227  std::ostringstream oss;
5229  if (isFillComplete ()) {
5230  oss << "{status = fill complete"
5231  << ", global rows = " << getGlobalNumRows()
5232  << ", global cols = " << getGlobalNumCols()
5233  << ", global num entries = " << getGlobalNumEntries()
5234  << "}";
5235  }
5236  else {
5237  oss << "{status = fill not complete"
5238  << ", global rows = " << getGlobalNumRows()
5239  << "}";
5240  }
5241  return oss.str();
5242  }
5243 
5244 
5245  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5246  void
5248  describe (Teuchos::FancyOStream &out,
5249  const Teuchos::EVerbosityLevel verbLevel) const
5250  {
5251  const char tfecfFuncName[] = "describe()";
5252  using std::endl;
5253  using std::setw;
5254  using Teuchos::VERB_DEFAULT;
5255  using Teuchos::VERB_NONE;
5256  using Teuchos::VERB_LOW;
5257  using Teuchos::VERB_MEDIUM;
5258  using Teuchos::VERB_HIGH;
5259  using Teuchos::VERB_EXTREME;
5260  Teuchos::EVerbosityLevel vl = verbLevel;
5261  if (vl == VERB_DEFAULT) vl = VERB_LOW;
5262  RCP<const Comm<int> > comm = this->getComm();
5263  const int myImageID = comm->getRank(),
5264  numImages = comm->getSize();
5265  size_t width = 1;
5266  for (size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
5267  ++width;
5268  }
5269  width = std::max<size_t> (width, static_cast<size_t> (11)) + 2;
5270  Teuchos::OSTab tab (out);
5271  // none: print nothing
5272  // low: print O(1) info from node 0
5273  // medium: print O(P) info, num entries per node
5274  // high: print O(N) info, num entries per row
5275  // extreme: print O(NNZ) info: print graph indices
5276  //
5277  // for medium and higher, print constituent objects at specified verbLevel
5278  if (vl != VERB_NONE) {
5279  if (myImageID == 0) out << this->description() << std::endl;
5280  // O(1) globals, minus what was already printed by description()
5281  if (isFillComplete() && myImageID == 0) {
5282  out << "Global number of diagonals = " << globalNumDiags_ << std::endl;
5283  out << "Global max number of row entries = " << globalMaxNumRowEntries_ << std::endl;
5284  }
5285  // constituent objects
5286  if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
5287  if (myImageID == 0) out << "\nRow map: " << std::endl;
5288  rowMap_->describe(out,vl);
5289  if (colMap_ != null) {
5290  if (myImageID == 0) out << "\nColumn map: " << std::endl;
5291  colMap_->describe(out,vl);
5292  }
5293  if (domainMap_ != null) {
5294  if (myImageID == 0) out << "\nDomain map: " << std::endl;
5295  domainMap_->describe(out,vl);
5296  }
5297  if (rangeMap_ != null) {
5298  if (myImageID == 0) out << "\nRange map: " << std::endl;
5299  rangeMap_->describe(out,vl);
5300  }
5301  }
5302  // O(P) data
5303  if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
5304  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
5305  if (myImageID == imageCtr) {
5306  out << "Node ID = " << imageCtr << std::endl
5307  << "Node number of entries = " << nodeNumEntries_ << std::endl
5308  << "Node number of diagonals = " << nodeNumDiags_ << std::endl
5309  << "Node max number of entries = " << nodeMaxNumRowEntries_ << std::endl;
5310  if (indicesAreAllocated()) {
5311  out << "Node number of allocated entries = " << nodeNumAllocated_ << std::endl;
5312  }
5313  else {
5314  out << "Indices are not allocated." << std::endl;
5315  }
5316  }
5317  comm->barrier();
5318  comm->barrier();
5319  comm->barrier();
5320  }
5321  }
5322  // O(N) and O(NNZ) data
5323  if (vl == VERB_HIGH || vl == VERB_EXTREME) {
5324  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5325  ! hasRowInfo (), std::runtime_error, ": reduce verbosity level; "
5326  "graph row information was deleted at fillComplete().");
5327  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
5328  if (myImageID == imageCtr) {
5329  out << std::setw(width) << "Node ID"
5330  << std::setw(width) << "Global Row"
5331  << std::setw(width) << "Num Entries";
5332  if (vl == VERB_EXTREME) {
5333  out << " Entries";
5334  }
5335  out << std::endl;
5336  const LocalOrdinal lclNumRows =
5337  static_cast<LocalOrdinal> (this->getNodeNumRows ());
5338  for (LocalOrdinal r=0; r < lclNumRows; ++r) {
5339  const RowInfo rowinfo = this->getRowInfo (r);
5340  GlobalOrdinal gid = rowMap_->getGlobalElement(r);
5341  out << std::setw(width) << myImageID
5342  << std::setw(width) << gid
5343  << std::setw(width) << rowinfo.numEntries;
5344  if (vl == VERB_EXTREME) {
5345  out << " ";
5346  if (isGloballyIndexed()) {
5347  ArrayView<const GlobalOrdinal> rowview = getGlobalView(rowinfo);
5348  for (size_t j=0; j < rowinfo.numEntries; ++j) out << rowview[j] << " ";
5349  }
5350  else if (isLocallyIndexed()) {
5351  ArrayView<const LocalOrdinal> rowview = getLocalView(rowinfo);
5352  for (size_t j=0; j < rowinfo.numEntries; ++j) out << colMap_->getGlobalElement(rowview[j]) << " ";
5353  }
5354  }
5355  out << std::endl;
5356  }
5357  }
5358  comm->barrier();
5359  comm->barrier();
5360  comm->barrier();
5361  }
5362  }
5363  }
5364  }
5365 
5366 
5367  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5368  bool
5371  {
5372  (void) source; // forestall "unused variable" compiler warnings
5373 
5374  // It's not clear what kind of compatibility checks on sizes can
5375  // be performed here. Epetra_CrsGraph doesn't check any sizes for
5376  // compatibility.
5377  return true;
5378  }
5379 
5380 
5381  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5382  void
5385  size_t numSameIDs,
5386  const Teuchos::ArrayView<const LocalOrdinal> &permuteToLIDs,
5387  const Teuchos::ArrayView<const LocalOrdinal> &permuteFromLIDs)
5388  {
5389  using Teuchos::Array;
5390  using Teuchos::ArrayView;
5391  typedef LocalOrdinal LO;
5392  typedef GlobalOrdinal GO;
5393  const char tfecfFuncName[] = "copyAndPermute";
5395  typedef RowGraph<LO, GO, node_type> row_graph_type;
5396 
5397  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5398  permuteToLIDs.size() != permuteFromLIDs.size(), std::runtime_error,
5399  ": permuteToLIDs and permuteFromLIDs must have the same size.");
5400  // Make sure that the source object has the right type. We only
5401  // actually need it to be a RowGraph, with matching first three
5402  // template parameters. If it's a CrsGraph, we can use view mode
5403  // instead of copy mode to get each row's data.
5404  //
5405  // FIXME (mfh 07 Jul 2013) It should not be necessary for any of
5406  // the template parameters but GO to match. GO has to match
5407  // because the graph has to send indices as global ordinals, if
5408  // the source and target graphs do not have the same column Map.
5409  // If LO doesn't match, the graphs could communicate using global
5410  // indices. It could be possible that Node affects the graph's
5411  // storage format, but packAndPrepare should assume a common
5412  // communication format in any case.
5413  const row_graph_type* srcRowGraph = dynamic_cast<const row_graph_type*> (&source);
5414  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5415  srcRowGraph == NULL, std::invalid_argument,
5416  ": The source object must be a RowGraph with matching first three "
5417  "template parameters.");
5418 
5419  // If the source object is actually a CrsGraph, we can use view
5420  // mode instead of copy mode to access the entries in each row,
5421  // if the graph is not fill complete.
5422  const this_type* srcCrsGraph = dynamic_cast<const this_type*> (&source);
5423 
5424  const map_type& srcRowMap = * (srcRowGraph->getRowMap ());
5425  const map_type& tgtRowMap = * (this->getRowMap ());
5426  const bool src_filled = srcRowGraph->isFillComplete ();
5427  Array<GO> row_copy;
5428  LO myid = 0;
5429 
5430  //
5431  // "Copy" part of "copy and permute."
5432  //
5433  if (src_filled || srcCrsGraph == NULL) {
5434  // If the source graph is fill complete, we can't use view mode,
5435  // because the data might be stored in a different format not
5436  // compatible with the expectations of view mode. Also, if the
5437  // source graph is not a CrsGraph, we can't use view mode,
5438  // because RowGraph only provides copy mode access to the data.
5439  for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
5440  const GO gid = srcRowMap.getGlobalElement (myid);
5441  size_t row_length = srcRowGraph->getNumEntriesInGlobalRow (gid);
5442  row_copy.resize (row_length);
5443  size_t check_row_length = 0;
5444  srcRowGraph->getGlobalRowCopy (gid, row_copy (), check_row_length);
5445  this->insertGlobalIndices (gid, row_copy ());
5446  }
5447  } else {
5448  for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
5449  const GO gid = srcRowMap.getGlobalElement (myid);
5450  ArrayView<const GO> row;
5451  srcCrsGraph->getGlobalRowView (gid, row);
5452  this->insertGlobalIndices (gid, row);
5453  }
5454  }
5455 
5456  //
5457  // "Permute" part of "copy and permute."
5458  //
5459  if (src_filled || srcCrsGraph == NULL) {
5460  for (LO i = 0; i < permuteToLIDs.size (); ++i) {
5461  const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs[i]);
5462  const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs[i]);
5463  size_t row_length = srcRowGraph->getNumEntriesInGlobalRow (srcgid);
5464  row_copy.resize (row_length);
5465  size_t check_row_length = 0;
5466  srcRowGraph->getGlobalRowCopy (srcgid, row_copy (), check_row_length);
5467  this->insertGlobalIndices (mygid, row_copy ());
5468  }
5469  } else {
5470  for (LO i = 0; i < permuteToLIDs.size (); ++i) {
5471  const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs[i]);
5472  const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs[i]);
5473  ArrayView<const GO> row;
5474  srcCrsGraph->getGlobalRowView (srcgid, row);
5475  this->insertGlobalIndices (mygid, row);
5476  }
5477  }
5478  }
5479 
5480 
5481  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5482  void
5484  packAndPrepare (const SrcDistObject& source,
5485  const Teuchos::ArrayView<const LocalOrdinal> &exportLIDs,
5486  Teuchos::Array<GlobalOrdinal> &exports,
5487  const Teuchos::ArrayView<size_t> & numPacketsPerLID,
5488  size_t& constantNumPackets,
5489  Distributor &distor)
5490  {
5491  using Teuchos::Array;
5492  const char tfecfFuncName[] = "packAndPrepare";
5493  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5494  exportLIDs.size() != numPacketsPerLID.size(), std::runtime_error,
5495  ": exportLIDs and numPacketsPerLID must have the same size.");
5496  typedef RowGraph<LocalOrdinal, GlobalOrdinal, node_type> row_graph_type;
5497  const row_graph_type& srcGraph = dynamic_cast<const row_graph_type&> (source);
5498 
5499  // We don't check whether src_graph has had fillComplete called,
5500  // because it doesn't matter whether the *source* graph has been
5501  // fillComplete'd. The target graph can not be fillComplete'd yet.
5502  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5503  this->isFillComplete (), std::runtime_error,
5504  ": The target graph of an Import or Export must not be fill complete.");
5505 
5506  srcGraph.pack (exportLIDs, exports, numPacketsPerLID, constantNumPackets, distor);
5507  }
5508 
5509 
5510  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5511  void
5513  pack (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5514  Teuchos::Array<GlobalOrdinal>& exports,
5515  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5516  size_t& constantNumPackets,
5517  Distributor& distor) const
5518  {
5519  using Teuchos::Array;
5520  typedef LocalOrdinal LO;
5521  typedef GlobalOrdinal GO;
5522  const char tfecfFuncName[] = "pack";
5523  (void) distor; // forestall "unused argument" compiler warning
5524 
5525  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5526  exportLIDs.size() != numPacketsPerLID.size(), std::runtime_error,
5527  ": exportLIDs and numPacketsPerLID must have the same size.");
5528 
5529  const map_type& srcMap = * (this->getMap ());
5530  constantNumPackets = 0;
5531 
5532  // Set numPacketsPerLID[i] to the number of entries owned by the
5533  // calling process in (local) row exportLIDs[i] of the graph, that
5534  // the caller wants us to send out. Compute the total number of
5535  // packets (that is, entries) owned by this process in all the
5536  // rows that the caller wants us to send out.
5537  size_t totalNumPackets = 0;
5538  Array<GO> row;
5539  for (LO i = 0; i < exportLIDs.size (); ++i) {
5540  const GO GID = srcMap.getGlobalElement (exportLIDs[i]);
5541  size_t row_length = this->getNumEntriesInGlobalRow (GID);
5542  numPacketsPerLID[i] = row_length;
5543  totalNumPackets += row_length;
5544  }
5545 
5546  exports.resize (totalNumPackets);
5547 
5548  // Loop again over the rows to export, and pack rows of indices
5549  // into the output buffer.
5550  size_t exportsOffset = 0;
5551  for (LO i = 0; i < exportLIDs.size (); ++i) {
5552  const GO GID = srcMap.getGlobalElement (exportLIDs[i]);
5553  size_t row_length = this->getNumEntriesInGlobalRow (GID);
5554  row.resize (row_length);
5555  size_t check_row_length = 0;
5556  this->getGlobalRowCopy (GID, row (), check_row_length);
5557  typename Array<GO>::const_iterator row_iter = row.begin();
5558  typename Array<GO>::const_iterator row_end = row.end();
5559  size_t j = 0;
5560  for (; row_iter != row_end; ++row_iter, ++j) {
5561  exports[exportsOffset+j] = *row_iter;
5562  }
5563  exportsOffset += row.size();
5564  }
5565  }
5566 
5567 
5568  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5569  void
5571  unpackAndCombine (const Teuchos::ArrayView<const LocalOrdinal> &importLIDs,
5572  const Teuchos::ArrayView<const GlobalOrdinal> &imports,
5573  const Teuchos::ArrayView<size_t> &numPacketsPerLID,
5574  size_t constantNumPackets,
5575  Distributor& /* distor */,
5576  CombineMode /* CM */)
5577  {
5578  using Teuchos::ArrayView;
5579  typedef LocalOrdinal LO;
5580  typedef GlobalOrdinal GO;
5581 
5582  // FIXME (mfh 02 Apr 2012) REPLACE combine mode has a perfectly
5583  // reasonable meaning, whether or not the matrix is fill complete.
5584  // It's just more work to implement.
5585 
5586  // We are not checking the value of the CombineMode input
5587  // argument. For CrsGraph, we only support import/export
5588  // operations if fillComplete has not yet been called. Any
5589  // incoming column-indices are inserted into the target graph. In
5590  // this context, CombineMode values of ADD vs INSERT are
5591  // equivalent. What is the meaning of REPLACE for CrsGraph? If a
5592  // duplicate column-index is inserted, it will be compressed out
5593  // when fillComplete is called.
5594  //
5595  // Note: I think REPLACE means that an existing row is replaced by
5596  // the imported row, i.e., the existing indices are cleared. CGB,
5597  // 6/17/2010
5598 
5599  const char tfecfFuncName[] = "unpackAndCombine: ";
5600  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5601  importLIDs.size() != numPacketsPerLID.size(), std::runtime_error,
5602  "importLIDs and numPacketsPerLID must have the same size.");
5603  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5604  isFillComplete (), std::runtime_error,
5605  "Import or Export operations are not allowed on the destination "
5606  "CrsGraph if it is fill complete.");
5607  size_t importsOffset = 0;
5608 
5609  typedef typename ArrayView<const LO>::const_iterator iter_type;
5610  iter_type impLIDiter = importLIDs.begin();
5611  iter_type impLIDend = importLIDs.end();
5612 
5613  for (size_t i = 0; impLIDiter != impLIDend; ++impLIDiter, ++i) {
5614  LO row_length = numPacketsPerLID[i];
5615 
5616  const GO* const row_raw = (row_length == 0) ? NULL : &imports[importsOffset];
5617  ArrayView<const GlobalOrdinal> row (row_raw, row_length);
5618  insertGlobalIndicesFiltered (this->getMap ()->getGlobalElement (*impLIDiter), row);
5619  importsOffset += row_length;
5620  }
5621  }
5622 
5623 
5624  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5625  void
5627  removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& newMap)
5628  {
5629  using Teuchos::Comm;
5630  using Teuchos::null;
5631  using Teuchos::ParameterList;
5632  using Teuchos::RCP;
5633 
5634  // We'll set all the state "transactionally," so that this method
5635  // satisfies the strong exception guarantee. This object's state
5636  // won't be modified until the end of this method.
5637  RCP<const map_type> rowMap, domainMap, rangeMap, colMap;
5638  RCP<import_type> importer;
5639  RCP<export_type> exporter;
5640 
5641  rowMap = newMap;
5642  RCP<const Comm<int> > newComm =
5643  (newMap.is_null ()) ? null : newMap->getComm ();
5644 
5645  if (! domainMap_.is_null ()) {
5646  if (domainMap_.getRawPtr () == rowMap_.getRawPtr ()) {
5647  // Common case: original domain and row Maps are identical.
5648  // In that case, we need only replace the original domain Map
5649  // with the new Map. This ensures that the new domain and row
5650  // Maps _stay_ identical.
5651  domainMap = newMap;
5652  } else {
5653  domainMap = domainMap_->replaceCommWithSubset (newComm);
5654  }
5655  }
5656  if (! rangeMap_.is_null ()) {
5657  if (rangeMap_.getRawPtr () == rowMap_.getRawPtr ()) {
5658  // Common case: original range and row Maps are identical. In
5659  // that case, we need only replace the original range Map with
5660  // the new Map. This ensures that the new range and row Maps
5661  // _stay_ identical.
5662  rangeMap = newMap;
5663  } else {
5664  rangeMap = rangeMap_->replaceCommWithSubset (newComm);
5665  }
5666  }
5667  if (! colMap.is_null ()) {
5668  colMap = colMap_->replaceCommWithSubset (newComm);
5669  }
5670 
5671  // (Re)create the Export and / or Import if necessary.
5672  if (! newComm.is_null ()) {
5673  RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
5674  //
5675  // The operations below are collective on the new communicator.
5676  //
5677  // (Re)create the Export object if necessary. If I haven't
5678  // called fillComplete yet, I don't have a rangeMap, so I must
5679  // first check if the _original_ rangeMap is not null. Ditto
5680  // for the Import object and the domain Map.
5681  if (! rangeMap_.is_null () &&
5682  rangeMap != rowMap &&
5683  ! rangeMap->isSameAs (*rowMap)) {
5684  if (params.is_null () || ! params->isSublist ("Export")) {
5685  exporter = rcp (new export_type (rowMap, rangeMap));
5686  }
5687  else {
5688  RCP<ParameterList> exportSublist = sublist (params, "Export", true);
5689  exporter = rcp (new export_type (rowMap, rangeMap, exportSublist));
5690  }
5691  }
5692  // (Re)create the Import object if necessary.
5693  if (! domainMap_.is_null () &&
5694  domainMap != colMap &&
5695  ! domainMap->isSameAs (*colMap)) {
5696  if (params.is_null () || ! params->isSublist ("Import")) {
5697  importer = rcp (new import_type (domainMap, colMap));
5698  } else {
5699  RCP<ParameterList> importSublist = sublist (params, "Import", true);
5700  importer = rcp (new import_type (domainMap, colMap, importSublist));
5701  }
5702  }
5703  } // if newComm is not null
5704 
5705  // Defer side effects until the end. If no destructors throw
5706  // exceptions (they shouldn't anyway), then this method satisfies
5707  // the strong exception guarantee.
5708  exporter_ = exporter;
5709  importer_ = importer;
5710  rowMap_ = rowMap;
5711  // mfh 31 Mar 2013: DistObject's map_ is the row Map of a CrsGraph
5712  // or CrsMatrix. CrsGraph keeps a redundant pointer (rowMap_) to
5713  // the same object. We might want to get rid of this redundant
5714  // pointer sometime, but for now, we'll leave it alone and just
5715  // set map_ to the same object.
5716  this->map_ = rowMap;
5717  domainMap_ = domainMap;
5718  rangeMap_ = rangeMap;
5719  colMap_ = colMap;
5720  }
5721 
5722  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5723  bool
5725  hasRowInfo () const
5726  {
5727  if (indicesAreAllocated () &&
5728  getProfileType () == StaticProfile &&
5729  k_rowPtrs_.dimension_0 () == 0) {
5730  return false;
5731  } else {
5732  return true;
5733  }
5734  }
5735 
5736 } // namespace Tpetra
5737 
5738 //
5739 // Explicit instantiation macro
5740 //
5741 // Must be expanded from within the Tpetra namespace!
5742 //
5743 #define TPETRA_CRSGRAPH_GRAPH_INSTANT(LO,GO,NODE) template class CrsGraph< LO , GO , NODE >;
5744 #define TPETRA_CRSGRAPH_SORTROWINDICESANDVALUES_INSTANT(S,LO,GO,NODE) template void CrsGraph< LO , GO , NODE >::sortRowIndicesAndValues< S >(const RowInfo, const Teuchos::ArrayView< S >& );
5745 #define TPETRA_CRSGRAPH_MERGEROWINDICESANDVALUES_INSTANT(S,LO,GO,NODE) template void CrsGraph< LO , GO , NODE >::mergeRowIndicesAndValues< S >(RowInfo, const ArrayView< S >& );
5746 #define TPETRA_CRSGRAPH_ALLOCATEVALUES1D_INSTANT(S,LO,GO,NODE)
5747 #define TPETRA_CRSGRAPH_ALLOCATEVALUES2D_INSTANT(S,LO,GO,NODE) template ArrayRCP< Array< S > > CrsGraph< LO , GO , NODE >::allocateValues2D< S >() const;
5748 
5749 #define TPETRA_CRSGRAPH_INSTANT(S,LO,GO,NODE) \
5750  TPETRA_CRSGRAPH_SORTROWINDICESANDVALUES_INSTANT(S,LO,GO,NODE) \
5751  TPETRA_CRSGRAPH_MERGEROWINDICESANDVALUES_INSTANT(S,LO,GO,NODE) \
5752  TPETRA_CRSGRAPH_ALLOCATEVALUES1D_INSTANT(S,LO,GO,NODE) \
5753  TPETRA_CRSGRAPH_ALLOCATEVALUES2D_INSTANT(S,LO,GO,NODE) \
5754  template void CrsGraph< LO , GO , NODE >::insertIndicesAndValues< S > (const RowInfo&, const SLocalGlobalViews&, const Teuchos::ArrayView< S >&, const Teuchos::ArrayView<const S >&, const ELocalGlobal, const ELocalGlobal);
5755 
5756 #endif // TPETRA_CRSGRAPH_DEF_HPP
Teuchos::ArrayRCP< const LocalOrdinal > getNodePackedIndices() const
Get an Teuchos::ArrayRCP of the packed column-indices.
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
bool isGloballyIndexed() const
If graph indices are in the global range, this function returns true. Otherwise, this function return...
Teuchos::RCP< const Comm< int > > getComm() const
Returns the communicator.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
bool haveGlobalConstants_
Whether all processes have computed global constants.
virtual void copyAndPermute(const SrcDistObject &source, size_t numSameIDs, const Teuchos::ArrayView< const LocalOrdinal > &permuteToLIDs, const Teuchos::ArrayView< const LocalOrdinal > &permuteFromLIDs)
Perform copies and permutations that are local to this process.
size_t findLocalIndex(const RowInfo &rowinfo, const LocalOrdinal ind, const size_t hint=0) const
Find the column offset corresponding to the given (local) column index.
virtual std::string description() const
One-line descriptiion of this object.
std::string description() const
Return a simple one-line description of this object.
void expertStaticFillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< const import_type > &importer=Teuchos::null, const Teuchos::RCP< const export_type > &exporter=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Perform a fillComplete on a graph that already has data, via setAllIndices().
Teuchos::RCP< const map_type > getRowMap() const
Returns the Map that describes the row distribution in this graph.
void insertLocalIndicesFiltered(const LocalOrdinal localRow, const Teuchos::ArrayView< const LocalOrdinal > &indices)
Like insertLocalIndices(), but with column Map filtering.
bool isFillActive() const
Returns true if resumeFill() has been called and the graph is in edit mode.
Teuchos::ArrayView< LocalOrdinal > getLocalViewNonConst(const RowInfo rowinfo)
Get a nonconst, nonowned, locally indexed view of the locally owned row myRow, such that rowinfo = ge...
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap)
Remove processes owning zero rows from the Maps and their communicator.
void insertGlobalIndices(GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &indices)
Insert global indices into the graph.
Teuchos::RCP< const map_type > getRangeMap() const
Returns the Map associated with the domain of this graph.
bool isNodeGlobalElement(GlobalOrdinal globalIndex) const
Whether the given global index is owned by this Map on the calling process.
bool indicesAreSorted_
Whether the graph&#39;s indices are sorted in each row, on this process.
size_t getNodeNumCols() const
Returns the number of columns connected to the locally owned rows of this graph.
bool hasRowInfo() const
Whether it is correct to call getRowInfo().
void removeLocalIndices(LocalOrdinal localRow)
Remove all graph indices from the specified local row.
bool noRedundancies_
Whether the graph&#39;s indices are non-redundant (merged) in each row, on this process.
local_graph_type getLocalGraph() const
Get the local graph.
bool sortGhostsAssociatedWithEachProcessor_
Whether to require makeColMap() (and therefore fillComplete()) to order column Map GIDs associated wi...
Node::device_type device_type
This class&#39; Kokkos device type.
bool hasColMap() const
Whether the graph has a column Map.
Teuchos::RCP< const map_type > rangeMap_
The Map describing the range of the (matrix corresponding to the) graph.
ProfileType pftype_
Whether the graph was allocated with static or dynamic profile.
void getGlobalRowCopy(GlobalOrdinal GlobalRow, const Teuchos::ArrayView< GlobalOrdinal > &Indices, size_t &NumIndices) const
Get a copy of the given row, using global indices.
void mergeRowIndices(RowInfo rowinfo)
Merge duplicate row indices in the given row.
size_t getGlobalMaxNumRowEntries() const
Maximum number of entries in all rows over all processes.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
global_size_t getGlobalNumRows() const
Returns the number of global rows in the graph.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
t_numRowEntries_ k_numRowEntries_
The number of local entries in each locally owned row.
void setAllIndices(const typename local_graph_type::row_map_type &rowPointers, const typename local_graph_type::entries_type::non_const_type &columnIndices)
Set the graph&#39;s data directly, using 1-D storage.
ProfileType getProfileType() const
Returns true if the graph was allocated with static data structures.
local_graph_type::entries_type::non_const_type k_lclInds1D_
Local column indices for all rows.
void mergeRowIndicesAndValues(RowInfo rowinfo, const Teuchos::ArrayView< Scalar > &rowValues)
Merge duplicate row indices in the given row, along with their corresponding values.
Teuchos::ArrayRCP< Teuchos::Array< GlobalOrdinal > > gblInds2D_
Global column indices for all rows.
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Teuchos::RCP< const import_type > getImporter() const
Returns the importer associated with this graph.
void checkInternalState() const
Throw an exception if the internal state is not consistent.
bool isStorageOptimized() const
Returns true if storage has been optimized.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Signal that data entry is complete, specifying domain and range maps.
bool upperTriangular_
Whether the graph is locally upper triangular.
void setParameterList(const Teuchos::RCP< Teuchos::ParameterList > &params)
Set the given list of parameters (must be nonnull).
void deep_copy(const LittleBlock< ST2, LO > &dst, const LittleBlock< ST1, LO > &src, typename std::enable_if< std::is_convertible< ST1, ST2 >::value &&!std::is_const< ST2 >::value, int >::type *=NULL)
Copy the LittleBlock src into the LittleBlock dst.
void insertGlobalIndicesFiltered(const GlobalOrdinal localRow, const Teuchos::ArrayView< const GlobalOrdinal > &indices)
Like insertGlobalIndices(), but with column Map filtering.
Teuchos::RCP< const map_type > domainMap_
The Map describing the domain of the (matrix corresponding to the) graph.
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.
size_t getNodeMaxNumRowEntries() const
Maximum number of entries in all rows owned by the calling process.
bool isNodeLocalElement(LocalOrdinal localIndex) const
Whether the given local index is valid for this Map on the calling process.
bool isFillComplete() const
Returns true if fillComplete() has been called and the graph is in compute mode.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the graph&#39;s current column Map with the given Map.
Teuchos::RCP< const map_type > colMap_
The Map describing the distribution of columns of the graph.
size_t findGlobalIndex(const RowInfo &rowinfo, const GlobalOrdinal ind, const size_t hint=0) const
Find the column offset corresponding to the given (global) column index.
size_t getNodeAllocationSize() const
Returns the total number of indices allocated for the graph, across all rows on this node...
Implementation details of Tpetra.
global_size_t getGlobalNumEntries() const
Returns the global number of entries in the graph.
size_t numAllocForAllRows_
The maximum number of entries to allow in each locally owned row.
size_t global_size_t
Global size_t object.
Kokkos::StaticCrsGraph< LocalOrdinal, Kokkos::LayoutLeft, execution_space > local_graph_type
The type of the part of the sparse graph on each MPI process.
bool isLocallyIndexed() const
If graph indices are in the local range, this function returns true. Otherwise, this function returns...
Teuchos::RCP< const map_type > getDomainMap() const
Returns the Map associated with the domain of this graph.
bool haveLocalConstants_
Whether this process has computed local constants.
bool isMerged() const
Whether duplicate column indices in each row have been merged.
t_GlobalOrdinal_1D k_gblInds1D_
Global column indices for all rows.
Teuchos::RCP< node_type > getNode() const
Returns the underlying node.
GlobalOrdinal getIndexBase() const
Returns the index base for global indices for this graph.
local_graph_type lclGraph_
Local graph; only initialized after first fillComplete() call.
void globalAssemble()
Communicate non-local contributions to other processes.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
void sortRowIndicesAndValues(const RowInfo rowinfo, const Teuchos::ArrayView< Scalar > &values)
Sort the column indices and their values in the given row.
void insertIndicesAndValues(const RowInfo &rowInfo, const SLocalGlobalViews &newInds, const Teuchos::ArrayView< Scalar > &oldRowVals, const Teuchos::ArrayView< const Scalar > &newRowVals, const ELocalGlobal lg, const ELocalGlobal I)
Insert indices and their values into the given row.
TPETRA_DEPRECATED local_graph_type getLocalGraph_Kokkos() const
Get the local graph (DEPRECATED: call getLocalGraph() instead).
bool isUpperTriangular() const
Whether the graph is locally upper triangular.
Sets up and executes a communication plan for a Tpetra DistObject.
GlobalOrdinal getGlobalElement(LocalOrdinal localIndex) const
The global index corresponding to the given local index.
CombineMode
Rule for combining data in an Import or Export.
Teuchos::RCP< const map_type > map_
The Map over which this object is distributed.
void makeColMap()
Make the graph&#39;s column Map, if it does not already have one.
void getLocalRowCopy(LocalOrdinal LocalRow, const Teuchos::ArrayView< LocalOrdinal > &indices, size_t &NumIndices) const
Get a copy of the given row, using local indices.
Kokkos::DualView< const size_t *, Kokkos::LayoutLeft, execution_space > k_numAllocPerRow_
The maximum number of entries to allow in each locally owned row, per row.
global_size_t getGlobalNumCols() const
Returns the number of global columns in the graph.
Abstract base class for objects that can be the source of an Import or Export operation.
Teuchos::RCP< const import_type > importer_
The Import from the domain Map to the column Map.
Node node_type
This class&#39; Kokkos Node type.
void reindexColumns(const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortIndicesInEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
Teuchos::RCP< const map_type > getColMap() const
Returns the Map that describes the column distribution in this graph.
size_t insertIndices(const RowInfo &rowInfo, const SLocalGlobalViews &newInds, const ELocalGlobal lg, const ELocalGlobal I)
Insert indices into the given row.
Teuchos::ArrayView< GlobalOrdinal > getGlobalViewNonConst(const RowInfo rowinfo)
Get a nonconst, nonowned, globally indexed view of the locally owned row myRow, such that rowinfo = g...
Teuchos::RCP< const map_type > rowMap_
The Map describing the distribution of rows of the graph.
size_t getNodeNumEntries() const
Returns the local number of entries in the graph.
virtual void pack(const Teuchos::ArrayView< const LocalOrdinal > &exportLIDs, Teuchos::Array< GlobalOrdinal > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t &constantNumPackets, Distributor &distor) const
Pack this object&#39;s data for Import or Export.
Teuchos::ArrayRCP< Teuchos::Array< LocalOrdinal > > lclInds2D_
Local column indices for all rows.
global_size_t getGlobalNumDiags() const
Returns the number of global diagonal entries, based on global row/column index comparisons.
LocalOrdinal getLocalElement(GlobalOrdinal globalIndex) const
The local index corresponding to the given global index.
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2)
Sort the first array, and apply the resulting permutation to the second array.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
size_t getNumAllocatedEntriesInGlobalRow(GlobalOrdinal globalRow) const
Returns the current number of allocated entries for this node in the specified global row ...
Describes a parallel distribution of objects over processes.
void setDomainRangeMaps(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap)
size_t getNumAllocatedEntriesInLocalRow(LocalOrdinal localRow) const
Returns the current number of allocated entries on this node in the specified local row...
size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const
Returns the current number of entries on this node in the specified local row.
Details::EStorageStatus storageStatus_
Status of the graph&#39;s storage, when not in a fill-complete state.
size_t getNodeNumDiags() const
Returns the number of local diagonal entries, based on global row/column index comparisons.
device_type::execution_space execution_space
This class&#39; Kokkos execution space.
local_graph_type::row_map_type::const_type k_rowPtrs_
Row offsets for "1-D" storage.
size_t getNodeNumRows() const
Returns the number of graph rows owned on the calling node.
void getGlobalRowView(GlobalOrdinal GlobalRow, Teuchos::ArrayView< const GlobalOrdinal > &Indices) const
Extract a const, non-persisting view of global indices in a specified row of the graph.
std::map< GlobalOrdinal, std::vector< GlobalOrdinal > > nonlocals_
Nonlocal data given to insertGlobalValues or sumIntoGlobalValues.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
void replaceDomainMapAndImporter(const Teuchos::RCP< const map_type > &newDomainMap, const Teuchos::RCP< const import_type > &newImporter)
Replace the current domain Map and Import with the given parameters.
void mergeAllIndices()
Merge duplicate row indices in all of the rows.
Teuchos::ArrayView< const LocalOrdinal > getLocalView(const RowInfo rowinfo) const
Get a const, nonowned, locally indexed view of the locally owned row myRow, such that rowinfo = getRo...
void setLocallyModified()
Report that we made a local modification to its structure.
virtual ~CrsGraph()
Destructor.
Kokkos::View< GlobalOrdinal *, execution_space > t_GlobalOrdinal_1D
Type of the k_gblInds1D_ array of global column indices.
Teuchos::RCP< const ParameterList > getValidParameters() const
Default parameter list suitable for validation.
RowInfo getRowInfoFromGlobalRowIndex(const GlobalOrdinal gblRow) const
Get information about the locally owned row with global index gblRow.
void sortAllIndices()
Sort the column indices in all the rows.
bool isLowerTriangular() const
Whether the graph is locally lower triangular.
void sortRowIndices(const RowInfo rowinfo)
Sort the column indices in the given row.
void insertLocalIndices(const LocalOrdinal localRow, const Teuchos::ArrayView< const LocalOrdinal > &indices)
Insert local indices into the graph.
Teuchos::RCP< const export_type > exporter_
The Export from the row Map to the range Map.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object to the given output stream with given verbosity level.
Teuchos::ArrayView< const GlobalOrdinal > getGlobalView(const RowInfo rowinfo) const
Get a const, nonowned, globally indexed view of the locally owned row myRow, such that rowinfo = getR...
bool lowerTriangular_
Whether the graph is locally lower triangular.
Teuchos::RCP< const export_type > getExporter() const
Returns the exporter associated with this graph.
void getNumEntriesPerLocalRowUpperBound(Teuchos::ArrayRCP< const size_t > &boundPerLocalRow, size_t &boundForAllLocalRows, bool &boundSameForAllLocalRows) const
Get an upper bound on the number of entries that can be stored in each row.
virtual bool checkSizes(const SrcDistObject &source)
Compare the source and target (this) objects for compatibility.
Teuchos::ArrayRCP< const size_t > getNodeRowPtrs() const
Get a host view of the row offsets.
void getLocalRowView(LocalOrdinal LocalRow, Teuchos::ArrayView< const LocalOrdinal > &indices) const
Extract a const, non-persisting view of local indices in a specified row of the graph.
RowInfo getRowInfo(const LocalOrdinal myRow) const
Get information about the locally owned row with local index myRow.
size_t getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const
Returns the current number of entries on this node in the specified global row.