Tpetra parallel linear algebra  Version of the Day
Tpetra_DistObjectKA_def.hpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_DISTOBJECT_DEF_HPP
43 #define TPETRA_DISTOBJECT_DEF_HPP
44 
45 #if TPETRA_USE_KOKKOS_DISTOBJECT
46 
47 #include "Tpetra_ConfigDefs.hpp"
48 #include "Tpetra_Map.hpp"
49 #include "Tpetra_Import.hpp"
50 #include "Tpetra_Export.hpp"
51 #include "Tpetra_Distributor.hpp"
52 #include "Tpetra_DistObjectKA_decl.hpp"
53 
54 namespace Tpetra {
55  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
56  DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::
57  DistObjectKA (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map)
58  : map_ (map)
59  {
60 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
61  using Teuchos::RCP;
62  using Teuchos::Time;
63  using Teuchos::TimeMonitor;
64 
65  RCP<Time> doXferTimer =
66  TimeMonitor::lookupCounter ("Tpetra::DistObject::doTransfer");
67  if (doXferTimer.is_null ()) {
68  doXferTimer =
69  TimeMonitor::getNewCounter ("Tpetra::DistObject::doTransfer");
70  }
71  doXferTimer_ = doXferTimer;
72 
73  RCP<Time> copyAndPermuteTimer =
74  TimeMonitor::lookupCounter ("Tpetra::DistObject::copyAndPermute");
75  if (copyAndPermuteTimer.is_null ()) {
76  copyAndPermuteTimer =
77  TimeMonitor::getNewCounter ("Tpetra::DistObject::copyAndPermute");
78  }
79  copyAndPermuteTimer_ = copyAndPermuteTimer;
80 
81  RCP<Time> packAndPrepareTimer =
82  TimeMonitor::lookupCounter ("Tpetra::DistObject::packAndPrepare");
83  if (packAndPrepareTimer.is_null ()) {
84  packAndPrepareTimer =
85  TimeMonitor::getNewCounter ("Tpetra::DistObject::packAndPrepare");
86  }
87  packAndPrepareTimer_ = packAndPrepareTimer;
88 
89  RCP<Time> doPostsAndWaitsTimer =
90  TimeMonitor::lookupCounter ("Tpetra::DistObject::doPostsAndWaits");
91  if (doPostsAndWaitsTimer.is_null ()) {
92  doPostsAndWaitsTimer =
93  TimeMonitor::getNewCounter ("Tpetra::DistObject::doPostsAndWaits");
94  }
95  doPostsAndWaitsTimer_ = doPostsAndWaitsTimer;
96 
97  RCP<Time> unpackAndCombineTimer =
98  TimeMonitor::lookupCounter ("Tpetra::DistObject::unpackAndCombine");
99  if (unpackAndCombineTimer.is_null ()) {
100  unpackAndCombineTimer =
101  TimeMonitor::getNewCounter ("Tpetra::DistObject::unpackAndCombine");
102  }
103  unpackAndCombineTimer_ = unpackAndCombineTimer;
104 #endif // HAVE_TPETRA_TRANSFER_TIMERS
105  }
106 
107  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
108  DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::
109  DistObjectKA (const DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>& rhs)
110  : map_ (rhs.map_)
111  {}
112 
113  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
114  DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::~DistObjectKA()
115  {}
116 
117  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
118  std::string
119  DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::description () const
120  {
121  using Teuchos::TypeNameTraits;
122 
123  std::ostringstream os;
124  os << "Tpetra::DistObject<"
125  << TypeNameTraits<Packet>::name ()
126  << ", " << TypeNameTraits<LocalOrdinal>::name ()
127  << ", " << TypeNameTraits<GlobalOrdinal>::name ()
128  << ", " << TypeNameTraits<Node>::name ()
129  << ">";
130  return os.str ();
131  }
132 
133  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
134  void
135  DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::
136  describe (Teuchos::FancyOStream &out,
137  const Teuchos::EVerbosityLevel verbLevel) const
138  {
139  using Teuchos::rcpFromRef;
140  using std::endl;
141 
142  const Teuchos::EVerbosityLevel vl = (verbLevel == Teuchos::VERB_DEFAULT) ?
143  Teuchos::VERB_LOW : verbLevel;
144 
145  if (vl != Teuchos::VERB_NONE) {
146  out << this->description () << endl;
147  Teuchos::OSTab tab (rcpFromRef (out));
148  out << "Export buffer size (in packets): " << exports_.size() << endl
149  << "Import buffer size (in packets): " << imports_.size() << endl
150  << "Map over which this object is distributed:" << endl;
151  map_->describe (out, vl);
152  }
153  }
154 
155  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
156  void
158  removeEmptyProcessesInPlace (const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >& newMap)
159  {
160  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
161  "Tpetra::DistObject::removeEmptyProcessesInPlace: Not implemented");
162  }
163 
164  template<class DistObjectType>
165  void
166  removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input,
167  const Teuchos::RCP<const Map<typename DistObjectType::local_ordinal_type,
168  typename DistObjectType::global_ordinal_type,
169  typename DistObjectType::node_type> >& newMap)
170  {
171  input->removeEmptyProcessesInPlace (newMap);
172  if (newMap.is_null ()) { // my process is excluded
173  input = Teuchos::null;
174  }
175  }
176 
177  template<class DistObjectType>
178  void
179  removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input)
180  {
181  using Teuchos::RCP;
182  typedef typename DistObjectType::local_ordinal_type LO;
183  typedef typename DistObjectType::global_ordinal_type GO;
184  typedef typename DistObjectType::node_type NT;
185  typedef Map<LO, GO, NT> map_type;
186 
187  RCP<const map_type> newMap = input->getMap ()->removeEmptyProcesses ();
188  removeEmptyProcessesInPlace<DistObjectType> (input, newMap);
189  }
190 
191  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
192  void
193  DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::
194  doImport (const SrcDistObject& source,
195  const Import<LocalOrdinal,GlobalOrdinal,Node>& importer,
196  CombineMode CM)
197  {
198  TEUCHOS_TEST_FOR_EXCEPTION(*getMap() != *importer.getTargetMap(),
199  std::invalid_argument, "doImport: The target DistObject's Map is not "
200  "identical to the Import's target Map.");
201 #ifdef HAVE_TPETRA_DEBUG
202  {
203  typedef DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node> this_type;
204  const this_type* srcDistObj = dynamic_cast<const this_type*> (&source);
205  TEUCHOS_TEST_FOR_EXCEPTION(
206  srcDistObj != NULL && * (srcDistObj->getMap ()) != *importer.getSourceMap(),
207  std::invalid_argument, "doImport: The source is a DistObject, yet its "
208  "Map is not identical to the Import's source Map.");
209  }
210 #endif // HAVE_TPETRA_DEBUG
211  size_t numSameIDs = importer.getNumSameIDs ();
212 
213  typedef Teuchos::ArrayView<const LocalOrdinal> view_type;
214  const view_type exportLIDs = importer.getExportLIDs();
215  const view_type remoteLIDs = importer.getRemoteLIDs();
216  const view_type permuteToLIDs = importer.getPermuteToLIDs();
217  const view_type permuteFromLIDs = importer.getPermuteFromLIDs();
218  this->doTransfer (source, CM, numSameIDs, permuteToLIDs, permuteFromLIDs,
219  remoteLIDs, exportLIDs, importer.getDistributor (),
220  DoForward);
221  }
222 
223  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
224  void
225  DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::
226  doExport (const SrcDistObject& source,
227  const Export<LocalOrdinal,GlobalOrdinal,Node>& exporter,
228  CombineMode CM)
229  {
230  TEUCHOS_TEST_FOR_EXCEPTION(
231  *getMap() != *exporter.getTargetMap(), std::invalid_argument,
232  "doExport: The target DistObject's Map is not identical to the Export's "
233  "target Map.");
234 #ifdef HAVE_TPETRA_DEBUG
235  {
236  typedef DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node> this_type;
237  const this_type* srcDistObj = dynamic_cast<const this_type*> (&source);
238  TEUCHOS_TEST_FOR_EXCEPTION(
239  srcDistObj != NULL && * (srcDistObj->getMap ()) != *exporter.getSourceMap(),
240  std::invalid_argument, "doExport: The source is a DistObject, yet its "
241  "Map is not identical to the Export's source Map.");
242  }
243 #endif // HAVE_TPETRA_DEBUG
244  size_t numSameIDs = exporter.getNumSameIDs();
245 
246  typedef ArrayView<const LocalOrdinal> view_type;
247  view_type exportLIDs = exporter.getExportLIDs();
248  view_type remoteLIDs = exporter.getRemoteLIDs();
249  view_type permuteToLIDs = exporter.getPermuteToLIDs();
250  view_type permuteFromLIDs = exporter.getPermuteFromLIDs();
251  doTransfer (source, CM, numSameIDs, permuteToLIDs, permuteFromLIDs, remoteLIDs,
252  exportLIDs, exporter.getDistributor (), DoForward);
253  }
254 
255  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
256  void
257  DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::
258  doImport (const SrcDistObject& source,
259  const Export<LocalOrdinal,GlobalOrdinal,Node> & exporter,
260  CombineMode CM)
261  {
262  TEUCHOS_TEST_FOR_EXCEPTION(
263  *getMap() != *exporter.getSourceMap(), std::invalid_argument,
264  "doImport (reverse mode): The target DistObject's Map is not identical "
265  "to the Export's source Map.");
266 #ifdef HAVE_TPETRA_DEBUG
267  {
268  typedef DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node> this_type;
269  const this_type* srcDistObj = dynamic_cast<const this_type*> (&source);
270  TEUCHOS_TEST_FOR_EXCEPTION(
271  srcDistObj != NULL && * (srcDistObj->getMap ()) != *exporter.getTargetMap(),
272  std::invalid_argument,
273  "doImport (reverse mode): The source is a DistObject, yet its "
274  "Map is not identical to the Export's target Map.");
275  }
276 #endif // HAVE_TPETRA_DEBUG
277  size_t numSameIDs = exporter.getNumSameIDs();
278 
279  typedef ArrayView<const LocalOrdinal> view_type;
280  view_type exportLIDs = exporter.getRemoteLIDs();
281  view_type remoteLIDs = exporter.getExportLIDs();
282  view_type permuteToLIDs = exporter.getPermuteFromLIDs();
283  view_type permuteFromLIDs = exporter.getPermuteToLIDs();
284  doTransfer (source, CM, numSameIDs, permuteToLIDs, permuteFromLIDs, remoteLIDs,
285  exportLIDs, exporter.getDistributor (), DoReverse);
286  }
287 
288  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
289  void
290  DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::
291  doExport (const SrcDistObject& source,
292  const Import<LocalOrdinal,GlobalOrdinal,Node> & importer,
293  CombineMode CM)
294  {
295  TEUCHOS_TEST_FOR_EXCEPTION(
296  *getMap() != *importer.getSourceMap(), std::invalid_argument,
297  "doExport (reverse mode): The target object's Map "
298  "is not identical to the Import's source Map.");
299 #ifdef HAVE_TPETRA_DEBUG
300  {
301  typedef DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node> this_type;
302  const this_type* srcDistObj = dynamic_cast<const this_type*> (&source);
303  TEUCHOS_TEST_FOR_EXCEPTION(
304  srcDistObj != NULL && * (srcDistObj->getMap ()) != *importer.getTargetMap(),
305  std::invalid_argument,
306  "doExport (reverse mode): The source is a DistObject, yet its "
307  "Map is not identical to the Import's target Map.");
308  }
309 #endif // HAVE_TPETRA_DEBUG
310  size_t numSameIDs = importer.getNumSameIDs();
311 
312  typedef ArrayView<const LocalOrdinal> view_type;
313  view_type exportLIDs = importer.getRemoteLIDs();
314  view_type remoteLIDs = importer.getExportLIDs();
315  view_type permuteToLIDs = importer.getPermuteFromLIDs();
316  view_type permuteFromLIDs = importer.getPermuteToLIDs();
317  doTransfer (source, CM, numSameIDs, permuteToLIDs, permuteFromLIDs, remoteLIDs,
318  exportLIDs, importer.getDistributor (), DoReverse);
319  }
320 
321  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
322  bool
323  DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::isDistributed() const {
324  return map_->isDistributed ();
325  }
326 
327  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
328  size_t
329  DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::
330  constantNumberOfPackets () const {
331  return 0; // default implementation; subclasses may override
332  }
333 
334  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
335  void
336  DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::
337  doTransfer (const SrcDistObject& src,
338  CombineMode CM,
339  size_t numSameIDs,
340  const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs_,
341  const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs_,
342  const Teuchos::ArrayView<const LocalOrdinal>& remoteLIDs_,
343  const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs_,
344  Distributor &distor,
345  ReverseOption revOp)
346  {
347  using Teuchos::as;
348  using Kokkos::Compat::getArrayView;
349  using Kokkos::Compat::getConstArrayView;
350  using Kokkos::Compat::getKokkosViewDeepCopy;
351  using Kokkos::Compat::create_const_view;
352 
353 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
354  Teuchos::TimeMonitor doXferMon (*doXferTimer_);
355 #endif // HAVE_TPETRA_TRANSFER_TIMERS
356 
357  // Convert arguments to Kokkos::View's (involves deep copy to device)
358  typedef Kokkos::View<const LocalOrdinal*, execution_space> lo_const_view_type;
359  lo_const_view_type permuteToLIDs =
360  getKokkosViewDeepCopy<execution_space> (permuteToLIDs_);
361  lo_const_view_type permuteFromLIDs =
362  getKokkosViewDeepCopy<execution_space> (permuteFromLIDs_);
363  lo_const_view_type remoteLIDs =
364  getKokkosViewDeepCopy<execution_space> (remoteLIDs_);
365  lo_const_view_type exportLIDs =
366  getKokkosViewDeepCopy<execution_space> (exportLIDs_);
367 
368  TEUCHOS_TEST_FOR_EXCEPTION(
369  ! checkSizes (src), std::invalid_argument,
370  "Tpetra::DistObject::doTransfer(): checkSizes() indicates that the "
371  "destination object is not a legal target for redistribution from the "
372  "source object. This probably means that they do not have the same "
373  "dimensions. For example, MultiVectors must have the same number of "
374  "rows and columns.");
375  KokkosClassic::ReadWriteOption rwo = KokkosClassic::ReadWrite;
376  if (CM == INSERT || CM == REPLACE) {
377  const size_t numIDsToWrite = numSameIDs +
378  as<size_t> (permuteToLIDs.size ()) +
379  as<size_t> (remoteLIDs.size ());
380  if (numIDsToWrite == this->getMap ()->getNodeNumElements ()) {
381  // We're overwriting all of our local data in the destination
382  // object, so a write-only view suffices.
383  //
384  // FIXME (mfh 10 Apr 2012) This doesn't make sense for a
385  // CrsMatrix with a dynamic graph. INSERT mode could mean
386  // that we're adding new entries to the object, but we don't
387  // want to get rid of the old ones.
388  rwo = KokkosClassic::WriteOnly;
389  }
390  }
391  // Tell the source to create a read-only view of its data. On a
392  // discrete accelerator such as a GPU, this brings EVERYTHING from
393  // device memory to host memory.
394  //
395  // FIXME (mfh 23 Mar 2012) By passing in the list of GIDs (or
396  // rather, local LIDs to send) and packet counts, createViews()
397  // could create a "sparse view" that only brings in the necessary
398  // data from device to host memory.
399  typedef DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node> this_type;
400  const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
401  if (srcDistObj != NULL) {
402  srcDistObj->createViews ();
403  }
404 
405  // Tell the target to create a view of its data. Depending on
406  // rwo, this could be a write-only view or a read-and-write view.
407  // On a discrete accelerator such as a GPU, a write-only view only
408  // requires a transfer from host to device memory. A
409  // read-and-write view requires a two-way transfer. This has the
410  // same problem as createViews(): it transfers EVERYTHING, not
411  // just the necessary data.
412  //
413  // FIXME (mfh 23 Mar 2012) By passing in the list of GIDs (or
414  // rather, local LIDs into which to receive) and packet counts,
415  // createViewsNonConst() could create a "sparse view" that only
416  // transfers the necessary data.
417  this->createViewsNonConst (rwo);
418 
419  if (numSameIDs + permuteToLIDs.size()) {
420 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
421  Teuchos::TimeMonitor copyAndPermuteMon (*copyAndPermuteTimer_);
422 #endif // HAVE_TPETRA_TRANSFER_TIMERS
423  // There is at least one GID to copy or permute.
424  copyAndPermute (src, numSameIDs, permuteToLIDs, permuteFromLIDs);
425  }
426 
427  // The method may return zero even if the implementation actually
428  // does have a constant number of packets per LID. However, if it
429  // returns nonzero, we may use this information to avoid
430  // (re)allocating num{Ex,Im}portPacketsPerLID_. packAndPrepare()
431  // will set this to its final value.
432  //
433  // We only need this if CM != ZERO, but it has to be lifted out of
434  // that scope because there are multiple tests for CM != ZERO.
435  size_t constantNumPackets = this->constantNumberOfPackets ();
436 
437  // We only need to pack communication buffers if the combine mode
438  // is not ZERO. A "ZERO combine mode" means that the results are
439  // the same as if we had received all zeros, and added them to the
440  // existing values. That means we don't need to communicate.
441  if (CM != ZERO) {
442  if (constantNumPackets == 0) {
443  Kokkos::Compat::realloc (numExportPacketsPerLID_, exportLIDs.size ());
444  Kokkos::Compat::realloc (numImportPacketsPerLID_, remoteLIDs.size ());
445  }
446 
447  {
448 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
449  Teuchos::TimeMonitor packAndPrepareMon (*packAndPrepareTimer_);
450 #endif // HAVE_TPETRA_TRANSFER_TIMERS
451  // Ask the source to pack data. Also ask it whether there are a
452  // constant number of packets per element (constantNumPackets is
453  // an output argument). If there are, constantNumPackets will
454  // come back nonzero. Otherwise, the source will fill the
455  // numExportPacketsPerLID_ array.
456  packAndPrepare (src, exportLIDs, exports_, numExportPacketsPerLID_,
457  constantNumPackets, distor);
458  }
459  }
460 
461  // We don't need the source's data anymore, so it can let go of
462  // its views. On an accelerator device with a separate memory
463  // space (like a GPU), this frees host memory, since device memory
464  // has the "master" version of the data.
465  if (srcDistObj != NULL) {
466  srcDistObj->releaseViews ();
467  }
468 
469  // We only need to send data if the combine mode is not ZERO.
470  if (CM != ZERO) {
471  if (constantNumPackets != 0) {
472  // There are a constant number of packets per element. We
473  // already know (from the number of "remote" (incoming)
474  // elements) how many incoming elements we expect, so we can
475  // resize the buffer accordingly.
476  const size_t rbufLen = remoteLIDs.size() * constantNumPackets;
477  if (as<size_t> (imports_.size()) != rbufLen) {
478  Kokkos::Compat::realloc (imports_, rbufLen);
479  }
480  }
481 
482  // Create mirror views of [import|export]PacketsPerLID
483  typename Kokkos::View<size_t*,execution_space>::HostMirror host_numExportPacketsPerLID = Kokkos::create_mirror_view (numExportPacketsPerLID_);
484  typename Kokkos::View<size_t*,execution_space>::HostMirror host_numImportPacketsPerLID = Kokkos::create_mirror_view (numImportPacketsPerLID_);
485 
486  // Copy numExportPacketsPerLID to host
487  Kokkos::deep_copy (host_numExportPacketsPerLID, numExportPacketsPerLID_);
488 
489  // Do we need to do communication (via doPostsAndWaits)?
490  bool needCommunication = true;
491  if (revOp == DoReverse && ! isDistributed ()) {
492  needCommunication = false;
493  }
494  // FIXME (mfh 30 Jun 2013): Checking whether the source object
495  // is distributed requires a cast to DistObject. If it's not a
496  // DistObject, then I'm not quite sure what to do. Perhaps it
497  // would be more appropriate for SrcDistObject to have an
498  // isDistributed() method. For now, I'll just assume that we
499  // need to do communication unless the cast succeeds and the
500  // source is not distributed.
501  else if (revOp == DoForward && srcDistObj != NULL &&
502  ! srcDistObj->isDistributed ()) {
503  needCommunication = false;
504  }
505 
506  if (needCommunication) {
507  if (revOp == DoReverse) {
508 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
509  Teuchos::TimeMonitor doPostsAndWaitsMon (*doPostsAndWaitsTimer_);
510 #endif // HAVE_TPETRA_TRANSFER_TIMERS
511  if (constantNumPackets == 0) { //variable num-packets-per-LID:
512  distor.doReversePostsAndWaits (create_const_view (host_numExportPacketsPerLID),
513  1,
514  host_numImportPacketsPerLID);
515  size_t totalImportPackets = 0;
516  for (view_size_type i = 0; i < numImportPacketsPerLID_.size(); ++i) {
517  totalImportPackets += host_numImportPacketsPerLID[i];
518  }
519  Kokkos::Compat::realloc (imports_, totalImportPackets);
520  distor.doReversePostsAndWaits (create_const_view (exports_),
521  getArrayView (host_numExportPacketsPerLID),
522  imports_,
523  getArrayView (host_numImportPacketsPerLID));
524  }
525  else {
526  distor.doReversePostsAndWaits (create_const_view (exports_),
527  constantNumPackets,
528  imports_);
529  }
530  }
531  else { // revOp == DoForward
532 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
533  Teuchos::TimeMonitor doPostsAndWaitsMon (*doPostsAndWaitsTimer_);
534 #endif // HAVE_TPETRA_TRANSFER_TIMERS
535  if (constantNumPackets == 0) { //variable num-packets-per-LID:
536  distor.doPostsAndWaits (create_const_view (host_numExportPacketsPerLID), 1,
537  host_numImportPacketsPerLID);
538  size_t totalImportPackets = 0;
539  for (view_size_type i = 0; i < numImportPacketsPerLID_.size(); ++i) {
540  totalImportPackets += host_numImportPacketsPerLID[i];
541  }
542  Kokkos::Compat::realloc (imports_, totalImportPackets);
543  distor.doPostsAndWaits (create_const_view (exports_),
544  getArrayView (host_numExportPacketsPerLID),
545  imports_,
546  getArrayView (host_numImportPacketsPerLID));
547  }
548  else {
549  distor.doPostsAndWaits (create_const_view (exports_),
550  constantNumPackets,
551  imports_);
552  }
553  }
554 
555  // Copy numImportPacketsPerLID to device
556  Kokkos::deep_copy (numImportPacketsPerLID_, host_numImportPacketsPerLID);
557 
558  {
559 #ifdef HAVE_TPETRA_TRANSFER_TIMERS
560  Teuchos::TimeMonitor unpackAndCombineMon (*unpackAndCombineTimer_);
561 #endif // HAVE_TPETRA_TRANSFER_TIMERS
562  unpackAndCombine (remoteLIDs, imports_, numImportPacketsPerLID_,
563  constantNumPackets, distor, CM);
564  }
565  }
566  } // if (CM != ZERO)
567 
568  this->releaseViews ();
569  }
570 
571  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
572  void
573  DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::print (std::ostream &os) const
574  {
575  using Teuchos::FancyOStream;
576  using Teuchos::getFancyOStream;
577  using Teuchos::RCP;
578  using Teuchos::rcpFromRef;
579  using std::endl;
580 
581  RCP<FancyOStream> out = getFancyOStream (rcpFromRef (os));
582  this->describe (*out, Teuchos::VERB_DEFAULT);
583  }
584 
585  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
586  void
587  DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::createViews () const
588  {}
589 
590  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
591  void
592  DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::
593  createViewsNonConst (KokkosClassic::ReadWriteOption /*rwo*/)
594  {}
595 
596  template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
597  void
598  DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::
599  releaseViews () const
600  {}
601 
602 #define TPETRA_DISTOBJECTKA_INSTANT(SCALAR, LO, GO, NODE) \
603  \
604  template class DistObjectKA< SCALAR , LO , GO , NODE >;
605 
606  // The "SLGN" stuff above doesn't work for Packet=char.
607 #define TPETRA_DISTOBJECTKA_INSTANT_CHAR(LO, GO, NODE) \
608  \
609  template class DistObjectKA< char , LO , GO , NODE >;
610 
611 
612 } // namespace Tpetra
613 
614 #else
615 
616 #define TPETRA_DISTOBJECTKA_INSTANT(SCALAR, LO, GO, NODE)
617 #define TPETRA_DISTOBJECTKA_INSTANT_CHAR(LO, GO, NODE)
618 
619 #endif /* TPETRA_ENABLE_KOKKOSARRAY_DISTOBJECT */
620 
621 #endif /* TPETRA_DISTOBJECT_DEF_HPP */
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void removeEmptyProcessesInPlace(Teuchos::RCP< DistObjectType > &input, const Teuchos::RCP< const Map< typename DistObjectType::local_ordinal_type, typename DistObjectType::global_ordinal_type, typename DistObjectType::node_type > > &newMap)
Remove processes which contain no elements in this object&#39;s Map.
void deep_copy(const LittleBlock< ST2, LO > &dst, const LittleBlock< ST1, LO > &src, typename std::enable_if< std::is_convertible< ST1, ST2 >::value &&!std::is_const< ST2 >::value, int >::type *=NULL)
Copy the LittleBlock src into the LittleBlock dst.
Insert new values that don&#39;t currently exist.
CombineMode
Rule for combining data in an Import or Export.
Replace existing values with new values.
Replace old values with zero.