42 #ifndef TPETRA_IMPORT_UTIL2_HPP 43 #define TPETRA_IMPORT_UTIL2_HPP 50 #include "Tpetra_ConfigDefs.hpp" 52 #include "Tpetra_Import.hpp" 53 #include "Tpetra_HashTable.hpp" 54 #include "Tpetra_Map.hpp" 56 #include "Tpetra_Distributor.hpp" 57 #include <Teuchos_Array.hpp> 69 template<
class T,
class D>
70 Kokkos::View<T*, D, Kokkos::MemoryUnmanaged>
71 getNonconstView (
const Teuchos::ArrayView<T>& x)
73 typedef Kokkos::View<T*, D, Kokkos::MemoryUnmanaged> view_type;
74 typedef typename view_type::size_type size_type;
75 const size_type numEnt =
static_cast<size_type
> (x.size ());
76 return view_type (x.getRawPtr (), numEnt);
79 template<
class T,
class D>
80 Kokkos::View<const T*, D, Kokkos::MemoryUnmanaged>
81 getConstView (
const Teuchos::ArrayView<const T>& x)
83 typedef Kokkos::View<const T*, D, Kokkos::MemoryUnmanaged> view_type;
84 typedef typename view_type::size_type size_type;
85 const size_type numEnt =
static_cast<size_type
> (x.size ());
86 return view_type (x.getRawPtr (), numEnt);
92 struct GetHostExecSpace {
93 typedef typename Space::execution_space execution_space;
94 typedef typename Kokkos::View<int*, execution_space>::HostMirror::execution_space host_execution_space;
100 namespace Import_Util {
116 template<
typename Scalar,
117 typename LocalOrdinal,
118 typename GlobalOrdinal,
122 const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
123 Teuchos::Array<char>& exports,
124 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
125 size_t& constantNumPackets,
127 const Teuchos::ArrayView<const int>& SourcePids);
144 template<
typename Scalar,
145 typename LocalOrdinal,
146 typename GlobalOrdinal,
150 const Teuchos::ArrayView<const LocalOrdinal> &importLIDs,
151 const Teuchos::ArrayView<const char> &imports,
152 const Teuchos::ArrayView<size_t> &numPacketsPerLID,
153 size_t constantNumPackets,
157 const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
158 const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs);
174 template<
typename Scalar,
175 typename LocalOrdinal,
176 typename GlobalOrdinal,
180 const Teuchos::ArrayView<const LocalOrdinal>& importLIDs,
181 const Teuchos::ArrayView<const char>& imports,
182 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
183 size_t constantNumPackets,
187 const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
188 const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs,
189 size_t TargetNumRows,
190 size_t TargetNumNonzeros,
192 const Teuchos::ArrayView<size_t>& rowPointers,
193 const Teuchos::ArrayView<GlobalOrdinal>& columnIndices,
194 const Teuchos::ArrayView<Scalar>& values,
195 const Teuchos::ArrayView<const int>& SourcePids,
196 Teuchos::Array<int>& TargetPids);
200 template<
typename Scalar,
typename Ordinal>
203 const Teuchos::ArrayView<Ordinal>& CRS_colind,
204 const Teuchos::ArrayView<Scalar>&CRS_vals);
210 template<
typename Scalar,
typename Ordinal>
213 const Teuchos::ArrayView<Ordinal>& CRS_colind,
214 const Teuchos::ArrayView<Scalar>& CRS_vals);
231 template <
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
234 const Teuchos::ArrayView<LocalOrdinal> &columnIndices_LID,
235 const Teuchos::ArrayView<GlobalOrdinal> &columnIndices_GID,
237 const Teuchos::ArrayView<const int> &owningPids,
238 Teuchos::Array<int> &remotePids,
268 template<
class LO,
class GO,
class D>
270 packRowCount (
const size_t numEnt,
271 const size_t numBytesPerValue)
284 const size_t numEntLen = PackTraits<LO, D>::packValueCount (numEntLO);
285 const size_t gidsLen = numEnt * PackTraits<GO, D>::packValueCount (gid);
286 const size_t pidsLen = numEnt * PackTraits<int, D>::packValueCount (lid);
287 const size_t valsLen = numEnt * numBytesPerValue;
288 return numEntLen + gidsLen + pidsLen + valsLen;
292 template<
class LO,
class D>
296 const size_t numBytes,
297 const size_t numBytesPerValue)
299 using Kokkos::subview;
301 typedef typename PackTraits<LO, D>::input_buffer_type input_buffer_type;
302 typedef typename input_buffer_type::size_type size_type;
306 return static_cast<size_t> (0);
310 const size_t theNumBytes = PackTraits<LO, D>::packValueCount (numEntLO);
311 #ifdef HAVE_TPETRA_DEBUG 312 TEUCHOS_TEST_FOR_EXCEPTION(
313 theNumBytes > numBytes, std::logic_error,
"unpackRowCount: " 314 "theNumBytes = " << theNumBytes <<
" < numBytes = " << numBytes
316 #endif // HAVE_TPETRA_DEBUG 317 const std::pair<size_type, size_type> rng (offset, offset + theNumBytes);
318 input_buffer_type inBuf = subview (imports, rng);
319 const size_t actualNumBytes = PackTraits<LO, D>::unpackValue (numEntLO, inBuf);
320 (void)actualNumBytes;
321 #ifdef HAVE_TPETRA_DEBUG 322 TEUCHOS_TEST_FOR_EXCEPTION(
323 theNumBytes > numBytes, std::logic_error,
"unpackRowCount: " 324 "actualNumBytes = " << actualNumBytes <<
" < numBytes = " << numBytes
326 #endif // HAVE_TPETRA_DEBUG 327 return static_cast<size_t> (numEntLO);
332 template<
class ST,
class LO,
class GO,
class D>
340 const size_t numBytesPerValue)
342 using Kokkos::subview;
349 typedef typename PackTraits<LO, D>::output_buffer_type output_buffer_type;
350 typedef typename output_buffer_type::size_type size_type;
351 typedef typename std::pair<size_type, size_type> pair_type;
359 const LO numEntLO =
static_cast<size_t> (numEnt);
362 const size_t numEntBeg = offset;
363 const size_t numEntLen = PackTraits<LO, D>::packValueCount (numEntLO);
364 const size_t gidsBeg = numEntBeg + numEntLen;
365 const size_t gidsLen = numEnt * PackTraits<GO, D>::packValueCount (gid);
366 const size_t pidsBeg = gidsBeg + gidsLen;
367 const size_t pidsLen = numEnt * PackTraits<int, D>::packValueCount (pid);
368 const size_t valsBeg = pidsBeg + pidsLen;
369 const size_t valsLen = numEnt * numBytesPerValue;
371 output_buffer_type numEntOut =
372 subview (exports, pair_type (numEntBeg, numEntBeg + numEntLen));
373 output_buffer_type gidsOut =
374 subview (exports, pair_type (gidsBeg, gidsBeg + gidsLen));
375 output_buffer_type pidsOut =
376 subview (exports, pair_type (pidsBeg, pidsBeg + pidsLen));
377 output_buffer_type valsOut =
378 subview (exports, pair_type (valsBeg, valsBeg + valsLen));
380 size_t numBytesOut = 0;
381 numBytesOut += PackTraits<LO, D>::packValue (numEntOut, numEntLO);
382 numBytesOut += PackTraits<GO, D>::packArray (gidsOut, gidsIn, numEnt);
383 numBytesOut += PackTraits<int, D>::packArray (pidsOut, pidsIn, numEnt);
384 numBytesOut += PackTraits<ST, D>::packArray (valsOut, valsIn, numEnt);
386 const size_t expectedNumBytes = numEntLen + gidsLen + pidsLen + valsLen;
387 TEUCHOS_TEST_FOR_EXCEPTION(
388 numBytesOut != expectedNumBytes, std::logic_error,
"unpackRow: " 389 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = " 390 << expectedNumBytes <<
".");
396 template<
class ST,
class LO,
class GO,
class D>
403 const size_t numBytes,
405 const size_t numBytesPerValue)
407 using Kokkos::subview;
414 typedef typename PackTraits<LO, D>::input_buffer_type input_buffer_type;
415 typedef typename input_buffer_type::size_type size_type;
416 typedef typename std::pair<size_type, size_type> pair_type;
422 TEUCHOS_TEST_FOR_EXCEPTION(
423 static_cast<size_t> (imports.dimension_0 ()) <= offset, std::logic_error,
424 "unpackRow: imports.dimension_0() = " << imports.dimension_0 () <<
425 " <= offset = " << offset <<
".");
426 TEUCHOS_TEST_FOR_EXCEPTION(
427 static_cast<size_t> (imports.dimension_0 ()) < offset + numBytes,
428 std::logic_error,
"unpackRow: imports.dimension_0() = " 429 << imports.dimension_0 () <<
" < offset + numBytes = " 430 << (offset + numBytes) <<
".");
436 const size_t numEntBeg = offset;
437 const size_t numEntLen = PackTraits<LO, D>::packValueCount (lid);
438 const size_t gidsBeg = numEntBeg + numEntLen;
439 const size_t gidsLen = numEnt * PackTraits<GO, D>::packValueCount (gid);
440 const size_t pidsBeg = gidsBeg + gidsLen;
441 const size_t pidsLen = numEnt * PackTraits<int, D>::packValueCount (pid);
442 const size_t valsBeg = pidsBeg + pidsLen;
443 const size_t valsLen = numEnt * numBytesPerValue;
445 input_buffer_type numEntIn = subview (imports, pair_type (numEntBeg, numEntBeg + numEntLen));
446 input_buffer_type gidsIn = subview (imports, pair_type (gidsBeg, gidsBeg + gidsLen));
447 input_buffer_type pidsIn = subview (imports, pair_type (pidsBeg, pidsBeg + pidsLen));
448 input_buffer_type valsIn = subview (imports, pair_type (valsBeg, valsBeg + valsLen));
450 size_t numBytesOut = 0;
452 numBytesOut += PackTraits<LO, D>::unpackValue (numEntOut, numEntIn);
453 TEUCHOS_TEST_FOR_EXCEPTION(
454 static_cast<size_t> (numEntOut) != numEnt, std::logic_error,
455 "unpackRow: Expected number of entries " << numEnt
456 <<
" != actual number of entries " << numEntOut <<
".");
458 numBytesOut += PackTraits<GO, D>::unpackArray (gidsOut, gidsIn, numEnt);
459 numBytesOut += PackTraits<int, D>::unpackArray (pidsOut, pidsIn, numEnt);
460 numBytesOut += PackTraits<ST, D>::unpackArray (valsOut, valsIn, numEnt);
462 TEUCHOS_TEST_FOR_EXCEPTION(
463 numBytesOut != numBytes, std::logic_error,
"unpackRow: numBytesOut = " 464 << numBytesOut <<
" != numBytes = " << numBytes <<
".");
465 const size_t expectedNumBytes = numEntLen + gidsLen + pidsLen + valsLen;
466 TEUCHOS_TEST_FOR_EXCEPTION(
467 numBytesOut != expectedNumBytes, std::logic_error,
"unpackRow: " 468 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = " 469 << expectedNumBytes <<
".");
477 namespace Import_Util {
479 template<
typename Scalar,
480 typename LocalOrdinal,
481 typename GlobalOrdinal,
485 const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
486 Teuchos::Array<char>& exports,
487 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
488 size_t& constantNumPackets,
490 const Teuchos::ArrayView<const int>& SourcePids)
493 using Kokkos::MemoryUnmanaged;
494 using Kokkos::subview;
496 using Teuchos::Array;
497 using Teuchos::ArrayView;
500 typedef LocalOrdinal LO;
501 typedef GlobalOrdinal GO;
503 typedef typename matrix_type::impl_scalar_type ST;
504 typedef typename Node::execution_space execution_space;
505 typedef typename GetHostExecSpace<execution_space>::host_execution_space HES;
507 typedef typename ArrayView<const LO>::size_type size_type;
508 typedef std::pair<typename View<int*, HES>::size_type,
509 typename View<int*, HES>::size_type> pair_type;
510 const char prefix[] =
"Tpetra::Import_Util::packAndPrepareWithOwningPIDs: ";
517 TEUCHOS_TEST_FOR_EXCEPTION(
519 prefix <<
"SourceMatrix must be locally indexed.");
520 TEUCHOS_TEST_FOR_EXCEPTION(
521 SourceMatrix.
getColMap ().is_null (), std::logic_error,
522 prefix <<
"The source matrix claims to be locally indexed, but its column " 523 "Map is null. This should never happen. Please report this bug to the " 524 "Tpetra developers.");
525 const size_type numExportLIDs = exportLIDs.size ();
526 TEUCHOS_TEST_FOR_EXCEPTION(
527 numExportLIDs != numPacketsPerLID.size (), std::invalid_argument, prefix
528 <<
"exportLIDs.size() = " << numExportLIDs <<
"!= numPacketsPerLID.size() " 529 <<
" = " << numPacketsPerLID.size () <<
".");
530 TEUCHOS_TEST_FOR_EXCEPTION(
531 static_cast<size_t> (SourcePids.size ()) != SourceMatrix.
getColMap ()->getNodeNumElements (),
532 std::invalid_argument, prefix <<
"SourcePids.size() = " 533 << SourcePids.size ()
534 <<
"!= SourceMatrix.getColMap()->getNodeNumElements() = " 535 << SourceMatrix.
getColMap ()->getNodeNumElements () <<
".");
540 constantNumPackets = 0;
545 size_t totalNumBytes = 0;
546 size_t totalNumEntries = 0;
547 size_t maxRowLength = 0;
548 for (size_type i = 0; i < numExportLIDs; ++i) {
549 const LO lclRow = exportLIDs[i];
554 size_t numBytesPerValue = 0;
560 ArrayView<const Scalar> valsView;
561 ArrayView<const LO> lidsView;
563 const ST* valsViewRaw =
reinterpret_cast<const ST*
> (valsView.getRawPtr ());
564 View<const ST*, HES, MemoryUnmanaged> valsViewK (valsViewRaw, valsView.size ());
565 TEUCHOS_TEST_FOR_EXCEPTION(
566 static_cast<size_t> (valsViewK.dimension_0 ()) != numEnt,
567 std::logic_error, prefix <<
"Local row " << i <<
" claims to have " 568 << numEnt <<
"entry/ies, but the View returned by getLocalRowView() " 569 "has " << valsViewK.dimension_0 () <<
" entry/ies. This should never " 570 "happen. Please report this bug to the Tpetra developers.");
575 numBytesPerValue = PackTraits<ST, HES>::packValueCount (valsViewK(0));
578 const size_t numBytes =
579 packRowCount<LO, GO, HES> (numEnt, numBytesPerValue);
580 numPacketsPerLID[i] = numBytes;
581 totalNumBytes += numBytes;
582 totalNumEntries += numEnt;
583 maxRowLength = std::max (maxRowLength, numEnt);
589 if (totalNumEntries > 0) {
590 exports.resize (totalNumBytes);
591 View<char*, HES, MemoryUnmanaged> exportsK (exports.getRawPtr (),
600 const map_type& colMap = * (SourceMatrix.
getColMap ());
604 View<int*, HES> pids;
608 gids = PackTraits<GO, HES>::allocateArray (gid, maxRowLength,
"gids");
609 pids = PackTraits<int, HES>::allocateArray (pid, maxRowLength,
"pids");
612 for (size_type i = 0; i < numExportLIDs; i++) {
613 const LO lclRow = exportLIDs[i];
616 ArrayView<const Scalar> valsView;
617 ArrayView<const LO> lidsView;
619 const ST* valsViewRaw =
reinterpret_cast<const ST*
> (valsView.getRawPtr ());
620 View<const ST*, HES, MemoryUnmanaged> valsViewK (valsViewRaw, valsView.size ());
621 const size_t numEnt =
static_cast<size_t> (valsViewK.dimension_0 ());
626 const size_t numBytesPerValue = numEnt == 0 ?
627 static_cast<size_t> (0) :
628 PackTraits<ST, HES>::packValueCount (valsViewK(0));
631 View<GO*, HES> gidsView = subview (gids, pair_type (0, numEnt));
632 View<int*, HES> pidsView = subview (pids, pair_type (0, numEnt));
633 for (
size_t k = 0; k < numEnt; ++k) {
634 gidsView(k) = colMap.getGlobalElement (lidsView[k]);
635 pidsView(k) = SourcePids[lidsView[k]];
639 const size_t numBytes =
640 packRow<ST, LO, GO, HES> (exportsK, offset, numEnt,
641 gidsView, pidsView, valsViewK,
647 #ifdef HAVE_TPETRA_DEBUG 648 TEUCHOS_TEST_FOR_EXCEPTION(
649 offset != totalNumBytes, std::logic_error, prefix <<
"At end of method, " 650 "the final offset (in bytes) " << offset <<
" does not equal the total " 651 "number of bytes packed " << totalNumBytes <<
". Please report this bug " 652 "to the Tpetra developers.");
653 #endif // HAVE_TPETRA_DEBUG 657 template<
typename Scalar,
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
660 const Teuchos::ArrayView<const LocalOrdinal> &importLIDs,
661 const Teuchos::ArrayView<const char> &imports,
662 const Teuchos::ArrayView<size_t> &numPacketsPerLID,
663 size_t constantNumPackets,
667 const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
668 const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs)
670 using Kokkos::MemoryUnmanaged;
672 typedef LocalOrdinal LO;
673 typedef GlobalOrdinal GO;
675 typedef typename matrix_type::impl_scalar_type ST;
676 typedef typename Teuchos::ArrayView<const LO>::size_type size_type;
677 typedef typename Node::execution_space execution_space;
678 typedef typename GetHostExecSpace<execution_space>::host_execution_space HES;
679 const char prefix[] =
"unpackAndCombineWithOwningPIDsCount: ";
681 TEUCHOS_TEST_FOR_EXCEPTION(
682 permuteToLIDs.size () != permuteFromLIDs.size (), std::invalid_argument,
683 prefix <<
"permuteToLIDs.size() = " << permuteToLIDs.size () <<
" != " 684 "permuteFromLIDs.size() = " << permuteFromLIDs.size() <<
".");
688 TEUCHOS_TEST_FOR_EXCEPTION(
689 ! locallyIndexed, std::invalid_argument, prefix <<
"The input CrsMatrix " 690 "'SourceMatrix' must be locally indexed.");
691 TEUCHOS_TEST_FOR_EXCEPTION(
692 importLIDs.size () != numPacketsPerLID.size (), std::invalid_argument,
693 prefix <<
"importLIDs.size() = " << importLIDs.size () <<
" != " 694 "numPacketsPerLID.size() = " << numPacketsPerLID.size () <<
".");
696 View<const char*, HES, MemoryUnmanaged> importsK (imports.getRawPtr (),
703 for (
size_t sourceLID = 0; sourceLID < numSameIDs; ++sourceLID) {
704 const LO srcLID =
static_cast<LO
> (sourceLID);
709 const size_type numPermuteToLIDs = permuteToLIDs.size ();
710 for (size_type p = 0; p < numPermuteToLIDs; ++p) {
716 const size_type numImportLIDs = importLIDs.size ();
717 for (size_type i = 0; i < numImportLIDs; ++i) {
718 const size_t numBytes = numPacketsPerLID[i];
722 const size_t numEnt = unpackRowCount<LO, HES> (importsK, offset,
723 numBytes,
sizeof (ST));
730 template<
typename Scalar,
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
733 const Teuchos::ArrayView<const LocalOrdinal>& importLIDs,
734 const Teuchos::ArrayView<const char>& imports,
735 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
736 const size_t constantNumPackets,
739 const size_t numSameIDs,
740 const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
741 const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs,
742 size_t TargetNumRows,
743 size_t TargetNumNonzeros,
744 const int MyTargetPID,
745 const Teuchos::ArrayView<size_t>& CSR_rowptr,
746 const Teuchos::ArrayView<GlobalOrdinal>& CSR_colind,
747 const Teuchos::ArrayView<Scalar>& CSR_vals,
748 const Teuchos::ArrayView<const int>& SourcePids,
749 Teuchos::Array<int>& TargetPids)
752 using Kokkos::MemoryUnmanaged;
753 using Kokkos::subview;
755 using Teuchos::ArrayView;
757 using Teuchos::av_reinterpret_cast;
758 using Teuchos::outArg;
759 using Teuchos::REDUCE_MAX;
760 using Teuchos::reduceAll;
761 typedef LocalOrdinal LO;
762 typedef GlobalOrdinal GO;
763 typedef typename Node::execution_space execution_space;
764 typedef typename GetHostExecSpace<execution_space>::host_execution_space HES;
766 typedef typename matrix_type::impl_scalar_type ST;
768 typedef typename ArrayView<const LO>::size_type size_type;
771 const char prefix[] =
"Tpetra::Import_Util::unpackAndCombineIntoCrsArrays: ";
773 const size_t N = TargetNumRows;
774 const size_t mynnz = TargetNumNonzeros;
777 const int MyPID = MyTargetPID;
779 TEUCHOS_TEST_FOR_EXCEPTION(
780 TargetNumRows + 1 != static_cast<size_t> (CSR_rowptr.size ()),
781 std::invalid_argument, prefix <<
"CSR_rowptr.size() = " <<
782 CSR_rowptr.size () <<
"!= TargetNumRows+1 = " << TargetNumRows+1 <<
".");
783 TEUCHOS_TEST_FOR_EXCEPTION(
784 permuteToLIDs.size () != permuteFromLIDs.size (), std::invalid_argument,
785 prefix <<
"permuteToLIDs.size() = " << permuteToLIDs.size ()
786 <<
"!= permuteFromLIDs.size() = " << permuteFromLIDs.size () <<
".");
787 const size_type numImportLIDs = importLIDs.size ();
788 TEUCHOS_TEST_FOR_EXCEPTION(
789 numImportLIDs != numPacketsPerLID.size (), std::invalid_argument,
790 prefix <<
"importLIDs.size() = " << numImportLIDs <<
" != " 791 "numPacketsPerLID.size() = " << numPacketsPerLID.size() <<
".");
794 View<const char*, HES, MemoryUnmanaged> importsK (imports.getRawPtr (),
797 for (
size_t i = 0; i< N+1; ++i) {
802 for (
size_t i = 0; i < numSameIDs; ++i) {
807 size_t numPermuteIDs = permuteToLIDs.size ();
808 for (
size_t i = 0; i < numPermuteIDs; ++i) {
809 CSR_rowptr[permuteToLIDs[i]] =
816 for (size_type k = 0; k < numImportLIDs; ++k) {
817 const size_t numBytes = numPacketsPerLID[k];
821 const size_t numEnt = unpackRowCount<LO, HES> (importsK, offset,
822 numBytes,
sizeof (ST));
823 CSR_rowptr[importLIDs[k]] += numEnt;
830 Teuchos::Array<size_t> NewStartRow (N + 1);
833 size_t last_len = CSR_rowptr[0];
835 for (
size_t i = 1; i < N+1; ++i) {
836 size_t new_len = CSR_rowptr[i];
837 CSR_rowptr[i] = last_len + CSR_rowptr[i-1];
838 NewStartRow[i] = CSR_rowptr[i];
842 TEUCHOS_TEST_FOR_EXCEPTION(
843 CSR_rowptr[N] != mynnz, std::invalid_argument, prefix <<
"CSR_rowptr[last]" 844 " = " << CSR_rowptr[N] <<
"!= mynnz = " << mynnz <<
".");
847 if (static_cast<size_t> (TargetPids.size ()) != mynnz) {
848 TargetPids.resize (mynnz);
850 TargetPids.assign (mynnz, -1);
853 ArrayRCP<const size_t> Source_rowptr_RCP;
854 ArrayRCP<const LO> Source_colind_RCP;
855 ArrayRCP<const Scalar> Source_vals_RCP;
856 SourceMatrix.getAllValues (Source_rowptr_RCP, Source_colind_RCP, Source_vals_RCP);
857 ArrayView<const size_t> Source_rowptr = Source_rowptr_RCP ();
858 ArrayView<const LO> Source_colind = Source_colind_RCP ();
859 ArrayView<const Scalar> Source_vals = Source_vals_RCP ();
861 const map_type& sourceColMap = * (SourceMatrix.
getColMap());
862 ArrayView<const GO> globalColElts = sourceColMap.getNodeElementList();
865 for (
size_t i = 0; i < numSameIDs; ++i) {
866 size_t FromRow = Source_rowptr[i];
867 size_t ToRow = CSR_rowptr[i];
868 NewStartRow[i] += Source_rowptr[i+1] - Source_rowptr[i];
870 for (
size_t j = Source_rowptr[i]; j < Source_rowptr[i+1]; ++j) {
871 CSR_vals[ToRow + j - FromRow] = Source_vals[j];
872 CSR_colind[ToRow + j - FromRow] = globalColElts[Source_colind[j]];
873 TargetPids[ToRow + j - FromRow] =
874 (SourcePids[Source_colind[j]] != MyPID) ?
875 SourcePids[Source_colind[j]] : -1;
879 size_t numBytesPerValue = 0;
880 if (PackTraits<ST, HES>::compileTimeSize) {
882 numBytesPerValue = PackTraits<ST, HES>::packValueCount (val);
897 if (Source_rowptr.size () == 0 || Source_rowptr[Source_rowptr.size () - 1] == 0) {
898 numBytesPerValue = PackTraits<ST, HES>::packValueCount (CSR_vals[0]);
901 numBytesPerValue = PackTraits<ST, HES>::packValueCount (Source_vals[0]);
903 size_t lclNumBytesPerValue = numBytesPerValue;
904 Teuchos::RCP<const Teuchos::Comm<int> > comm = SourceMatrix.
getComm ();
905 reduceAll<int, size_t> (*comm, REDUCE_MAX, lclNumBytesPerValue,
906 outArg (numBytesPerValue));
910 for (
size_t i = 0; i < numPermuteIDs; ++i) {
911 LO FromLID = permuteFromLIDs[i];
912 size_t FromRow = Source_rowptr[FromLID];
913 size_t ToRow = CSR_rowptr[permuteToLIDs[i]];
915 NewStartRow[permuteToLIDs[i]] += Source_rowptr[FromLID+1]-Source_rowptr[FromLID];
917 for (
size_t j = Source_rowptr[FromLID]; j < Source_rowptr[FromLID+1]; ++j) {
918 CSR_vals[ToRow + j - FromRow] = Source_vals[j];
919 CSR_colind[ToRow + j - FromRow] = globalColElts[Source_colind[j]];
920 TargetPids[ToRow + j - FromRow] =
921 (SourcePids[Source_colind[j]] != MyPID) ?
922 SourcePids[Source_colind[j]] : -1;
927 if (imports.size () > 0) {
929 #ifdef HAVE_TPETRA_DEBUG 933 for (
size_t i = 0; i < static_cast<size_t> (numImportLIDs); ++i) {
934 const size_t numBytes = numPacketsPerLID[i];
942 const size_t numEnt = unpackRowCount<LO, HES> (importsK, offset, numBytes,
944 const LO lclRow = importLIDs[i];
945 const size_t StartRow = NewStartRow[lclRow];
946 NewStartRow[lclRow] += numEnt;
948 View<GO*, HES, MemoryUnmanaged> gidsOut =
949 getNonconstView<GO, HES> (CSR_colind (StartRow, numEnt));
950 View<int*, HES, MemoryUnmanaged> pidsOut =
951 getNonconstView<int, HES> (TargetPids (StartRow, numEnt));
952 ArrayView<Scalar> valsOutS = CSR_vals (StartRow, numEnt);
953 View<ST*, HES, MemoryUnmanaged> valsOut =
954 getNonconstView<ST, HES> (av_reinterpret_cast<ST> (valsOutS));
956 const size_t numBytesOut =
957 unpackRow<ST, LO, GO, HES> (gidsOut, pidsOut, valsOut, importsK,
958 offset, numBytes, numEnt, numBytesPerValue);
959 if (numBytesOut != numBytes) {
960 #ifdef HAVE_TPETRA_DEBUG 967 for (
size_t j = 0; j < numEnt; ++j) {
968 const int pid = pidsOut[j];
969 pidsOut[j] = (pid != MyPID) ? pid : -1;
973 #ifdef HAVE_TPETRA_DEBUG 974 TEUCHOS_TEST_FOR_EXCEPTION(
975 offset != static_cast<size_t> (imports.size ()), std::logic_error, prefix
976 <<
"After unpacking and counting all the import packets, the final offset" 977 " in bytes " << offset <<
" != imports.size() = " << imports.size () <<
978 ". Please report this bug to the Tpetra developers.");
979 TEUCHOS_TEST_FOR_EXCEPTION(
980 lclErr != 0, std::logic_error, prefix <<
"numBytes != numBytesOut " 981 "somewhere in unpack loop. This should never happen. " 982 "Please report this bug to the Tpetra developers.");
983 #endif // HAVE_TPETRA_DEBUG 988 template<
typename Scalar,
typename Ordinal>
991 const Teuchos::ArrayView<Ordinal> & CRS_colind,
992 const Teuchos::ArrayView<Scalar> &CRS_vals)
997 size_t NumRows = CRS_rowptr.size()-1;
998 size_t nnz = CRS_colind.size();
1000 for(
size_t i = 0; i < NumRows; i++){
1001 size_t start=CRS_rowptr[i];
1002 if(start >= nnz)
continue;
1004 Scalar* locValues = &CRS_vals[start];
1005 size_t NumEntries = CRS_rowptr[i+1] - start;
1006 Ordinal* locIndices = &CRS_colind[start];
1008 Ordinal n = NumEntries;
1012 Ordinal max = n - m;
1013 for(Ordinal j = 0; j < max; j++) {
1014 for(Ordinal k = j; k >= 0; k-=m) {
1015 if(locIndices[k+m] >= locIndices[k])
1017 Scalar dtemp = locValues[k+m];
1018 locValues[k+m] = locValues[k];
1019 locValues[k] = dtemp;
1020 Ordinal itemp = locIndices[k+m];
1021 locIndices[k+m] = locIndices[k];
1022 locIndices[k] = itemp;
1031 template<
typename Scalar,
typename Ordinal>
1034 const Teuchos::ArrayView<Ordinal> & CRS_colind,
1035 const Teuchos::ArrayView<Scalar> &CRS_vals)
1042 size_t NumRows = CRS_rowptr.size()-1;
1043 size_t nnz = CRS_colind.size();
1044 size_t new_curr=CRS_rowptr[0], old_curr=CRS_rowptr[0];
1046 for(
size_t i = 0; i < NumRows; i++){
1047 size_t start=CRS_rowptr[i];
1048 if(start >= nnz)
continue;
1050 Scalar* locValues = &CRS_vals[start];
1051 size_t NumEntries = CRS_rowptr[i+1] - start;
1052 Ordinal* locIndices = &CRS_colind[start];
1055 Ordinal n = NumEntries;
1059 Ordinal max = n - m;
1060 for(Ordinal j = 0; j < max; j++) {
1061 for(Ordinal k = j; k >= 0; k-=m) {
1062 if(locIndices[k+m] >= locIndices[k])
1064 Scalar dtemp = locValues[k+m];
1065 locValues[k+m] = locValues[k];
1066 locValues[k] = dtemp;
1067 Ordinal itemp = locIndices[k+m];
1068 locIndices[k+m] = locIndices[k];
1069 locIndices[k] = itemp;
1076 for(
size_t j=CRS_rowptr[i]; j < CRS_rowptr[i+1]; j++) {
1077 if(j > CRS_rowptr[i] && CRS_colind[j]==CRS_colind[new_curr-1]) {
1078 CRS_vals[new_curr-1] += CRS_vals[j];
1080 else if(new_curr==j) {
1084 CRS_colind[new_curr] = CRS_colind[j];
1085 CRS_vals[new_curr] = CRS_vals[j];
1090 CRS_rowptr[i] = old_curr;
1094 CRS_rowptr[NumRows] = new_curr;
1097 template <
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
1100 const ArrayView<LocalOrdinal> &colind_LID,
1101 const ArrayView<GlobalOrdinal> &colind_GID,
1103 const Teuchos::ArrayView<const int> &owningPIDs,
1104 Teuchos::Array<int> &remotePIDs,
1108 typedef LocalOrdinal LO;
1109 typedef GlobalOrdinal GO;
1112 const char prefix[] =
"lowCommunicationMakeColMapAndReindex: ";
1116 const map_type& domainMap = *domainMapRCP;
1122 Teuchos::Array<bool> LocalGIDs;
1123 if (numDomainElements > 0) {
1124 LocalGIDs.resize (numDomainElements,
false);
1135 const size_t numMyRows = rowptr.size () - 1;
1136 const int hashsize = std::max (static_cast<int> (numMyRows), 100);
1139 Teuchos::Array<GO> RemoteGIDList;
1140 RemoteGIDList.reserve (hashsize);
1141 Teuchos::Array<int> PIDList;
1142 PIDList.reserve (hashsize);
1153 size_t NumLocalColGIDs = 0;
1154 LO NumRemoteColGIDs = 0;
1155 for (
size_t i = 0; i < numMyRows; ++i) {
1156 for(
size_t j = rowptr[i]; j < rowptr[i+1]; ++j) {
1157 const GO GID = colind_GID[j];
1159 const LO LID = domainMap.getLocalElement (GID);
1161 const bool alreadyFound = LocalGIDs[LID];
1162 if (! alreadyFound) {
1163 LocalGIDs[LID] =
true;
1166 colind_LID[j] = LID;
1169 const LO hash_value = RemoteGIDs.
get (GID);
1170 if (hash_value == -1) {
1171 const int PID = owningPIDs[j];
1172 TEUCHOS_TEST_FOR_EXCEPTION(
1173 PID == -1, std::invalid_argument, prefix <<
"Cannot figure out if " 1175 colind_LID[j] =
static_cast<LO
> (numDomainElements + NumRemoteColGIDs);
1176 RemoteGIDs.
add (GID, NumRemoteColGIDs);
1177 RemoteGIDList.push_back (GID);
1178 PIDList.push_back (PID);
1182 colind_LID[j] =
static_cast<LO
> (numDomainElements + hash_value);
1190 if (domainMap.getComm ()->getSize () == 1) {
1193 TEUCHOS_TEST_FOR_EXCEPTION(
1194 NumRemoteColGIDs != 0, std::runtime_error, prefix <<
"There is only one " 1195 "process in the domain Map's communicator, which means that there are no " 1196 "\"remote\" indices. Nevertheless, some column indices are not in the " 1198 if (static_cast<size_t> (NumLocalColGIDs) == numDomainElements) {
1202 colMap = domainMapRCP;
1209 const LO numMyCols = NumLocalColGIDs + NumRemoteColGIDs;
1210 Teuchos::Array<GO> ColIndices;
1211 GO* RemoteColIndices = NULL;
1212 if (numMyCols > 0) {
1213 ColIndices.resize (numMyCols);
1214 if (NumLocalColGIDs != static_cast<size_t> (numMyCols)) {
1215 RemoteColIndices = &ColIndices[NumLocalColGIDs];
1219 for (LO i = 0; i < NumRemoteColGIDs; ++i) {
1220 RemoteColIndices[i] = RemoteGIDList[i];
1224 Teuchos::Array<LO> RemotePermuteIDs (NumRemoteColGIDs);
1225 for (LO i = 0; i < NumRemoteColGIDs; ++i) {
1226 RemotePermuteIDs[i]=i;
1233 ColIndices.begin () + NumLocalColGIDs,
1234 RemotePermuteIDs.begin ());
1240 remotePIDs = PIDList;
1249 LO StartCurrent = 0, StartNext = 1;
1250 while (StartNext < NumRemoteColGIDs) {
1251 if (PIDList[StartNext]==PIDList[StartNext-1]) {
1255 Tpetra::sort2 (ColIndices.begin () + NumLocalColGIDs + StartCurrent,
1256 ColIndices.begin () + NumLocalColGIDs + StartNext,
1257 RemotePermuteIDs.begin () + StartCurrent);
1258 StartCurrent = StartNext;
1262 Tpetra::sort2 (ColIndices.begin () + NumLocalColGIDs + StartCurrent,
1263 ColIndices.begin () + NumLocalColGIDs + StartNext,
1264 RemotePermuteIDs.begin () + StartCurrent);
1267 Teuchos::Array<LO> ReverseRemotePermuteIDs (NumRemoteColGIDs);
1268 for (LO i = 0; i < NumRemoteColGIDs; ++i) {
1269 ReverseRemotePermuteIDs[RemotePermuteIDs[i]] = i;
1273 bool use_local_permute =
false;
1274 Teuchos::Array<LO> LocalPermuteIDs (numDomainElements);
1286 Teuchos::ArrayView<const GO> domainGlobalElements = domainMap.getNodeElementList();
1287 if (static_cast<size_t> (NumLocalColGIDs) == numDomainElements) {
1288 if (NumLocalColGIDs > 0) {
1290 std::copy (domainGlobalElements.begin (), domainGlobalElements.end (),
1291 ColIndices.begin ());
1295 LO NumLocalAgain = 0;
1296 use_local_permute =
true;
1297 for (
size_t i = 0; i < numDomainElements; ++i) {
1299 LocalPermuteIDs[i] = NumLocalAgain;
1300 ColIndices[NumLocalAgain++] = domainGlobalElements[i];
1303 TEUCHOS_TEST_FOR_EXCEPTION(
1304 static_cast<size_t> (NumLocalAgain) != NumLocalColGIDs,
1305 std::runtime_error, prefix <<
"Local ID count test failed.");
1309 const GST minus_one = Teuchos::OrdinalTraits<GST>::invalid ();
1310 colMap = rcp (
new map_type (minus_one, ColIndices, domainMap.getIndexBase (),
1311 domainMap.getComm (), domainMap.getNode ()));
1314 for (
size_t i = 0; i < numMyRows; ++i) {
1315 for (
size_t j = rowptr[i]; j < rowptr[i+1]; ++j) {
1316 const LO ID = colind_LID[j];
1317 if (static_cast<size_t> (ID) < numDomainElements) {
1318 if (use_local_permute) {
1319 colind_LID[j] = LocalPermuteIDs[colind_LID[j]];
1326 colind_LID[j] = NumLocalColGIDs + ReverseRemotePermuteIDs[colind_LID[j]-numDomainElements];
1338 #include "Tpetra_CrsMatrix.hpp" 1340 #endif // TPETRA_IMPORT_UTIL_HPP void sortAndMergeCrsEntries(const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< Ordinal > &CRS_colind, const Teuchos::ArrayView< Scalar > &CRS_vals)
Sort and merge the entries of the (raw CSR) matrix by column index within each row.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
void sort3(const IT1 &first1, const IT1 &last1, const IT2 &first2, const IT3 &first3)
Sort the first array, and apply the same permutation to the second and third arrays.
Traits class for packing / unpacking data of type T, using Kokkos data structures that live in the gi...
Teuchos::RCP< const map_type > getColMap() const
The Map that describes the column distribution in this matrix.
Declaration of the Tpetra::CrsMatrix class.
bool isLocallyIndexed() const
Whether the matrix is locally indexed on the calling process.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Kokkos::View< char *, D, Kokkos::MemoryUnmanaged > output_buffer_type
The type of an output buffer of bytes.
void getLocalRowView(LocalOrdinal LocalRow, Teuchos::ArrayView< const LocalOrdinal > &indices, Teuchos::ArrayView< const Scalar > &values) const
Get a constant, nonpersisting view of a row of this matrix, using local row and column indices...
void sortCrsEntries(const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< Ordinal > &CRS_colind, const Teuchos::ArrayView< Scalar > &CRS_vals)
Sort the entries of the (raw CSR) matrix by column index within each row.
size_t global_size_t
Global size_t object.
size_t unpackAndCombineWithOwningPIDsCount(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &SourceMatrix, const Teuchos::ArrayView< const LocalOrdinal > &importLIDs, const Teuchos::ArrayView< const char > &imports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t constantNumPackets, Distributor &distor, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LocalOrdinal > &permuteToLIDs, const Teuchos::ArrayView< const LocalOrdinal > &permuteFromLIDs)
Special version of Tpetra::CrsMatrix::unpackAndCombine that also unpacks owning process ranks...
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix is distributed.
void lowCommunicationMakeColMapAndReindex(const Teuchos::ArrayView< const size_t > &rowPointers, const Teuchos::ArrayView< LocalOrdinal > &columnIndices_LID, const Teuchos::ArrayView< GlobalOrdinal > &columnIndices_GID, const Tpetra::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap, const Teuchos::ArrayView< const int > &owningPids, Teuchos::Array< int > &remotePids, Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > &colMap)
lowCommunicationMakeColMapAndReindex
Sets up and executes a communication plan for a Tpetra DistObject.
CombineMode
Rule for combining data in an Import or Export.
Kokkos::View< const value_type *, D, Kokkos::MemoryUnmanaged > input_array_type
The type of an input array of value_type.
Kokkos::View< const char *, D, Kokkos::MemoryUnmanaged > input_buffer_type
The type of an input buffer of bytes.
void unpackAndCombineIntoCrsArrays(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &SourceMatrix, const Teuchos::ArrayView< const LocalOrdinal > &importLIDs, const Teuchos::ArrayView< const char > &imports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t constantNumPackets, Distributor &distor, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LocalOrdinal > &permuteToLIDs, const Teuchos::ArrayView< const LocalOrdinal > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, int MyTargetPID, const Teuchos::ArrayView< size_t > &rowPointers, const Teuchos::ArrayView< GlobalOrdinal > &columnIndices, const Teuchos::ArrayView< Scalar > &values, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2)
Sort the first array, and apply the resulting permutation to the second array.
size_t getNodeNumElements() const
The number of elements belonging to the calling process.
ValueType get(const KeyType key)
Get the value corresponding to the given key.
Describes a parallel distribution of objects over processes.
void add(const KeyType key, const ValueType value)
Add a key and its value to the hash table.
Kokkos::View< value_type *, D, Kokkos::MemoryUnmanaged > output_array_type
The type of an output array of value_type.
Stand-alone utility functions and macros.
size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const
Returns the current number of entries on this node in the specified local row.
void packAndPrepareWithOwningPIDs(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &SourceMatrix, const Teuchos::ArrayView< const LocalOrdinal > &exportLIDs, Teuchos::Array< char > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t &constantNumPackets, Distributor &distor, const Teuchos::ArrayView< const int > &SourcePids)
Special version of Tpetra::CrsMatrix::packAndPrepare that also packs owning process ranks...