42 #ifndef TPETRA_EXPERIMENTAL_BLOCKCRSMATRIX_DEF_HPP 43 #define TPETRA_EXPERIMENTAL_BLOCKCRSMATRIX_DEF_HPP 50 #include "Teuchos_TimeMonitor.hpp" 53 namespace Experimental {
55 template<
class Scalar,
class LO,
class GO,
class Node>
57 BlockCrsMatrix<Scalar, LO, GO, Node>::
58 markLocalErrorAndGetStream ()
60 * (this->localError_) =
true;
61 if ((*errs_).is_null ()) {
62 *errs_ = Teuchos::rcp (
new std::ostringstream ());
67 template<
class Scalar,
class LO,
class GO,
class Node>
72 blockSize_ (static_cast<LO> (0)),
77 localError_ (new bool (false)),
78 errs_ (new
Teuchos::RCP<std::ostringstream> ())
82 template<
class Scalar,
class LO,
class GO,
class Node>
89 blockSize_ (blockSize),
94 offsetPerBlock_ (blockSize * blockSize),
95 localError_ (new bool (false)),
96 errs_ (new
Teuchos::RCP<std::ostringstream> ())
98 TEUCHOS_TEST_FOR_EXCEPTION(
99 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::Experimental::" 100 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted " 101 "rows (isSorted() is false). This class assumes sorted rows.");
103 graphRCP_ = Teuchos::rcpFromRef(graph_);
109 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
110 TEUCHOS_TEST_FOR_EXCEPTION(
111 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::Experimental::" 112 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
113 " <= 0. The block size must be positive.");
119 typedef typename crs_graph_type::local_graph_type::row_map_type row_map_type;
120 typedef typename row_map_type::HostMirror::non_const_type nc_host_row_map_type;
125 nc_host_row_map_type ptr_h_nc = Kokkos::create_mirror_view (ptr_d);
131 Kokkos::resize (valView_,
134 val_ = valView_.ptr_on_device ();
137 template<
class Scalar,
class LO,
class GO,
class Node>
142 const LO blockSize) :
146 domainPointMap_ (domainPointMap),
147 rangePointMap_ (rangePointMap),
148 blockSize_ (blockSize),
152 offsetPerBlock_ (blockSize * blockSize),
153 localError_ (new bool (false)),
154 errs_ (new
Teuchos::RCP<std::ostringstream> ())
156 TEUCHOS_TEST_FOR_EXCEPTION(
157 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::Experimental::" 158 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted " 159 "rows (isSorted() is false). This class assumes sorted rows.");
161 graphRCP_ = Teuchos::rcpFromRef(graph_);
167 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
168 TEUCHOS_TEST_FOR_EXCEPTION(
169 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::Experimental::" 170 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
171 " <= 0. The block size must be positive.");
174 typedef typename crs_graph_type::local_graph_type::row_map_type row_map_type;
175 typedef typename row_map_type::HostMirror::non_const_type nc_host_row_map_type;
180 nc_host_row_map_type ptr_h_nc = Kokkos::create_mirror_view (ptr_d);
186 Kokkos::resize (valView_,
189 val_ = valView_.ptr_on_device ();
192 template<
class Scalar,
class LO,
class GO,
class Node>
193 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
198 return Teuchos::rcp (
new map_type (domainPointMap_));
201 template<
class Scalar,
class LO,
class GO,
class Node>
202 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
207 return Teuchos::rcp (
new map_type (rangePointMap_));
210 template<
class Scalar,
class LO,
class GO,
class Node>
211 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
218 template<
class Scalar,
class LO,
class GO,
class Node>
219 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
226 template<
class Scalar,
class LO,
class GO,
class Node>
234 template<
class Scalar,
class LO,
class GO,
class Node>
242 template<
class Scalar,
class LO,
class GO,
class Node>
250 template<
class Scalar,
class LO,
class GO,
class Node>
255 Teuchos::ETransp mode,
260 TEUCHOS_TEST_FOR_EXCEPTION(
261 mode != Teuchos::NO_TRANS && mode != Teuchos::TRANS && mode != Teuchos::CONJ_TRANS,
262 std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::apply: " 263 "Invalid 'mode' argument. Valid values are Teuchos::NO_TRANS, " 264 "Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
273 catch (std::invalid_argument& e) {
274 TEUCHOS_TEST_FOR_EXCEPTION(
275 true, std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::" 276 "apply: Either the input MultiVector X or the output MultiVector Y " 277 "cannot be viewed as a BlockMultiVector, given this BlockCrsMatrix's " 278 "graph. BlockMultiVector's constructor threw the following exception: " 287 const_cast<this_type*
> (
this)->
applyBlock (X_view, Y_view, mode, alpha, beta);
288 }
catch (std::invalid_argument& e) {
289 TEUCHOS_TEST_FOR_EXCEPTION(
290 true, std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::" 291 "apply: The implementation method applyBlock complained about having " 292 "an invalid argument. It reported the following: " << e.what ());
293 }
catch (std::logic_error& e) {
294 TEUCHOS_TEST_FOR_EXCEPTION(
295 true, std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::" 296 "apply: The implementation method applyBlock complained about a " 297 "possible bug in its implementation. It reported the following: " 299 }
catch (std::exception& e) {
300 TEUCHOS_TEST_FOR_EXCEPTION(
301 true, std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::" 302 "apply: The implementation method applyBlock threw an exception which " 303 "is neither std::invalid_argument nor std::logic_error, but is a " 304 "subclass of std::exception. It reported the following: " 307 TEUCHOS_TEST_FOR_EXCEPTION(
308 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::" 309 "apply: The implementation method applyBlock threw an exception which " 310 "is not an instance of a subclass of std::exception. This probably " 311 "indicates a bug. Please report this to the Tpetra developers.");
315 template<
class Scalar,
class LO,
class GO,
class Node>
320 Teuchos::ETransp mode,
324 TEUCHOS_TEST_FOR_EXCEPTION(
326 "Tpetra::Experimental::BlockCrsMatrix::applyBlock: " 327 "X and Y have different block sizes. X.getBlockSize() = " 331 if (mode == Teuchos::NO_TRANS) {
332 applyBlockNoTrans (X, Y, alpha, beta);
333 }
else if (mode == Teuchos::TRANS || mode == Teuchos::CONJ_TRANS) {
334 applyBlockTrans (X, Y, mode, alpha, beta);
336 TEUCHOS_TEST_FOR_EXCEPTION(
337 true, std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::" 338 "applyBlock: Invalid 'mode' argument. Valid values are " 339 "Teuchos::NO_TRANS, Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
343 template<
class Scalar,
class LO,
class GO,
class Node>
349 for (LO lclRow = 0; lclRow < numLocalMeshRows; ++lclRow) {
350 const size_t meshBeg = ptr_[lclRow];
351 const size_t meshEnd = ptr_[lclRow+1];
352 for (
size_t absBlkOff = meshBeg; absBlkOff < meshEnd; ++absBlkOff) {
354 deep_copy (A_cur, static_cast<impl_scalar_type> (alpha));
359 template<
class Scalar,
class LO,
class GO,
class Node>
365 const LO numColInds)
const 372 return static_cast<LO
> (0);
376 const size_t absRowBlockOffset = this->ptr_[localRowInd];
377 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
378 const LO perBlockSize = this->offsetPerBlock ();
383 for (LO k = 0; k < numColInds; ++k, pointOffset += perBlockSize) {
384 const LO relBlockOffset =
385 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
386 if (relBlockOffset != LINV) {
395 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
403 for (LO i = 0; i < perBlockSize; ++i) {
406 hint = relBlockOffset + 1;
413 template <
class Scalar,
class LO,
class GO,
class Node>
418 const char tfecfFuncName[] =
419 "Tpetra::BlockCrsMatrix::getLocalDiagOffsets: ";
420 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
421 (! graph_.
hasColMap (), std::runtime_error,
422 "The matrix's graph does not yet have a column Map.");
429 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
431 "The matrix's graph is globally indexed.");
437 if (static_cast<LO> (offsets.size ()) != lclNumRows) {
438 offsets.resize (lclNumRows);
441 #ifdef HAVE_TPETRA_DEBUG 442 bool allRowMapDiagEntriesInColMap =
true;
443 bool allDiagEntriesFound =
true;
444 bool allOffsetsCorrect =
true;
445 bool noOtherWeirdness =
true;
446 std::vector<std::pair<LO, size_t> > wrongOffsets;
448 #endif // HAVE_TPETRA_DEBUG 450 for (LO lclRowInd = 0; lclRowInd < lclNumRows; ++lclRowInd) {
451 const GO gblRowInd = rowMap.getGlobalElement (lclRowInd);
452 const GO gblColInd = gblRowInd;
455 if (lclColInd == Teuchos::OrdinalTraits<LO>::invalid ()) {
456 #ifdef HAVE_TPETRA_DEBUG 457 allRowMapDiagEntriesInColMap =
false;
458 #endif // HAVE_TPETRA_DEBUG 459 offsets[lclRowInd] = Teuchos::OrdinalTraits<size_t>::invalid ();
463 if (static_cast<LO> (rowInfo.localRow) == lclRowInd &&
464 rowInfo.numEntries > 0) {
466 offsets[lclRowInd] = offset;
468 #ifdef HAVE_TPETRA_DEBUG 473 const LO* lclColInds = NULL;
474 Scalar* lclVals = NULL;
476 LO err = this->
getLocalRowView (lclRowInd, lclColInds, lclVals, numEnt);
478 noOtherWeirdness =
false;
481 if (offset >= static_cast<size_t> (numEnt)) {
484 allOffsetsCorrect =
false;
485 wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
487 const LO actualLclColInd = lclColInds[offset];
489 if (actualGblColInd != gblColInd) {
490 allOffsetsCorrect =
false;
491 wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
495 #endif // HAVE_TPETRA_DEBUG 498 offsets[lclRowInd] = Teuchos::OrdinalTraits<size_t>::invalid ();
499 #ifdef HAVE_TPETRA_DEBUG 500 allDiagEntriesFound =
false;
501 #endif // HAVE_TPETRA_DEBUG 506 #ifdef HAVE_TPETRA_DEBUG 507 if (wrongOffsets.size () != 0) {
508 std::ostringstream os;
509 os <<
"Proc " << this->
getComm ()->getRank () <<
": Wrong offsets: [";
510 for (
size_t k = 0; k < wrongOffsets.size (); ++k) {
511 os <<
"(" << wrongOffsets[k].first <<
"," 512 << wrongOffsets[k].second <<
")";
513 if (k + 1 < wrongOffsets.size ()) {
517 os <<
"]" << std::endl;
518 std::cerr << os.str ();
520 #endif // HAVE_TPETRA_DEBUG 522 #ifdef HAVE_TPETRA_DEBUG 523 using Teuchos::reduceAll;
525 const bool localSuccess =
526 allRowMapDiagEntriesInColMap && allDiagEntriesFound && allOffsetsCorrect;
527 const int numResults = 5;
529 lclResults[0] = allRowMapDiagEntriesInColMap ? 1 : 0;
530 lclResults[1] = allDiagEntriesFound ? 1 : 0;
531 lclResults[2] = allOffsetsCorrect ? 1 : 0;
532 lclResults[3] = noOtherWeirdness ? 1 : 0;
544 reduceAll<int, int> (* (this->
getComm ()), Teuchos::REDUCE_MIN,
545 numResults, lclResults, gblResults);
547 if (gblResults[0] != 1 || gblResults[1] != 1 || gblResults[2] != 1
548 || gblResults[3] != 1) {
549 std::ostringstream os;
550 os <<
"Issue(s) that we noticed (on Process " << gblResults[4] <<
", " 551 "possibly among others): " << endl;
552 if (gblResults[0] == 0) {
553 os <<
" - The column Map does not contain at least one diagonal entry " 554 "of the matrix." << endl;
556 if (gblResults[1] == 0) {
557 os <<
" - On one or more processes, some row does not contain a " 558 "diagonal entry." << endl;
560 if (gblResults[2] == 0) {
561 os <<
" - On one or more processes, some offsets are incorrect." 564 if (gblResults[3] == 0) {
565 os <<
" - One or more processes had some other error." 568 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::runtime_error, os.str());
570 #endif // HAVE_TPETRA_DEBUG 573 template <
class Scalar,
class LO,
class GO,
class Node>
579 Kokkos::MemoryUnmanaged>& factoredDiagonal,
581 Kokkos::MemoryUnmanaged>& factorizationPivots,
587 const LO numLocalMeshRows =
596 Teuchos::Array<impl_scalar_type> localMem (blockSize);
597 Teuchos::Array<impl_scalar_type> localMat (blockSize*blockSize);
601 LO rowBegin = 0, rowEnd = 0, rowStride = 0;
602 if (direction == Forward) {
604 rowEnd = numLocalMeshRows+1;
607 else if (direction == Backward) {
608 rowBegin = numLocalMeshRows;
612 else if (direction == Symmetric) {
613 this->
localGaussSeidel (B, X, factoredDiagonal, factorizationPivots, omega, Forward);
614 this->
localGaussSeidel (B, X, factoredDiagonal, factorizationPivots, omega, Backward);
618 const Scalar one_minus_omega = Teuchos::ScalarTraits<Scalar>::one()-omega;
619 const Scalar minus_omega = -omega;
622 for (LO lclRow = rowBegin; lclRow != rowEnd; lclRow += rowStride) {
623 const LO actlRow = lclRow - 1;
629 const size_t meshBeg = ptr_[actlRow];
630 const size_t meshEnd = ptr_[actlRow+1];
631 for (
size_t absBlkOff = meshBeg; absBlkOff < meshEnd; ++absBlkOff) {
632 const LO meshCol = ind_[absBlkOff];
634 getConstLocalBlockFromAbsOffset (absBlkOff);
639 const Scalar alpha = meshCol == actlRow ? one_minus_omega : minus_omega;
641 GEMV (alpha, A_cur, X_cur, X_lcl);
648 auto D_lcl = Kokkos::subview (factoredDiagonal, actlRow, ALL (), ALL ());
649 auto ipiv = Kokkos::subview (factorizationPivots, actlRow, ALL ());
651 GETRS (
"N", D_lcl, ipiv, X_lcl, info);
654 COPY (X_lcl, X_update);
658 for (LO lclRow = rowBegin; lclRow != rowEnd; lclRow += rowStride) {
659 for (LO j = 0; j < numVecs; ++j) {
660 LO actlRow = lclRow-1;
666 const size_t meshBeg = ptr_[actlRow];
667 const size_t meshEnd = ptr_[actlRow+1];
668 for (
size_t absBlkOff = meshBeg; absBlkOff < meshEnd; ++absBlkOff) {
669 const LO meshCol = ind_[absBlkOff];
671 getConstLocalBlockFromAbsOffset (absBlkOff);
676 const Scalar alpha = meshCol == actlRow ? one_minus_omega : minus_omega;
678 GEMV (alpha, A_cur, X_cur, X_lcl);
685 auto D_lcl = Kokkos::subview (factoredDiagonal, actlRow, ALL (), ALL ());
686 auto ipiv = Kokkos::subview (factorizationPivots, actlRow, ALL ());
688 GETRS (
"N", D_lcl, ipiv, X_lcl, info);
691 COPY (X_lcl, X_update);
697 template <
class Scalar,
class LO,
class GO,
class Node>
703 const Scalar& dampingFactor,
706 const bool zeroInitialGuess)
const 710 TEUCHOS_TEST_FOR_EXCEPTION(
711 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::" 712 "gaussSeidelCopy: Not implemented.");
715 template <
class Scalar,
class LO,
class GO,
class Node>
721 const Teuchos::ArrayView<LO>& rowIndices,
722 const Scalar& dampingFactor,
725 const bool zeroInitialGuess)
const 729 TEUCHOS_TEST_FOR_EXCEPTION(
730 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::" 731 "reorderedGaussSeidelCopy: Not implemented.");
734 template <
class Scalar,
class LO,
class GO,
class Node>
738 const Teuchos::ArrayView<const size_t>& offsets)
const 740 using Teuchos::ArrayRCP;
741 using Teuchos::ArrayView;
742 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
745 const LO* columnIndices;
748 Teuchos::Array<LO> cols(1);
751 Teuchos::Array<Scalar> zeroMat (blockSize_*blockSize_, ZERO);
752 for (
size_t i = 0; i < myNumRows; ++i) {
754 if (offsets[i] == Teuchos::OrdinalTraits<size_t>::invalid ()) {
759 diag.
replaceLocalValues (i, cols.getRawPtr(), &vals[offsets[i]*blockSize_*blockSize_], 1);
765 template <
class Scalar,
class LO,
class GO,
class Node>
769 Kokkos::MemoryUnmanaged>& diag,
770 const Teuchos::ArrayView<const size_t>& offsets)
const 773 using Kokkos::parallel_for;
775 Kokkos::MemoryUnmanaged>::HostMirror::execution_space host_exec_space;
779 TEUCHOS_TEST_FOR_EXCEPTION
780 (static_cast<LO> (diag.dimension_0 ()) < lclNumMeshRows ||
781 static_cast<LO> (diag.dimension_1 ()) < blockSize ||
782 static_cast<LO> (diag.dimension_2 ()) < blockSize,
783 std::invalid_argument,
"Tpetra::BlockCrsMatrix::getLocalDiagCopy: " 784 "The input Kokkos::View is not big enough to hold all the data.");
785 TEUCHOS_TEST_FOR_EXCEPTION
786 (static_cast<LO> (offsets.size ()) < lclNumMeshRows,
787 std::invalid_argument,
"Tpetra::BlockCrsMatrix::getLocalDiagCopy: " 788 "offsets.size() = " << offsets.size () <<
" < local number of diagonal " 789 "blocks " << lclNumMeshRows <<
".");
793 typedef Kokkos::RangePolicy<host_exec_space, LO> policy_type;
794 parallel_for (policy_type (0, lclNumMeshRows), [=] (
const LO& lclMeshRow) {
795 auto D_in = this->getConstLocalBlockFromRelOffset (lclMeshRow, offsets[lclMeshRow]);
796 auto D_out = Kokkos::subview (diag, lclMeshRow, ALL (), ALL ());
802 template<
class Scalar,
class LO,
class GO,
class Node>
808 const LO numColInds)
const 815 return static_cast<LO
> (0);
819 const size_t absRowBlockOffset = ptr_[localRowInd];
820 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
821 const LO perBlockSize = this->offsetPerBlock ();
826 for (LO k = 0; k < numColInds; ++k, pointOffset += perBlockSize) {
827 const LO relBlockOffset =
828 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
829 if (relBlockOffset != LINV) {
830 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
832 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
834 getConstLocalBlockFromInput (vIn, pointOffset);
836 Impl::absMax (A_old, A_new);
837 hint = relBlockOffset + 1;
845 template<
class Scalar,
class LO,
class GO,
class Node>
851 const LO numColInds)
const 858 return static_cast<LO
> (0);
863 const size_t absRowBlockOffset = this->ptr_[localRowInd];
864 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
865 const LO perBlockSize = this->offsetPerBlock ();
870 for (LO k = 0; k < numColInds; ++k, pointOffset += perBlockSize) {
871 const LO relBlockOffset =
872 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
873 if (relBlockOffset != LINV) {
882 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
890 for (LO i = 0; i < perBlockSize; ++i) {
891 A_old[i] += A_new[i];
893 hint = relBlockOffset + 1;
900 template<
class Scalar,
class LO,
class GO,
class Node>
912 return Teuchos::OrdinalTraits<LO>::invalid ();
915 const size_t absBlockOffsetStart = ptr_[localRowInd];
916 colInds = ind_ + absBlockOffsetStart;
918 impl_scalar_type*
const vOut = val_ + absBlockOffsetStart * offsetPerBlock ();
919 vals =
reinterpret_cast<Scalar*
> (vOut);
921 numInds = ptr_[localRowInd + 1] - absBlockOffsetStart;
926 template<
class Scalar,
class LO,
class GO,
class Node>
930 const Teuchos::ArrayView<LO>& Indices,
931 const Teuchos::ArrayView<Scalar>& Values,
932 size_t &NumEntries)
const 938 if (numInds > Indices.size() || numInds*blockSize_*blockSize_ > Values.size()) {
939 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
940 "Tpetra::BlockCrsMatrix::getLocalRowCopy : Column and/or values array is not large enough to hold " 941 << numInds <<
" row entries");
943 for (LO i=0; i<numInds; ++i) {
944 Indices[i] = colInds[i];
946 for (LO i=0; i<numInds*blockSize_*blockSize_; ++i) {
949 NumEntries = numInds;
952 template<
class Scalar,
class LO,
class GO,
class Node>
958 const LO numColInds)
const 965 return static_cast<LO
> (0);
968 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
972 for (LO k = 0; k < numColInds; ++k) {
973 const LO relBlockOffset =
974 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
975 if (relBlockOffset != LINV) {
976 offsets[k] =
static_cast<ptrdiff_t
> (relBlockOffset);
977 hint = relBlockOffset + 1;
985 template<
class Scalar,
class LO,
class GO,
class Node>
989 const ptrdiff_t offsets[],
991 const LO numOffsets)
const 998 return static_cast<LO
> (0);
1002 const size_t absRowBlockOffset = ptr_[localRowInd];
1003 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1004 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1005 size_t pointOffset = 0;
1008 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
1009 const size_t relBlockOffset = offsets[k];
1010 if (relBlockOffset != STINV) {
1011 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1013 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1015 getConstLocalBlockFromInput (vIn, pointOffset);
1016 COPY (A_new, A_old);
1024 template<
class Scalar,
class LO,
class GO,
class Node>
1028 const ptrdiff_t offsets[],
1029 const Scalar vals[],
1030 const LO numOffsets)
const 1037 return static_cast<LO
> (0);
1041 const size_t absRowBlockOffset = ptr_[localRowInd];
1042 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1043 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1044 size_t pointOffset = 0;
1047 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
1048 const size_t relBlockOffset = offsets[k];
1049 if (relBlockOffset != STINV) {
1050 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1052 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1054 getConstLocalBlockFromInput (vIn, pointOffset);
1055 Impl::absMax (A_old, A_new);
1063 template<
class Scalar,
class LO,
class GO,
class Node>
1067 const ptrdiff_t offsets[],
1068 const Scalar vals[],
1069 const LO numOffsets)
const 1076 return static_cast<LO
> (0);
1081 const size_t absRowBlockOffset = ptr_[localRowInd];
1082 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1083 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1084 size_t pointOffset = 0;
1087 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
1088 const size_t relBlockOffset = offsets[k];
1089 if (relBlockOffset != STINV) {
1090 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1092 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1094 getConstLocalBlockFromInput (vIn, pointOffset);
1096 AXPY (ONE, A_new, A_old);
1104 template<
class Scalar,
class LO,
class GO,
class Node>
1110 if (numEntInGraph == Teuchos::OrdinalTraits<size_t>::invalid ()) {
1111 return static_cast<LO
> (0);
1113 return static_cast<LO
> (numEntInGraph);
1117 template<
class Scalar,
class LO,
class GO,
class Node>
1122 const Teuchos::ETransp mode,
1132 TEUCHOS_TEST_FOR_EXCEPTION(
1133 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::apply: " 1134 "transpose and conjugate transpose modes are not yet implemented.");
1137 template<
class Scalar,
class LO,
class GO,
class Node>
1149 const Scalar zero = STS::zero ();
1150 const Scalar one = STS::one ();
1151 RCP<const import_type>
import = graph_.
getImporter ();
1153 RCP<const export_type> theExport = graph_.
getExporter ();
1157 TEUCHOS_TEST_FOR_EXCEPTION(
1159 "Tpetra::Experimental::BlockCrsMatrix::applyBlockNoTrans: " 1160 "The input BlockMultiVector X has deep copy semantics, " 1161 "not view semantics (as it should).");
1162 TEUCHOS_TEST_FOR_EXCEPTION(
1164 "Tpetra::Experimental::BlockCrsMatrix::applyBlockNoTrans: " 1165 "The output BlockMultiVector Y has deep copy semantics, " 1166 "not view semantics (as it should).");
1168 if (alpha == zero) {
1171 }
else if (beta != one) {
1175 const BMV* X_colMap = NULL;
1176 if (
import.is_null ()) {
1179 }
catch (std::exception& e) {
1180 TEUCHOS_TEST_FOR_EXCEPTION(
1181 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::" 1182 "applyBlockNoTrans:" << std::endl <<
"Tpetra::MultiVector::" 1183 "operator= threw an exception: " << std::endl << e.what ());
1192 if ((*X_colMap_).is_null () ||
1200 X_colMap = &(**X_colMap_);
1201 }
catch (std::exception& e) {
1202 TEUCHOS_TEST_FOR_EXCEPTION(
1203 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::" 1204 "applyBlockNoTrans:" << std::endl <<
"Tpetra::MultiVector::" 1205 "operator= threw an exception: " << std::endl << e.what ());
1209 BMV* Y_rowMap = NULL;
1210 if (theExport.is_null ()) {
1212 }
else if ((*Y_rowMap_).is_null () ||
1218 Y_rowMap = &(**Y_rowMap_);
1219 }
catch (std::exception& e) {
1220 TEUCHOS_TEST_FOR_EXCEPTION(
1221 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::" 1222 "applyBlockNoTrans:" << std::endl <<
"Tpetra::MultiVector::" 1223 "operator= threw an exception: " << std::endl << e.what ());
1227 localApplyBlockNoTrans (*X_colMap, *Y_rowMap, alpha, beta);
1229 if (! theExport.is_null ()) {
1235 template<
class Scalar,
class LO,
class GO,
class Node>
1245 const Scalar zero = STS::zero ();
1246 const Scalar one = STS::one ();
1247 const LO numLocalMeshRows =
1256 Teuchos::Array<impl_scalar_type> localMem (blockSize);
1260 for (LO lclRow = 0; lclRow < numLocalMeshRows; ++lclRow) {
1265 }
else if (beta == one) {
1266 COPY (Y_cur, Y_lcl);
1268 COPY (Y_cur, Y_lcl);
1272 const size_t meshBeg = ptr_[lclRow];
1273 const size_t meshEnd = ptr_[lclRow+1];
1274 for (
size_t absBlkOff = meshBeg; absBlkOff < meshEnd; ++absBlkOff) {
1275 const LO meshCol = ind_[absBlkOff];
1277 getConstLocalBlockFromAbsOffset (absBlkOff);
1281 GEMV (alpha, A_cur, X_cur, Y_lcl);
1284 COPY (Y_lcl, Y_cur);
1288 for (LO lclRow = 0; lclRow < numLocalMeshRows; ++lclRow) {
1289 for (LO j = 0; j < numVecs; ++j) {
1294 }
else if (beta == one) {
1295 COPY (Y_cur, Y_lcl);
1297 COPY (Y_cur, Y_lcl);
1301 const size_t meshBeg = ptr_[lclRow];
1302 const size_t meshEnd = ptr_[lclRow+1];
1303 for (
size_t absBlkOff = meshBeg; absBlkOff < meshEnd; ++absBlkOff) {
1304 const LO meshCol = ind_[absBlkOff];
1306 getConstLocalBlockFromAbsOffset (absBlkOff);
1310 GEMV (alpha, A_cur, X_cur, Y_lcl);
1313 COPY (Y_lcl, Y_cur);
1319 template<
class Scalar,
class LO,
class GO,
class Node>
1323 const LO colIndexToFind,
1324 const LO hint)
const 1326 const size_t absStartOffset = ptr_[localRowIndex];
1327 const size_t absEndOffset = ptr_[localRowIndex+1];
1328 const LO numEntriesInRow =
static_cast<LO
> (absEndOffset - absStartOffset);
1330 const LO*
const curInd = ind_ + absStartOffset;
1333 if (hint < numEntriesInRow && curInd[hint] == colIndexToFind) {
1340 LO relOffset = Teuchos::OrdinalTraits<LO>::invalid ();
1345 const LO maxNumEntriesForLinearSearch = 32;
1346 if (numEntriesInRow > maxNumEntriesForLinearSearch) {
1351 const LO* beg = curInd;
1352 const LO* end = curInd + numEntriesInRow;
1353 std::pair<const LO*, const LO*> p =
1354 std::equal_range (beg, end, colIndexToFind);
1355 if (p.first != p.second) {
1357 relOffset =
static_cast<LO
> (p.first - beg);
1361 for (LO k = 0; k < numEntriesInRow; ++k) {
1362 if (colIndexToFind == curInd[k]) {
1372 template<
class Scalar,
class LO,
class GO,
class Node>
1377 return offsetPerBlock_;
1380 template<
class Scalar,
class LO,
class GO,
class Node>
1384 const size_t pointOffset)
const 1387 const LO rowStride = blockSize_;
1391 template<
class Scalar,
class LO,
class GO,
class Node>
1395 const size_t pointOffset)
const 1398 const LO rowStride = blockSize_;
1402 template<
class Scalar,
class LO,
class GO,
class Node>
1413 const size_t absPointOffset = absBlockOffset * offsetPerBlock ();
1414 return getConstLocalBlockFromInput (val_, absPointOffset);
1418 template<
class Scalar,
class LO,
class GO,
class Node>
1422 const size_t relMeshOffset)
const 1426 const LO* lclColInds = NULL;
1427 Scalar* lclVals = NULL;
1430 LO err = this->
getLocalRowView (lclMeshRow, lclColInds, lclVals, numEnt);
1438 const size_t relPointOffset = relMeshOffset * this->offsetPerBlock ();
1439 IST* lclValsImpl =
reinterpret_cast<IST*
> (lclVals);
1440 return this->getConstLocalBlockFromInput (const_cast<const IST*> (lclValsImpl),
1445 template<
class Scalar,
class LO,
class GO,
class Node>
1456 const size_t absPointOffset = absBlockOffset * offsetPerBlock ();
1457 return getNonConstLocalBlockFromInput (const_cast<impl_scalar_type*> (val_),
1462 template<
class Scalar,
class LO,
class GO,
class Node>
1465 getLocalBlock (
const LO localRowInd,
const LO localColInd)
const 1467 const size_t absRowBlockOffset = ptr_[localRowInd];
1468 const LO relBlockOffset =
1469 this->findRelOffsetOfColumnIndex (localRowInd, localColInd);
1471 if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid ()) {
1472 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1473 return getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1490 template<
class Scalar,
class LO,
class GO,
class Node>
1497 const this_type* src =
dynamic_cast<const this_type*
> (&source);
1500 std::ostream& err = markLocalErrorAndGetStream ();
1501 err <<
"checkSizes: The source object of the Import or Export " 1502 "must be a BlockCrsMatrix with the same template parameters as the " 1503 "target object." << endl;
1509 std::ostream& err = markLocalErrorAndGetStream ();
1510 err <<
"checkSizes: The source and target objects of the Import or " 1511 <<
"Export must have the same block sizes. The source's block " 1512 <<
"size = " << src->getBlockSize () <<
" != the target's block " 1515 if (! src->graph_.isFillComplete ()) {
1516 std::ostream& err = markLocalErrorAndGetStream ();
1517 err <<
"checkSizes: The source object of the Import or Export is " 1518 "not fill complete. Both source and target objects must be fill " 1519 "complete." << endl;
1522 std::ostream& err = markLocalErrorAndGetStream ();
1523 err <<
"checkSizes: The target object of the Import or Export is " 1524 "not fill complete. Both source and target objects must be fill " 1525 "complete." << endl;
1527 if (src->graph_.getColMap ().is_null ()) {
1528 std::ostream& err = markLocalErrorAndGetStream ();
1529 err <<
"checkSizes: The source object of the Import or Export does " 1530 "not have a column Map. Both source and target objects must have " 1531 "column Maps." << endl;
1533 if (this->graph_.
getColMap ().is_null ()) {
1534 std::ostream& err = markLocalErrorAndGetStream ();
1535 err <<
"checkSizes: The target object of the Import or Export does " 1536 "not have a column Map. Both source and target objects must have " 1537 "column Maps." << endl;
1541 return ! (* (this->localError_));
1544 template<
class Scalar,
class LO,
class GO,
class Node>
1549 const Teuchos::ArrayView<const LO>& permuteToLIDs,
1550 const Teuchos::ArrayView<const LO>& permuteFromLIDs)
1554 const bool debug =
false;
1557 std::ostringstream os;
1558 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
1559 os <<
"Proc " << myRank <<
": copyAndPermute: " 1560 <<
"numSameIDs: " << numSameIDs
1561 <<
", permuteToLIDs.size(): " << permuteToLIDs.size ()
1562 <<
", permuteFromLIDs.size(): " << permuteFromLIDs.size ()
1564 std::cerr << os.str ();
1570 if (* (this->localError_)) {
1571 std::ostream& err = this->markLocalErrorAndGetStream ();
1572 err <<
"copyAndPermute: The target object of the Import or Export is " 1573 "already in an error state." << endl;
1576 if (permuteToLIDs.size () != permuteFromLIDs.size ()) {
1577 std::ostream& err = this->markLocalErrorAndGetStream ();
1578 err <<
"copyAndPermute: permuteToLIDs.size() = " << permuteToLIDs.size ()
1579 <<
" != permuteFromLIDs.size() = " << permuteFromLIDs.size () <<
"." 1584 const this_type* src =
dynamic_cast<const this_type*
> (&source);
1586 std::ostream& err = this->markLocalErrorAndGetStream ();
1587 err <<
"copyAndPermute: The source object of the Import or Export is " 1588 "either not a BlockCrsMatrix, or does not have the right template " 1589 "parameters. checkSizes() should have caught this. " 1590 "Please report this bug to the Tpetra developers." << endl;
1593 if (* (src->localError_)) {
1594 std::ostream& err = this->markLocalErrorAndGetStream ();
1595 err <<
"copyAndPermute: The source object of the Import or Export is " 1596 "already in an error state." << endl;
1600 bool lclErr =
false;
1601 #ifdef HAVE_TPETRA_DEBUG 1602 std::set<LO> invalidSrcCopyRows;
1603 std::set<LO> invalidDstCopyRows;
1604 std::set<LO> invalidDstCopyCols;
1605 std::set<LO> invalidDstPermuteCols;
1606 std::set<LO> invalidPermuteFromRows;
1607 #endif // HAVE_TPETRA_DEBUG 1616 #ifdef HAVE_TPETRA_DEBUG 1617 const map_type& srcRowMap = * (src->graph_.getRowMap ());
1618 #endif // HAVE_TPETRA_DEBUG 1620 const map_type& srcColMap = * (src->graph_.getColMap ());
1622 const bool canUseLocalColumnIndices = srcColMap.
locallySameAs (dstColMap);
1623 const size_t numPermute =
static_cast<size_t> (permuteFromLIDs.size ());
1626 std::ostringstream os;
1627 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
1628 os <<
"Proc " << myRank <<
": copyAndPermute: " 1629 <<
"canUseLocalColumnIndices: " 1630 << (canUseLocalColumnIndices ?
"true" :
"false")
1631 <<
", numPermute: " << numPermute
1633 std::cerr << os.str ();
1636 if (canUseLocalColumnIndices) {
1638 for (LO localRow = 0; localRow < static_cast<LO> (numSameIDs); ++localRow) {
1639 #ifdef HAVE_TPETRA_DEBUG 1642 invalidSrcCopyRows.insert (localRow);
1645 #endif // HAVE_TPETRA_DEBUG 1647 const LO* lclSrcCols;
1652 LO err = src->getLocalRowView (localRow, lclSrcCols, vals, numEntries);
1655 #ifdef HAVE_TPETRA_DEBUG 1656 (void) invalidSrcCopyRows.insert (localRow);
1657 #endif // HAVE_TPETRA_DEBUG 1661 if (err != numEntries) {
1664 #ifdef HAVE_TPETRA_DEBUG 1665 invalidDstCopyRows.insert (localRow);
1666 #endif // HAVE_TPETRA_DEBUG 1674 for (LO k = 0; k < numEntries; ++k) {
1677 #ifdef HAVE_TPETRA_DEBUG 1678 (void) invalidDstCopyCols.insert (lclSrcCols[k]);
1679 #endif // HAVE_TPETRA_DEBUG 1688 for (
size_t k = 0; k < numPermute; ++k) {
1689 const LO srcLclRow =
static_cast<LO
> (permuteFromLIDs[k]);
1690 const LO dstLclRow =
static_cast<LO
> (permuteToLIDs[k]);
1692 const LO* lclSrcCols;
1695 LO err = src->getLocalRowView (srcLclRow, lclSrcCols, vals, numEntries);
1698 #ifdef HAVE_TPETRA_DEBUG 1699 invalidPermuteFromRows.insert (srcLclRow);
1700 #endif // HAVE_TPETRA_DEBUG 1704 if (err != numEntries) {
1706 #ifdef HAVE_TPETRA_DEBUG 1707 for (LO c = 0; c < numEntries; ++c) {
1709 invalidDstPermuteCols.insert (lclSrcCols[c]);
1712 #endif // HAVE_TPETRA_DEBUG 1719 const size_t maxNumEnt = src->graph_.getNodeMaxNumRowEntries ();
1720 Teuchos::Array<LO> lclDstCols (maxNumEnt);
1723 for (LO localRow = 0; localRow < static_cast<LO> (numSameIDs); ++localRow) {
1724 const LO* lclSrcCols;
1731 err = src->getLocalRowView (localRow, lclSrcCols, vals, numEntries);
1732 }
catch (std::exception& e) {
1734 std::ostringstream os;
1735 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
1736 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow " 1737 << localRow <<
", src->getLocalRowView() threw an exception: " 1739 std::cerr << os.str ();
1746 #ifdef HAVE_TPETRA_DEBUG 1747 invalidSrcCopyRows.insert (localRow);
1748 #endif // HAVE_TPETRA_DEBUG 1751 if (static_cast<size_t> (numEntries) > static_cast<size_t> (lclDstCols.size ())) {
1754 std::ostringstream os;
1755 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
1756 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow " 1757 << localRow <<
", numEntries = " << numEntries <<
" > maxNumEnt = " 1758 << maxNumEnt << endl;
1759 std::cerr << os.str ();
1765 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view (0, numEntries);
1766 for (LO j = 0; j < numEntries; ++j) {
1768 if (lclDstColsView[j] == Teuchos::OrdinalTraits<LO>::invalid ()) {
1770 #ifdef HAVE_TPETRA_DEBUG 1771 invalidDstCopyCols.insert (lclDstColsView[j]);
1772 #endif // HAVE_TPETRA_DEBUG 1776 err = this->
replaceLocalValues (localRow, lclDstColsView.getRawPtr (), vals, numEntries);
1777 }
catch (std::exception& e) {
1779 std::ostringstream os;
1780 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
1781 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow " 1782 << localRow <<
", this->replaceLocalValues() threw an exception: " 1784 std::cerr << os.str ();
1788 if (err != numEntries) {
1791 std::ostringstream os;
1792 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
1793 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" " 1794 "localRow " << localRow <<
", this->replaceLocalValues " 1795 "returned " << err <<
" instead of numEntries = " 1796 << numEntries << endl;
1797 std::cerr << os.str ();
1805 for (
size_t k = 0; k < numPermute; ++k) {
1806 const LO srcLclRow =
static_cast<LO
> (permuteFromLIDs[k]);
1807 const LO dstLclRow =
static_cast<LO
> (permuteToLIDs[k]);
1809 const LO* lclSrcCols;
1814 err = src->getLocalRowView (srcLclRow, lclSrcCols, vals, numEntries);
1815 }
catch (std::exception& e) {
1817 std::ostringstream os;
1818 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
1819 os <<
"Proc " << myRank <<
": copyAndPermute: At \"permute\" " 1820 "srcLclRow " << srcLclRow <<
" and dstLclRow " << dstLclRow
1821 <<
", src->getLocalRowView() threw an exception: " << e.what ();
1822 std::cerr << os.str ();
1829 #ifdef HAVE_TPETRA_DEBUG 1830 invalidPermuteFromRows.insert (srcLclRow);
1831 #endif // HAVE_TPETRA_DEBUG 1834 if (static_cast<size_t> (numEntries) > static_cast<size_t> (lclDstCols.size ())) {
1840 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view (0, numEntries);
1841 for (LO j = 0; j < numEntries; ++j) {
1843 if (lclDstColsView[j] == Teuchos::OrdinalTraits<LO>::invalid ()) {
1845 #ifdef HAVE_TPETRA_DEBUG 1846 invalidDstPermuteCols.insert (lclDstColsView[j]);
1847 #endif // HAVE_TPETRA_DEBUG 1850 err = this->
replaceLocalValues (dstLclRow, lclDstColsView.getRawPtr (), vals, numEntries);
1851 if (err != numEntries) {
1860 std::ostream& err = this->markLocalErrorAndGetStream ();
1861 #ifdef HAVE_TPETRA_DEBUG 1862 err <<
"copyAndPermute: The graph structure of the source of the " 1863 "Import or Export must be a subset of the graph structure of the " 1865 err <<
"invalidSrcCopyRows = [";
1866 for (
typename std::set<LO>::const_iterator it = invalidSrcCopyRows.begin ();
1867 it != invalidSrcCopyRows.end (); ++it) {
1869 typename std::set<LO>::const_iterator itp1 = it;
1871 if (itp1 != invalidSrcCopyRows.end ()) {
1875 err <<
"], invalidDstCopyRows = [";
1876 for (
typename std::set<LO>::const_iterator it = invalidDstCopyRows.begin ();
1877 it != invalidDstCopyRows.end (); ++it) {
1879 typename std::set<LO>::const_iterator itp1 = it;
1881 if (itp1 != invalidDstCopyRows.end ()) {
1885 err <<
"], invalidDstCopyCols = [";
1886 for (
typename std::set<LO>::const_iterator it = invalidDstCopyCols.begin ();
1887 it != invalidDstCopyCols.end (); ++it) {
1889 typename std::set<LO>::const_iterator itp1 = it;
1891 if (itp1 != invalidDstCopyCols.end ()) {
1895 err <<
"], invalidDstPermuteCols = [";
1896 for (
typename std::set<LO>::const_iterator it = invalidDstPermuteCols.begin ();
1897 it != invalidDstPermuteCols.end (); ++it) {
1899 typename std::set<LO>::const_iterator itp1 = it;
1901 if (itp1 != invalidDstPermuteCols.end ()) {
1905 err <<
"], invalidPermuteFromRows = [";
1906 for (
typename std::set<LO>::const_iterator it = invalidPermuteFromRows.begin ();
1907 it != invalidPermuteFromRows.end (); ++it) {
1909 typename std::set<LO>::const_iterator itp1 = it;
1911 if (itp1 != invalidPermuteFromRows.end ()) {
1915 err <<
"]" << std::endl;
1917 err <<
"copyAndPermute: The graph structure of the source of the " 1918 "Import or Export must be a subset of the graph structure of the " 1919 "target." << std::endl;
1920 #endif // HAVE_TPETRA_DEBUG 1924 std::ostringstream os;
1925 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
1926 const bool lclSuccess = ! (* (this->localError_));
1927 os <<
"*** Proc " << myRank <<
": copyAndPermute " 1928 << (lclSuccess ?
"succeeded" :
"FAILED");
1934 std::cerr << os.str ();
1958 template<
class LO,
class GO,
class D>
1960 packRowCount (
const size_t numEnt,
1961 const size_t numBytesPerValue,
1962 const size_t blkSize)
1974 const size_t numEntLen = PackTraits<LO, D>::packValueCount (numEntLO);
1975 const size_t gidsLen = numEnt * PackTraits<GO, D>::packValueCount (gid);
1976 const size_t valsLen = numEnt * numBytesPerValue * blkSize * blkSize;
1977 return numEntLen + gidsLen + valsLen;
1991 template<
class ST,
class LO,
class GO,
class D>
1994 const size_t offset,
1995 const size_t numBytes,
1996 const size_t numBytesPerValue)
1998 using Kokkos::subview;
2000 typedef typename PackTraits<LO, D>::input_buffer_type input_buffer_type;
2001 typedef typename input_buffer_type::size_type size_type;
2003 if (numBytes == 0) {
2005 return static_cast<size_t> (0);
2009 const size_t theNumBytes = PackTraits<LO, D>::packValueCount (numEntLO);
2010 #ifdef HAVE_TPETRA_DEBUG 2011 TEUCHOS_TEST_FOR_EXCEPTION(
2012 theNumBytes > numBytes, std::logic_error,
"unpackRowCount: " 2013 "theNumBytes = " << theNumBytes <<
" < numBytes = " << numBytes
2015 #endif // HAVE_TPETRA_DEBUG 2016 const std::pair<size_type, size_type> rng (offset, offset + theNumBytes);
2017 input_buffer_type inBuf = subview (imports, rng);
2018 const size_t actualNumBytes = PackTraits<LO, D>::unpackValue (numEntLO, inBuf);
2019 #ifdef HAVE_TPETRA_DEBUG 2020 TEUCHOS_TEST_FOR_EXCEPTION(
2021 theNumBytes > numBytes, std::logic_error,
"unpackRowCount: " 2022 "actualNumBytes = " << actualNumBytes <<
" < numBytes = " << numBytes
2025 (void) actualNumBytes;
2026 #endif // HAVE_TPETRA_DEBUG 2027 return static_cast<size_t> (numEntLO);
2038 template<
class ST,
class LO,
class GO,
class D>
2041 const size_t offset,
2042 const size_t numEnt,
2045 const size_t numBytesPerValue,
2046 const size_t blockSize)
2048 using Kokkos::subview;
2055 typedef typename PackTraits<LO, D>::output_buffer_type output_buffer_type;
2056 typedef typename output_buffer_type::size_type size_type;
2057 typedef typename std::pair<size_type, size_type> pair_type;
2063 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2066 const LO numEntLO =
static_cast<size_t> (numEnt);
2068 const size_t numEntBeg = offset;
2069 const size_t numEntLen = PackTraits<LO, D>::packValueCount (numEntLO);
2070 const size_t gidsBeg = numEntBeg + numEntLen;
2071 const size_t gidsLen = numEnt * PackTraits<GO, D>::packValueCount (gid);
2072 const size_t valsBeg = gidsBeg + gidsLen;
2073 const size_t valsLen = numScalarEnt * numBytesPerValue;
2075 output_buffer_type numEntOut =
2076 subview (exports, pair_type (numEntBeg, numEntBeg + numEntLen));
2077 output_buffer_type gidsOut =
2078 subview (exports, pair_type (gidsBeg, gidsBeg + gidsLen));
2079 output_buffer_type valsOut =
2080 subview (exports, pair_type (valsBeg, valsBeg + valsLen));
2082 size_t numBytesOut = 0;
2083 numBytesOut += PackTraits<LO, D>::packValue (numEntOut, numEntLO);
2084 numBytesOut += PackTraits<GO, D>::packArray (gidsOut, gidsIn, numEnt);
2085 numBytesOut += PackTraits<ST, D>::packArray (valsOut, valsIn, numScalarEnt);
2087 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
2088 TEUCHOS_TEST_FOR_EXCEPTION(
2089 numBytesOut != expectedNumBytes, std::logic_error,
"unpackRow: " 2090 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = " 2091 << expectedNumBytes <<
".");
2096 template<
class ST,
class LO,
class GO,
class D>
2101 const size_t offset,
2102 const size_t numBytes,
2103 const size_t numEnt,
2104 const size_t numBytesPerValue,
2105 const size_t blockSize)
2107 using Kokkos::subview;
2114 typedef typename PackTraits<LO, D>::input_buffer_type input_buffer_type;
2115 typedef typename input_buffer_type::size_type size_type;
2116 typedef typename std::pair<size_type, size_type> pair_type;
2118 if (numBytes == 0) {
2122 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2123 TEUCHOS_TEST_FOR_EXCEPTION(
2124 static_cast<size_t> (imports.dimension_0 ()) <= offset,
2125 std::logic_error,
"unpackRow: imports.dimension_0() = " 2126 << imports.dimension_0 () <<
" <= offset = " << offset <<
".");
2127 TEUCHOS_TEST_FOR_EXCEPTION(
2128 static_cast<size_t> (imports.dimension_0 ()) < offset + numBytes,
2129 std::logic_error,
"unpackRow: imports.dimension_0() = " 2130 << imports.dimension_0 () <<
" < offset + numBytes = " 2131 << (offset + numBytes) <<
".");
2136 const size_t numEntBeg = offset;
2137 const size_t numEntLen = PackTraits<LO, D>::packValueCount (lid);
2138 const size_t gidsBeg = numEntBeg + numEntLen;
2139 const size_t gidsLen = numEnt * PackTraits<GO, D>::packValueCount (gid);
2140 const size_t valsBeg = gidsBeg + gidsLen;
2141 const size_t valsLen = numScalarEnt * numBytesPerValue;
2143 input_buffer_type numEntIn =
2144 subview (imports, pair_type (numEntBeg, numEntBeg + numEntLen));
2145 input_buffer_type gidsIn =
2146 subview (imports, pair_type (gidsBeg, gidsBeg + gidsLen));
2147 input_buffer_type valsIn =
2148 subview (imports, pair_type (valsBeg, valsBeg + valsLen));
2150 size_t numBytesOut = 0;
2152 numBytesOut += PackTraits<LO, D>::unpackValue (numEntOut, numEntIn);
2153 TEUCHOS_TEST_FOR_EXCEPTION(
2154 static_cast<size_t> (numEntOut) != numEnt, std::logic_error,
2155 "unpackRow: Expected number of entries " << numEnt
2156 <<
" != actual number of entries " << numEntOut <<
".");
2158 numBytesOut += PackTraits<GO, D>::unpackArray (gidsOut, gidsIn, numEnt);
2159 numBytesOut += PackTraits<ST, D>::unpackArray (valsOut, valsIn, numScalarEnt);
2161 TEUCHOS_TEST_FOR_EXCEPTION(
2162 numBytesOut != numBytes, std::logic_error,
"unpackRow: numBytesOut = " 2163 << numBytesOut <<
" != numBytes = " << numBytes <<
".");
2164 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
2165 TEUCHOS_TEST_FOR_EXCEPTION(
2166 numBytesOut != expectedNumBytes, std::logic_error,
"unpackRow: " 2167 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = " 2168 << expectedNumBytes <<
".");
2173 template<
class Scalar,
class LO,
class GO,
class Node>
2177 const Teuchos::ArrayView<const LO>& exportLIDs,
2178 Teuchos::Array<packet_type>& exports,
2179 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
2180 size_t& constantNumPackets,
2185 using Kokkos::MemoryUnmanaged;
2186 using Kokkos::subview;
2189 typedef typename View<int*, device_type>::HostMirror::execution_space HES;
2191 typedef typename Teuchos::ArrayView<const LO>::size_type size_type;
2192 const bool debug =
false;
2195 std::ostringstream os;
2196 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
2197 os <<
"Proc " << myRank <<
": packAndPrepare: exportLIDs.size() = " 2198 << exportLIDs.size () <<
", numPacketsPerLID.size() = " 2199 << numPacketsPerLID.size () << endl;
2200 std::cerr << os.str ();
2203 if (* (this->localError_)) {
2204 std::ostream& err = this->markLocalErrorAndGetStream ();
2205 err <<
"packAndPrepare: The target object of the Import or Export is " 2206 "already in an error state." << endl;
2210 const this_type* src =
dynamic_cast<const this_type*
> (&source);
2213 std::ostream& err = this->markLocalErrorAndGetStream ();
2214 err <<
"packAndPrepare: The source (input) object of the Import or " 2215 "Export is either not a BlockCrsMatrix, or does not have the right " 2216 "template parameters. checkSizes() should have caught this. " 2217 "Please report this bug to the Tpetra developers." << endl;
2222 const size_t blockSize =
static_cast<size_t> (src->getBlockSize ());
2223 const size_type numExportLIDs = exportLIDs.size ();
2225 if (numExportLIDs != numPacketsPerLID.size ()) {
2226 std::ostream& err = this->markLocalErrorAndGetStream ();
2227 err <<
"packAndPrepare: exportLIDs.size() = " << numExportLIDs
2228 <<
" != numPacketsPerLID.size() = " << numPacketsPerLID.size ()
2241 constantNumPackets = 0;
2246 size_t totalNumBytes = 0;
2247 size_t totalNumEntries = 0;
2248 size_t maxRowLength = 0;
2249 for (size_type i = 0; i < numExportLIDs; ++i) {
2250 const LO lclRow = exportLIDs[i];
2258 if (numEnt == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2261 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2265 size_t numBytesPerValue = 0;
2271 Scalar* valsRaw = NULL;
2272 const LO* lidsRaw = NULL;
2273 LO actualNumEnt = 0;
2275 src->getLocalRowView (lclRow, lidsRaw, valsRaw, actualNumEnt);
2277 if (numEnt != static_cast<size_t> (actualNumEnt)) {
2278 std::ostream& err = this->markLocalErrorAndGetStream ();
2279 err <<
"packAndPrepare: Local row " << i <<
" claims to have " <<
2280 numEnt <<
"entry/ies, but the View returned by getLocalRowView() " 2281 "has " << actualNumEnt <<
" entry/ies. This should never happen. " 2282 "Please report this bug to the Tpetra developers." << endl;
2285 if (errCode == Teuchos::OrdinalTraits<LO>::invalid ()) {
2286 std::ostream& err = this->markLocalErrorAndGetStream ();
2287 err <<
"packAndPrepare: Local row " << i <<
" is not in the row Map " 2288 "of the source object on the calling process." << endl;
2292 const ST* valsRawST =
const_cast<const ST*
> (
reinterpret_cast<ST*
> (valsRaw));
2293 View<const ST*, HES, MemoryUnmanaged> vals (valsRawST, numScalarEnt);
2298 numBytesPerValue = PackTraits<ST, HES>::packValueCount (vals(0));
2301 const size_t numBytes =
2302 packRowCount<LO, GO, HES> (numEnt, numBytesPerValue, blockSize);
2303 numPacketsPerLID[i] = numBytes;
2304 totalNumBytes += numBytes;
2305 totalNumEntries += numEnt;
2306 maxRowLength = std::max (maxRowLength, numEnt);
2310 const int myRank = graph_.
getComm ()->getRank ();
2311 std::ostringstream os;
2312 os <<
"Proc " << myRank <<
": packAndPrepare: totalNumBytes = " 2313 << totalNumBytes << endl;
2314 std::cerr << os.str ();
2320 exports.resize (totalNumBytes);
2321 if (totalNumEntries > 0) {
2322 View<char*, HES, MemoryUnmanaged> exportsK (exports.getRawPtr (),
2337 View<GO*, HES> gblColInds;
2340 gblColInds = PackTraits<GO, HES>::allocateArray (gid, maxRowLength,
"gids");
2344 for (size_type i = 0; i < numExportLIDs; ++i) {
2345 const LO lclRowInd = exportLIDs[i];
2346 const LO* lclColIndsRaw;
2352 (void) src->getLocalRowView (lclRowInd, lclColIndsRaw, valsRaw, numEntLO);
2353 const size_t numEnt =
static_cast<size_t> (numEntLO);
2354 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2355 View<const LO*, HES, MemoryUnmanaged> lclColInds (lclColIndsRaw, numEnt);
2356 const ST* valsRawST =
const_cast<const ST*
> (
reinterpret_cast<ST*
> (valsRaw));
2357 View<const ST*, HES, MemoryUnmanaged> vals (valsRawST, numScalarEnt);
2362 const size_t numBytesPerValue = numEnt == 0 ?
2363 static_cast<size_t> (0) :
2364 PackTraits<ST, HES>::packValueCount (vals(0));
2367 for (
size_t j = 0; j < numEnt; ++j) {
2372 const size_t numBytes =
2373 packRowForBlockCrs<ST, LO, GO, HES> (exportsK, offset, numEnt, gblColInds,
2374 vals, numBytesPerValue, blockSize);
2379 if (offset != totalNumBytes) {
2380 std::ostream& err = this->markLocalErrorAndGetStream ();
2381 err <<
"packAndPreapre: At end of method, the final offset (in bytes) " 2382 << offset <<
" does not equal the total number of bytes packed " 2383 << totalNumBytes <<
". " 2384 <<
"Please report this bug to the Tpetra developers." << endl;
2390 std::ostringstream os;
2391 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
2392 const bool lclSuccess = ! (* (this->localError_));
2393 os <<
"*** Proc " << myRank <<
": packAndPrepare " 2394 << (lclSuccess ?
"succeeded" :
"FAILED")
2395 <<
" (totalNumEntries = " << totalNumEntries <<
") ***" << endl;
2396 std::cerr << os.str ();
2401 template<
class Scalar,
class LO,
class GO,
class Node>
2405 const Teuchos::ArrayView<const packet_type>& imports,
2406 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
2413 using Kokkos::MemoryUnmanaged;
2414 using Kokkos::subview;
2417 typedef typename Teuchos::ArrayView<const LO>::size_type size_type;
2418 typedef typename View<int*, device_type>::HostMirror::execution_space HES;
2419 typedef std::pair<typename View<int*, HES>::size_type,
2420 typename View<int*, HES>::size_type> pair_type;
2421 typedef View<GO*, HES, MemoryUnmanaged> gids_out_type;
2422 typedef View<LO*, HES, MemoryUnmanaged> lids_out_type;
2423 typedef View<ST*, HES, MemoryUnmanaged> vals_out_type;
2424 typedef typename PackTraits<GO, HES>::input_buffer_type input_buffer_type;
2425 const char prefix[] =
"Tpetra::Experimental::BlockCrsMatrix::unpackAndCombine: ";
2426 const bool debug =
false;
2429 std::ostringstream os;
2430 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
2431 os <<
"Proc " << myRank <<
": unpackAndCombine" << endl;
2432 std::cerr << os.str ();
2438 if (* (this->localError_)) {
2439 std::ostream& err = this->markLocalErrorAndGetStream ();
2440 err << prefix <<
"The target object of the Import or Export is " 2441 "already in an error state." << endl;
2444 if (importLIDs.size () != numPacketsPerLID.size ()) {
2445 std::ostream& err = this->markLocalErrorAndGetStream ();
2446 err << prefix <<
"importLIDs.size() = " << importLIDs.size () <<
" != " 2447 "numPacketsPerLID.size() = " << numPacketsPerLID.size () <<
"." << endl;
2451 std::ostream& err = this->markLocalErrorAndGetStream ();
2452 err << prefix <<
"Invalid CombineMode value " << CM <<
". Valid " 2453 <<
"values include ADD, INSERT, REPLACE, ABSMAX, and ZERO." 2463 const size_type numImportLIDs = importLIDs.size ();
2464 if (CM ==
ZERO || numImportLIDs == 0) {
2466 std::ostringstream os;
2467 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
2468 os <<
"Proc " << myRank <<
": unpackAndCombine: Nothing to do" << endl;
2469 std::cerr << os.str ();
2475 std::ostringstream os;
2476 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
2477 os <<
"Proc " << myRank <<
": unpackAndCombine: Getting ready" << endl;
2478 std::cerr << os.str ();
2481 input_buffer_type importsK (imports.getRawPtr (), imports.size ());
2484 const size_t maxRowNumScalarEnt = maxRowNumEnt * blockSize * blockSize;
2486 size_t numBytesPerValue;
2498 numBytesPerValue = PackTraits<ST, HES>::packValueCount (val);
2505 View<GO*, HES> gblColInds;
2506 View<LO*, HES> lclColInds;
2507 View<ST*, HES> vals;
2521 gblColInds = PackTraits<GO, HES>::allocateArray (gid, maxRowNumEnt,
"gids");
2522 lclColInds = PackTraits<LO, HES>::allocateArray (lid, maxRowNumEnt,
"lids");
2523 vals = PackTraits<ST, HES>::allocateArray (val, maxRowNumScalarEnt,
"vals");
2527 for (size_type i = 0; i < numImportLIDs; ++i) {
2528 const size_t numBytes = numPacketsPerLID[i];
2529 if (numBytes == 0) {
2533 const size_t numEnt =
2534 unpackRowCount<ST, LO, GO, HES> (importsK, offset, numBytes, numBytesPerValue);
2535 if (numEnt > maxRowNumEnt) {
2536 std::ostream& err = this->markLocalErrorAndGetStream ();
2537 err << prefix <<
"At i = " << i <<
", numEnt = " << numEnt
2538 <<
" > maxRowNumEnt = " << maxRowNumEnt << endl;
2542 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2543 const LO lclRow = importLIDs[i];
2545 gids_out_type gidsOut = subview (gblColInds, pair_type (0, numEnt));
2546 vals_out_type valsOut = subview (vals, pair_type (0, numScalarEnt));
2548 const size_t numBytesOut =
2549 unpackRowForBlockCrs<ST, LO, GO, HES> (gidsOut, valsOut, importsK, offset, numBytes,
2550 numEnt, numBytesPerValue, blockSize);
2551 if (numBytes != numBytesOut) {
2552 std::ostream& err = this->markLocalErrorAndGetStream ();
2553 err << prefix <<
"At i = " << i <<
", numBytes = " << numBytes
2554 <<
" != numBytesOut = " << numBytesOut <<
".";
2559 lids_out_type lidsOut = subview (lclColInds, pair_type (0, numEnt));
2560 for (
size_t k = 0; k < numEnt; ++k) {
2562 #ifdef HAVE_TPETRA_DEBUG 2563 if (lidsOut(k) == Teuchos::OrdinalTraits<LO>::invalid ()) {
2564 std::ostream& err = this->markLocalErrorAndGetStream ();
2565 err << prefix <<
"At i = " << i <<
", GID " << gidsOut(k)
2566 <<
" is not owned by the calling process.";
2569 #endif // HAVE_TPETRA_DEBUG 2574 const LO*
const lidsRaw =
const_cast<const LO*
> (lidsOut.ptr_on_device ());
2575 const Scalar*
const valsRaw =
2576 reinterpret_cast<const Scalar*
> (
const_cast<const ST*
> (valsOut.ptr_on_device ()));
2581 }
else if (CM ==
ABSMAX) {
2584 #ifdef HAVE_TPETRA_DEBUG 2585 if (static_cast<LO> (numEnt) != numCombd) {
2586 std::ostream& err = this->markLocalErrorAndGetStream ();
2587 err << prefix <<
"At i = " << i <<
", numEnt = " << numEnt
2588 <<
" != numCombd = " << numCombd <<
".";
2593 #endif // HAVE_TPETRA_DEBUG 2600 std::ostringstream os;
2601 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
2602 const bool lclSuccess = ! (* (this->localError_));
2603 os <<
"*** Proc " << myRank <<
": unpackAndCombine " 2604 << (lclSuccess ?
"succeeded" :
"FAILED")
2606 std::cerr << os.str ();
2611 template<
class Scalar,
class LO,
class GO,
class Node>
2615 using Teuchos::TypeNameTraits;
2616 std::ostringstream os;
2617 os <<
"\"Tpetra::BlockCrsMatrix\": { " 2618 <<
"Template parameters: { " 2619 <<
"Scalar: " << TypeNameTraits<Scalar>::name ()
2620 <<
"LO: " << TypeNameTraits<LO>::name ()
2621 <<
"GO: " << TypeNameTraits<GO>::name ()
2622 <<
"Node: " << TypeNameTraits<Node>::name ()
2624 <<
", Label: \"" << this->getObjectLabel () <<
"\"" 2625 <<
", Global dimensions: [" 2626 << graph_.
getDomainMap ()->getGlobalNumElements () <<
", " 2627 << graph_.
getRangeMap ()->getGlobalNumElements () <<
"]" 2634 template<
class Scalar,
class LO,
class GO,
class Node>
2638 const Teuchos::EVerbosityLevel verbLevel)
const 2640 using Teuchos::ArrayRCP;
2641 using Teuchos::CommRequest;
2642 using Teuchos::FancyOStream;
2643 using Teuchos::getFancyOStream;
2644 using Teuchos::ireceive;
2645 using Teuchos::isend;
2646 using Teuchos::outArg;
2647 using Teuchos::TypeNameTraits;
2648 using Teuchos::VERB_DEFAULT;
2649 using Teuchos::VERB_NONE;
2650 using Teuchos::VERB_LOW;
2651 using Teuchos::VERB_MEDIUM;
2653 using Teuchos::VERB_EXTREME;
2655 using Teuchos::wait;
2659 const Teuchos::EVerbosityLevel vl =
2660 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
2662 if (vl == VERB_NONE) {
2667 Teuchos::OSTab tab0 (out);
2669 out <<
"\"Tpetra::BlockCrsMatrix\":" << endl;
2670 Teuchos::OSTab tab1 (out);
2672 out <<
"Template parameters:" << endl;
2674 Teuchos::OSTab tab2 (out);
2675 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
2676 <<
"LO: " << TypeNameTraits<LO>::name () << endl
2677 <<
"GO: " << TypeNameTraits<GO>::name () << endl
2678 <<
"Node: " << TypeNameTraits<Node>::name () << endl;
2680 out <<
"Label: \"" << this->getObjectLabel () <<
"\"" << endl
2681 <<
"Global dimensions: [" 2682 << graph_.
getDomainMap ()->getGlobalNumElements () <<
", " 2683 << graph_.
getRangeMap ()->getGlobalNumElements () <<
"]" << endl;
2686 out <<
"Block size: " << blockSize << endl;
2689 if (vl >= VERB_MEDIUM) {
2690 const Teuchos::Comm<int>& comm = * (graph_.
getMap ()->getComm ());
2691 const int myRank = comm.getRank ();
2693 out <<
"Row Map:" << endl;
2700 out <<
"Column Map: same as row Map" << endl;
2705 out <<
"Column Map:" << endl;
2713 out <<
"Domain Map: same as row Map" << endl;
2718 out <<
"Domain Map: same as column Map" << endl;
2723 out <<
"Domain Map:" << endl;
2731 out <<
"Range Map: same as domain Map" << endl;
2736 out <<
"Range Map: same as row Map" << endl;
2741 out <<
"Range Map: " << endl;
2748 if (vl >= VERB_EXTREME) {
2749 const Teuchos::Comm<int>& comm = * (graph_.
getMap ()->getComm ());
2750 const int myRank = comm.getRank ();
2751 const int numProcs = comm.getSize ();
2754 RCP<std::ostringstream> lclOutStrPtr (
new std::ostringstream ());
2755 RCP<FancyOStream> osPtr = getFancyOStream (lclOutStrPtr);
2756 FancyOStream& os = *osPtr;
2757 os <<
"Process " << myRank <<
":" << endl;
2758 Teuchos::OSTab tab2 (os);
2766 os <<
"Row " << meshGblRow <<
": {";
2768 const LO* lclColInds = NULL;
2769 Scalar* vals = NULL;
2773 for (LO k = 0; k < numInds; ++k) {
2776 os <<
"Col " << gblCol <<
": [";
2777 for (LO i = 0; i < blockSize; ++i) {
2778 for (LO j = 0; j < blockSize; ++j) {
2779 os << vals[blockSize*blockSize*k + i*blockSize + j];
2780 if (j + 1 < blockSize) {
2784 if (i + 1 < blockSize) {
2789 if (k + 1 < numInds) {
2799 out << lclOutStrPtr->str ();
2800 lclOutStrPtr = Teuchos::null;
2803 const int sizeTag = 1337;
2804 const int dataTag = 1338;
2806 ArrayRCP<char> recvDataBuf;
2810 for (
int p = 1; p < numProcs; ++p) {
2813 ArrayRCP<size_t> recvSize (1);
2815 RCP<CommRequest<int> > recvSizeReq =
2816 ireceive<int, size_t> (recvSize, p, sizeTag, comm);
2817 wait<int> (comm, outArg (recvSizeReq));
2818 const size_t numCharsToRecv = recvSize[0];
2825 if (static_cast<size_t>(recvDataBuf.size()) < numCharsToRecv + 1) {
2826 recvDataBuf.resize (numCharsToRecv + 1);
2828 ArrayRCP<char> recvData = recvDataBuf.persistingView (0, numCharsToRecv);
2830 RCP<CommRequest<int> > recvDataReq =
2831 ireceive<int, char> (recvData, p, dataTag, comm);
2832 wait<int> (comm, outArg (recvDataReq));
2837 recvDataBuf[numCharsToRecv] =
'\0';
2838 out << recvDataBuf.getRawPtr ();
2840 else if (myRank == p) {
2844 const std::string stringToSend = lclOutStrPtr->str ();
2845 lclOutStrPtr = Teuchos::null;
2848 const size_t numCharsToSend = stringToSend.size ();
2849 ArrayRCP<size_t> sendSize (1);
2850 sendSize[0] = numCharsToSend;
2851 RCP<CommRequest<int> > sendSizeReq =
2852 isend<int, size_t> (sendSize, 0, sizeTag, comm);
2853 wait<int> (comm, outArg (sendSizeReq));
2861 ArrayRCP<const char> sendData (&stringToSend[0], 0, numCharsToSend,
false);
2862 RCP<CommRequest<int> > sendDataReq =
2863 isend<int, char> (sendData, 0, dataTag, comm);
2864 wait<int> (comm, outArg (sendDataReq));
2870 template<
class Scalar,
class LO,
class GO,
class Node>
2871 Teuchos::RCP<const Teuchos::Comm<int> >
2878 template<
class Scalar,
class LO,
class GO,
class Node>
2887 template<
class Scalar,
class LO,
class GO,
class Node>
2895 template<
class Scalar,
class LO,
class GO,
class Node>
2903 template<
class Scalar,
class LO,
class GO,
class Node>
2911 template<
class Scalar,
class LO,
class GO,
class Node>
2919 template<
class Scalar,
class LO,
class GO,
class Node>
2927 template<
class Scalar,
class LO,
class GO,
class Node>
2935 template<
class Scalar,
class LO,
class GO,
class Node>
2943 template<
class Scalar,
class LO,
class GO,
class Node>
2951 template<
class Scalar,
class LO,
class GO,
class Node>
2959 template<
class Scalar,
class LO,
class GO,
class Node>
2967 template<
class Scalar,
class LO,
class GO,
class Node>
2975 template<
class Scalar,
class LO,
class GO,
class Node>
2983 template<
class Scalar,
class LO,
class GO,
class Node>
2991 template<
class Scalar,
class LO,
class GO,
class Node>
2999 template<
class Scalar,
class LO,
class GO,
class Node>
3007 template<
class Scalar,
class LO,
class GO,
class Node>
3016 template<
class Scalar,
class LO,
class GO,
class Node>
3020 const Teuchos::ArrayView<GO> &Indices,
3021 const Teuchos::ArrayView<Scalar> &Values,
3022 size_t &NumEntries)
const 3024 TEUCHOS_TEST_FOR_EXCEPTION(
3025 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::getGlobalRowCopy: " 3026 "This class doesn't support global matrix indexing.");
3030 template<
class Scalar,
class LO,
class GO,
class Node>
3034 Teuchos::ArrayView<const GO> &indices,
3035 Teuchos::ArrayView<const Scalar> &values)
const 3037 TEUCHOS_TEST_FOR_EXCEPTION(
3038 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::getGlobalRowView: " 3039 "This class doesn't support global matrix indexing.");
3043 template<
class Scalar,
class LO,
class GO,
class Node>
3047 Teuchos::ArrayView<const LO> &indices,
3048 Teuchos::ArrayView<const Scalar> &values)
const 3050 TEUCHOS_TEST_FOR_EXCEPTION(
3051 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::getGlobalRowView: " 3052 "This class doesn't support global matrix indexing.");
3057 template<
class Scalar,
class LO,
class GO,
class Node>
3066 Teuchos::ArrayRCP<size_t> colOffsets;
3071 offset = rowOffset + colOffsets[r]*bs*bs;
3072 for(
int b=0; b<bs; b++)
3074 std::cout <<
"offset: " << offset+b*(bs+1) << std::endl;
3082 Teuchos::RCP<Teuchos::FancyOStream> wrappedStream = Teuchos::getFancyOStream (Teuchos::rcpFromRef (std::cout));
3083 diag.
describe (*wrappedStream, Teuchos::VERB_EXTREME);
3085 std::cout <<
"Raw data:\n";
3087 for(
int i=0; i<nnz; i++)
3088 std::cout <<
"val[" << i <<
"] = " << val_[i] << std::endl;
3092 template<
class Scalar,
class LO,
class GO,
class Node>
3097 TEUCHOS_TEST_FOR_EXCEPTION(
3098 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::leftScale: " 3099 "not implemented.");
3103 template<
class Scalar,
class LO,
class GO,
class Node>
3108 TEUCHOS_TEST_FOR_EXCEPTION(
3109 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::rightScale: " 3110 "not implemented.");
3114 template<
class Scalar,
class LO,
class GO,
class Node>
3115 Teuchos::RCP<const Tpetra::RowGraph<LO, GO, Node> >
3122 template<
class Scalar,
class LO,
class GO,
class Node>
3127 TEUCHOS_TEST_FOR_EXCEPTION(
3128 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::getFrobeniusNorm: " 3129 "not implemented.");
3140 #define TPETRA_EXPERIMENTAL_BLOCKCRSMATRIX_INSTANT(S,LO,GO,NODE) \ 3141 template class Experimental::BlockCrsMatrix< S, LO, GO, NODE >; 3143 #endif // TPETRA_EXPERIMENTAL_BLOCKCRSMATRIX_DEF_HPP Teuchos::ArrayRCP< const LocalOrdinal > getNodePackedIndices() const
Get an Teuchos::ArrayRCP of the packed column-indices.
LO absMaxLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like sumIntoLocalValuesByOffsets, but for the ABSMAX combine mode.
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
bool isGloballyIndexed() const
If graph indices are in the global range, this function returns true. Otherwise, this function return...
Teuchos::RCP< const Comm< int > > getComm() const
Returns the communicator.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
virtual bool supportsRowViews() const
Whether this object implements getLocalRowView() and getGlobalRowView().
void replaceLocalValue(const LocalOrdinal myRow, const Scalar &value) const
Replace current value at the specified location with specified values.
size_t findLocalIndex(const RowInfo &rowinfo, const LocalOrdinal ind, const size_t hint=0) const
Find the column offset corresponding to the given (local) column index.
virtual void copyAndPermute(const Tpetra::SrcDistObject &source, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Perform copies and permutations that are local to this process.
virtual bool isLowerTriangular() const
Whether this matrix is lower triangular.
Teuchos::RCP< const map_type > getRowMap() const
Returns the Map that describes the row distribution in this graph.
Teuchos::RCP< const map_type > getColMap() const
get the (mesh) map for the columns of this block matrix.
LO getBlockSize() const
The number of degrees of freedom per mesh point.
Teuchos::RCP< const map_type > getRowMap() const
get the (mesh) map for the rows of this block matrix.
Teuchos::RCP< const map_type > getRangeMap() const
Returns the Map associated with the domain of this graph.
mv_type mv_
The Tpetra::MultiVector used to represent the data.
LO replaceLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Replace values at the given (mesh, i.e., block) column indices, in the given (mesh, i.e., block) row.
size_t getNodeNumCols() const
Returns the number of columns connected to the locally owned rows of this graph.
local_graph_type getLocalGraph() const
Get the local graph.
LO replaceLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like replaceLocalValues, but avoids computing row offsets.
void COPY(const ViewType1 &x, const ViewType2 &y)
y := x, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dimension(s)...
bool hasColMap() const
Whether the graph has a column Map.
void putScalar(const Scalar &val)
Fill all entries with the given value val.
size_t getGlobalMaxNumRowEntries() const
Maximum number of entries in all rows over all processes.
global_size_t getGlobalNumRows() const
Returns the number of global rows in the graph.
size_t getNodeNumRows() const
get the local number of block rows
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
void getLocalDiagOffsets(Teuchos::ArrayRCP< size_t > &offsets) const
Get offsets of the diagonal entries in the matrix.
void applyBlock(const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar beta=Teuchos::ScalarTraits< Scalar >::zero())
Version of apply() that takes BlockMultiVector input and output.
void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
Traits class for packing / unpacking data of type T, using Kokkos data structures that live in the gi...
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object with some verbosity level to a FancyOStream.
One or more distributed dense vectors.
Node::device_type device_type
The Kokkos::Device specialization that this class uses.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const
Print a description of this object to the given output stream.
MultiVector for multiple degrees of freedom per mesh point.
LO getLocalRowOffsets(const LO localRowInd, ptrdiff_t offsets[], const LO colInds[], const LO numColInds) const
Get relative offsets corresponding to the given rows, given by local row index.
global_size_t getGlobalNumRows() const
get the global number of block rows
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
Teuchos::RCP< const import_type > getImporter() const
Returns the importer associated with this graph.
Nonowning view of a square dense block in a block matrix.
LocalOrdinal getMinLocalIndex() const
The minimum local index.
void deep_copy(const LittleBlock< ST2, LO > &dst, const LittleBlock< ST1, LO > &src, typename std::enable_if< std::is_convertible< ST1, ST2 >::value &&!std::is_const< ST2 >::value, int >::type *=NULL)
Copy the LittleBlock src into the LittleBlock dst.
LO absMaxLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Like sumIntoLocalValues, but for the ABSMAX combine mode.
Teuchos::RCP< const map_type > getRangeMap() const
Get the (point) range Map of this matrix.
virtual bool isUpperTriangular() const
Whether this matrix is upper triangular.
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.
bool locallySameAs(const Map< LocalOrdinal, GlobalOrdinal, node_type > &map) const
Is the given Map locally the same as the input Map?
size_t getNodeMaxNumRowEntries() const
Maximum number of entries in all rows owned by the calling process.
void scale(const Scalar &val)
Multiply all entries in place by the given value val.
virtual bool checkSizes(const Tpetra::SrcDistObject &source)
Compare the source and target (this) objects for compatibility.
little_vec_type getLocalBlock(const LO localRowIndex, const LO colIndex) const
Get a view of the degrees of freedom at the given mesh point.
Kokkos::View< char *, D, Kokkos::MemoryUnmanaged > output_buffer_type
The type of an output buffer of bytes.
bool isNodeLocalElement(LocalOrdinal localIndex) const
Whether the given local index is valid for this Map on the calling process.
virtual bool hasColMap() const
Whether this matrix has a well-defined column map.
bool isFillComplete() const
Returns true if fillComplete() has been called and the graph is in compute mode.
Teuchos::RCP< const map_type > getDomainMap() const
Get the (point) domain Map of this matrix.
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
virtual size_t getNodeNumDiags() const
The number of local diagonal entries, based on global row/column index comparisons.
global_size_t getGlobalNumEntries() const
Returns the global number of entries in the graph.
LO getBlockSize() const
Get the number of degrees of freedom per mesh point.
virtual GO getIndexBase() const
The index base for global indices in this matrix.
virtual bool isGloballyIndexed() const
Whether matrix indices are globally indexed.
size_t global_size_t
Global size_t object.
Insert new values that don't currently exist.
bool isLocallyIndexed() const
If graph indices are in the local range, this function returns true. Otherwise, this function returns...
std::string description() const
One-line description of this object.
BlockCrsMatrix()
Default constructor: Makes an empty block matrix.
Teuchos::RCP< const map_type > getDomainMap() const
Returns the Map associated with the domain of this graph.
Teuchos::RCP< node_type > getNode() const
Returns the underlying node.
void GEMV(const CoefficientType &alpha, const LittleBlockType &A, const LittleVectorType1 &x, const LittleVectorType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
GlobalOrdinal getIndexBase() const
Returns the index base for global indices for this graph.
virtual global_size_t getGlobalNumCols() const
The global number of columns of this matrix.
ESweepDirection
Sweep direction for Gauss-Seidel or Successive Over-Relaxation (SOR).
virtual bool isFillComplete() const
Whether fillComplete() has been called.
void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
Nonowning view of a set of degrees of freedom corresponding to a mesh point in a block vector or mult...
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
LO getNumVectors() const
Get the number of columns (vectors) in the BlockMultiVector.
bool isUpperTriangular() const
Whether the graph is locally upper triangular.
Sets up and executes a communication plan for a Tpetra DistObject.
GlobalOrdinal getGlobalElement(LocalOrdinal localIndex) const
The global index corresponding to the given local index.
CombineMode
Rule for combining data in an Import or Export.
Sum new values into existing values.
void reorderedGaussSeidelCopy(MultiVector< Scalar, LO, GO, Node > &X, const MultiVector< Scalar, LO, GO, Node > &B, const MultiVector< Scalar, LO, GO, Node > &D, const ArrayView< LO > &rowIndices, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps, const bool zeroInitialGuess) const
Version of reorderedGaussSeidel(), with fewer requirements on X.
Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node >::mag_type mag_type
Type of a norm result.
Kokkos::View< const value_type *, D, Kokkos::MemoryUnmanaged > input_array_type
The type of an input array of value_type.
virtual size_t getNodeNumCols() const
The number of columns needed to apply the forward operator on this node.
virtual size_t getNodeNumEntries() const
The local number of stored (structurally nonzero) entries.
virtual void leftScale(const Vector< Scalar, LO, GO, Node > &x)
Scale the RowMatrix on the left with the given Vector x.
void getLocalDiagCopy(BlockCrsMatrix< Scalar, LO, GO, Node > &diag, const Teuchos::ArrayView< const size_t > &offsets) const
Variant of getLocalDiagCopy() that uses precomputed offsets.
virtual global_size_t getGlobalNumEntries() const
The global number of stored (structurally nonzero) entries.
virtual void getGlobalRowCopy(GO GlobalRow, const Teuchos::ArrayView< GO > &Indices, const Teuchos::ArrayView< Scalar > &Values, size_t &NumEntries) const
Get a copy of the given global row's entries.
void getLocalRowCopy(LO LocalRow, const Teuchos::ArrayView< LO > &Indices, const Teuchos::ArrayView< Scalar > &Values, size_t &NumEntries) const
Not implemented.
Kokkos::View< const char *, D, Kokkos::MemoryUnmanaged > input_buffer_type
The type of an input buffer of bytes.
Replace old value with maximum of magnitudes of old and new values.
global_size_t getGlobalNumCols() const
Returns the number of global columns in the graph.
Abstract base class for objects that can be the source of an Import or Export operation.
LO getLocalRowView(const LO localRowInd, const LO *&colInds, Scalar *&vals, LO &numInds) const
Get a view of the (mesh, i.e., block) row, using local (mesh, i.e., block) indices.
void gaussSeidelCopy(MultiVector< Scalar, LO, GO, Node > &X, const MultiVector< Scalar, LO, GO, Node > &B, const MultiVector< Scalar, LO, GO, Node > &D, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps, const bool zeroInitialGuess) const
Version of gaussSeidel(), with fewer requirements on X.
Replace existing values with new values.
Teuchos::RCP< const map_type > getColMap() const
Returns the Map that describes the column distribution in this graph.
Replace old values with zero.
std::string errorMessages() const
The current stream of error messages.
Teuchos::DataAccess getCopyOrView() const
Get whether this has copy (copyOrView = Teuchos::Copy) or view (copyOrView = Teuchos::View) semantics...
LO sumIntoLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Sum into values at the given (mesh, i.e., block) column indices, in the given (mesh, i.e., block) row.
virtual size_t getGlobalMaxNumRowEntries() const
The maximum number of entries across all rows/columns on all nodes.
size_t getNodeNumEntries() const
Returns the local number of entries in the graph.
Kokkos::Details::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
global_size_t getGlobalNumDiags() const
Returns the number of global diagonal entries, based on global row/column index comparisons.
LocalOrdinal getLocalElement(GlobalOrdinal globalIndex) const
The local index corresponding to the given global index.
virtual void rightScale(const Vector< Scalar, LO, GO, Node > &x)
Scale the RowMatrix on the right with the given Vector x.
size_t getNodeNumElements() const
The number of elements belonging to the calling process.
virtual Tpetra::RowMatrix< Scalar, LO, GO, Node >::mag_type getFrobeniusNorm() const
The Frobenius norm of the matrix.
size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const
Returns the current number of entries on this node in the specified local row.
Kokkos::View< value_type *, D, Kokkos::MemoryUnmanaged > output_array_type
The type of an output array of value_type.
LO sumIntoLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like sumIntoLocalValues, but avoids computing row offsets.
A distributed dense vector.
Constant block CRS matrix class.
size_t getNodeNumDiags() const
Returns the number of local diagonal entries, based on global row/column index comparisons.
void apply(const mv_type &X, mv_type &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
For this matrix A, compute Y := beta * Y + alpha * Op(A) * X.
virtual Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which this matrix is distributed.
size_t getNodeNumRows() const
Returns the number of graph rows owned on the calling node.
void doExport(const SrcDistObject &source, const Export< LO, GO, Node > &exporter, CombineMode CM)
Export data into this object using an Export object ("forward mode").
virtual global_size_t getGlobalNumDiags() const
The number of global diagonal entries, based on global row/column index comparisons.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
BlockMultiVector< Scalar, LO, GO, Node >::impl_scalar_type impl_scalar_type
The implementation type of entries in the matrix.
virtual Teuchos::RCP< Node > getNode() const
The Kokkos Node instance.
void GETRS(const char mode[], const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &B, int &info)
Solve the linear system(s) AX=B, using the result of GETRF or GETF2.
Declaration of Tpetra::Experimental::BlockCrsMatrix.
size_t getNodeMaxNumRowEntries() const
The maximum number of entries across all rows/columns on this node.
bool isLowerTriangular() const
Whether the graph is locally lower triangular.
LocalOrdinal getMaxLocalIndex() const
The maximum local index on the calling process.
virtual size_t getNumEntriesInGlobalRow(GO globalRow) const
The current number of entries on the calling process in the specified global row. ...
virtual Teuchos::RCP< const Tpetra::RowGraph< LO, GO, Node > > getGraph() const
Get the (mesh) graph.
size_t getNumEntriesInLocalRow(const LO localRowInd) const
Return the number of entries in the given row on the calling process.
Teuchos::RCP< const export_type > getExporter() const
Returns the exporter associated with this graph.
void localGaussSeidel(const BlockMultiVector< Scalar, LO, GO, Node > &Residual, BlockMultiVector< Scalar, LO, GO, Node > &Solution, const Kokkos::View< impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &factoredDiagonal, const Kokkos::View< int **, device_type, Kokkos::MemoryUnmanaged > &factorizationPivots, const Scalar &omega, const ESweepDirection direction) const
Local Gauss-Seidel solve, given a factorized diagonal.
virtual void getGlobalRowView(GO GlobalRow, Teuchos::ArrayView< const GO > &indices, Teuchos::ArrayView< const Scalar > &values) const
Get a constant, nonpersisting, globally indexed view of the given row of the matrix.
virtual bool isLocallyIndexed() const
Whether matrix indices are locally indexed.
RowInfo getRowInfo(const LocalOrdinal myRow) const
Get information about the locally owned row with local index myRow.
size_t getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const
Returns the current number of entries on this node in the specified global row.