42 #ifndef TPETRA_MATRIXMATRIX_DEF_HPP 43 #define TPETRA_MATRIXMATRIX_DEF_HPP 46 #include "Teuchos_VerboseObject.hpp" 47 #include "Teuchos_Array.hpp" 49 #include "Tpetra_ConfigDefs.hpp" 50 #include "Tpetra_CrsMatrix.hpp" 52 #include "Tpetra_RowMatrixTransposer.hpp" 53 #include "Tpetra_ConfigDefs.hpp" 54 #include "Tpetra_Map.hpp" 55 #include "Tpetra_Export.hpp" 59 #include "Teuchos_FancyOStream.hpp" 72 namespace MatrixMatrix{
75 template <
class Scalar,
85 bool call_FillComplete_on_result,
86 const std::string & label)
89 #ifdef HAVE_TPETRA_MMM_TIMINGS 90 std::string prefix = std::string(
"TpetraExt ")+ label + std::string(
": ");
91 using Teuchos::TimeMonitor;
92 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"MMM All Setup"))));
107 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete(), std::runtime_error,
"MatrixMatrix::Multiply(): Matrix A is not fill complete.");
108 TEUCHOS_TEST_FOR_EXCEPTION(!B.
isFillComplete(), std::runtime_error,
"MatrixMatrix::Multiply(): Matrix B is not fill complete.");
109 TEUCHOS_TEST_FOR_EXCEPTION(C.
isLocallyIndexed() , std::runtime_error,
"MatrixMatrix::Multiply(): Result matrix C must not be locally indexed.");
116 Node> CrsMatrixStruct_t;
119 RCP<const Matrix_t > Aprime = null;
120 RCP<const Matrix_t > Bprime = null;
123 bool NewFlag=!C.
getGraph()->isLocallyIndexed() && !C.
getGraph()->isGloballyIndexed();
125 bool use_optimized_ATB=
false;
126 if(transposeA && !transposeB && call_FillComplete_on_result && NewFlag) {
127 use_optimized_ATB=
true;
129 #ifdef USE_OLD_TRANSPOSE // NOTE: For Grey Ballard's use. Remove this later. 130 use_optimized_ATB=
false;
134 if(!use_optimized_ATB && transposeA) {
139 Aprime = rcpFromRef(A);
147 Bprime = rcpFromRef(B);
158 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete(), std::runtime_error,
159 "MatrixMatrix::Multiply: ERROR, inner dimensions of op(A) and op(B) " 160 "must match for matrix-matrix product. op(A) is " 161 <<Aouter<<
"x"<<Ainner <<
", op(B) is "<<Binner<<
"x"<<Bouter<<std::endl);
167 TEUCHOS_TEST_FOR_EXCEPTION(Aouter > C.
getGlobalNumRows(), std::runtime_error,
168 "MatrixMatrix::Multiply: ERROR, dimensions of result C must " 170 <<
" rows, should have at least "<<Aouter << std::endl);
182 int numProcs = A.
getComm()->getSize();
186 CrsMatrixStruct_t Aview;
187 CrsMatrixStruct_t Bview;
189 RCP<const Map_t > targetMap_A = Aprime->getRowMap();
190 RCP<const Map_t > targetMap_B = Bprime->getRowMap();
192 #ifdef HAVE_TPETRA_MMM_TIMINGS 193 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"MMM All I&X"))));
198 if(!use_optimized_ATB) {
199 RCP<const Import<LocalOrdinal,GlobalOrdinal, Node> > dummyImporter;
200 MMdetails::import_and_extract_views(*Aprime, targetMap_A, Aview,dummyImporter,
true,label);
207 targetMap_B = Aprime->getColMap();
211 if(!use_optimized_ATB)
212 MMdetails::import_and_extract_views(*Bprime, targetMap_B, Bview, Aprime->getGraph()->getImporter(),
false,label);
214 #ifdef HAVE_TPETRA_MMM_TIMINGS 215 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"MMM All Multiply"))));
220 CrsWrapper_CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> crsmat(C);
222 if(use_optimized_ATB) {
223 MMdetails::mult_AT_B_newmatrix(A, B, C,label);
225 else if(call_FillComplete_on_result && NewFlag ) {
226 MMdetails::mult_A_B_newmatrix(Aview, Bview, C,label);
229 MMdetails::mult_A_B(Aview, Bview, crsmat,label);
230 #ifdef HAVE_TPETRA_MMM_TIMINGS 231 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"MMM All FillComplete"))));
233 if (call_FillComplete_on_result) {
241 C.
fillComplete(Bprime->getDomainMap(), Aprime->getRangeMap());
249 template <
class Scalar,
258 bool call_FillComplete_on_result,
259 const std::string & label)
261 #ifdef HAVE_TPETRA_MMM_TIMINGS 262 std::string prefix = std::string(
"TpetraExt ")+ label + std::string(
": ");
263 using Teuchos::TimeMonitor;
264 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix+std::string(
"Jacobi All Setup"))));
271 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete(), std::runtime_error,
"MatrixMatrix::Multiply(): Matrix A is not fill complete.");
272 TEUCHOS_TEST_FOR_EXCEPTION(!B.
isFillComplete(), std::runtime_error,
"MatrixMatrix::Multiply(): Matrix B is not fill complete.");
273 TEUCHOS_TEST_FOR_EXCEPTION(C.
isLocallyIndexed() , std::runtime_error,
"MatrixMatrix::Multiply(): Result matrix C must not be locally indexed.");
280 Node> CrsMatrixStruct_t;
283 RCP<const Matrix_t > Aprime = rcpFromRef(A);
284 RCP<const Matrix_t > Bprime = rcpFromRef(B);
293 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete(), std::runtime_error,
294 "MatrixMatrix::Jacobi: ERROR, inner dimensions of op(A) and op(B) " 295 "must match for matrix-matrix product. op(A) is " 296 <<Aouter<<
"x"<<Ainner <<
", op(B) is "<<Binner<<
"x"<<Bouter<<std::endl);
302 TEUCHOS_TEST_FOR_EXCEPTION(Aouter > C.
getGlobalNumRows(), std::runtime_error,
303 "MatrixMatrix::Multiply: ERROR, dimensions of result C must " 305 <<
" rows, should have at least "<<Aouter << std::endl);
317 int numProcs = A.
getComm()->getSize();
321 CrsMatrixStruct_t Aview;
322 CrsMatrixStruct_t Bview;
324 RCP<const Map_t > targetMap_A = Aprime->getRowMap();
325 RCP<const Map_t > targetMap_B = Bprime->getRowMap();
327 #ifdef HAVE_TPETRA_MMM_TIMINGS 328 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix+std::string(
"Jacobi All I&X"))));
332 RCP<const Import<LocalOrdinal,GlobalOrdinal, Node> > dummyImporter;
333 MMdetails::import_and_extract_views(*Aprime, targetMap_A, Aview,dummyImporter,
false,label);
338 targetMap_B = Aprime->getColMap();
342 MMdetails::import_and_extract_views(*Bprime, targetMap_B, Bview, Aprime->getGraph()->getImporter(),
false,label);
344 #ifdef HAVE_TPETRA_MMM_TIMINGS 345 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix+std::string(
"Jacobi All Multiply"))));
350 CrsWrapper_CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> crsmat(C);
353 bool NewFlag=!C.
getGraph()->isLocallyIndexed() && !C.
getGraph()->isGloballyIndexed();
355 if(call_FillComplete_on_result && NewFlag ) {
356 MMdetails::jacobi_A_B_newmatrix(omega,Dinv,Aview, Bview, C,label);
359 TEUCHOS_TEST_FOR_EXCEPTION(
360 true, std::runtime_error,
361 "jacobi_A_B_general not implemented");
385 template <
class Scalar,
396 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete(), std::runtime_error,
397 "MatrixMatrix::Add ERROR, input matrix A.isFillComplete() is false; it is required to be true. (Result matrix B is not required to be isFillComplete()).");
398 TEUCHOS_TEST_FOR_EXCEPTION(B.
isFillComplete() , std::runtime_error,
399 "MatrixMatrix::Add ERROR, input matrix B must not be fill complete!");
400 TEUCHOS_TEST_FOR_EXCEPTION(B.
isStaticGraph() , std::runtime_error,
401 "MatrixMatrix::Add ERROR, input matrix B must not have static graph!");
403 "MatrixMatrix::Add ERROR, input matrix B must not be locally indexed!");
405 "MatrixMatrix::Add ERROR, input matrix B must have a dynamic profile!");
412 RCP<const CrsMatrix_t> Aprime = null;
418 Aprime = rcpFromRef(A);
425 if(scalarB != ScalarTraits<Scalar>::one()){
431 if(scalarA != ScalarTraits<Scalar>::zero()){
432 for(LocalOrdinal i = 0; (size_t)i < numMyRows; ++i){
433 row = B.
getRowMap()->getGlobalElement(i);
434 Aprime->getGlobalRowCopy(row, a_inds(), a_vals(), a_numEntries);
435 if(scalarA != ScalarTraits<Scalar>::one()){
436 for(
size_t j =0; j<a_numEntries; ++j){
437 a_vals[j] *= scalarA;
452 template <
class Scalar,
456 Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
458 const bool transposeA,
461 const bool transposeB,
465 const Teuchos::RCP<Teuchos::ParameterList>& params)
469 using Teuchos::rcpFromRef;
470 using Teuchos::rcp_dynamic_cast;
471 using Teuchos::rcp_implicit_cast;
476 TEUCHOS_TEST_FOR_EXCEPTION(
478 "Tpetra::MatrixMatrix::add: A and B must both be fill complete.");
480 #ifdef HAVE_TPETRA_DEBUG 483 const bool domainMapsSame =
487 TEUCHOS_TEST_FOR_EXCEPTION(
488 domainMapsSame, std::invalid_argument,
489 "Tpetra::MatrixMatrix::add: The domain Maps of Op(A) and Op(B) are not the same.");
491 const bool rangeMapsSame =
495 TEUCHOS_TEST_FOR_EXCEPTION(
496 rangeMapsSame, std::invalid_argument,
497 "Tpetra::MatrixMatrix::add: The range Maps of Op(A) and Op(B) are not the same.");
499 #endif // HAVE_TPETRA_DEBUG 502 RCP<const crs_matrix_type> Aprime;
504 transposer_type theTransposer (rcpFromRef (A));
505 Aprime = theTransposer.createTranspose ();
507 Aprime = rcpFromRef (A);
510 #ifdef HAVE_TPETRA_DEBUG 511 TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null (), std::logic_error,
512 "Tpetra::MatrixMatrix::Add: Failed to compute Op(A). " 513 "Please report this bug to the Tpetra developers.");
514 #endif // HAVE_TPETRA_DEBUG 517 RCP<const crs_matrix_type> Bprime;
519 transposer_type theTransposer (rcpFromRef (B));
520 Bprime = theTransposer.createTranspose ();
522 Bprime = rcpFromRef (B);
525 #ifdef HAVE_TPETRA_DEBUG 526 TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null (), std::logic_error,
527 "Tpetra::MatrixMatrix::Add: Failed to compute Op(B). " 528 "Please report this bug to the Tpetra developers.");
530 TEUCHOS_TEST_FOR_EXCEPTION(
531 ! Aprime->isFillComplete () || ! Bprime->isFillComplete (), std::invalid_argument,
532 "Tpetra::MatrixMatrix::add: Aprime and Bprime must both be fill complete. " 533 "Please report this bug to the Tpetra developers.");
534 #endif // HAVE_TPETRA_DEBUG 537 RCP<row_matrix_type> C =
538 Bprime->add (alpha, *rcp_implicit_cast<const row_matrix_type> (Aprime),
539 beta, domainMap, rangeMap, params);
540 return rcp_dynamic_cast<crs_matrix_type> (C);
546 template <
class Scalar,
560 using Teuchos::Array;
561 using Teuchos::ArrayRCP;
562 using Teuchos::ArrayView;
565 using Teuchos::rcp_dynamic_cast;
566 using Teuchos::rcpFromRef;
567 using Teuchos::tuple;
570 typedef Teuchos::ScalarTraits<Scalar> STS;
578 TEUCHOS_TEST_FOR_EXCEPTION(C.is_null (), std::logic_error,
579 "Tpetra::MatrixMatrix::Add: The case C == null does not actually work. " 580 "Fixing this will require an interface change.");
582 TEUCHOS_TEST_FOR_EXCEPTION(
584 "Tpetra::MatrixMatrix::Add: Both input matrices must be fill complete " 585 "before calling this function.");
587 #ifdef HAVE_TPETRA_DEBUG 589 const bool domainMapsSame =
593 TEUCHOS_TEST_FOR_EXCEPTION(
594 domainMapsSame, std::invalid_argument,
595 "Tpetra::MatrixMatrix::Add: The domain Maps of Op(A) and Op(B) are not the same.");
597 const bool rangeMapsSame =
601 TEUCHOS_TEST_FOR_EXCEPTION(
602 rangeMapsSame, std::invalid_argument,
603 "Tpetra::MatrixMatrix::Add: The range Maps of Op(A) and Op(B) are not the same.");
605 #endif // HAVE_TPETRA_DEBUG 608 RCP<const crs_matrix_type> Aprime;
610 transposer_type theTransposer (rcpFromRef (A));
611 Aprime = theTransposer.createTranspose ();
613 Aprime = rcpFromRef (A);
616 #ifdef HAVE_TPETRA_DEBUG 617 TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null (), std::logic_error,
618 "Tpetra::MatrixMatrix::Add: Failed to compute Op(A). " 619 "Please report this bug to the Tpetra developers.");
620 #endif // HAVE_TPETRA_DEBUG 623 RCP<const crs_matrix_type> Bprime;
625 transposer_type theTransposer (rcpFromRef (B));
626 Bprime = theTransposer.createTranspose ();
628 Bprime = rcpFromRef (B);
631 #ifdef HAVE_TPETRA_DEBUG 632 TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null (), std::logic_error,
633 "Tpetra::MatrixMatrix::Add: Failed to compute Op(B). " 634 "Please report this bug to the Tpetra developers.");
635 #endif // HAVE_TPETRA_DEBUG 638 if (! C.is_null ()) {
639 C->setAllToScalar (STS::zero ());
647 if (Aprime->getRowMap ()->isSameAs (* (Bprime->getRowMap ())) {
648 RCP<const map_type> rowMap = Aprime->getRowMap ();
650 RCP<const crs_graph_type> A_graph =
651 rcp_dynamic_cast<
const crs_graph_type> (Aprime->getGraph (),
true);
652 #ifdef HAVE_TPETRA_DEBUG 653 TEUCHOS_TEST_FOR_EXCEPTION(A_graph.is_null (), std::logic_error,
654 "Tpetra::MatrixMatrix::Add: Graph of Op(A) is null. " 655 "Please report this bug to the Tpetra developers.");
656 #endif // HAVE_TPETRA_DEBUG 657 RCP<const crs_graph_type> B_graph =
658 rcp_dynamic_cast<
const crs_graph_type> (Bprime->getGraph (),
true);
659 #ifdef HAVE_TPETRA_DEBUG 660 TEUCHOS_TEST_FOR_EXCEPTION(B_graph.is_null (), std::logic_error,
661 "Tpetra::MatrixMatrix::Add: Graph of Op(B) is null. " 662 "Please report this bug to the Tpetra developers.");
663 #endif // HAVE_TPETRA_DEBUG 664 RCP<const map_type> A_colMap = A_graph->getColMap ();
665 #ifdef HAVE_TPETRA_DEBUG 666 TEUCHOS_TEST_FOR_EXCEPTION(A_colMap.is_null (), std::logic_error,
667 "Tpetra::MatrixMatrix::Add: Column Map of Op(A) is null. " 668 "Please report this bug to the Tpetra developers.");
669 #endif // HAVE_TPETRA_DEBUG 670 RCP<const map_type> B_colMap = B_graph->getColMap ();
671 #ifdef HAVE_TPETRA_DEBUG 672 TEUCHOS_TEST_FOR_EXCEPTION(B_colMap.is_null (), std::logic_error,
673 "Tpetra::MatrixMatrix::Add: Column Map of Op(B) is null. " 674 "Please report this bug to the Tpetra developers.");
675 TEUCHOS_TEST_FOR_EXCEPTION(A_graph->getImporter ().is_null (),
677 "Tpetra::MatrixMatrix::Add: Op(A)'s Import is null. " 678 "Please report this bug to the Tpetra developers.");
679 TEUCHOS_TEST_FOR_EXCEPTION(B_graph->getImporter ().is_null (),
681 "Tpetra::MatrixMatrix::Add: Op(B)'s Import is null. " 682 "Please report this bug to the Tpetra developers.");
683 #endif // HAVE_TPETRA_DEBUG 686 RCP<const import_type> sumImport =
687 A_graph->getImporter ()->setUnion (* (B_graph->getImporter ()));
688 RCP<const map_type> C_colMap = sumImport->getTargetMap ();
696 ArrayView<const LocalOrdinal> A_local_ind;
697 Array<GlobalOrdinal> A_global_ind;
698 ArrayView<const LocalOrdinal> B_local_ind;
699 Array<GlobalOrdinal> B_global_ind;
701 const size_t localNumRows = rowMap->getNodeNumElements ();
702 ArrayRCP<size_t> numEntriesPerRow (localNumRows);
705 size_t maxNumEntriesPerRow = 0;
706 for (
size_t localRow = 0; localRow < localNumRows; ++localRow) {
708 A_graph->getLocalRowView (as<LocalOrdinal> (localRow), A_local_ind);
709 const size_type A_numEnt = A_local_ind.size ();
710 if (A_numEnt > A_global_ind.size ()) {
711 A_global_ind.resize (A_numEnt);
714 for (size_type k = 0; k < A_numEnt; ++k) {
715 A_global_ind[k] = A_colMap->getGlobalElement (A_local_ind[k]);
719 B_graph->getLocalRowView (as<LocalOrdinal> (localRow), B_local_ind);
720 const size_type B_numEnt = B_local_ind.size ();
721 if (B_numEnt > B_global_ind.size ()) {
722 B_global_ind.resize (B_numEnt);
725 for (size_type k = 0; k < B_numEnt; ++k) {
726 B_global_ind[k] = B_colMap->getGlobalElement (B_local_ind[k]);
730 const size_t curNumEntriesPerRow =
731 keyMergeCount (A_global_ind.begin (), A_global_ind.end (),
732 B_global_ind.begin (), B_global_ind.end ());
733 numEntriesPerRow[localRow] = curNumEntriesPerRow;
734 maxNumEntriesPerRow = std::max (maxNumEntriesPerRow, curNumEntriesPerRow);
741 C = rcp (
new crs_matrix_type (rowMap, C_colMap, numEntriesPerRow,
StaticProfile));
746 ArrayView<const Scalar> A_val;
747 ArrayView<const Scalar> B_val;
749 Array<LocalOrdinal> AplusB_local_ind (maxNumEntriesPerRow);
750 Array<GlobalOrdinal> AplusB_global_ind (maxNumEntriesPerRow);
751 Array<Scalar> AplusB_val (maxNumEntriesPerRow);
753 for (
size_t localRow = 0; localRow < localNumRows; ++localRow) {
755 Aprime->getLocalRowView (as<LocalOrdinal> (localRow), A_local_ind, A_val);
757 for (size_type k = 0; k < A_local_ind.size (); ++k) {
758 A_global_ind[k] = A_colMap->getGlobalElement (A_local_ind[k]);
762 Bprime->getLocalRowView (as<LocalOrdinal> (localRow), B_local_ind, B_val);
764 for (size_type k = 0; k < B_local_ind.size (); ++k) {
765 B_global_ind[k] = B_colMap->getGlobalElement (B_local_ind[k]);
768 const size_t curNumEntries = numEntriesPerRow[localRow];
769 ArrayView<LocalOrdinal> C_local_ind = AplusB_local_ind (0, curNumEntries);
770 ArrayView<GlobalOrdinal> C_global_ind = AplusB_global_ind (0, curNumEntries);
771 ArrayView<Scalar> C_val = AplusB_val (0, curNumEntries);
775 A_val.begin (), A_val.end (),
776 B_global_ind.begin (), B_global_ind.end (),
777 B_val.begin (), B_val.end (),
778 C_global_ind.begin (), C_val.begin (),
779 std::plus<Scalar> ());
781 for (size_type k = 0; k < as<size_type> (numEntriesPerRow[localRow]); ++k) {
782 C_local_ind[k] = C_colMap->getLocalElement (C_global_ind[k]);
785 C->replaceLocalValues (localRow, C_local_ind, C_val);
790 RCP<const map_type> domainMap = A_graph->getDomainMap ();
791 RCP<const map_type> rangeMap = A_graph->getRangeMap ();
792 C->expertStaticFillComplete (domainMap, rangeMap, sumImport, A_graph->getExporter ());
800 C = rcp (
new crs_matrix_type (Aprime->getRowMap (), null));
807 C = rcp (
new crs_matrix_type (Aprime->getRowMap (), 0));
811 #ifdef HAVE_TPETRA_DEBUG 812 TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null (), std::logic_error,
813 "Tpetra::MatrixMatrix::Add: At this point, Aprime is null. " 814 "Please report this bug to the Tpetra developers.");
815 TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null (), std::logic_error,
816 "Tpetra::MatrixMatrix::Add: At this point, Bprime is null. " 817 "Please report this bug to the Tpetra developers.");
818 TEUCHOS_TEST_FOR_EXCEPTION(C.is_null (), std::logic_error,
819 "Tpetra::MatrixMatrix::Add: At this point, C is null. " 820 "Please report this bug to the Tpetra developers.");
821 #endif // HAVE_TPETRA_DEBUG 823 Array<RCP<const crs_matrix_type> > Mat =
824 tuple<RCP<const crs_matrix_type> > (Aprime, Bprime);
825 Array<Scalar> scalar = tuple<Scalar> (scalarA, scalarB);
828 for (
int k = 0; k < 2; ++k) {
829 Array<GlobalOrdinal> Indices;
830 Array<Scalar> Values;
838 #ifdef HAVE_TPETRA_DEBUG 839 TEUCHOS_TEST_FOR_EXCEPTION(Mat[k].is_null (), std::logic_error,
840 "Tpetra::MatrixMatrix::Add: At this point, curRowMap is null. " 841 "Please report this bug to the Tpetra developers.");
842 #endif // HAVE_TPETRA_DEBUG 843 RCP<const map_type> curRowMap = Mat[k]->getRowMap ();
844 #ifdef HAVE_TPETRA_DEBUG 845 TEUCHOS_TEST_FOR_EXCEPTION(curRowMap.is_null (), std::logic_error,
846 "Tpetra::MatrixMatrix::Add: At this point, curRowMap is null. " 847 "Please report this bug to the Tpetra developers.");
848 #endif // HAVE_TPETRA_DEBUG 850 const size_t localNumRows = Mat[k]->getNodeNumRows ();
851 for (
size_t i = 0; i < localNumRows; ++i) {
852 const GlobalOrdinal globalRow = curRowMap->getGlobalElement (i);
853 size_t numEntries = Mat[k]->getNumEntriesInGlobalRow (globalRow);
854 if (numEntries > 0) {
855 Indices.resize (numEntries);
856 Values.resize (numEntries);
857 Mat[k]->getGlobalRowCopy (globalRow, Indices (), Values (), numEntries);
859 if (scalar[k] != STS::one ()) {
860 for (
size_t j = 0; j < numEntries; ++j) {
861 Values[j] *= scalar[k];
865 if (C->isFillComplete ()) {
866 C->sumIntoGlobalValues (globalRow, Indices, Values);
868 C->insertGlobalValues (globalRow, Indices, Values);
882 template <
class TransferType>
883 void printMultiplicationStatistics(Teuchos::RCP<TransferType > Transfer,
const std::string &label){
884 if(Transfer.is_null())
return;
886 const Distributor & Distor = Transfer->getDistributor();
887 Teuchos::RCP<const Teuchos::Comm<int> > Comm = Transfer->getSourceMap()->getComm();
889 size_t rows_send = Transfer->getNumExportIDs();
890 size_t rows_recv = Transfer->getNumRemoteIDs();
892 size_t round1_send = Transfer->getNumExportIDs() *
sizeof(size_t);
893 size_t round1_recv = Transfer->getNumRemoteIDs() *
sizeof(size_t);
896 size_t round2_send, round2_recv;
899 int myPID = Comm->getRank();
900 int NumProcs = Comm->getSize();
907 size_t lstats[8] = {num_send_neighbors,num_recv_neighbors,rows_send,rows_recv,round1_send,round1_recv,round2_send,round2_recv};
908 size_t gstats_min[8], gstats_max[8];
910 double lstats_avg[8], gstats_avg[8];
911 for(
int i=0; i<8; i++)
912 lstats_avg[i] = ((
double)lstats[i])/NumProcs;
914 Teuchos::reduceAll(*Comm(),Teuchos::REDUCE_MIN,8,lstats,gstats_min);
915 Teuchos::reduceAll(*Comm(),Teuchos::REDUCE_MAX,8,lstats,gstats_max);
916 Teuchos::reduceAll(*Comm(),Teuchos::REDUCE_SUM,8,lstats_avg,gstats_avg);
919 printf(
"%s Send Statistics[min/avg/max]: neigh=%d/%4.1f/%d rows=%d/%4.1f/%d round1=%d/%4.1f/%d round2=%d/%4.1f/%d\n",label.c_str(),
920 (int)gstats_min[0],gstats_avg[0],(
int)gstats_max[0], (int)gstats_min[2],gstats_avg[2],(
int)gstats_max[2],
921 (int)gstats_min[4],gstats_avg[4],(
int)gstats_max[4], (int)gstats_min[6],gstats_avg[6],(
int)gstats_max[6]);
922 printf(
"%s Recv Statistics[min/avg/max]: neigh=%d/%4.1f/%d rows=%d/%4.1f/%d round1=%d/%4.1f/%d round2=%d/%4.1f/%d\n",label.c_str(),
923 (int)gstats_min[1],gstats_avg[1],(
int)gstats_max[1], (int)gstats_min[3],gstats_avg[3],(
int)gstats_max[3],
924 (int)gstats_min[5],gstats_avg[5],(
int)gstats_max[5], (int)gstats_min[7],gstats_avg[7],(
int)gstats_max[7]);
930 template<
class Scalar,
934 void mult_AT_B_newmatrix(
938 const std::string & label) {
947 Node> CrsMatrixStruct_t;
949 #ifdef HAVE_TPETRA_MMM_TIMINGS 950 std::string prefix = std::string(
"TpetraExt ")+ label + std::string(
": ");
951 using Teuchos::TimeMonitor;
952 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix+std::string(
"MMM-T Transpose"))));
964 #ifdef HAVE_TPETRA_MMM_TIMINGS 965 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix+std::string(
"MMM-T I&X"))));
969 CrsMatrixStruct_t Aview;
970 CrsMatrixStruct_t Bview;
971 RCP<const Import<LocalOrdinal,GlobalOrdinal, Node> > dummyImporter;
972 MMdetails::import_and_extract_views(*Atrans, Atrans->getRowMap(), Aview, dummyImporter,
true,label);
973 MMdetails::import_and_extract_views(B, B.
getRowMap(), Bview, dummyImporter,
true,label);
975 #ifdef HAVE_TPETRA_MMM_TIMINGS 976 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix+std::string(
"MMM-T AB-core"))));
979 RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >Ctemp;
982 bool needs_final_export = !Atrans->getGraph()->getExporter().is_null();
983 if(needs_final_export)
986 Ctemp = rcp(&C,
false);
989 mult_A_B_newmatrix(Aview,Bview,*Ctemp,label);
994 #ifdef HAVE_TPETRA_MMM_TIMINGS 995 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix+std::string(
"MMM-T exportAndFillComplete"))));
998 Teuchos::RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > Crcp(&C,
false);
999 if(needs_final_export) {
1000 Teuchos::ParameterList labelList;
1001 labelList.set(
"Timer Label",label);
1002 Ctemp->exportAndFillComplete(Crcp,*Ctemp->getGraph()->getExporter(),
1005 #ifdef COMPUTE_MMM_STATISTICS 1006 printMultiplicationStatistics(Ctemp->getGraph()->getExporter(),label+std::string(
" AT_B MMM"));
1012 template<
class Scalar,
1014 class GlobalOrdinal,
1019 CrsWrapper<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1020 const std::string & label)
1022 typedef Teuchos::ScalarTraits<Scalar> STS;
1024 LocalOrdinal C_firstCol = Bview.
colMap->getMinLocalIndex();
1025 LocalOrdinal C_lastCol = Bview.
colMap->getMaxLocalIndex();
1027 LocalOrdinal C_firstCol_import = OrdinalTraits<LocalOrdinal>::zero();
1028 LocalOrdinal C_lastCol_import = OrdinalTraits<LocalOrdinal>::invalid();
1030 ArrayView<const GlobalOrdinal> bcols = Bview.
colMap->getNodeElementList();
1031 ArrayView<const GlobalOrdinal> bcols_import = null;
1033 C_firstCol_import = Bview.
importColMap->getMinLocalIndex();
1034 C_lastCol_import = Bview.
importColMap->getMaxLocalIndex();
1036 bcols_import = Bview.
importColMap->getNodeElementList();
1039 size_t C_numCols = C_lastCol - C_firstCol +
1040 OrdinalTraits<LocalOrdinal>::one();
1041 size_t C_numCols_import = C_lastCol_import - C_firstCol_import +
1042 OrdinalTraits<LocalOrdinal>::one();
1044 if (C_numCols_import > C_numCols) C_numCols = C_numCols_import;
1046 Array<Scalar> dwork = Array<Scalar>(C_numCols);
1047 Array<GlobalOrdinal> iwork = Array<GlobalOrdinal>(C_numCols);
1048 Array<size_t> iwork2 = Array<size_t>(C_numCols);
1050 Array<Scalar> C_row_i = dwork;
1051 Array<GlobalOrdinal> C_cols = iwork;
1052 Array<size_t> c_index = iwork2;
1053 Array<GlobalOrdinal> combined_index = Array<GlobalOrdinal>(2*C_numCols);
1054 Array<Scalar> combined_values = Array<Scalar>(2*C_numCols);
1056 size_t C_row_i_length, j, k, last_index;
1059 LocalOrdinal LO_INVALID = OrdinalTraits<LocalOrdinal>::invalid();
1060 Array<LocalOrdinal> Acol2Brow(Aview.
colMap->getNodeNumElements(),LO_INVALID);
1061 Array<LocalOrdinal> Acol2Irow(Aview.
colMap->getNodeNumElements(),LO_INVALID);
1064 for(LocalOrdinal i=Aview.
colMap->getMinLocalIndex(); i <=
1065 Aview.
colMap->getMaxLocalIndex(); i++)
1070 for(LocalOrdinal i=Aview.
colMap->getMinLocalIndex(); i <=
1071 Aview.
colMap->getMaxLocalIndex(); i++) {
1072 GlobalOrdinal GID = Aview.
colMap->getGlobalElement(i);
1073 LocalOrdinal BLID = Bview.
origMatrix->getRowMap()->getLocalElement(GID);
1074 if(BLID != LO_INVALID) Acol2Brow[i] = BLID;
1075 else Acol2Irow[i] = Bview.
importMatrix->getRowMap()->getLocalElement(GID);
1085 ArrayRCP<const size_t> Arowptr_RCP, Browptr_RCP, Irowptr_RCP;
1086 ArrayRCP<const LocalOrdinal> Acolind_RCP, Bcolind_RCP, Icolind_RCP;
1087 ArrayRCP<const Scalar> Avals_RCP, Bvals_RCP, Ivals_RCP;
1088 ArrayView<const size_t> Arowptr, Browptr, Irowptr;
1089 ArrayView<const LocalOrdinal> Acolind, Bcolind, Icolind;
1090 ArrayView<const Scalar> Avals, Bvals, Ivals;
1091 Aview.
origMatrix->getAllValues(Arowptr_RCP,Acolind_RCP,Avals_RCP);
1092 Bview.
origMatrix->getAllValues(Browptr_RCP,Bcolind_RCP,Bvals_RCP);
1093 Arowptr = Arowptr_RCP(); Acolind = Acolind_RCP(); Avals = Avals_RCP();
1094 Browptr = Browptr_RCP(); Bcolind = Bcolind_RCP(); Bvals = Bvals_RCP();
1096 Bview.
importMatrix->getAllValues(Irowptr_RCP,Icolind_RCP,Ivals_RCP);
1097 Irowptr = Irowptr_RCP(); Icolind = Icolind_RCP(); Ivals = Ivals_RCP();
1100 bool C_filled = C.isFillComplete();
1102 for (
size_t i = 0; i < C_numCols; i++)
1103 c_index[i] = OrdinalTraits<size_t>::invalid();
1106 size_t Arows = Aview.
rowMap->getNodeNumElements();
1107 for(
size_t i=0; i<Arows; ++i) {
1111 GlobalOrdinal global_row = Aview.
rowMap->getGlobalElement(i);
1120 C_row_i_length = OrdinalTraits<size_t>::zero();
1122 for(k = Arowptr[i]; k < Arowptr[i+1]; ++k) {
1123 LocalOrdinal Ak = Acol2Brow[Acolind[k]];
1124 Scalar Aval = Avals[k];
1125 if (Aval == STS::zero())
1128 if (Ak==LO_INVALID)
continue;
1130 for(j=Browptr[Ak]; j< Browptr[Ak+1]; ++j) {
1131 LocalOrdinal col = Bcolind[j];
1133 if (c_index[col] == OrdinalTraits<size_t>::invalid()){
1136 C_row_i[C_row_i_length] = Aval*Bvals[j];
1137 C_cols[C_row_i_length] = col;
1138 c_index[col] = C_row_i_length;
1142 C_row_i[c_index[col]] += Aval*Bvals[j];
1147 for (
size_t ii = 0; ii < C_row_i_length; ii++) {
1148 c_index[C_cols[ii]] = OrdinalTraits<size_t>::invalid();
1149 C_cols[ii] = bcols[C_cols[ii]];
1150 combined_index[ii] = C_cols[ii];
1151 combined_values[ii] = C_row_i[ii];
1153 last_index = C_row_i_length;
1159 C_row_i_length = OrdinalTraits<size_t>::zero();
1161 for(k = Arowptr[i]; k < Arowptr[i+1]; ++k) {
1162 LocalOrdinal Ak = Acol2Brow[Acolind[k]];
1163 Scalar Aval = Avals[k];
1164 if (Aval == STS::zero())
1167 if (Ak!=LO_INVALID)
continue;
1169 Ak = Acol2Irow[Acolind[k]];
1170 for(j=Irowptr[Ak]; j< Irowptr[Ak+1]; ++j) {
1171 LocalOrdinal col = Icolind[j];
1173 if (c_index[col] == OrdinalTraits<size_t>::invalid()){
1176 C_row_i[C_row_i_length] = Aval*Ivals[j];
1177 C_cols[C_row_i_length] = col;
1178 c_index[col] = C_row_i_length;
1183 C_row_i[c_index[col]] += Aval*Ivals[j];
1188 for (
size_t ii = 0; ii < C_row_i_length; ii++) {
1189 c_index[C_cols[ii]] = OrdinalTraits<size_t>::invalid();
1190 C_cols[ii] = bcols_import[C_cols[ii]];
1191 combined_index[last_index] = C_cols[ii];
1192 combined_values[last_index] = C_row_i[ii];
1201 C.sumIntoGlobalValues(
1203 combined_index.view(OrdinalTraits<size_t>::zero(), last_index),
1204 combined_values.view(OrdinalTraits<size_t>::zero(), last_index))
1206 C.insertGlobalValues(
1208 combined_index.view(OrdinalTraits<size_t>::zero(), last_index),
1209 combined_values.view(OrdinalTraits<size_t>::zero(), last_index));
1215 template<
class Scalar,
1217 class GlobalOrdinal,
1219 void setMaxNumEntriesPerRow(
1222 typedef typename Array<ArrayView<const LocalOrdinal> >::size_type local_length_size;
1223 Mview.maxNumRowEntries = OrdinalTraits<local_length_size>::zero();
1224 if(Mview.indices.size() > OrdinalTraits<local_length_size>::zero() ){
1225 Mview.maxNumRowEntries = Mview.indices[0].size();
1226 for(local_length_size i = 1; i<Mview.indices.size(); ++i){
1227 if(Mview.indices[i].size() > Mview.maxNumRowEntries){
1228 Mview.maxNumRowEntries = Mview.indices[i].size();
1235 template<
class CrsMatrixType>
1236 size_t C_estimate_nnz(CrsMatrixType & A, CrsMatrixType &B){
1238 size_t Aest = 100, Best=100;
1239 if(A.getNodeNumEntries() > 0)
1240 Aest = (A.getNodeNumRows()>0)? A.getNodeNumEntries()/A.getNodeNumEntries():100;
1241 if(B.getNodeNumEntries() > 0)
1242 Best=(B.getNodeNumRows()>0)? B.getNodeNumEntries()/B.getNodeNumEntries():100;
1244 size_t nnzperrow=(size_t)(sqrt((
double)Aest) + sqrt((
double)Best) - 1);
1245 nnzperrow*=nnzperrow;
1247 return (
size_t)(A.getNodeNumRows()*nnzperrow*0.75 + 100);
1253 template<
class Scalar,
1255 class GlobalOrdinal,
1257 void mult_A_B_newmatrix(
1261 const std::string & label)
1265 using Teuchos::ArrayView;
1269 #ifdef HAVE_TPETRA_MMM_TIMINGS 1270 std::string prefix = std::string(
"TpetraExt ")+ label + std::string(
": ");
1271 using Teuchos::TimeMonitor;
1272 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*(TimeMonitor::getNewTimer(prefix+std::string(
"MMM M5 Cmap")))));
1274 size_t ST_INVALID = Teuchos::OrdinalTraits<LocalOrdinal>::invalid();
1275 LocalOrdinal LO_INVALID = Teuchos::OrdinalTraits<LocalOrdinal>::invalid();
1279 RCP<const import_type> Cimport;
1280 RCP<const map_type> Ccolmap;
1281 RCP<const import_type> Bimport = Bview.
origMatrix->getGraph()->getImporter();
1282 RCP<const import_type> Iimport = Bview.
importMatrix.is_null() ? Teuchos::null : Bview.
importMatrix->getGraph()->getImporter();
1283 Array<LocalOrdinal> Bcol2Ccol(Bview.
colMap->getNodeNumElements()), Icol2Ccol;
1289 for(
size_t i=0; i<Bview.
colMap->getNodeNumElements(); i++) {
1290 Bcol2Ccol[i] = Teuchos::as<LocalOrdinal>(i);
1295 if(!Bimport.is_null() && !Iimport.is_null()){
1296 Cimport = Bimport->setUnion(*Iimport);
1297 Ccolmap = Cimport->getTargetMap();
1299 else if(!Bimport.is_null() && Iimport.is_null()) {
1300 Cimport = Bimport->setUnion();
1302 else if(Bimport.is_null() && !Iimport.is_null()) {
1303 Cimport = Iimport->setUnion();
1306 throw std::runtime_error(
"TpetraExt::MMM status of matrix importers is nonsensical");
1308 Ccolmap = Cimport->getTargetMap();
1310 if(!Cimport->getSourceMap()->isSameAs(*Bview.
origMatrix->getDomainMap()))
1311 throw std::runtime_error(
"Tpetra::MMM: Import setUnion messed with the DomainMap in an unfortunate way");
1314 Icol2Ccol.resize(Bview.
importMatrix->getColMap()->getNodeNumElements());
1315 ArrayView<const GlobalOrdinal> Bgid = Bview.
origMatrix->getColMap()->getNodeElementList();
1316 ArrayView<const GlobalOrdinal> Igid = Bview.
importMatrix->getColMap()->getNodeElementList();
1318 for(
size_t i=0; i<Bview.
origMatrix->getColMap()->getNodeNumElements(); i++)
1319 Bcol2Ccol[i] = Ccolmap->getLocalElement(Bgid[i]);
1320 for(
size_t i=0; i<Bview.
importMatrix->getColMap()->getNodeNumElements(); i++)
1321 Icol2Ccol[i] = Ccolmap->getLocalElement(Igid[i]);
1324 #ifdef HAVE_TPETRA_MMM_TIMINGS 1325 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix+std::string(
"MMM Newmatrix SerialCore"))));
1330 size_t n=Ccolmap->getNodeNumElements();
1333 ArrayRCP<const size_t> Arowptr_RCP, Browptr_RCP, Irowptr_RCP;
1334 ArrayRCP<size_t> Crowptr_RCP;
1335 ArrayRCP<const LocalOrdinal> Acolind_RCP, Bcolind_RCP, Icolind_RCP;
1336 ArrayRCP<LocalOrdinal> Ccolind_RCP;
1337 ArrayRCP<const Scalar> Avals_RCP, Bvals_RCP, Ivals_RCP;
1338 ArrayRCP<Scalar> Cvals_RCP;
1340 Aview.
origMatrix->getAllValues(Arowptr_RCP,Acolind_RCP,Avals_RCP);
1341 Bview.
origMatrix->getAllValues(Browptr_RCP,Bcolind_RCP,Bvals_RCP);
1346 ArrayView<const size_t> Arowptr, Browptr, Irowptr;
1347 ArrayView<const LocalOrdinal> Acolind, Bcolind, Icolind;
1348 ArrayView<const Scalar> Avals, Bvals, Ivals;
1349 ArrayView<size_t> Crowptr;
1350 ArrayView<LocalOrdinal> Ccolind;
1351 ArrayView<Scalar> Cvals;
1352 Arowptr = Arowptr_RCP(); Acolind = Acolind_RCP(); Avals = Avals_RCP();
1353 Browptr = Browptr_RCP(); Bcolind = Bcolind_RCP(); Bvals = Bvals_RCP();
1355 Irowptr = Irowptr_RCP(); Icolind = Icolind_RCP(); Ivals = Ivals_RCP();
1363 size_t INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
1364 Array<size_t> c_status(n, ST_INVALID);
1368 size_t CSR_ip=0,OLD_ip=0;
1369 Crowptr_RCP.resize(m+1); Crowptr = Crowptr_RCP();
1370 Ccolind_RCP.resize(CSR_alloc); Ccolind = Ccolind_RCP();
1371 Cvals_RCP.resize(CSR_alloc); Cvals = Cvals_RCP();
1374 Array<LocalOrdinal> targetMapToOrigRow(Aview.
colMap->getNodeNumElements(),LO_INVALID);
1375 Array<LocalOrdinal> targetMapToImportRow(Aview.
colMap->getNodeNumElements(),LO_INVALID);
1379 for(LocalOrdinal i=Aview.
colMap->getMinLocalIndex(); i <= Aview.
colMap->getMaxLocalIndex(); i++) {
1380 LocalOrdinal B_LID = Bview.
origMatrix->getRowMap()->getLocalElement(Aview.
colMap->getGlobalElement(i));
1381 if(B_LID != LO_INVALID) targetMapToOrigRow[i] = B_LID;
1383 LocalOrdinal I_LID = Bview.
importMatrix->getRowMap()->getLocalElement(Aview.
colMap->getGlobalElement(i));
1384 targetMapToImportRow[i] = I_LID;
1390 for(LocalOrdinal i=Aview.
colMap->getMinLocalIndex(); i <= Aview.
colMap->getMaxLocalIndex(); i++) {
1391 LocalOrdinal B_LID = Bview.
origMatrix->getRowMap()->getLocalElement(Aview.
colMap->getGlobalElement(i));
1392 if(B_LID != LO_INVALID) targetMapToOrigRow[i] = B_LID;
1394 LocalOrdinal I_LID = Bview.
importMatrix->getRowMap()->getLocalElement(Aview.
colMap->getGlobalElement(i));
1395 targetMapToImportRow[i] = I_LID;
1400 const Scalar SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
1403 for(
size_t i=0; i<m; i++){
1406 for(
size_t k=Arowptr[i]; k<Arowptr[i+1]; k++){
1407 LocalOrdinal Ak = Acolind[k];
1408 Scalar Aval = Avals[k];
1409 if(Aval==SC_ZERO)
continue;
1411 if(targetMapToOrigRow[Ak] != LO_INVALID){
1413 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Ak]);
1415 for(
size_t j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
1416 LocalOrdinal Cj=Bcol2Ccol[Bcolind[j]];
1418 if(c_status[Cj]==INVALID || c_status[Cj]<OLD_ip){
1420 c_status[Cj] = CSR_ip;
1421 Ccolind[CSR_ip]= Cj;
1422 Cvals[CSR_ip] = Aval*Bvals[j];
1426 Cvals[c_status[Cj]]+=Aval*Bvals[j];
1431 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Ak]);
1432 for(
size_t j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
1433 LocalOrdinal Cj=Icol2Ccol[Icolind[j]];
1435 if(c_status[Cj]==INVALID || c_status[Cj]<OLD_ip){
1437 c_status[Cj]=CSR_ip;
1439 Cvals[CSR_ip]=Aval*Ivals[j];
1443 Cvals[c_status[Cj]]+=Aval*Ivals[j];
1449 if(CSR_ip + n > CSR_alloc){
1451 Ccolind_RCP.resize(CSR_alloc); Ccolind = Ccolind_RCP();
1452 Cvals_RCP.resize(CSR_alloc); Cvals = Cvals_RCP();
1460 Cvals_RCP.resize(CSR_ip);
1461 Ccolind_RCP.resize(CSR_ip);
1464 #ifdef HAVE_TPETRA_MMM_TIMINGS 1465 MM = rcp (
new TimeMonitor (* (TimeMonitor::getNewTimer(prefix+std::string(
"MMM Newmatrix Final Sort")))));
1472 Import_Util::sortCrsEntries(Crowptr_RCP(),Ccolind_RCP(),Cvals_RCP());
1475 #ifdef HAVE_TPETRA_MMM_TIMINGS 1476 MM = rcp (
new TimeMonitor (* (TimeMonitor::getNewTimer(prefix+std::string(
"MMM Newmatrix ESFC")))));
1485 template<
class Scalar,
1487 class GlobalOrdinal,
1489 void jacobi_A_B_newmatrix(
1495 const std::string & label)
1499 using Teuchos::ArrayView;
1502 #ifdef HAVE_TPETRA_MMM_TIMINGS 1503 std::string prefix = std::string(
"TpetraExt ")+ label + std::string(
": ");
1504 using Teuchos::TimeMonitor;
1505 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*(TimeMonitor::getNewTimer(prefix+std::string(
"Jacobi M5 Cmap")))));
1507 size_t ST_INVALID = Teuchos::OrdinalTraits<LocalOrdinal>::invalid();
1508 LocalOrdinal LO_INVALID = Teuchos::OrdinalTraits<LocalOrdinal>::invalid();
1512 RCP<const import_type> Cimport;
1513 RCP<const map_type> Ccolmap;
1514 RCP<const import_type> Bimport = Bview.
origMatrix->getGraph()->getImporter();
1515 RCP<const import_type> Iimport = Bview.
importMatrix.is_null() ? Teuchos::null : Bview.
importMatrix->getGraph()->getImporter();
1516 Array<LocalOrdinal> Bcol2Ccol(Bview.
colMap->getNodeNumElements()), Icol2Ccol;
1522 for(
size_t i=0; i<Bview.
colMap->getNodeNumElements(); i++) {
1523 Bcol2Ccol[i] = Teuchos::as<LocalOrdinal>(i);
1528 if(!Bimport.is_null() && !Iimport.is_null()){
1529 Cimport = Bimport->setUnion(*Iimport);
1530 Ccolmap = Cimport->getTargetMap();
1532 else if(!Bimport.is_null() && Iimport.is_null()) {
1533 Cimport = Bimport->setUnion();
1535 else if(Bimport.is_null() && !Iimport.is_null()) {
1536 Cimport = Iimport->setUnion();
1539 throw std::runtime_error(
"TpetraExt::Jacobi status of matrix importers is nonsensical");
1541 Ccolmap = Cimport->getTargetMap();
1543 if(!Cimport->getSourceMap()->isSameAs(*Bview.
origMatrix->getDomainMap()))
1544 throw std::runtime_error(
"Tpetra:Jacobi Import setUnion messed with the DomainMap in an unfortunate way");
1547 Icol2Ccol.resize(Bview.
importMatrix->getColMap()->getNodeNumElements());
1548 ArrayView<const GlobalOrdinal> Bgid = Bview.
origMatrix->getColMap()->getNodeElementList();
1549 ArrayView<const GlobalOrdinal> Igid = Bview.
importMatrix->getColMap()->getNodeElementList();
1551 for(
size_t i=0; i<Bview.
origMatrix->getColMap()->getNodeNumElements(); i++)
1552 Bcol2Ccol[i] = Ccolmap->getLocalElement(Bgid[i]);
1553 for(
size_t i=0; i<Bview.
importMatrix->getColMap()->getNodeNumElements(); i++)
1554 Icol2Ccol[i] = Ccolmap->getLocalElement(Igid[i]);
1557 #ifdef HAVE_TPETRA_MMM_TIMINGS 1558 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix+std::string(
"Jacobi Newmatrix SerialCore"))));
1563 size_t n=Ccolmap->getNodeNumElements();
1566 ArrayRCP<const size_t> Arowptr_RCP, Browptr_RCP, Irowptr_RCP;
1567 ArrayRCP<size_t> Crowptr_RCP;
1568 ArrayRCP<const LocalOrdinal> Acolind_RCP, Bcolind_RCP, Icolind_RCP;
1569 ArrayRCP<LocalOrdinal> Ccolind_RCP;
1570 ArrayRCP<const Scalar> Avals_RCP, Bvals_RCP, Ivals_RCP;
1571 ArrayRCP<Scalar> Cvals_RCP;
1572 ArrayRCP<const Scalar> Dvals_RCP;
1574 Aview.
origMatrix->getAllValues(Arowptr_RCP,Acolind_RCP,Avals_RCP);
1575 Bview.
origMatrix->getAllValues(Browptr_RCP,Bcolind_RCP,Bvals_RCP);
1580 ArrayView<const size_t> Arowptr, Browptr, Irowptr;
1581 ArrayView<const LocalOrdinal> Acolind, Bcolind, Icolind;
1582 ArrayView<const Scalar> Avals, Bvals, Ivals;
1583 ArrayView<size_t> Crowptr;
1584 ArrayView<LocalOrdinal> Ccolind;
1585 ArrayView<Scalar> Cvals;
1586 ArrayView<const Scalar> Dvals;
1587 Arowptr = Arowptr_RCP(); Acolind = Acolind_RCP(); Avals = Avals_RCP();
1588 Browptr = Browptr_RCP(); Bcolind = Bcolind_RCP(); Bvals = Bvals_RCP();
1590 Irowptr = Irowptr_RCP(); Icolind = Icolind_RCP(); Ivals = Ivals_RCP();
1592 Dvals = Dvals_RCP();
1599 size_t INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
1600 Array<size_t> c_status(n, ST_INVALID);
1604 size_t CSR_ip=0,OLD_ip=0;
1605 Crowptr_RCP.resize(m+1); Crowptr = Crowptr_RCP();
1606 Ccolind_RCP.resize(CSR_alloc); Ccolind = Ccolind_RCP();
1607 Cvals_RCP.resize(CSR_alloc); Cvals = Cvals_RCP();
1610 Array<LocalOrdinal> targetMapToOrigRow(Aview.
colMap->getNodeNumElements(),LO_INVALID);
1611 Array<LocalOrdinal> targetMapToImportRow(Aview.
colMap->getNodeNumElements(),LO_INVALID);
1615 for(LocalOrdinal i=Aview.
colMap->getMinLocalIndex(); i <= Aview.
colMap->getMaxLocalIndex(); i++) {
1616 LocalOrdinal B_LID = Bview.
origMatrix->getRowMap()->getLocalElement(Aview.
colMap->getGlobalElement(i));
1617 if(B_LID != LO_INVALID) targetMapToOrigRow[i] = B_LID;
1619 LocalOrdinal I_LID = Bview.
importMatrix->getRowMap()->getLocalElement(Aview.
colMap->getGlobalElement(i));
1620 targetMapToImportRow[i] = I_LID;
1626 for(LocalOrdinal i=Aview.
colMap->getMinLocalIndex(); i <= Aview.
colMap->getMaxLocalIndex(); i++) {
1627 LocalOrdinal B_LID = Bview.
origMatrix->getRowMap()->getLocalElement(Aview.
colMap->getGlobalElement(i));
1628 if(B_LID != LO_INVALID) targetMapToOrigRow[i] = B_LID;
1630 LocalOrdinal I_LID = Bview.
importMatrix->getRowMap()->getLocalElement(Aview.
colMap->getGlobalElement(i));
1631 targetMapToImportRow[i] = I_LID;
1636 const Scalar SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
1639 for(
size_t i=0; i<m; i++){
1641 Scalar Dval = Dvals[i];
1644 for(
size_t k=Browptr[i]; k<Browptr[i+1]; k++){
1645 Scalar Bval = Bvals[k];
1646 if(Bval==SC_ZERO)
continue;
1647 LocalOrdinal Ck=Bcol2Ccol[Bcolind[k]];
1650 c_status[Ck] = CSR_ip;
1651 Ccolind[CSR_ip] = Ck;
1652 Cvals[CSR_ip] = Bvals[k];
1658 for(
size_t k=Arowptr[i]; k<Arowptr[i+1]; k++){
1659 LocalOrdinal Ak = Acolind[k];
1660 Scalar Aval = Avals[k];
1661 if(Aval==SC_ZERO)
continue;
1663 if(targetMapToOrigRow[Ak] != LO_INVALID){
1665 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Ak]);
1667 for(
size_t j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
1668 LocalOrdinal Cj=Bcol2Ccol[Bcolind[j]];
1670 if(c_status[Cj]==INVALID || c_status[Cj]<OLD_ip){
1672 c_status[Cj] = CSR_ip;
1673 Ccolind[CSR_ip] = Cj;
1674 Cvals[CSR_ip] = - omega * Dval* Aval * Bvals[j];
1678 Cvals[c_status[Cj]] -= omega * Dval* Aval * Bvals[j];
1683 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Ak]);
1684 for(
size_t j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
1685 LocalOrdinal Cj=Icol2Ccol[Icolind[j]];
1687 if(c_status[Cj]==INVALID || c_status[Cj]<OLD_ip){
1689 c_status[Cj] = CSR_ip;
1690 Ccolind[CSR_ip] = Cj;
1691 Cvals[CSR_ip] = - omega * Dval* Aval * Ivals[j];
1695 Cvals[c_status[Cj]] -= omega * Dval* Aval * Ivals[j];
1701 if(CSR_ip + n > CSR_alloc){
1703 Ccolind_RCP.resize(CSR_alloc); Ccolind = Ccolind_RCP();
1704 Cvals_RCP.resize(CSR_alloc); Cvals = Cvals_RCP();
1712 Cvals_RCP.resize(CSR_ip);
1713 Ccolind_RCP.resize(CSR_ip);
1716 #ifdef HAVE_TPETRA_MMM_TIMINGS 1717 MM = rcp (
new TimeMonitor (* (TimeMonitor::getNewTimer(prefix+std::string(
"Jacobi Newmatrix Final Sort")))));
1724 Import_Util::sortCrsEntries(Crowptr_RCP(),Ccolind_RCP(),Cvals_RCP());
1727 #ifdef HAVE_TPETRA_MMM_TIMINGS 1728 MM = rcp (
new TimeMonitor (* (TimeMonitor::getNewTimer(prefix+std::string(
"Jacobi Newmatrix ESFC")))));
1736 template<
class Scalar,
1738 class GlobalOrdinal,
1740 void import_and_extract_views(
1745 bool userAssertsThereAreNoRemotes,
1746 const std::string & label)
1748 #ifdef HAVE_TPETRA_MMM_TIMINGS 1749 std::string prefix = std::string(
"TpetraExt ")+ label + std::string(
": ");
1750 using Teuchos::TimeMonitor;
1751 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix+std::string(
"MMM I&X Alloc"))));
1763 Mview.deleteContents();
1765 RCP<const Map_t> Mrowmap = M.
getRowMap();
1766 RCP<const Map_t> MremoteRowMap;
1767 const int numProcs = Mrowmap->getComm()->getSize();
1769 ArrayView<const GlobalOrdinal> Mrows = targetMap->getNodeElementList();
1771 size_t numRemote = 0;
1772 size_t numRows = targetMap->getNodeNumElements();
1775 Mview.
rowMap = targetMap;
1781 if(userAssertsThereAreNoRemotes)
return;
1783 #ifdef HAVE_TPETRA_MMM_TIMINGS 1784 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix+std::string(
"MMM I&X RemoteMap"))));
1789 if(!prototypeImporter.is_null() && prototypeImporter->getSourceMap()->isSameAs(*Mrowmap) && prototypeImporter->getTargetMap()->isSameAs(*targetMap)) {
1791 numRemote = prototypeImporter->getNumRemoteIDs();
1792 Array<GlobalOrdinal> MremoteRows(numRemote);
1793 ArrayView<const LocalOrdinal> RemoteLIDs = prototypeImporter->getRemoteLIDs();
1794 for(
size_t i=0; i<numRemote; i++) {
1795 MremoteRows[i] = targetMap->getGlobalElement(RemoteLIDs[i]);
1798 MremoteRowMap=rcp(
new Map_t(OrdinalTraits<global_size_t>::invalid(), MremoteRows(), Mrowmap->getIndexBase(), Mrowmap->getComm(), Mrowmap->getNode()));
1801 else if(prototypeImporter.is_null()) {
1803 Array<GlobalOrdinal> MremoteRows(numRows);
1804 for(
size_t i=0; i < numRows; ++i) {
1805 const LocalOrdinal mlid = Mrowmap->getLocalElement(Mrows[i]);
1807 if (mlid == OrdinalTraits<LocalOrdinal>::invalid()) {
1808 MremoteRows[numRemote]=Mrows[i];
1812 MremoteRows.resize(numRemote);
1813 MremoteRowMap=rcp(
new Map_t(OrdinalTraits<global_size_t>::invalid(), MremoteRows(), Mrowmap->getIndexBase(), Mrowmap->getComm(), Mrowmap->getNode()));
1822 TEUCHOS_TEST_FOR_EXCEPTION(numRemote > 0, std::runtime_error,
1823 "MatrixMatrix::import_and_extract_views ERROR, numProcs < 2 but attempting to import remote matrix rows." <<std::endl);
1832 #ifdef HAVE_TPETRA_MMM_TIMINGS 1833 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix+std::string(
"MMM I&X Collective-0"))));
1837 Teuchos::reduceAll(*(Mrowmap->getComm()) , Teuchos::REDUCE_MAX, (
global_size_t)numRemote, Teuchos::outArg(globalMaxNumRemote) );
1840 if (globalMaxNumRemote > 0) {
1841 #ifdef HAVE_TPETRA_MMM_TIMINGS 1842 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix+std::string(
"MMM I&X Import-2"))));
1845 RCP<const Import<LocalOrdinal, GlobalOrdinal, Node> > importer;
1848 importer = prototypeImporter->createRemoteOnlyImport(MremoteRowMap);
1852 throw std::runtime_error(
"prototypeImporter->SourceMap() does not match M.getRowMap()!");
1854 #ifdef HAVE_TPETRA_MMM_TIMINGS 1855 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix+std::string(
"MMM I&X Import-3"))));
1859 Teuchos::ParameterList labelList;
1860 labelList.set(
"Timer Label",label);
1861 Mview.
importMatrix = Tpetra::importAndFillCompleteCrsMatrix<CrsMatrix_t>(Teuchos::rcp(&M,
false),*importer,M.
getDomainMap(),MremoteRowMap,Teuchos::rcp(&labelList,
false));
1863 #ifdef COMPUTE_MMM_STATISTICS 1864 printMultiplicationStatistics(importer,label+std::string(
" I&X MMM"));
1868 #ifdef HAVE_TPETRA_MMM_TIMINGS 1869 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix+std::string(
"MMM I&X Import-4"))));
1888 #define TPETRA_MATRIXMATRIX_INSTANT(SCALAR,LO,GO,NODE) \ 1891 void MatrixMatrix::Multiply( \ 1892 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \ 1894 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \ 1896 CrsMatrix< SCALAR , LO , GO , NODE >& C, \ 1897 bool call_FillComplete_on_result, \ 1898 const std::string & label); \ 1901 void MatrixMatrix::Jacobi( \ 1903 const Vector< SCALAR, LO, GO, NODE > & Dinv, \ 1904 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \ 1905 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \ 1906 CrsMatrix< SCALAR , LO , GO , NODE >& C, \ 1907 bool call_FillComplete_on_result, \ 1908 const std::string & label); \ 1911 void MatrixMatrix::Add( \ 1912 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \ 1915 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \ 1918 RCP<CrsMatrix< SCALAR , LO , GO , NODE > > C); \ 1921 void MatrixMatrix::Add( \ 1922 const CrsMatrix<SCALAR, LO, GO, NODE>& A, \ 1925 CrsMatrix<SCALAR, LO, GO, NODE>& B, \ 1929 Teuchos::RCP<CrsMatrix< SCALAR , LO , GO , NODE > > \ 1930 MatrixMatrix::add (const SCALAR & alpha, \ 1931 const bool transposeA, \ 1932 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \ 1933 const SCALAR & beta, \ 1934 const bool transposeB, \ 1935 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \ 1936 const Teuchos::RCP<const Map< LO , GO , NODE > >& domainMap, \ 1937 const Teuchos::RCP<const Map< LO , GO , NODE > >& rangeMap, \ 1938 const Teuchos::RCP<Teuchos::ParameterList>& params); \ 1942 #endif // TPETRA_MATRIXMATRIX_DEF_HPP Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Teuchos::RCP< const map_type > importColMap
Colmap garnered as a result of the import.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
Teuchos::ArrayRCP< const Scalar > getData() const
Const view of the local values of this vector.
size_t getNumReceives() const
The number of processes from which we will receive data.
Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const
This matrix's graph, as a RowGraph.
Teuchos::RCP< const map_type > domainMap
Domain map for original matrix.
Teuchos::RCP< const map_type > getRowMap() const
The Map that describes the row distribution in this matrix.
Teuchos::RCP< const map_type > getColMap() const
The Map that describes the column distribution in this matrix.
bool isLocallyIndexed() const
Whether the matrix is locally indexed on the calling process.
Teuchos::RCP< const map_type > colMap
Col map for the original version of the matrix.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the matrix's column Map with the given Map.
void setAllValues(const typename local_matrix_type::row_map_type &rowPointers, const typename local_graph_type::entries_type::non_const_type &columnIndices, const typename local_matrix_type::values_type &values)
Sets the 1D pointer arrays of the graph.
size_t getNodeNumRows() const
The number of matrix rows owned by the calling process.
Teuchos::RCP< const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > origMatrix
The original matrix.
void insertGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals)
Insert one or more entries into the matrix, using global indices.
size_t global_size_t
Global size_t object.
LocalOrdinal sumIntoGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals, const bool atomic=useAtomicUpdatesByDefault)
Sum into one or more sparse matrix entries, using global indices.
void Add(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, bool transposeA, Scalar scalarA, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, Scalar scalarB)
Construct and (optionally) redistribute the explicitly stored transpose of a CrsMatrix.
void keyValueMerge(KeyInputIterType keyBeg1, KeyInputIterType keyEnd1, ValueInputIterType valBeg1, ValueInputIterType valEnd1, KeyInputIterType keyBeg2, KeyInputIterType keyEnd2, ValueInputIterType valBeg2, ValueInputIterType valEnd2, KeyOutputIterType keyOut, ValueOutputIterType valOut, BinaryFunction f)
Merge two sorted (by keys) sequences of unique (key,value) pairs by combining pairs with equal keys...
Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > importMatrix
The imported matrix.
Teuchos::RCP< const map_type > getDomainMap() const
The domain Map of this matrix.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix is distributed.
bool isFillComplete() const
Whether the matrix is fill complete.
void scale(const Scalar &alpha)
Scale the matrix's values: this := alpha*this.
Sets up and executes a communication plan for a Tpetra DistObject.
void Jacobi(Scalar omega, const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Dinv, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &C, bool call_FillComplete_on_result=true, const std::string &label=std::string())
Teuchos::RCP< crs_matrix_type > createTranspose()
Compute and return the transpose of the matrix given to the constructor.
Utility functions for packing and unpacking sparse matrix entries.
ProfileType getProfileType() const
Returns true if the matrix was allocated with static data structures.
global_size_t getGlobalNumRows() const
Number of global elements in the row map of this matrix.
bool isFillActive() const
Whether the matrix is not fill complete.
bool isStaticGraph() const
Indicates that the graph is static, so that new entries cannot be added to this matrix.
void Multiply(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, bool transposeA, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, bool transposeB, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &C, bool call_FillComplete_on_result=true, const std::string &label=std::string())
Sparse matrix-matrix multiply.
Describes a parallel distribution of objects over processes.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects...
A read-only, row-oriented interface to a sparse matrix.
A distributed dense vector.
Stand-alone utility functions and macros.
Teuchos::RCP< const map_type > rowMap
Desired row map for "imported" version of the matrix.
void expertStaticFillComplete(const RCP< const map_type > &domainMap, const RCP< const map_type > &rangeMap, const RCP< const import_type > &importer=Teuchos::null, const RCP< const export_type > &exporter=Teuchos::null, const RCP< ParameterList > ¶ms=Teuchos::null)
Perform a fillComplete on a matrix that already has data.
Struct that holds views of the contents of a CrsMatrix.
void getLastDoStatistics(size_t &bytes_sent, size_t &bytes_recvd) const
Information on the last call to do/doReverse.
size_t getNumSends() const
The number of processes to which we will send data.
Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > add(const Scalar &alpha, const bool transposeA, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta, const bool transposeB, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap=Teuchos::null, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Compute the sparse matrix sum C = scalarA * Op(A) + scalarB * Op(B), where Op(X) is either X or its t...
Teuchos::RCP< const map_type > origRowMap
Original row map of matrix.
Teuchos::RCP< crs_matrix_type > createTransposeLocal()
Compute and return the transpose of the matrix given to the constructor.
void fillComplete(const RCP< const map_type > &domainMap, const RCP< const map_type > &rangeMap, const RCP< ParameterList > ¶ms=null)
Signal that data entry is complete, specifying domain and range maps.
Teuchos::RCP< const map_type > getRangeMap() const
The range Map of this matrix.
size_t getNodeMaxNumRowEntries() const
Returns the maximum number of entries across all rows/columns on this node.