Tpetra parallel linear algebra  Version of the Day
TpetraExt_MatrixMatrix_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_MATRIXMATRIX_DEF_HPP
43 #define TPETRA_MATRIXMATRIX_DEF_HPP
44 
46 #include "Teuchos_VerboseObject.hpp"
47 #include "Teuchos_Array.hpp"
48 #include "Tpetra_Util.hpp"
49 #include "Tpetra_ConfigDefs.hpp"
50 #include "Tpetra_CrsMatrix.hpp"
52 #include "Tpetra_RowMatrixTransposer.hpp"
53 #include "Tpetra_ConfigDefs.hpp"
54 #include "Tpetra_Map.hpp"
55 #include "Tpetra_Export.hpp"
56 #include "Tpetra_Import_Util.hpp"
57 #include "Tpetra_Import_Util2.hpp"
58 #include <algorithm>
59 #include "Teuchos_FancyOStream.hpp"
60 
61 //#define COMPUTE_MMM_STATISTICS
62 
63 
69 namespace Tpetra {
70 
71 
72 namespace MatrixMatrix{
73 
74 
75 template <class Scalar,
76  class LocalOrdinal,
77  class GlobalOrdinal,
78  class Node>
79 void Multiply(
81  bool transposeA,
83  bool transposeB,
85  bool call_FillComplete_on_result,
86  const std::string & label)
87 {
88 
89 #ifdef HAVE_TPETRA_MMM_TIMINGS
90  std::string prefix = std::string("TpetraExt ")+ label + std::string(": ");
91  using Teuchos::TimeMonitor;
92  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("MMM All Setup"))));
93 #endif
94 
95  //TEUCHOS_FUNC_TIME_MONITOR_DIFF("My Matrix Mult", mmm_multiply);
97  //
98  //This method forms the matrix-matrix product C = op(A) * op(B), where
99  //op(A) == A if transposeA is false,
100  //op(A) == A^T if transposeA is true,
101  //and similarly for op(B).
102  //
103 
104  //A and B should already be Filled.
105  //(Should we go ahead and call FillComplete() on them if necessary?
106  // or error out? For now, we choose to error out.)
107  TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), std::runtime_error, "MatrixMatrix::Multiply(): Matrix A is not fill complete.");
108  TEUCHOS_TEST_FOR_EXCEPTION(!B.isFillComplete(), std::runtime_error, "MatrixMatrix::Multiply(): Matrix B is not fill complete.");
109  TEUCHOS_TEST_FOR_EXCEPTION(C.isLocallyIndexed() , std::runtime_error, "MatrixMatrix::Multiply(): Result matrix C must not be locally indexed.");
110 
111  //Convience typedefs
112  typedef CrsMatrixStruct<
113  Scalar,
114  LocalOrdinal,
115  GlobalOrdinal,
116  Node> CrsMatrixStruct_t;
118 
119  RCP<const Matrix_t > Aprime = null;
120  RCP<const Matrix_t > Bprime = null;
121 
122  // Is this a "clean" matrix
123  bool NewFlag=!C.getGraph()->isLocallyIndexed() && !C.getGraph()->isGloballyIndexed();
124 
125  bool use_optimized_ATB=false;
126  if(transposeA && !transposeB && call_FillComplete_on_result && NewFlag) {
127  use_optimized_ATB=true;
128  }
129 #ifdef USE_OLD_TRANSPOSE // NOTE: For Grey Ballard's use. Remove this later.
130  use_optimized_ATB=false;
131 #endif
132 
133 
134  if(!use_optimized_ATB && transposeA) {
136  Aprime = at.createTranspose();
137  }
138  else{
139  Aprime = rcpFromRef(A);
140  }
141 
142  if(transposeB){
144  Bprime=bt.createTranspose();
145  }
146  else{
147  Bprime = rcpFromRef(B);
148  }
149 
150 
151  //now check size compatibility
152  global_size_t numACols = A.getDomainMap()->getGlobalNumElements();
153  global_size_t numBCols = B.getDomainMap()->getGlobalNumElements();
154  global_size_t Aouter = transposeA ? numACols : A.getGlobalNumRows();
155  global_size_t Bouter = transposeB ? B.getGlobalNumRows() : numBCols;
156  global_size_t Ainner = transposeA ? A.getGlobalNumRows() : numACols;
157  global_size_t Binner = transposeB ? numBCols : B.getGlobalNumRows();
158  TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), std::runtime_error,
159  "MatrixMatrix::Multiply: ERROR, inner dimensions of op(A) and op(B) "
160  "must match for matrix-matrix product. op(A) is "
161  <<Aouter<<"x"<<Ainner << ", op(B) is "<<Binner<<"x"<<Bouter<<std::endl);
162 
163  //The result matrix C must at least have a row-map that reflects the
164  //correct row-size. Don't check the number of columns because rectangular
165  //matrices which were constructed with only one map can still end up
166  //having the correct capacity and dimensions when filled.
167  TEUCHOS_TEST_FOR_EXCEPTION(Aouter > C.getGlobalNumRows(), std::runtime_error,
168  "MatrixMatrix::Multiply: ERROR, dimensions of result C must "
169  "match dimensions of op(A) * op(B). C has "<<C.getGlobalNumRows()
170  << " rows, should have at least "<<Aouter << std::endl);
171 
172  //It doesn't matter whether C is already Filled or not. If it is already
173  //Filled, it must have space allocated for the positions that will be
174  //referenced in forming C = op(A)*op(B). If it doesn't have enough space,
175  //we'll error out later when trying to store result values.
176 
177  // CGB: However, matrix must be in active-fill
178  TEUCHOS_TEST_FOR_EXCEPT( C.isFillActive() == false );
179 
180  //We're going to need to import remotely-owned sections of A and/or B
181  //if more than 1 processor is performing this run, depending on the scenario.
182  int numProcs = A.getComm()->getSize();
183 
184  //Declare a couple of structs that will be used to hold views of the data
185  //of A and B, to be used for fast access during the matrix-multiplication.
186  CrsMatrixStruct_t Aview;
187  CrsMatrixStruct_t Bview;
188 
189  RCP<const Map_t > targetMap_A = Aprime->getRowMap();
190  RCP<const Map_t > targetMap_B = Bprime->getRowMap();
191 
192 #ifdef HAVE_TPETRA_MMM_TIMINGS
193  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("MMM All I&X"))));
194 #endif
195 
196  //Now import any needed remote rows and populate the Aview struct.
197  // NOTE: We assert that an import isn't needed --- since we do the transpose above to handle that.
198  if(!use_optimized_ATB) {
199  RCP<const Import<LocalOrdinal,GlobalOrdinal, Node> > dummyImporter;
200  MMdetails::import_and_extract_views(*Aprime, targetMap_A, Aview,dummyImporter,true,label);
201  }
202 
203 
204  //We will also need local access to all rows of B that correspond to the
205  //column-map of op(A).
206  if (numProcs > 1) {
207  targetMap_B = Aprime->getColMap(); //colmap_op_A;
208  }
209 
210  //Now import any needed remote rows and populate the Bview struct.
211  if(!use_optimized_ATB)
212  MMdetails::import_and_extract_views(*Bprime, targetMap_B, Bview, Aprime->getGraph()->getImporter(),false,label);
213 
214 #ifdef HAVE_TPETRA_MMM_TIMINGS
215  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("MMM All Multiply"))));
216 #endif
217 
218 
219  //Now call the appropriate method to perform the actual multiplication.
220  CrsWrapper_CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> crsmat(C);
221 
222  if(use_optimized_ATB) {
223  MMdetails::mult_AT_B_newmatrix(A, B, C,label);
224  }
225  else if(call_FillComplete_on_result && NewFlag ) {
226  MMdetails::mult_A_B_newmatrix(Aview, Bview, C,label);
227  }
228  else {
229  MMdetails::mult_A_B(Aview, Bview, crsmat,label);
230 #ifdef HAVE_TPETRA_MMM_TIMINGS
231  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("MMM All FillComplete"))));
232 #endif
233  if (call_FillComplete_on_result) {
234  //We'll call FillComplete on the C matrix before we exit, and give
235  //it a domain-map and a range-map.
236  //The domain-map will be the domain-map of B, unless
237  //op(B)==transpose(B), in which case the range-map of B will be used.
238  //The range-map will be the range-map of A, unless
239  //op(A)==transpose(A), in which case the domain-map of A will be used.
240  if (!C.isFillComplete()) {
241  C.fillComplete(Bprime->getDomainMap(), Aprime->getRangeMap());
242  }
243  }
244  }
245 
246 }
247 
248 
249 template <class Scalar,
250  class LocalOrdinal,
251  class GlobalOrdinal,
252  class Node>
253 void Jacobi(Scalar omega,
258  bool call_FillComplete_on_result,
259  const std::string & label)
260 {
261 #ifdef HAVE_TPETRA_MMM_TIMINGS
262  std::string prefix = std::string("TpetraExt ")+ label + std::string(": ");
263  using Teuchos::TimeMonitor;
264  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix+std::string("Jacobi All Setup"))));
265 #endif
267 
268  //A and B should already be Filled.
269  //(Should we go ahead and call FillComplete() on them if necessary?
270  // or error out? For now, we choose to error out.)
271  TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), std::runtime_error, "MatrixMatrix::Multiply(): Matrix A is not fill complete.");
272  TEUCHOS_TEST_FOR_EXCEPTION(!B.isFillComplete(), std::runtime_error, "MatrixMatrix::Multiply(): Matrix B is not fill complete.");
273  TEUCHOS_TEST_FOR_EXCEPTION(C.isLocallyIndexed() , std::runtime_error, "MatrixMatrix::Multiply(): Result matrix C must not be locally indexed.");
274 
275  //Convience typedefs
276  typedef CrsMatrixStruct<
277  Scalar,
278  LocalOrdinal,
279  GlobalOrdinal,
280  Node> CrsMatrixStruct_t;
282 
283  RCP<const Matrix_t > Aprime = rcpFromRef(A);
284  RCP<const Matrix_t > Bprime = rcpFromRef(B);
285 
286  //now check size compatibility
287  global_size_t numACols = A.getDomainMap()->getGlobalNumElements();
288  global_size_t numBCols = B.getDomainMap()->getGlobalNumElements();
289  global_size_t Aouter = A.getGlobalNumRows();
290  global_size_t Bouter = numBCols;
291  global_size_t Ainner = numACols;
292  global_size_t Binner = B.getGlobalNumRows();
293  TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), std::runtime_error,
294  "MatrixMatrix::Jacobi: ERROR, inner dimensions of op(A) and op(B) "
295  "must match for matrix-matrix product. op(A) is "
296  <<Aouter<<"x"<<Ainner << ", op(B) is "<<Binner<<"x"<<Bouter<<std::endl);
297 
298  //The result matrix C must at least have a row-map that reflects the
299  //correct row-size. Don't check the number of columns because rectangular
300  //matrices which were constructed with only one map can still end up
301  //having the correct capacity and dimensions when filled.
302  TEUCHOS_TEST_FOR_EXCEPTION(Aouter > C.getGlobalNumRows(), std::runtime_error,
303  "MatrixMatrix::Multiply: ERROR, dimensions of result C must "
304  "match dimensions of op(A) * op(B). C has "<<C.getGlobalNumRows()
305  << " rows, should have at least "<<Aouter << std::endl);
306 
307  //It doesn't matter whether C is already Filled or not. If it is already
308  //Filled, it must have space allocated for the positions that will be
309  //referenced in forming C = op(A)*op(B). If it doesn't have enough space,
310  //we'll error out later when trying to store result values.
311 
312  // CGB: However, matrix must be in active-fill
313  TEUCHOS_TEST_FOR_EXCEPT( C.isFillActive() == false );
314 
315  //We're going to need to import remotely-owned sections of A and/or B
316  //if more than 1 processor is performing this run, depending on the scenario.
317  int numProcs = A.getComm()->getSize();
318 
319  //Declare a couple of structs that will be used to hold views of the data
320  //of A and B, to be used for fast access during the matrix-multiplication.
321  CrsMatrixStruct_t Aview;
322  CrsMatrixStruct_t Bview;
323 
324  RCP<const Map_t > targetMap_A = Aprime->getRowMap();
325  RCP<const Map_t > targetMap_B = Bprime->getRowMap();
326 
327 #ifdef HAVE_TPETRA_MMM_TIMINGS
328  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix+std::string("Jacobi All I&X"))));
329 #endif
330 
331  //Now import any needed remote rows and populate the Aview struct.
332  RCP<const Import<LocalOrdinal,GlobalOrdinal, Node> > dummyImporter;
333  MMdetails::import_and_extract_views(*Aprime, targetMap_A, Aview,dummyImporter,false,label);
334 
335  //We will also need local access to all rows of B that correspond to the
336  //column-map of op(A).
337  if (numProcs > 1) {
338  targetMap_B = Aprime->getColMap(); //colmap_op_A;
339  }
340 
341  //Now import any needed remote rows and populate the Bview struct.
342  MMdetails::import_and_extract_views(*Bprime, targetMap_B, Bview, Aprime->getGraph()->getImporter(),false,label);
343 
344 #ifdef HAVE_TPETRA_MMM_TIMINGS
345  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix+std::string("Jacobi All Multiply"))));
346 #endif
347 
348 
349  //Now call the appropriate method to perform the actual multiplication.
350  CrsWrapper_CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> crsmat(C);
351 
352  // Is this a "clean" matrix
353  bool NewFlag=!C.getGraph()->isLocallyIndexed() && !C.getGraph()->isGloballyIndexed();
354 
355  if(call_FillComplete_on_result && NewFlag ) {
356  MMdetails::jacobi_A_B_newmatrix(omega,Dinv,Aview, Bview, C,label);
357  }
358  else {
359  TEUCHOS_TEST_FOR_EXCEPTION(
360  true, std::runtime_error,
361  "jacobi_A_B_general not implemented");
362  // FIXME (mfh 03 Apr 2014) This statement is unreachable, so I'm
363  // commenting it out.
364 // #ifdef HAVE_TPETRA_MMM_TIMINGS
365 // MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: Jacobi FillComplete")));
366 // #endif
367  // FIXME (mfh 03 Apr 2014) This statement is unreachable, so I'm
368  // commenting it out.
369  // if (call_FillComplete_on_result) {
370  // //We'll call FillComplete on the C matrix before we exit, and give
371  // //it a domain-map and a range-map.
372  // //The domain-map will be the domain-map of B, unless
373  // //op(B)==transpose(B), in which case the range-map of B will be used.
374  // //The range-map will be the range-map of A, unless
375  // //op(A)==transpose(A), in which case the domain-map of A will be used.
376  // if (!C.isFillComplete()) {
377  // C.fillComplete(Bprime->getDomainMap(), Aprime->getRangeMap());
378  // }
379  // }
380  }
381 }
382 
383 
384 
385 template <class Scalar,
386  class LocalOrdinal,
387  class GlobalOrdinal,
388  class Node>
389 void Add(
391  bool transposeA,
392  Scalar scalarA,
394  Scalar scalarB )
395 {
396  TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), std::runtime_error,
397  "MatrixMatrix::Add ERROR, input matrix A.isFillComplete() is false; it is required to be true. (Result matrix B is not required to be isFillComplete()).");
398  TEUCHOS_TEST_FOR_EXCEPTION(B.isFillComplete() , std::runtime_error,
399  "MatrixMatrix::Add ERROR, input matrix B must not be fill complete!");
400  TEUCHOS_TEST_FOR_EXCEPTION(B.isStaticGraph() , std::runtime_error,
401  "MatrixMatrix::Add ERROR, input matrix B must not have static graph!");
402  TEUCHOS_TEST_FOR_EXCEPTION(B.isLocallyIndexed() , std::runtime_error,
403  "MatrixMatrix::Add ERROR, input matrix B must not be locally indexed!");
404  TEUCHOS_TEST_FOR_EXCEPTION(B.getProfileType()!=DynamicProfile, std::runtime_error,
405  "MatrixMatrix::Add ERROR, input matrix B must have a dynamic profile!");
406  //Convience typedef
407  typedef CrsMatrix<
408  Scalar,
409  LocalOrdinal,
410  GlobalOrdinal,
411  Node> CrsMatrix_t;
412  RCP<const CrsMatrix_t> Aprime = null;
413  if( transposeA ){
414  RowMatrixTransposer<Scalar, LocalOrdinal, GlobalOrdinal, Node> theTransposer(Teuchos::rcpFromRef (A));
415  Aprime = theTransposer.createTranspose();
416  }
417  else{
418  Aprime = rcpFromRef(A);
419  }
420  size_t a_numEntries;
421  Array<GlobalOrdinal> a_inds(A.getNodeMaxNumRowEntries());
422  Array<Scalar> a_vals(A.getNodeMaxNumRowEntries());
423  GlobalOrdinal row;
424 
425  if(scalarB != ScalarTraits<Scalar>::one()){
426  B.scale(scalarB);
427  }
428 
429  bool bFilled = B.isFillComplete();
430  size_t numMyRows = B.getNodeNumRows();
431  if(scalarA != ScalarTraits<Scalar>::zero()){
432  for(LocalOrdinal i = 0; (size_t)i < numMyRows; ++i){
433  row = B.getRowMap()->getGlobalElement(i);
434  Aprime->getGlobalRowCopy(row, a_inds(), a_vals(), a_numEntries);
435  if(scalarA != ScalarTraits<Scalar>::one()){
436  for(size_t j =0; j<a_numEntries; ++j){
437  a_vals[j] *= scalarA;
438  }
439  }
440  if(bFilled){
441  B.sumIntoGlobalValues(row, a_inds(0,a_numEntries), a_vals(0,a_numEntries));
442  }
443  else{
444  B.insertGlobalValues(row, a_inds(0,a_numEntries), a_vals(0,a_numEntries));
445  }
446 
447  }
448  }
449 }
450 
451 
452 template <class Scalar,
453  class LocalOrdinal,
454  class GlobalOrdinal,
455  class Node>
456 Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
457 add (const Scalar& alpha,
458  const bool transposeA,
460  const Scalar& beta,
461  const bool transposeB,
463  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >& domainMap,
464  const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >& rangeMap,
465  const Teuchos::RCP<Teuchos::ParameterList>& params)
466 {
467  using Teuchos::RCP;
468  using Teuchos::rcp;
469  using Teuchos::rcpFromRef;
470  using Teuchos::rcp_dynamic_cast;
471  using Teuchos::rcp_implicit_cast;
475 
476  TEUCHOS_TEST_FOR_EXCEPTION(
477  ! A.isFillComplete () || ! B.isFillComplete (), std::invalid_argument,
478  "Tpetra::MatrixMatrix::add: A and B must both be fill complete.");
479 
480 #ifdef HAVE_TPETRA_DEBUG
481  // The matrices don't have domain or range Maps unless they are fill complete.
482  if (A.isFillComplete () && B.isFillComplete ()) {
483  const bool domainMapsSame =
484  (! transposeA && ! transposeB && ! A.getDomainMap ()->isSameAs (* (B.getDomainMap ()))) ||
485  (! transposeA && transposeB && ! A.getDomainMap ()->isSameAs (* (B.getRangeMap ()))) ||
486  (transposeA && ! transposeB && ! A.getRangeMap ()->isSameAs (* (B.getDomainMap ())));
487  TEUCHOS_TEST_FOR_EXCEPTION(
488  domainMapsSame, std::invalid_argument,
489  "Tpetra::MatrixMatrix::add: The domain Maps of Op(A) and Op(B) are not the same.");
490 
491  const bool rangeMapsSame =
492  (! transposeA && ! transposeB && ! A.getRangeMap ()->isSameAs (* (B.getRangeMap ()))) ||
493  (! transposeA && transposeB && ! A.getRangeMap ()->isSameAs (* (B.getDomainMap ()))) ||
494  (transposeA && ! transposeB && ! A.getDomainMap ()->isSameAs (* (B.getRangeMap ())));
495  TEUCHOS_TEST_FOR_EXCEPTION(
496  rangeMapsSame, std::invalid_argument,
497  "Tpetra::MatrixMatrix::add: The range Maps of Op(A) and Op(B) are not the same.");
498  }
499 #endif // HAVE_TPETRA_DEBUG
500 
501  // Form the explicit transpose of A if necessary.
502  RCP<const crs_matrix_type> Aprime;
503  if (transposeA) {
504  transposer_type theTransposer (rcpFromRef (A));
505  Aprime = theTransposer.createTranspose ();
506  } else {
507  Aprime = rcpFromRef (A);
508  }
509 
510 #ifdef HAVE_TPETRA_DEBUG
511  TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null (), std::logic_error,
512  "Tpetra::MatrixMatrix::Add: Failed to compute Op(A). "
513  "Please report this bug to the Tpetra developers.");
514 #endif // HAVE_TPETRA_DEBUG
515 
516  // Form the explicit transpose of B if necessary.
517  RCP<const crs_matrix_type> Bprime;
518  if (transposeB) {
519  transposer_type theTransposer (rcpFromRef (B));
520  Bprime = theTransposer.createTranspose ();
521  } else {
522  Bprime = rcpFromRef (B);
523  }
524 
525 #ifdef HAVE_TPETRA_DEBUG
526  TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null (), std::logic_error,
527  "Tpetra::MatrixMatrix::Add: Failed to compute Op(B). "
528  "Please report this bug to the Tpetra developers.");
529 
530  TEUCHOS_TEST_FOR_EXCEPTION(
531  ! Aprime->isFillComplete () || ! Bprime->isFillComplete (), std::invalid_argument,
532  "Tpetra::MatrixMatrix::add: Aprime and Bprime must both be fill complete. "
533  "Please report this bug to the Tpetra developers.");
534 #endif // HAVE_TPETRA_DEBUG
535 
536 
537  RCP<row_matrix_type> C =
538  Bprime->add (alpha, *rcp_implicit_cast<const row_matrix_type> (Aprime),
539  beta, domainMap, rangeMap, params);
540  return rcp_dynamic_cast<crs_matrix_type> (C);
541 }
542 
543 
544 
545 
546 template <class Scalar,
547  class LocalOrdinal,
548  class GlobalOrdinal,
549  class Node>
550 void Add(
552  bool transposeA,
553  Scalar scalarA,
555  bool transposeB,
556  Scalar scalarB,
558 {
559  using Teuchos::as;
560  using Teuchos::Array;
561  using Teuchos::ArrayRCP;
562  using Teuchos::ArrayView;
563  using Teuchos::RCP;
564  using Teuchos::rcp;
565  using Teuchos::rcp_dynamic_cast;
566  using Teuchos::rcpFromRef;
567  using Teuchos::tuple;
568  using std::endl;
569  // typedef typename ArrayView<const Scalar>::size_type size_type;
570  typedef Teuchos::ScalarTraits<Scalar> STS;
572  // typedef Import<LocalOrdinal, GlobalOrdinal, Node> import_type;
573  // typedef RowGraph<LocalOrdinal, GlobalOrdinal, Node> row_graph_type;
574  // typedef CrsGraph<LocalOrdinal, GlobalOrdinal, Node> crs_graph_type;
577 
578  TEUCHOS_TEST_FOR_EXCEPTION(C.is_null (), std::logic_error,
579  "Tpetra::MatrixMatrix::Add: The case C == null does not actually work. "
580  "Fixing this will require an interface change.");
581 
582  TEUCHOS_TEST_FOR_EXCEPTION(
583  ! A.isFillComplete () || ! B.isFillComplete (), std::invalid_argument,
584  "Tpetra::MatrixMatrix::Add: Both input matrices must be fill complete "
585  "before calling this function.");
586 
587 #ifdef HAVE_TPETRA_DEBUG
588  {
589  const bool domainMapsSame =
590  (! transposeA && ! transposeB && ! A.getDomainMap ()->isSameAs (* (B.getDomainMap ()))) ||
591  (! transposeA && transposeB && ! A.getDomainMap ()->isSameAs (* (B.getRangeMap ()))) ||
592  (transposeA && ! transposeB && ! A.getRangeMap ()->isSameAs (* (B.getDomainMap ())));
593  TEUCHOS_TEST_FOR_EXCEPTION(
594  domainMapsSame, std::invalid_argument,
595  "Tpetra::MatrixMatrix::Add: The domain Maps of Op(A) and Op(B) are not the same.");
596 
597  const bool rangeMapsSame =
598  (! transposeA && ! transposeB && ! A.getRangeMap ()->isSameAs (* (B.getRangeMap ()))) ||
599  (! transposeA && transposeB && ! A.getRangeMap ()->isSameAs (* (B.getDomainMap ()))) ||
600  (transposeA && ! transposeB && ! A.getDomainMap ()->isSameAs (* (B.getRangeMap ())));
601  TEUCHOS_TEST_FOR_EXCEPTION(
602  rangeMapsSame, std::invalid_argument,
603  "Tpetra::MatrixMatrix::Add: The range Maps of Op(A) and Op(B) are not the same.");
604  }
605 #endif // HAVE_TPETRA_DEBUG
606 
607  // Form the explicit transpose of A if necessary.
608  RCP<const crs_matrix_type> Aprime;
609  if (transposeA) {
610  transposer_type theTransposer (rcpFromRef (A));
611  Aprime = theTransposer.createTranspose ();
612  } else {
613  Aprime = rcpFromRef (A);
614  }
615 
616 #ifdef HAVE_TPETRA_DEBUG
617  TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null (), std::logic_error,
618  "Tpetra::MatrixMatrix::Add: Failed to compute Op(A). "
619  "Please report this bug to the Tpetra developers.");
620 #endif // HAVE_TPETRA_DEBUG
621 
622  // Form the explicit transpose of B if necessary.
623  RCP<const crs_matrix_type> Bprime;
624  if (transposeB) {
625  transposer_type theTransposer (rcpFromRef (B));
626  Bprime = theTransposer.createTranspose ();
627  } else {
628  Bprime = rcpFromRef (B);
629  }
630 
631 #ifdef HAVE_TPETRA_DEBUG
632  TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null (), std::logic_error,
633  "Tpetra::MatrixMatrix::Add: Failed to compute Op(B). "
634  "Please report this bug to the Tpetra developers.");
635 #endif // HAVE_TPETRA_DEBUG
636 
637  // Allocate or zero the entries of the result matrix.
638  if (! C.is_null ()) {
639  C->setAllToScalar (STS::zero ());
640  } else {
641 #if 0
642  // If Aprime and Bprime have the same row Map, and if C is null,
643  // we can optimize construction and fillComplete of C. For now,
644  // we just check pointer equality, to avoid the all-reduce in
645  // isSameAs. It may be worth that all-reduce to check, however.
646  //if (Aprime->getRowMap ().getRawPtr () == Bprime->getRowMap ().getRawPtr ()) {
647  if (Aprime->getRowMap ()->isSameAs (* (Bprime->getRowMap ())) {
648  RCP<const map_type> rowMap = Aprime->getRowMap ();
649 
650  RCP<const crs_graph_type> A_graph =
651  rcp_dynamic_cast<const crs_graph_type> (Aprime->getGraph (), true);
652 #ifdef HAVE_TPETRA_DEBUG
653  TEUCHOS_TEST_FOR_EXCEPTION(A_graph.is_null (), std::logic_error,
654  "Tpetra::MatrixMatrix::Add: Graph of Op(A) is null. "
655  "Please report this bug to the Tpetra developers.");
656 #endif // HAVE_TPETRA_DEBUG
657  RCP<const crs_graph_type> B_graph =
658  rcp_dynamic_cast<const crs_graph_type> (Bprime->getGraph (), true);
659 #ifdef HAVE_TPETRA_DEBUG
660  TEUCHOS_TEST_FOR_EXCEPTION(B_graph.is_null (), std::logic_error,
661  "Tpetra::MatrixMatrix::Add: Graph of Op(B) is null. "
662  "Please report this bug to the Tpetra developers.");
663 #endif // HAVE_TPETRA_DEBUG
664  RCP<const map_type> A_colMap = A_graph->getColMap ();
665 #ifdef HAVE_TPETRA_DEBUG
666  TEUCHOS_TEST_FOR_EXCEPTION(A_colMap.is_null (), std::logic_error,
667  "Tpetra::MatrixMatrix::Add: Column Map of Op(A) is null. "
668  "Please report this bug to the Tpetra developers.");
669 #endif // HAVE_TPETRA_DEBUG
670  RCP<const map_type> B_colMap = B_graph->getColMap ();
671 #ifdef HAVE_TPETRA_DEBUG
672  TEUCHOS_TEST_FOR_EXCEPTION(B_colMap.is_null (), std::logic_error,
673  "Tpetra::MatrixMatrix::Add: Column Map of Op(B) is null. "
674  "Please report this bug to the Tpetra developers.");
675  TEUCHOS_TEST_FOR_EXCEPTION(A_graph->getImporter ().is_null (),
676  std::logic_error,
677  "Tpetra::MatrixMatrix::Add: Op(A)'s Import is null. "
678  "Please report this bug to the Tpetra developers.");
679  TEUCHOS_TEST_FOR_EXCEPTION(B_graph->getImporter ().is_null (),
680  std::logic_error,
681  "Tpetra::MatrixMatrix::Add: Op(B)'s Import is null. "
682  "Please report this bug to the Tpetra developers.");
683 #endif // HAVE_TPETRA_DEBUG
684 
685  // Compute the (column Map and) Import of the matrix sum.
686  RCP<const import_type> sumImport =
687  A_graph->getImporter ()->setUnion (* (B_graph->getImporter ()));
688  RCP<const map_type> C_colMap = sumImport->getTargetMap ();
689 
690  // First, count the number of entries in each row. Then, go
691  // back over the rows again, and compute the actual sum.
692  // Remember that C may have a different column Map than Aprime
693  // or Bprime, so its local indices may be different. That's why
694  // we have to convert from local to global indices.
695 
696  ArrayView<const LocalOrdinal> A_local_ind;
697  Array<GlobalOrdinal> A_global_ind;
698  ArrayView<const LocalOrdinal> B_local_ind;
699  Array<GlobalOrdinal> B_global_ind;
700 
701  const size_t localNumRows = rowMap->getNodeNumElements ();
702  ArrayRCP<size_t> numEntriesPerRow (localNumRows);
703  // Compute the max number of entries in any row of A+B on this
704  // process, so that we won't have to resize temporary arrays.
705  size_t maxNumEntriesPerRow = 0;
706  for (size_t localRow = 0; localRow < localNumRows; ++localRow) {
707  // Get view of current row of A_graph, in its local indices.
708  A_graph->getLocalRowView (as<LocalOrdinal> (localRow), A_local_ind);
709  const size_type A_numEnt = A_local_ind.size ();
710  if (A_numEnt > A_global_ind.size ()) {
711  A_global_ind.resize (A_numEnt);
712  }
713  // Convert A's local indices to global indices.
714  for (size_type k = 0; k < A_numEnt; ++k) {
715  A_global_ind[k] = A_colMap->getGlobalElement (A_local_ind[k]);
716  }
717 
718  // Get view of current row of B_graph, in its local indices.
719  B_graph->getLocalRowView (as<LocalOrdinal> (localRow), B_local_ind);
720  const size_type B_numEnt = B_local_ind.size ();
721  if (B_numEnt > B_global_ind.size ()) {
722  B_global_ind.resize (B_numEnt);
723  }
724  // Convert B's local indices to global indices.
725  for (size_type k = 0; k < B_numEnt; ++k) {
726  B_global_ind[k] = B_colMap->getGlobalElement (B_local_ind[k]);
727  }
728 
729  // Count the number of entries in the merged row of A + B.
730  const size_t curNumEntriesPerRow =
731  keyMergeCount (A_global_ind.begin (), A_global_ind.end (),
732  B_global_ind.begin (), B_global_ind.end ());
733  numEntriesPerRow[localRow] = curNumEntriesPerRow;
734  maxNumEntriesPerRow = std::max (maxNumEntriesPerRow, curNumEntriesPerRow);
735  }
736 
737  // Create C, using the sum column Map and number of entries per
738  // row that we computed above. Having the exact number of
739  // entries per row lets us use static profile, making it valid
740  // to call expertStaticFillComplete.
741  C = rcp (new crs_matrix_type (rowMap, C_colMap, numEntriesPerRow, StaticProfile));
742 
743  // Go back through the rows and actually compute the sum. We
744  // don't ever have to resize A_global_ind or B_global_ind below,
745  // since we've already done it above.
746  ArrayView<const Scalar> A_val;
747  ArrayView<const Scalar> B_val;
748 
749  Array<LocalOrdinal> AplusB_local_ind (maxNumEntriesPerRow);
750  Array<GlobalOrdinal> AplusB_global_ind (maxNumEntriesPerRow);
751  Array<Scalar> AplusB_val (maxNumEntriesPerRow);
752 
753  for (size_t localRow = 0; localRow < localNumRows; ++localRow) {
754  // Get view of current row of A, in A's local indices.
755  Aprime->getLocalRowView (as<LocalOrdinal> (localRow), A_local_ind, A_val);
756  // Convert A's local indices to global indices.
757  for (size_type k = 0; k < A_local_ind.size (); ++k) {
758  A_global_ind[k] = A_colMap->getGlobalElement (A_local_ind[k]);
759  }
760 
761  // Get view of current row of B, in B's local indices.
762  Bprime->getLocalRowView (as<LocalOrdinal> (localRow), B_local_ind, B_val);
763  // Convert B's local indices to global indices.
764  for (size_type k = 0; k < B_local_ind.size (); ++k) {
765  B_global_ind[k] = B_colMap->getGlobalElement (B_local_ind[k]);
766  }
767 
768  const size_t curNumEntries = numEntriesPerRow[localRow];
769  ArrayView<LocalOrdinal> C_local_ind = AplusB_local_ind (0, curNumEntries);
770  ArrayView<GlobalOrdinal> C_global_ind = AplusB_global_ind (0, curNumEntries);
771  ArrayView<Scalar> C_val = AplusB_val (0, curNumEntries);
772 
773  // Sum the entries in the current row of A plus B.
774  keyValueMerge (A_global_ind.begin (), A_global_ind.end (),
775  A_val.begin (), A_val.end (),
776  B_global_ind.begin (), B_global_ind.end (),
777  B_val.begin (), B_val.end (),
778  C_global_ind.begin (), C_val.begin (),
779  std::plus<Scalar> ());
780  // Convert the sum's global indices into C's local indices.
781  for (size_type k = 0; k < as<size_type> (numEntriesPerRow[localRow]); ++k) {
782  C_local_ind[k] = C_colMap->getLocalElement (C_global_ind[k]);
783  }
784  // Give the current row sum to C.
785  C->replaceLocalValues (localRow, C_local_ind, C_val);
786  }
787 
788  // Use "expert static fill complete" to bypass construction of
789  // the Import and Export (if applicable) object(s).
790  RCP<const map_type> domainMap = A_graph->getDomainMap ();
791  RCP<const map_type> rangeMap = A_graph->getRangeMap ();
792  C->expertStaticFillComplete (domainMap, rangeMap, sumImport, A_graph->getExporter ());
793 
794  return; // Now we're done!
795  }
796  else {
797  // FIXME (mfh 08 May 2013) When I first looked at this method, I
798  // noticed that C was being given the row Map of Aprime (the
799  // possibly transposed version of A). Is this what we want?
800  C = rcp (new crs_matrix_type (Aprime->getRowMap (), null));
801  }
802 
803 #else
804  // FIXME (mfh 08 May 2013) When I first looked at this method, I
805  // noticed that C was being given the row Map of Aprime (the
806  // possibly transposed version of A). Is this what we want?
807  C = rcp (new crs_matrix_type (Aprime->getRowMap (), 0));
808 #endif // 0
809  }
810 
811 #ifdef HAVE_TPETRA_DEBUG
812  TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null (), std::logic_error,
813  "Tpetra::MatrixMatrix::Add: At this point, Aprime is null. "
814  "Please report this bug to the Tpetra developers.");
815  TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null (), std::logic_error,
816  "Tpetra::MatrixMatrix::Add: At this point, Bprime is null. "
817  "Please report this bug to the Tpetra developers.");
818  TEUCHOS_TEST_FOR_EXCEPTION(C.is_null (), std::logic_error,
819  "Tpetra::MatrixMatrix::Add: At this point, C is null. "
820  "Please report this bug to the Tpetra developers.");
821 #endif // HAVE_TPETRA_DEBUG
822 
823  Array<RCP<const crs_matrix_type> > Mat =
824  tuple<RCP<const crs_matrix_type> > (Aprime, Bprime);
825  Array<Scalar> scalar = tuple<Scalar> (scalarA, scalarB);
826 
827  // do a loop over each matrix to add: A reordering might be more efficient
828  for (int k = 0; k < 2; ++k) {
829  Array<GlobalOrdinal> Indices;
830  Array<Scalar> Values;
831 
832  // Loop over each locally owned row of the current matrix (either
833  // Aprime or Bprime), and sum its entries into the corresponding
834  // row of C. This works regardless of whether Aprime or Bprime
835  // has the same row Map as C, because both sumIntoGlobalValues and
836  // insertGlobalValues allow summing resp. inserting into nonowned
837  // rows of C.
838 #ifdef HAVE_TPETRA_DEBUG
839  TEUCHOS_TEST_FOR_EXCEPTION(Mat[k].is_null (), std::logic_error,
840  "Tpetra::MatrixMatrix::Add: At this point, curRowMap is null. "
841  "Please report this bug to the Tpetra developers.");
842 #endif // HAVE_TPETRA_DEBUG
843  RCP<const map_type> curRowMap = Mat[k]->getRowMap ();
844 #ifdef HAVE_TPETRA_DEBUG
845  TEUCHOS_TEST_FOR_EXCEPTION(curRowMap.is_null (), std::logic_error,
846  "Tpetra::MatrixMatrix::Add: At this point, curRowMap is null. "
847  "Please report this bug to the Tpetra developers.");
848 #endif // HAVE_TPETRA_DEBUG
849 
850  const size_t localNumRows = Mat[k]->getNodeNumRows ();
851  for (size_t i = 0; i < localNumRows; ++i) {
852  const GlobalOrdinal globalRow = curRowMap->getGlobalElement (i);
853  size_t numEntries = Mat[k]->getNumEntriesInGlobalRow (globalRow);
854  if (numEntries > 0) {
855  Indices.resize (numEntries);
856  Values.resize (numEntries);
857  Mat[k]->getGlobalRowCopy (globalRow, Indices (), Values (), numEntries);
858 
859  if (scalar[k] != STS::one ()) {
860  for (size_t j = 0; j < numEntries; ++j) {
861  Values[j] *= scalar[k];
862  }
863  }
864 
865  if (C->isFillComplete ()) {
866  C->sumIntoGlobalValues (globalRow, Indices, Values);
867  } else {
868  C->insertGlobalValues (globalRow, Indices, Values);
869  }
870  }
871  }
872  }
873 }
874 
875 
876 
877 } //End namespace MatrixMatrix
878 
879 namespace MMdetails{
880 
881 // Prints MMM-style statistics on communication done with an Import or Export object
882 template <class TransferType>
883 void printMultiplicationStatistics(Teuchos::RCP<TransferType > Transfer, const std::string &label){
884  if(Transfer.is_null()) return;
885 
886  const Distributor & Distor = Transfer->getDistributor();
887  Teuchos::RCP<const Teuchos::Comm<int> > Comm = Transfer->getSourceMap()->getComm();
888 
889  size_t rows_send = Transfer->getNumExportIDs();
890  size_t rows_recv = Transfer->getNumRemoteIDs();
891 
892  size_t round1_send = Transfer->getNumExportIDs() * sizeof(size_t);
893  size_t round1_recv = Transfer->getNumRemoteIDs() * sizeof(size_t);
894  size_t num_send_neighbors = Distor.getNumSends();
895  size_t num_recv_neighbors = Distor.getNumReceives();
896  size_t round2_send, round2_recv;
897  Distor.getLastDoStatistics(round2_send,round2_recv);
898 
899  int myPID = Comm->getRank();
900  int NumProcs = Comm->getSize();
901 
902  // Processor by processor statistics
903  // printf("[%d] %s Statistics: neigh[s/r]=%d/%d rows[s/r]=%d/%d r1bytes[s/r]=%d/%d r2bytes[s/r]=%d/%d\n",
904  // myPID,label.c_str(),num_send_neighbors,num_recv_neighbors,rows_send,rows_recv,round1_send,round1_recv,round2_send,round2_recv);
905 
906  // Global statistics
907  size_t lstats[8] = {num_send_neighbors,num_recv_neighbors,rows_send,rows_recv,round1_send,round1_recv,round2_send,round2_recv};
908  size_t gstats_min[8], gstats_max[8];
909 
910  double lstats_avg[8], gstats_avg[8];
911  for(int i=0; i<8; i++)
912  lstats_avg[i] = ((double)lstats[i])/NumProcs;
913 
914  Teuchos::reduceAll(*Comm(),Teuchos::REDUCE_MIN,8,lstats,gstats_min);
915  Teuchos::reduceAll(*Comm(),Teuchos::REDUCE_MAX,8,lstats,gstats_max);
916  Teuchos::reduceAll(*Comm(),Teuchos::REDUCE_SUM,8,lstats_avg,gstats_avg);
917 
918  if(!myPID) {
919  printf("%s Send Statistics[min/avg/max]: neigh=%d/%4.1f/%d rows=%d/%4.1f/%d round1=%d/%4.1f/%d round2=%d/%4.1f/%d\n",label.c_str(),
920  (int)gstats_min[0],gstats_avg[0],(int)gstats_max[0], (int)gstats_min[2],gstats_avg[2],(int)gstats_max[2],
921  (int)gstats_min[4],gstats_avg[4],(int)gstats_max[4], (int)gstats_min[6],gstats_avg[6],(int)gstats_max[6]);
922  printf("%s Recv Statistics[min/avg/max]: neigh=%d/%4.1f/%d rows=%d/%4.1f/%d round1=%d/%4.1f/%d round2=%d/%4.1f/%d\n",label.c_str(),
923  (int)gstats_min[1],gstats_avg[1],(int)gstats_max[1], (int)gstats_min[3],gstats_avg[3],(int)gstats_max[3],
924  (int)gstats_min[5],gstats_avg[5],(int)gstats_max[5], (int)gstats_min[7],gstats_avg[7],(int)gstats_max[7]);
925  }
926 }
927 
928 
929 //kernel method for computing the local portion of C = A*B
930 template<class Scalar,
931  class LocalOrdinal,
932  class GlobalOrdinal,
933  class Node>
934 void mult_AT_B_newmatrix(
938  const std::string & label) {
939 
940  // Using & Typedefs
941  using Teuchos::RCP;
942  using Teuchos::rcp;
943  typedef CrsMatrixStruct<
944  Scalar,
945  LocalOrdinal,
946  GlobalOrdinal,
947  Node> CrsMatrixStruct_t;
948 
949 #ifdef HAVE_TPETRA_MMM_TIMINGS
950  std::string prefix = std::string("TpetraExt ")+ label + std::string(": ");
951  using Teuchos::TimeMonitor;
952  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix+std::string("MMM-T Transpose"))));
953 #endif
954 
955  /*************************************************************/
956  /* 1) Local Transpose of A */
957  /*************************************************************/
959  RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > Atrans = at.createTransposeLocal();
960 
961  /*************************************************************/
962  /* 2/3) Call mult_A_B_newmatrix w/ fillComplete */
963  /*************************************************************/
964 #ifdef HAVE_TPETRA_MMM_TIMINGS
965  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix+std::string("MMM-T I&X"))));
966 #endif
967 
968  // Get views, asserting that no import is required to speed up computation
969  CrsMatrixStruct_t Aview;
970  CrsMatrixStruct_t Bview;
971  RCP<const Import<LocalOrdinal,GlobalOrdinal, Node> > dummyImporter;
972  MMdetails::import_and_extract_views(*Atrans, Atrans->getRowMap(), Aview, dummyImporter,true,label);
973  MMdetails::import_and_extract_views(B, B.getRowMap(), Bview, dummyImporter,true,label);
974 
975 #ifdef HAVE_TPETRA_MMM_TIMINGS
976  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix+std::string("MMM-T AB-core"))));
977 #endif
978 
979  RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >Ctemp;
980 
981  // If Atrans has no Exporter, we can use C instead of having to create a temp matrix
982  bool needs_final_export = !Atrans->getGraph()->getExporter().is_null();
983  if(needs_final_export)
984  Ctemp = rcp(new Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>(Atrans->getRowMap(),0));
985  else
986  Ctemp = rcp(&C,false);// don't allow deallocation
987 
988  // Multiply
989  mult_A_B_newmatrix(Aview,Bview,*Ctemp,label);
990 
991  /*************************************************************/
992  /* 4) exportAndFillComplete matrix */
993  /*************************************************************/
994 #ifdef HAVE_TPETRA_MMM_TIMINGS
995  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix+std::string("MMM-T exportAndFillComplete"))));
996 #endif
997 
998  Teuchos::RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > Crcp(&C,false);
999  if(needs_final_export) {
1000  Teuchos::ParameterList labelList;
1001  labelList.set("Timer Label",label);
1002  Ctemp->exportAndFillComplete(Crcp,*Ctemp->getGraph()->getExporter(),
1003  B.getDomainMap(),A.getDomainMap(),rcp(&labelList,false));
1004  }
1005 #ifdef COMPUTE_MMM_STATISTICS
1006  printMultiplicationStatistics(Ctemp->getGraph()->getExporter(),label+std::string(" AT_B MMM"));
1007 #endif
1008 }
1009 
1010 
1011 //kernel method for computing the local portion of C = A*B
1012 template<class Scalar,
1013  class LocalOrdinal,
1014  class GlobalOrdinal,
1015  class Node>
1016 void mult_A_B(
1019  CrsWrapper<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1020  const std::string & label)
1021 {
1022  typedef Teuchos::ScalarTraits<Scalar> STS;
1023  //TEUCHOS_FUNC_TIME_MONITOR_DIFF("mult_A_B", mult_A_B);
1024  LocalOrdinal C_firstCol = Bview.colMap->getMinLocalIndex();
1025  LocalOrdinal C_lastCol = Bview.colMap->getMaxLocalIndex();
1026 
1027  LocalOrdinal C_firstCol_import = OrdinalTraits<LocalOrdinal>::zero();
1028  LocalOrdinal C_lastCol_import = OrdinalTraits<LocalOrdinal>::invalid();
1029 
1030  ArrayView<const GlobalOrdinal> bcols = Bview.colMap->getNodeElementList();
1031  ArrayView<const GlobalOrdinal> bcols_import = null;
1032  if (Bview.importColMap != null) {
1033  C_firstCol_import = Bview.importColMap->getMinLocalIndex();
1034  C_lastCol_import = Bview.importColMap->getMaxLocalIndex();
1035 
1036  bcols_import = Bview.importColMap->getNodeElementList();
1037  }
1038 
1039  size_t C_numCols = C_lastCol - C_firstCol +
1040  OrdinalTraits<LocalOrdinal>::one();
1041  size_t C_numCols_import = C_lastCol_import - C_firstCol_import +
1042  OrdinalTraits<LocalOrdinal>::one();
1043 
1044  if (C_numCols_import > C_numCols) C_numCols = C_numCols_import;
1045 
1046  Array<Scalar> dwork = Array<Scalar>(C_numCols);
1047  Array<GlobalOrdinal> iwork = Array<GlobalOrdinal>(C_numCols);
1048  Array<size_t> iwork2 = Array<size_t>(C_numCols);
1049 
1050  Array<Scalar> C_row_i = dwork;
1051  Array<GlobalOrdinal> C_cols = iwork;
1052  Array<size_t> c_index = iwork2;
1053  Array<GlobalOrdinal> combined_index = Array<GlobalOrdinal>(2*C_numCols);
1054  Array<Scalar> combined_values = Array<Scalar>(2*C_numCols);
1055 
1056  size_t C_row_i_length, j, k, last_index;
1057 
1058  // Run through all the hash table lookups once and for all
1059  LocalOrdinal LO_INVALID = OrdinalTraits<LocalOrdinal>::invalid();
1060  Array<LocalOrdinal> Acol2Brow(Aview.colMap->getNodeNumElements(),LO_INVALID);
1061  Array<LocalOrdinal> Acol2Irow(Aview.colMap->getNodeNumElements(),LO_INVALID);
1062  if(Aview.colMap->isSameAs(*Bview.origMatrix->getRowMap())){
1063  // Maps are the same: Use local IDs as the hash
1064  for(LocalOrdinal i=Aview.colMap->getMinLocalIndex(); i <=
1065  Aview.colMap->getMaxLocalIndex(); i++)
1066  Acol2Brow[i]=i;
1067  }
1068  else {
1069  // Maps are not the same: Use the map's hash
1070  for(LocalOrdinal i=Aview.colMap->getMinLocalIndex(); i <=
1071  Aview.colMap->getMaxLocalIndex(); i++) {
1072  GlobalOrdinal GID = Aview.colMap->getGlobalElement(i);
1073  LocalOrdinal BLID = Bview.origMatrix->getRowMap()->getLocalElement(GID);
1074  if(BLID != LO_INVALID) Acol2Brow[i] = BLID;
1075  else Acol2Irow[i] = Bview.importMatrix->getRowMap()->getLocalElement(GID);
1076  }
1077  }
1078 
1079  //To form C = A*B we're going to execute this expression:
1080  //
1081  // C(i,j) = sum_k( A(i,k)*B(k,j) )
1082  //
1083  //Our goal, of course, is to navigate the data in A and B once, without
1084  //performing searches for column-indices, etc.
1085  ArrayRCP<const size_t> Arowptr_RCP, Browptr_RCP, Irowptr_RCP;
1086  ArrayRCP<const LocalOrdinal> Acolind_RCP, Bcolind_RCP, Icolind_RCP;
1087  ArrayRCP<const Scalar> Avals_RCP, Bvals_RCP, Ivals_RCP;
1088  ArrayView<const size_t> Arowptr, Browptr, Irowptr;
1089  ArrayView<const LocalOrdinal> Acolind, Bcolind, Icolind;
1090  ArrayView<const Scalar> Avals, Bvals, Ivals;
1091  Aview.origMatrix->getAllValues(Arowptr_RCP,Acolind_RCP,Avals_RCP);
1092  Bview.origMatrix->getAllValues(Browptr_RCP,Bcolind_RCP,Bvals_RCP);
1093  Arowptr = Arowptr_RCP(); Acolind = Acolind_RCP(); Avals = Avals_RCP();
1094  Browptr = Browptr_RCP(); Bcolind = Bcolind_RCP(); Bvals = Bvals_RCP();
1095  if(!Bview.importMatrix.is_null()) {
1096  Bview.importMatrix->getAllValues(Irowptr_RCP,Icolind_RCP,Ivals_RCP);
1097  Irowptr = Irowptr_RCP(); Icolind = Icolind_RCP(); Ivals = Ivals_RCP();
1098  }
1099 
1100  bool C_filled = C.isFillComplete();
1101 
1102  for (size_t i = 0; i < C_numCols; i++)
1103  c_index[i] = OrdinalTraits<size_t>::invalid();
1104 
1105  //loop over the rows of A.
1106  size_t Arows = Aview.rowMap->getNodeNumElements();
1107  for(size_t i=0; i<Arows; ++i) {
1108 
1109  //only navigate the local portion of Aview... which is, thankfully, all of A
1110  //since this routine doesn't do transpose modes
1111  GlobalOrdinal global_row = Aview.rowMap->getGlobalElement(i);
1112 
1113  //loop across the i-th row of A and for each corresponding row
1114  //in B, loop across colums and accumulate product
1115  //A(i,k)*B(k,j) into our partial sum quantities C_row_i. In other words,
1116  //as we stride across B(k,:) we're calculating updates for row i of the
1117  //result matrix C.
1118 
1119 
1120  C_row_i_length = OrdinalTraits<size_t>::zero();
1121 
1122  for(k = Arowptr[i]; k < Arowptr[i+1]; ++k) {
1123  LocalOrdinal Ak = Acol2Brow[Acolind[k]];
1124  Scalar Aval = Avals[k];
1125  if (Aval == STS::zero())
1126  continue;
1127 
1128  if (Ak==LO_INVALID) continue;
1129 
1130  for(j=Browptr[Ak]; j< Browptr[Ak+1]; ++j) {
1131  LocalOrdinal col = Bcolind[j];
1132  //assert(col >= 0 && col < C_numCols);
1133  if (c_index[col] == OrdinalTraits<size_t>::invalid()){
1134  //assert(C_row_i_length >= 0 && C_row_i_length < C_numCols);
1135  // This has to be a += so insertGlobalValue goes out
1136  C_row_i[C_row_i_length] = Aval*Bvals[j];
1137  C_cols[C_row_i_length] = col;
1138  c_index[col] = C_row_i_length;
1139  C_row_i_length++;
1140  }
1141  else {
1142  C_row_i[c_index[col]] += Aval*Bvals[j];
1143  }
1144  }
1145  }
1146 
1147  for (size_t ii = 0; ii < C_row_i_length; ii++) {
1148  c_index[C_cols[ii]] = OrdinalTraits<size_t>::invalid();
1149  C_cols[ii] = bcols[C_cols[ii]];
1150  combined_index[ii] = C_cols[ii];
1151  combined_values[ii] = C_row_i[ii];
1152  }
1153  last_index = C_row_i_length;
1154 
1155  //
1156  //Now put the C_row_i values into C.
1157  //
1158  // We might have to revamp this later.
1159  C_row_i_length = OrdinalTraits<size_t>::zero();
1160 
1161  for(k = Arowptr[i]; k < Arowptr[i+1]; ++k) {
1162  LocalOrdinal Ak = Acol2Brow[Acolind[k]];
1163  Scalar Aval = Avals[k];
1164  if (Aval == STS::zero())
1165  continue;
1166 
1167  if (Ak!=LO_INVALID) continue;
1168 
1169  Ak = Acol2Irow[Acolind[k]];
1170  for(j=Irowptr[Ak]; j< Irowptr[Ak+1]; ++j) {
1171  LocalOrdinal col = Icolind[j];
1172  //assert(col >= 0 && col < C_numCols);
1173  if (c_index[col] == OrdinalTraits<size_t>::invalid()){
1174  //assert(C_row_i_length >= 0 && C_row_i_length < C_numCols);
1175  // This has to be a += so insertGlobalValue goes out
1176  C_row_i[C_row_i_length] = Aval*Ivals[j];
1177  C_cols[C_row_i_length] = col;
1178  c_index[col] = C_row_i_length;
1179  C_row_i_length++;
1180  }
1181  else {
1182  // This has to be a += so insertGlobalValue goes out
1183  C_row_i[c_index[col]] += Aval*Ivals[j];
1184  }
1185  }
1186  }
1187 
1188  for (size_t ii = 0; ii < C_row_i_length; ii++) {
1189  c_index[C_cols[ii]] = OrdinalTraits<size_t>::invalid();
1190  C_cols[ii] = bcols_import[C_cols[ii]];
1191  combined_index[last_index] = C_cols[ii];
1192  combined_values[last_index] = C_row_i[ii];
1193  last_index++;
1194  }
1195 
1196  //
1197  //Now put the C_row_i values into C.
1198  //
1199  // We might have to revamp this later.
1200  C_filled ?
1201  C.sumIntoGlobalValues(
1202  global_row,
1203  combined_index.view(OrdinalTraits<size_t>::zero(), last_index),
1204  combined_values.view(OrdinalTraits<size_t>::zero(), last_index))
1205  :
1206  C.insertGlobalValues(
1207  global_row,
1208  combined_index.view(OrdinalTraits<size_t>::zero(), last_index),
1209  combined_values.view(OrdinalTraits<size_t>::zero(), last_index));
1210 
1211  }
1212 
1213 }
1214 
1215 template<class Scalar,
1216  class LocalOrdinal,
1217  class GlobalOrdinal,
1218  class Node>
1219 void setMaxNumEntriesPerRow(
1221 {
1222  typedef typename Array<ArrayView<const LocalOrdinal> >::size_type local_length_size;
1223  Mview.maxNumRowEntries = OrdinalTraits<local_length_size>::zero();
1224  if(Mview.indices.size() > OrdinalTraits<local_length_size>::zero() ){
1225  Mview.maxNumRowEntries = Mview.indices[0].size();
1226  for(local_length_size i = 1; i<Mview.indices.size(); ++i){
1227  if(Mview.indices[i].size() > Mview.maxNumRowEntries){
1228  Mview.maxNumRowEntries = Mview.indices[i].size();
1229  }
1230  }
1231  }
1232 }
1233 
1234 
1235 template<class CrsMatrixType>
1236 size_t C_estimate_nnz(CrsMatrixType & A, CrsMatrixType &B){
1237  // Follows the NZ estimate in ML's ml_matmatmult.c
1238  size_t Aest = 100, Best=100;
1239  if(A.getNodeNumEntries() > 0)
1240  Aest = (A.getNodeNumRows()>0)? A.getNodeNumEntries()/A.getNodeNumEntries():100;
1241  if(B.getNodeNumEntries() > 0)
1242  Best=(B.getNodeNumRows()>0)? B.getNodeNumEntries()/B.getNodeNumEntries():100;
1243 
1244  size_t nnzperrow=(size_t)(sqrt((double)Aest) + sqrt((double)Best) - 1);
1245  nnzperrow*=nnzperrow;
1246 
1247  return (size_t)(A.getNodeNumRows()*nnzperrow*0.75 + 100);
1248 }
1249 
1250 
1251 
1252 //kernel method for computing the local portion of C = A*B
1253 template<class Scalar,
1254  class LocalOrdinal,
1255  class GlobalOrdinal,
1256  class Node>
1257 void mult_A_B_newmatrix(
1261  const std::string & label)
1262 {
1263  using Teuchos::RCP;
1264  using Teuchos::rcp;
1265  using Teuchos::ArrayView;
1266  typedef Import<LocalOrdinal, GlobalOrdinal, Node> import_type;
1267  typedef Map<LocalOrdinal, GlobalOrdinal, Node> map_type;
1268 
1269 #ifdef HAVE_TPETRA_MMM_TIMINGS
1270  std::string prefix = std::string("TpetraExt ")+ label + std::string(": ");
1271  using Teuchos::TimeMonitor;
1272  RCP<TimeMonitor> MM = rcp(new TimeMonitor(*(TimeMonitor::getNewTimer(prefix+std::string("MMM M5 Cmap")))));
1273 #endif
1274  size_t ST_INVALID = Teuchos::OrdinalTraits<LocalOrdinal>::invalid();
1275  LocalOrdinal LO_INVALID = Teuchos::OrdinalTraits<LocalOrdinal>::invalid();
1276 
1277 
1278  // Build the final importer / column map, hash table lookups for C
1279  RCP<const import_type> Cimport;
1280  RCP<const map_type> Ccolmap;
1281  RCP<const import_type> Bimport = Bview.origMatrix->getGraph()->getImporter();
1282  RCP<const import_type> Iimport = Bview.importMatrix.is_null() ? Teuchos::null : Bview.importMatrix->getGraph()->getImporter();
1283  Array<LocalOrdinal> Bcol2Ccol(Bview.colMap->getNodeNumElements()), Icol2Ccol;
1284 
1285  if(Bview.importMatrix.is_null()) {
1286  Cimport = Bimport;
1287  Ccolmap = Bview.colMap;
1288  // Bcol2Ccol is trivial
1289  for(size_t i=0; i<Bview.colMap->getNodeNumElements(); i++) {
1290  Bcol2Ccol[i] = Teuchos::as<LocalOrdinal>(i);
1291  }
1292  }
1293  else {
1294  // Choose the right variant of setUnion
1295  if(!Bimport.is_null() && !Iimport.is_null()){
1296  Cimport = Bimport->setUnion(*Iimport);
1297  Ccolmap = Cimport->getTargetMap();
1298  }
1299  else if(!Bimport.is_null() && Iimport.is_null()) {
1300  Cimport = Bimport->setUnion();
1301  }
1302  else if(Bimport.is_null() && !Iimport.is_null()) {
1303  Cimport = Iimport->setUnion();
1304  }
1305  else
1306  throw std::runtime_error("TpetraExt::MMM status of matrix importers is nonsensical");
1307 
1308  Ccolmap = Cimport->getTargetMap();
1309 
1310  if(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()))
1311  throw std::runtime_error("Tpetra::MMM: Import setUnion messed with the DomainMap in an unfortunate way");
1312 
1313  // NOTE: This is not efficient and should be folded into setUnion
1314  Icol2Ccol.resize(Bview.importMatrix->getColMap()->getNodeNumElements());
1315  ArrayView<const GlobalOrdinal> Bgid = Bview.origMatrix->getColMap()->getNodeElementList();
1316  ArrayView<const GlobalOrdinal> Igid = Bview.importMatrix->getColMap()->getNodeElementList();
1317 
1318  for(size_t i=0; i<Bview.origMatrix->getColMap()->getNodeNumElements(); i++)
1319  Bcol2Ccol[i] = Ccolmap->getLocalElement(Bgid[i]);
1320  for(size_t i=0; i<Bview.importMatrix->getColMap()->getNodeNumElements(); i++)
1321  Icol2Ccol[i] = Ccolmap->getLocalElement(Igid[i]);
1322  }
1323 
1324 #ifdef HAVE_TPETRA_MMM_TIMINGS
1325  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix+std::string("MMM Newmatrix SerialCore"))));
1326 #endif
1327 
1328  // Sizes
1329  size_t m=Aview.origMatrix->getNodeNumRows();
1330  size_t n=Ccolmap->getNodeNumElements();
1331 
1332  // Get Data Pointers
1333  ArrayRCP<const size_t> Arowptr_RCP, Browptr_RCP, Irowptr_RCP;
1334  ArrayRCP<size_t> Crowptr_RCP;
1335  ArrayRCP<const LocalOrdinal> Acolind_RCP, Bcolind_RCP, Icolind_RCP;
1336  ArrayRCP<LocalOrdinal> Ccolind_RCP;
1337  ArrayRCP<const Scalar> Avals_RCP, Bvals_RCP, Ivals_RCP;
1338  ArrayRCP<Scalar> Cvals_RCP;
1339 
1340  Aview.origMatrix->getAllValues(Arowptr_RCP,Acolind_RCP,Avals_RCP);
1341  Bview.origMatrix->getAllValues(Browptr_RCP,Bcolind_RCP,Bvals_RCP);
1342  if(!Bview.importMatrix.is_null()) Bview.importMatrix->getAllValues(Irowptr_RCP,Icolind_RCP,Ivals_RCP);
1343 
1344 
1345  // For efficiency
1346  ArrayView<const size_t> Arowptr, Browptr, Irowptr;
1347  ArrayView<const LocalOrdinal> Acolind, Bcolind, Icolind;
1348  ArrayView<const Scalar> Avals, Bvals, Ivals;
1349  ArrayView<size_t> Crowptr;
1350  ArrayView<LocalOrdinal> Ccolind;
1351  ArrayView<Scalar> Cvals;
1352  Arowptr = Arowptr_RCP(); Acolind = Acolind_RCP(); Avals = Avals_RCP();
1353  Browptr = Browptr_RCP(); Bcolind = Bcolind_RCP(); Bvals = Bvals_RCP();
1354  if(!Bview.importMatrix.is_null()) {
1355  Irowptr = Irowptr_RCP(); Icolind = Icolind_RCP(); Ivals = Ivals_RCP();
1356  }
1357 
1358  // The status array will contain the index into colind where this entry was last deposited.
1359  // c_status[i] < CSR_ip - not in the row yet.
1360  // c_status[i] >= CSR_ip, this is the entry where you can find the data
1361  // We start with this filled with INVALID's indicating that there are no entries yet.
1362  // Sadly, this complicates the code due to the fact that size_t's are unsigned.
1363  size_t INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
1364  Array<size_t> c_status(n, ST_INVALID);
1365 
1366  // Classic csr assembly (low memory edition)
1367  size_t CSR_alloc=std::max(C_estimate_nnz(*Aview.origMatrix,*Bview.origMatrix),n);
1368  size_t CSR_ip=0,OLD_ip=0;
1369  Crowptr_RCP.resize(m+1); Crowptr = Crowptr_RCP();
1370  Ccolind_RCP.resize(CSR_alloc); Ccolind = Ccolind_RCP();
1371  Cvals_RCP.resize(CSR_alloc); Cvals = Cvals_RCP();
1372 
1373  // Run through all the hash table lookups once and for all
1374  Array<LocalOrdinal> targetMapToOrigRow(Aview.colMap->getNodeNumElements(),LO_INVALID);
1375  Array<LocalOrdinal> targetMapToImportRow(Aview.colMap->getNodeNumElements(),LO_INVALID);
1376 
1377  if(Aview.colMap->isSameAs(*Bview.rowMap)){
1378  // Maps are the same: Use local IDs as the hash
1379  for(LocalOrdinal i=Aview.colMap->getMinLocalIndex(); i <= Aview.colMap->getMaxLocalIndex(); i++) {
1380  LocalOrdinal B_LID = Bview.origMatrix->getRowMap()->getLocalElement(Aview.colMap->getGlobalElement(i));
1381  if(B_LID != LO_INVALID) targetMapToOrigRow[i] = B_LID;
1382  else {
1383  LocalOrdinal I_LID = Bview.importMatrix->getRowMap()->getLocalElement(Aview.colMap->getGlobalElement(i));
1384  targetMapToImportRow[i] = I_LID;
1385  }
1386  }
1387  }
1388  else {
1389  // Maps are not the same: Use the map's hash
1390  for(LocalOrdinal i=Aview.colMap->getMinLocalIndex(); i <= Aview.colMap->getMaxLocalIndex(); i++) {
1391  LocalOrdinal B_LID = Bview.origMatrix->getRowMap()->getLocalElement(Aview.colMap->getGlobalElement(i));
1392  if(B_LID != LO_INVALID) targetMapToOrigRow[i] = B_LID;
1393  else {
1394  LocalOrdinal I_LID = Bview.importMatrix->getRowMap()->getLocalElement(Aview.colMap->getGlobalElement(i));
1395  targetMapToImportRow[i] = I_LID;
1396  }
1397  }
1398  }
1399 
1400  const Scalar SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
1401 
1402  // For each row of A/C
1403  for(size_t i=0; i<m; i++){
1404  Crowptr[i]=CSR_ip;
1405 
1406  for(size_t k=Arowptr[i]; k<Arowptr[i+1]; k++){
1407  LocalOrdinal Ak = Acolind[k];
1408  Scalar Aval = Avals[k];
1409  if(Aval==SC_ZERO) continue;
1410 
1411  if(targetMapToOrigRow[Ak] != LO_INVALID){
1412  // Local matrix
1413  size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Ak]);
1414 
1415  for(size_t j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
1416  LocalOrdinal Cj=Bcol2Ccol[Bcolind[j]];
1417 
1418  if(c_status[Cj]==INVALID || c_status[Cj]<OLD_ip){
1419  // New entry
1420  c_status[Cj] = CSR_ip;
1421  Ccolind[CSR_ip]= Cj;
1422  Cvals[CSR_ip] = Aval*Bvals[j];
1423  CSR_ip++;
1424  }
1425  else
1426  Cvals[c_status[Cj]]+=Aval*Bvals[j];
1427  }
1428  }
1429  else{
1430  // Remote matrix
1431  size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Ak]);
1432  for(size_t j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
1433  LocalOrdinal Cj=Icol2Ccol[Icolind[j]];
1434 
1435  if(c_status[Cj]==INVALID || c_status[Cj]<OLD_ip){
1436  // New entry
1437  c_status[Cj]=CSR_ip;
1438  Ccolind[CSR_ip]=Cj;
1439  Cvals[CSR_ip]=Aval*Ivals[j];
1440  CSR_ip++;
1441  }
1442  else
1443  Cvals[c_status[Cj]]+=Aval*Ivals[j];
1444  }
1445  }
1446  }
1447 
1448  // Resize for next pass if needed
1449  if(CSR_ip + n > CSR_alloc){
1450  CSR_alloc*=2;
1451  Ccolind_RCP.resize(CSR_alloc); Ccolind = Ccolind_RCP();
1452  Cvals_RCP.resize(CSR_alloc); Cvals = Cvals_RCP();
1453  }
1454  OLD_ip=CSR_ip;
1455  }
1456 
1457  Crowptr[m]=CSR_ip;
1458 
1459  // Downward resize
1460  Cvals_RCP.resize(CSR_ip);
1461  Ccolind_RCP.resize(CSR_ip);
1462 
1463 
1464 #ifdef HAVE_TPETRA_MMM_TIMINGS
1465  MM = rcp (new TimeMonitor (* (TimeMonitor::getNewTimer(prefix+std::string("MMM Newmatrix Final Sort")))));
1466 #endif
1467 
1468  // Replace the column map
1469  C.replaceColMap(Ccolmap);
1470 
1471  // Final sort & set of CRS arrays
1472  Import_Util::sortCrsEntries(Crowptr_RCP(),Ccolind_RCP(),Cvals_RCP());
1473  C.setAllValues(Crowptr_RCP,Ccolind_RCP,Cvals_RCP);
1474 
1475 #ifdef HAVE_TPETRA_MMM_TIMINGS
1476  MM = rcp (new TimeMonitor (* (TimeMonitor::getNewTimer(prefix+std::string("MMM Newmatrix ESFC")))));
1477 #endif
1478 
1479  // Final FillComplete
1480  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(),Aview.origMatrix->getRangeMap(),Cimport);
1481 }
1482 
1483 
1484 //kernel method for computing the local portion of C = (I-omega D^{-1} A)*B
1485 template<class Scalar,
1486  class LocalOrdinal,
1487  class GlobalOrdinal,
1488  class Node>
1489 void jacobi_A_B_newmatrix(
1490  Scalar omega,
1495  const std::string & label)
1496 {
1497  using Teuchos::RCP;
1498  using Teuchos::rcp;
1499  using Teuchos::ArrayView;
1500  typedef Import<LocalOrdinal, GlobalOrdinal, Node> import_type;
1501  typedef Map<LocalOrdinal, GlobalOrdinal, Node> map_type;
1502 #ifdef HAVE_TPETRA_MMM_TIMINGS
1503  std::string prefix = std::string("TpetraExt ")+ label + std::string(": ");
1504  using Teuchos::TimeMonitor;
1505  RCP<TimeMonitor> MM = rcp(new TimeMonitor(*(TimeMonitor::getNewTimer(prefix+std::string("Jacobi M5 Cmap")))));
1506 #endif
1507  size_t ST_INVALID = Teuchos::OrdinalTraits<LocalOrdinal>::invalid();
1508  LocalOrdinal LO_INVALID = Teuchos::OrdinalTraits<LocalOrdinal>::invalid();
1509 
1510 
1511  // Build the final importer / column map, hash table lookups for C
1512  RCP<const import_type> Cimport;
1513  RCP<const map_type> Ccolmap;
1514  RCP<const import_type> Bimport = Bview.origMatrix->getGraph()->getImporter();
1515  RCP<const import_type> Iimport = Bview.importMatrix.is_null() ? Teuchos::null : Bview.importMatrix->getGraph()->getImporter();
1516  Array<LocalOrdinal> Bcol2Ccol(Bview.colMap->getNodeNumElements()), Icol2Ccol;
1517 
1518  if(Bview.importMatrix.is_null()) {
1519  Cimport = Bimport;
1520  Ccolmap = Bview.colMap;
1521  // Bcol2Ccol is trivial
1522  for(size_t i=0; i<Bview.colMap->getNodeNumElements(); i++) {
1523  Bcol2Ccol[i] = Teuchos::as<LocalOrdinal>(i);
1524  }
1525  }
1526  else {
1527  // Choose the right variant of setUnion
1528  if(!Bimport.is_null() && !Iimport.is_null()){
1529  Cimport = Bimport->setUnion(*Iimport);
1530  Ccolmap = Cimport->getTargetMap();
1531  }
1532  else if(!Bimport.is_null() && Iimport.is_null()) {
1533  Cimport = Bimport->setUnion();
1534  }
1535  else if(Bimport.is_null() && !Iimport.is_null()) {
1536  Cimport = Iimport->setUnion();
1537  }
1538  else
1539  throw std::runtime_error("TpetraExt::Jacobi status of matrix importers is nonsensical");
1540 
1541  Ccolmap = Cimport->getTargetMap();
1542 
1543  if(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()))
1544  throw std::runtime_error("Tpetra:Jacobi Import setUnion messed with the DomainMap in an unfortunate way");
1545 
1546  // NOTE: This is not efficient and should be folded into setUnion
1547  Icol2Ccol.resize(Bview.importMatrix->getColMap()->getNodeNumElements());
1548  ArrayView<const GlobalOrdinal> Bgid = Bview.origMatrix->getColMap()->getNodeElementList();
1549  ArrayView<const GlobalOrdinal> Igid = Bview.importMatrix->getColMap()->getNodeElementList();
1550 
1551  for(size_t i=0; i<Bview.origMatrix->getColMap()->getNodeNumElements(); i++)
1552  Bcol2Ccol[i] = Ccolmap->getLocalElement(Bgid[i]);
1553  for(size_t i=0; i<Bview.importMatrix->getColMap()->getNodeNumElements(); i++)
1554  Icol2Ccol[i] = Ccolmap->getLocalElement(Igid[i]);
1555  }
1556 
1557 #ifdef HAVE_TPETRA_MMM_TIMINGS
1558  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix+std::string("Jacobi Newmatrix SerialCore"))));
1559 #endif
1560 
1561  // Sizes
1562  size_t m=Aview.origMatrix->getNodeNumRows();
1563  size_t n=Ccolmap->getNodeNumElements();
1564 
1565  // Get Data Pointers
1566  ArrayRCP<const size_t> Arowptr_RCP, Browptr_RCP, Irowptr_RCP;
1567  ArrayRCP<size_t> Crowptr_RCP;
1568  ArrayRCP<const LocalOrdinal> Acolind_RCP, Bcolind_RCP, Icolind_RCP;
1569  ArrayRCP<LocalOrdinal> Ccolind_RCP;
1570  ArrayRCP<const Scalar> Avals_RCP, Bvals_RCP, Ivals_RCP;
1571  ArrayRCP<Scalar> Cvals_RCP;
1572  ArrayRCP<const Scalar> Dvals_RCP;
1573 
1574  Aview.origMatrix->getAllValues(Arowptr_RCP,Acolind_RCP,Avals_RCP);
1575  Bview.origMatrix->getAllValues(Browptr_RCP,Bcolind_RCP,Bvals_RCP);
1576  if(!Bview.importMatrix.is_null()) Bview.importMatrix->getAllValues(Irowptr_RCP,Icolind_RCP,Ivals_RCP);
1577  Dvals_RCP = Dinv.getData();
1578 
1579  // For efficiency
1580  ArrayView<const size_t> Arowptr, Browptr, Irowptr;
1581  ArrayView<const LocalOrdinal> Acolind, Bcolind, Icolind;
1582  ArrayView<const Scalar> Avals, Bvals, Ivals;
1583  ArrayView<size_t> Crowptr;
1584  ArrayView<LocalOrdinal> Ccolind;
1585  ArrayView<Scalar> Cvals;
1586  ArrayView<const Scalar> Dvals;
1587  Arowptr = Arowptr_RCP(); Acolind = Acolind_RCP(); Avals = Avals_RCP();
1588  Browptr = Browptr_RCP(); Bcolind = Bcolind_RCP(); Bvals = Bvals_RCP();
1589  if(!Bview.importMatrix.is_null()) {
1590  Irowptr = Irowptr_RCP(); Icolind = Icolind_RCP(); Ivals = Ivals_RCP();
1591  }
1592  Dvals = Dvals_RCP();
1593 
1594  // The status array will contain the index into colind where this entry was last deposited.
1595  // c_status[i] < CSR_ip - not in the row yet.
1596  // c_status[i] >= CSR_ip, this is the entry where you can find the data
1597  // We start with this filled with INVALID's indicating that there are no entries yet.
1598  // Sadly, this complicates the code due to the fact that size_t's are unsigned.
1599  size_t INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
1600  Array<size_t> c_status(n, ST_INVALID);
1601 
1602  // Classic csr assembly (low memory edition)
1603  size_t CSR_alloc=std::max(C_estimate_nnz(*Aview.origMatrix,*Bview.origMatrix),n);
1604  size_t CSR_ip=0,OLD_ip=0;
1605  Crowptr_RCP.resize(m+1); Crowptr = Crowptr_RCP();
1606  Ccolind_RCP.resize(CSR_alloc); Ccolind = Ccolind_RCP();
1607  Cvals_RCP.resize(CSR_alloc); Cvals = Cvals_RCP();
1608 
1609  // Run through all the hash table lookups once and for all
1610  Array<LocalOrdinal> targetMapToOrigRow(Aview.colMap->getNodeNumElements(),LO_INVALID);
1611  Array<LocalOrdinal> targetMapToImportRow(Aview.colMap->getNodeNumElements(),LO_INVALID);
1612 
1613  if(Aview.colMap->isSameAs(*Bview.rowMap)){
1614  // Maps are the same: Use local IDs as the hash
1615  for(LocalOrdinal i=Aview.colMap->getMinLocalIndex(); i <= Aview.colMap->getMaxLocalIndex(); i++) {
1616  LocalOrdinal B_LID = Bview.origMatrix->getRowMap()->getLocalElement(Aview.colMap->getGlobalElement(i));
1617  if(B_LID != LO_INVALID) targetMapToOrigRow[i] = B_LID;
1618  else {
1619  LocalOrdinal I_LID = Bview.importMatrix->getRowMap()->getLocalElement(Aview.colMap->getGlobalElement(i));
1620  targetMapToImportRow[i] = I_LID;
1621  }
1622  }
1623  }
1624  else {
1625  // Maps are not the same: Use the map's hash
1626  for(LocalOrdinal i=Aview.colMap->getMinLocalIndex(); i <= Aview.colMap->getMaxLocalIndex(); i++) {
1627  LocalOrdinal B_LID = Bview.origMatrix->getRowMap()->getLocalElement(Aview.colMap->getGlobalElement(i));
1628  if(B_LID != LO_INVALID) targetMapToOrigRow[i] = B_LID;
1629  else {
1630  LocalOrdinal I_LID = Bview.importMatrix->getRowMap()->getLocalElement(Aview.colMap->getGlobalElement(i));
1631  targetMapToImportRow[i] = I_LID;
1632  }
1633  }
1634  }
1635 
1636  const Scalar SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
1637 
1638  // For each row of A/C
1639  for(size_t i=0; i<m; i++){
1640  Crowptr[i]=CSR_ip;
1641  Scalar Dval = Dvals[i];
1642 
1643  // Entries of B
1644  for(size_t k=Browptr[i]; k<Browptr[i+1]; k++){
1645  Scalar Bval = Bvals[k];
1646  if(Bval==SC_ZERO) continue;
1647  LocalOrdinal Ck=Bcol2Ccol[Bcolind[k]];
1648 
1649  // Assume no repeated entries in B
1650  c_status[Ck] = CSR_ip;
1651  Ccolind[CSR_ip] = Ck;
1652  Cvals[CSR_ip] = Bvals[k];
1653  CSR_ip++;
1654  }
1655 
1656 
1657  // Entries of -omega * Dinv * A * B
1658  for(size_t k=Arowptr[i]; k<Arowptr[i+1]; k++){
1659  LocalOrdinal Ak = Acolind[k];
1660  Scalar Aval = Avals[k];
1661  if(Aval==SC_ZERO) continue;
1662 
1663  if(targetMapToOrigRow[Ak] != LO_INVALID){
1664  // Local matrix
1665  size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Ak]);
1666 
1667  for(size_t j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
1668  LocalOrdinal Cj=Bcol2Ccol[Bcolind[j]];
1669 
1670  if(c_status[Cj]==INVALID || c_status[Cj]<OLD_ip){
1671  // New entry
1672  c_status[Cj] = CSR_ip;
1673  Ccolind[CSR_ip] = Cj;
1674  Cvals[CSR_ip] = - omega * Dval* Aval * Bvals[j];
1675  CSR_ip++;
1676  }
1677  else
1678  Cvals[c_status[Cj]] -= omega * Dval* Aval * Bvals[j];
1679  }
1680  }
1681  else{
1682  // Remote matrix
1683  size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Ak]);
1684  for(size_t j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
1685  LocalOrdinal Cj=Icol2Ccol[Icolind[j]];
1686 
1687  if(c_status[Cj]==INVALID || c_status[Cj]<OLD_ip){
1688  // New entry
1689  c_status[Cj] = CSR_ip;
1690  Ccolind[CSR_ip] = Cj;
1691  Cvals[CSR_ip] = - omega * Dval* Aval * Ivals[j];
1692  CSR_ip++;
1693  }
1694  else
1695  Cvals[c_status[Cj]] -= omega * Dval* Aval * Ivals[j];
1696  }
1697  }
1698  }
1699 
1700  // Resize for next pass if needed
1701  if(CSR_ip + n > CSR_alloc){
1702  CSR_alloc*=2;
1703  Ccolind_RCP.resize(CSR_alloc); Ccolind = Ccolind_RCP();
1704  Cvals_RCP.resize(CSR_alloc); Cvals = Cvals_RCP();
1705  }
1706  OLD_ip=CSR_ip;
1707  }
1708 
1709  Crowptr[m]=CSR_ip;
1710 
1711  // Downward resize
1712  Cvals_RCP.resize(CSR_ip);
1713  Ccolind_RCP.resize(CSR_ip);
1714 
1715 
1716 #ifdef HAVE_TPETRA_MMM_TIMINGS
1717  MM = rcp (new TimeMonitor (* (TimeMonitor::getNewTimer(prefix+std::string("Jacobi Newmatrix Final Sort")))));
1718 #endif
1719 
1720  // Replace the column map
1721  C.replaceColMap(Ccolmap);
1722 
1723  // Final sort & set of CRS arrays
1724  Import_Util::sortCrsEntries(Crowptr_RCP(),Ccolind_RCP(),Cvals_RCP());
1725  C.setAllValues(Crowptr_RCP,Ccolind_RCP,Cvals_RCP);
1726 
1727 #ifdef HAVE_TPETRA_MMM_TIMINGS
1728  MM = rcp (new TimeMonitor (* (TimeMonitor::getNewTimer(prefix+std::string("Jacobi Newmatrix ESFC")))));
1729 #endif
1730 
1731  // Final FillComplete
1732  C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(),Aview.origMatrix->getRangeMap(),Cimport);
1733 }
1734 
1735 
1736 template<class Scalar,
1737  class LocalOrdinal,
1738  class GlobalOrdinal,
1739  class Node>
1740 void import_and_extract_views(
1742  RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > targetMap,
1744  RCP<const Import<LocalOrdinal, GlobalOrdinal, Node> > prototypeImporter,
1745  bool userAssertsThereAreNoRemotes,
1746  const std::string & label)
1747 {
1748 #ifdef HAVE_TPETRA_MMM_TIMINGS
1749  std::string prefix = std::string("TpetraExt ")+ label + std::string(": ");
1750  using Teuchos::TimeMonitor;
1751  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix+std::string("MMM I&X Alloc"))));
1752 #endif
1753 
1754  //Convience typedef
1757  // The goal of this method is to populate the 'Mview' struct with views of the
1758  // rows of M, including all rows that correspond to elements in 'targetMap'.
1759  //
1760  // If targetMap includes local elements that correspond to remotely-owned rows
1761  // of M, then those remotely-owned rows will be imported into
1762  // 'Mview.importMatrix', and views of them will be included in 'Mview'.
1763  Mview.deleteContents();
1764 
1765  RCP<const Map_t> Mrowmap = M.getRowMap();
1766  RCP<const Map_t> MremoteRowMap;
1767  const int numProcs = Mrowmap->getComm()->getSize();
1768 
1769  ArrayView<const GlobalOrdinal> Mrows = targetMap->getNodeElementList();
1770 
1771  size_t numRemote = 0;
1772  size_t numRows = targetMap->getNodeNumElements();
1773  Mview.origMatrix = Teuchos::rcp(&M,false);
1774  Mview.origRowMap = M.getRowMap();
1775  Mview.rowMap = targetMap;
1776  Mview.colMap = M.getColMap();
1777  Mview.domainMap = M.getDomainMap();
1778  Mview.importColMap = null;
1779 
1780  // Short circuit if the user swears there are no remotes
1781  if(userAssertsThereAreNoRemotes) return;
1782 
1783 #ifdef HAVE_TPETRA_MMM_TIMINGS
1784  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix+std::string("MMM I&X RemoteMap"))));
1785 #endif
1786 
1787  // mark each row in targetMap as local or remote, and go ahead and get a view for the local rows
1788  int mode = 0;
1789  if(!prototypeImporter.is_null() && prototypeImporter->getSourceMap()->isSameAs(*Mrowmap) && prototypeImporter->getTargetMap()->isSameAs(*targetMap)) {
1790  // We have a valid prototype importer --- ask it for the remotes
1791  numRemote = prototypeImporter->getNumRemoteIDs();
1792  Array<GlobalOrdinal> MremoteRows(numRemote);
1793  ArrayView<const LocalOrdinal> RemoteLIDs = prototypeImporter->getRemoteLIDs();
1794  for(size_t i=0; i<numRemote; i++) {
1795  MremoteRows[i] = targetMap->getGlobalElement(RemoteLIDs[i]);
1796  }
1797 
1798  MremoteRowMap=rcp(new Map_t(OrdinalTraits<global_size_t>::invalid(), MremoteRows(), Mrowmap->getIndexBase(), Mrowmap->getComm(), Mrowmap->getNode()));
1799  mode=1;
1800  }
1801  else if(prototypeImporter.is_null()) {
1802  // No prototype importer --- count the remotes the hard way
1803  Array<GlobalOrdinal> MremoteRows(numRows);
1804  for(size_t i=0; i < numRows; ++i) {
1805  const LocalOrdinal mlid = Mrowmap->getLocalElement(Mrows[i]);
1806 
1807  if (mlid == OrdinalTraits<LocalOrdinal>::invalid()) {
1808  MremoteRows[numRemote]=Mrows[i];
1809  ++numRemote;
1810  }
1811  }
1812  MremoteRows.resize(numRemote);
1813  MremoteRowMap=rcp(new Map_t(OrdinalTraits<global_size_t>::invalid(), MremoteRows(), Mrowmap->getIndexBase(), Mrowmap->getComm(), Mrowmap->getNode()));
1814  mode=2;
1815  }
1816  else {
1817  // prototypeImporter is bad. But if we're in serial that's OK.
1818  mode=3;
1819  }
1820 
1821  if (numProcs < 2) {
1822  TEUCHOS_TEST_FOR_EXCEPTION(numRemote > 0, std::runtime_error,
1823  "MatrixMatrix::import_and_extract_views ERROR, numProcs < 2 but attempting to import remote matrix rows." <<std::endl);
1824  //If only one processor we don't need to import any remote rows, so return.
1825  return;
1826  }
1827 
1828  //
1829  // Now we will import the needed remote rows of M, if the global maximum
1830  // value of numRemote is greater than 0.
1831  //
1832 #ifdef HAVE_TPETRA_MMM_TIMINGS
1833  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix+std::string("MMM I&X Collective-0"))));
1834 #endif
1835 
1836  global_size_t globalMaxNumRemote = 0;
1837  Teuchos::reduceAll(*(Mrowmap->getComm()) , Teuchos::REDUCE_MAX, (global_size_t)numRemote, Teuchos::outArg(globalMaxNumRemote) );
1838 
1839 
1840  if (globalMaxNumRemote > 0) {
1841 #ifdef HAVE_TPETRA_MMM_TIMINGS
1842  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix+std::string("MMM I&X Import-2"))));
1843 #endif
1844  // Create an importer with target-map MremoteRowMap and source-map Mrowmap.
1845  RCP<const Import<LocalOrdinal, GlobalOrdinal, Node> > importer;
1846 
1847  if(mode==1)
1848  importer = prototypeImporter->createRemoteOnlyImport(MremoteRowMap);
1849  else if(mode==2)
1850  importer=rcp(new Import<LocalOrdinal, GlobalOrdinal, Node>(Mrowmap, MremoteRowMap));
1851  else
1852  throw std::runtime_error("prototypeImporter->SourceMap() does not match M.getRowMap()!");
1853 
1854 #ifdef HAVE_TPETRA_MMM_TIMINGS
1855  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix+std::string("MMM I&X Import-3"))));
1856 #endif
1857 
1858  // Now create a new matrix into which we can import the remote rows of M that we need.
1859  Teuchos::ParameterList labelList;
1860  labelList.set("Timer Label",label);
1861  Mview.importMatrix = Tpetra::importAndFillCompleteCrsMatrix<CrsMatrix_t>(Teuchos::rcp(&M,false),*importer,M.getDomainMap(),MremoteRowMap,Teuchos::rcp(&labelList,false));
1862 
1863 #ifdef COMPUTE_MMM_STATISTICS
1864  printMultiplicationStatistics(importer,label+std::string(" I&X MMM"));
1865 #endif
1866 
1867 
1868 #ifdef HAVE_TPETRA_MMM_TIMINGS
1869  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix+std::string("MMM I&X Import-4"))));
1870 #endif
1871 
1872  // Save the column map of the imported matrix, so that we can convert indices back to global for arithmetic later
1873  Mview.importColMap = Mview.importMatrix->getColMap();
1874  }
1875 }
1876 
1877 
1878 
1879 } //End namepsace MMdetails
1880 
1881 } //End namespace Tpetra
1882 //
1883 // Explicit instantiation macro
1884 //
1885 // Must be expanded from within the Tpetra namespace!
1886 //
1887 
1888 #define TPETRA_MATRIXMATRIX_INSTANT(SCALAR,LO,GO,NODE) \
1889  \
1890  template \
1891  void MatrixMatrix::Multiply( \
1892  const CrsMatrix< SCALAR , LO , GO , NODE >& A, \
1893  bool transposeA, \
1894  const CrsMatrix< SCALAR , LO , GO , NODE >& B, \
1895  bool transposeB, \
1896  CrsMatrix< SCALAR , LO , GO , NODE >& C, \
1897  bool call_FillComplete_on_result, \
1898  const std::string & label); \
1899 \
1900 template \
1901  void MatrixMatrix::Jacobi( \
1902  SCALAR omega, \
1903  const Vector< SCALAR, LO, GO, NODE > & Dinv, \
1904  const CrsMatrix< SCALAR , LO , GO , NODE >& A, \
1905  const CrsMatrix< SCALAR , LO , GO , NODE >& B, \
1906  CrsMatrix< SCALAR , LO , GO , NODE >& C, \
1907  bool call_FillComplete_on_result, \
1908  const std::string & label); \
1909 \
1910  template \
1911  void MatrixMatrix::Add( \
1912  const CrsMatrix< SCALAR , LO , GO , NODE >& A, \
1913  bool transposeA, \
1914  SCALAR scalarA, \
1915  const CrsMatrix< SCALAR , LO , GO , NODE >& B, \
1916  bool transposeB, \
1917  SCALAR scalarB, \
1918  RCP<CrsMatrix< SCALAR , LO , GO , NODE > > C); \
1919  \
1920  template \
1921  void MatrixMatrix::Add( \
1922  const CrsMatrix<SCALAR, LO, GO, NODE>& A, \
1923  bool transposeA, \
1924  SCALAR scalarA, \
1925  CrsMatrix<SCALAR, LO, GO, NODE>& B, \
1926  SCALAR scalarB ); \
1927  \
1928  template \
1929  Teuchos::RCP<CrsMatrix< SCALAR , LO , GO , NODE > > \
1930  MatrixMatrix::add (const SCALAR & alpha, \
1931  const bool transposeA, \
1932  const CrsMatrix< SCALAR , LO , GO , NODE >& A, \
1933  const SCALAR & beta, \
1934  const bool transposeB, \
1935  const CrsMatrix< SCALAR , LO , GO , NODE >& B, \
1936  const Teuchos::RCP<const Map< LO , GO , NODE > >& domainMap, \
1937  const Teuchos::RCP<const Map< LO , GO , NODE > >& rangeMap, \
1938  const Teuchos::RCP<Teuchos::ParameterList>& params); \
1939 \
1940 
1941 
1942 #endif // TPETRA_MATRIXMATRIX_DEF_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Teuchos::RCP< const map_type > importColMap
Colmap garnered as a result of the import.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
Teuchos::ArrayRCP< const Scalar > getData() const
Const view of the local values of this vector.
size_t getNumReceives() const
The number of processes from which we will receive data.
Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const
This matrix&#39;s graph, as a RowGraph.
Teuchos::RCP< const map_type > domainMap
Domain map for original matrix.
Teuchos::RCP< const map_type > getRowMap() const
The Map that describes the row distribution in this matrix.
Teuchos::RCP< const map_type > getColMap() const
The Map that describes the column distribution in this matrix.
bool isLocallyIndexed() const
Whether the matrix is locally indexed on the calling process.
Teuchos::RCP< const map_type > colMap
Col map for the original version of the matrix.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the matrix&#39;s column Map with the given Map.
void setAllValues(const typename local_matrix_type::row_map_type &rowPointers, const typename local_graph_type::entries_type::non_const_type &columnIndices, const typename local_matrix_type::values_type &values)
Sets the 1D pointer arrays of the graph.
size_t getNodeNumRows() const
The number of matrix rows owned by the calling process.
Teuchos::RCP< const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > origMatrix
The original matrix.
void insertGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals)
Insert one or more entries into the matrix, using global indices.
size_t global_size_t
Global size_t object.
LocalOrdinal sumIntoGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals, const bool atomic=useAtomicUpdatesByDefault)
Sum into one or more sparse matrix entries, using global indices.
void Add(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, bool transposeA, Scalar scalarA, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, Scalar scalarB)
Construct and (optionally) redistribute the explicitly stored transpose of a CrsMatrix.
void keyValueMerge(KeyInputIterType keyBeg1, KeyInputIterType keyEnd1, ValueInputIterType valBeg1, ValueInputIterType valEnd1, KeyInputIterType keyBeg2, KeyInputIterType keyEnd2, ValueInputIterType valBeg2, ValueInputIterType valEnd2, KeyOutputIterType keyOut, ValueOutputIterType valOut, BinaryFunction f)
Merge two sorted (by keys) sequences of unique (key,value) pairs by combining pairs with equal keys...
Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > importMatrix
The imported matrix.
Teuchos::RCP< const map_type > getDomainMap() const
The domain Map of this matrix.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix is distributed.
bool isFillComplete() const
Whether the matrix is fill complete.
void scale(const Scalar &alpha)
Scale the matrix&#39;s values: this := alpha*this.
Sets up and executes a communication plan for a Tpetra DistObject.
void Jacobi(Scalar omega, const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Dinv, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &C, bool call_FillComplete_on_result=true, const std::string &label=std::string())
Teuchos::RCP< crs_matrix_type > createTranspose()
Compute and return the transpose of the matrix given to the constructor.
Utility functions for packing and unpacking sparse matrix entries.
ProfileType getProfileType() const
Returns true if the matrix was allocated with static data structures.
global_size_t getGlobalNumRows() const
Number of global elements in the row map of this matrix.
bool isFillActive() const
Whether the matrix is not fill complete.
bool isStaticGraph() const
Indicates that the graph is static, so that new entries cannot be added to this matrix.
void Multiply(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, bool transposeA, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, bool transposeB, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &C, bool call_FillComplete_on_result=true, const std::string &label=std::string())
Sparse matrix-matrix multiply.
Describes a parallel distribution of objects over processes.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects...
A read-only, row-oriented interface to a sparse matrix.
A distributed dense vector.
Stand-alone utility functions and macros.
Teuchos::RCP< const map_type > rowMap
Desired row map for "imported" version of the matrix.
void expertStaticFillComplete(const RCP< const map_type > &domainMap, const RCP< const map_type > &rangeMap, const RCP< const import_type > &importer=Teuchos::null, const RCP< const export_type > &exporter=Teuchos::null, const RCP< ParameterList > &params=Teuchos::null)
Perform a fillComplete on a matrix that already has data.
Struct that holds views of the contents of a CrsMatrix.
void getLastDoStatistics(size_t &bytes_sent, size_t &bytes_recvd) const
Information on the last call to do/doReverse.
size_t getNumSends() const
The number of processes to which we will send data.
Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > add(const Scalar &alpha, const bool transposeA, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta, const bool transposeB, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap=Teuchos::null, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Compute the sparse matrix sum C = scalarA * Op(A) + scalarB * Op(B), where Op(X) is either X or its t...
Teuchos::RCP< const map_type > origRowMap
Original row map of matrix.
Teuchos::RCP< crs_matrix_type > createTransposeLocal()
Compute and return the transpose of the matrix given to the constructor.
void fillComplete(const RCP< const map_type > &domainMap, const RCP< const map_type > &rangeMap, const RCP< ParameterList > &params=null)
Signal that data entry is complete, specifying domain and range maps.
Teuchos::RCP< const map_type > getRangeMap() const
The range Map of this matrix.
size_t getNodeMaxNumRowEntries() const
Returns the maximum number of entries across all rows/columns on this node.