EpetraExt  Development
EpetraExt_MatrixMatrix.cpp
Go to the documentation of this file.
1 //@HEADER
2 // ***********************************************************************
3 //
4 // EpetraExt: Epetra Extended - Linear Algebra Services Package
5 // Copyright (2011) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 
42 #include <EpetraExt_ConfigDefs.h>
43 #include <EpetraExt_MatrixMatrix.h>
44 
45 #include <EpetraExt_MMHelpers.h>
46 
48 
49 #include <Epetra_Export.h>
50 #include <Epetra_Import.h>
51 #include <Epetra_Util.h>
52 #include <Epetra_Map.h>
53 #include <Epetra_Comm.h>
54 #include <Epetra_CrsMatrix.h>
55 #include <Epetra_Vector.h>
56 #include <Epetra_Directory.h>
57 #include <Epetra_HashTable.h>
58 #include <Epetra_Distributor.h>
59 
60 #include <Teuchos_TimeMonitor.hpp>
61 
62 #ifdef HAVE_VECTOR
63 #include <vector>
64 #endif
65 
66 
67 
68 namespace EpetraExt {
69 //=========================================================================
70 //
71 //Method for internal use... sparsedot forms a dot-product between two
72 //sparsely-populated 'vectors'.
73 //Important assumption: assumes the indices in u_ind and v_ind are sorted.
74 //
75 template<typename int_type>
76 double sparsedot(double* u, int_type* u_ind, int u_len,
77  double* v, int_type* v_ind, int v_len)
78 {
79  double result = 0.0;
80 
81  int v_idx = 0;
82  int u_idx = 0;
83 
84  while(v_idx < v_len && u_idx < u_len) {
85  int_type ui = u_ind[u_idx];
86  int_type vi = v_ind[v_idx];
87 
88  if (ui < vi) {
89  ++u_idx;
90  }
91  else if (ui > vi) {
92  ++v_idx;
93  }
94  else {
95  result += u[u_idx++]*v[v_idx++];
96  }
97  }
98 
99  return(result);
100 }
101 
102 //=========================================================================
103 //kernel method for computing the local portion of C = A*B^T
104 template<typename int_type>
106  CrsMatrixStruct& Bview,
107  CrsWrapper& C)
108 {
109  int i, j, k;
110  int returnValue = 0;
111 
112  int maxlen = 0;
113  for(i=0; i<Aview.numRows; ++i) {
114  if (Aview.numEntriesPerRow[i] > maxlen) maxlen = Aview.numEntriesPerRow[i];
115  }
116  for(i=0; i<Bview.numRows; ++i) {
117  if (Bview.numEntriesPerRow[i] > maxlen) maxlen = Bview.numEntriesPerRow[i];
118  }
119 
120  //std::cout << "Aview: " << std::endl;
121  //dumpCrsMatrixStruct(Aview);
122 
123  //std::cout << "Bview: " << std::endl;
124  //dumpCrsMatrixStruct(Bview);
125 
126  int numBcols = Bview.colMap->NumMyElements();
127  int numBrows = Bview.numRows;
128 
129  int iworklen = maxlen*2 + numBcols;
130  int_type* iwork = new int_type[iworklen];
131 
132  int_type * bcols = iwork+maxlen*2;
133  int_type* bgids = 0;
134  Bview.colMap->MyGlobalElementsPtr(bgids);
135  double* bvals = new double[maxlen*2];
136  double* avals = bvals+maxlen;
137 
138  int_type max_all_b = (int_type) Bview.colMap->MaxAllGID64();
139  int_type min_all_b = (int_type) Bview.colMap->MinAllGID64();
140 
141  //bcols will hold the GIDs from B's column-map for fast access
142  //during the computations below
143  for(i=0; i<numBcols; ++i) {
144  int blid = Bview.colMap->LID(bgids[i]);
145  bcols[blid] = bgids[i];
146  }
147 
148  //next create arrays indicating the first and last column-index in
149  //each row of B, so that we can know when to skip certain rows below.
150  //This will provide a large performance gain for banded matrices, and
151  //a somewhat smaller gain for *most* other matrices.
152  int_type* b_firstcol = new int_type[2*numBrows];
153  int_type* b_lastcol = b_firstcol+numBrows;
154  int_type temp;
155  for(i=0; i<numBrows; ++i) {
156  b_firstcol[i] = max_all_b;
157  b_lastcol[i] = min_all_b;
158 
159  int Blen_i = Bview.numEntriesPerRow[i];
160  if (Blen_i < 1) continue;
161  int* Bindices_i = Bview.indices[i];
162 
163  if (Bview.remote[i]) {
164  for(k=0; k<Blen_i; ++k) {
165  temp = (int_type) Bview.importColMap->GID64(Bindices_i[k]);
166  if (temp < b_firstcol[i]) b_firstcol[i] = temp;
167  if (temp > b_lastcol[i]) b_lastcol[i] = temp;
168  }
169  }
170  else {
171  for(k=0; k<Blen_i; ++k) {
172  temp = bcols[Bindices_i[k]];
173  if (temp < b_firstcol[i]) b_firstcol[i] = temp;
174  if (temp > b_lastcol[i]) b_lastcol[i] = temp;
175  }
176  }
177  }
178 
179  Epetra_Util util;
180 
181  int_type* Aind = iwork;
182  int_type* Bind = iwork+maxlen;
183 
184  bool C_filled = C.Filled();
185 
186  //To form C = A*B^T, we're going to execute this expression:
187  //
188  // C(i,j) = sum_k( A(i,k)*B(j,k) )
189  //
190  //This is the easiest case of all to code (easier than A*B, A^T*B, A^T*B^T).
191  //But it requires the use of a 'sparsedot' function (we're simply forming
192  //dot-products with row A_i and row B_j for all i and j).
193 
194  //loop over the rows of A.
195  for(i=0; i<Aview.numRows; ++i) {
196  if (Aview.remote[i]) {
197  continue;
198  }
199 
200  int* Aindices_i = Aview.indices[i];
201  double* Aval_i = Aview.values[i];
202  int A_len_i = Aview.numEntriesPerRow[i];
203  if (A_len_i < 1) {
204  continue;
205  }
206 
207  for(k=0; k<A_len_i; ++k) {
208  Aind[k] = (int_type) Aview.colMap->GID64(Aindices_i[k]);
209  avals[k] = Aval_i[k];
210  }
211 
212  util.Sort<int_type>(true, A_len_i, Aind, 1, &avals, 0, NULL, 0, 0);
213 
214  int_type mina = Aind[0];
215  int_type maxa = Aind[A_len_i-1];
216 
217  if (mina > max_all_b || maxa < min_all_b) {
218  continue;
219  }
220 
221  int_type global_row = (int_type) Aview.rowMap->GID64(i);
222 
223  //loop over the rows of B and form results C_ij = dot(A(i,:),B(j,:))
224  for(j=0; j<Bview.numRows; ++j) {
225  if (b_firstcol[j] > maxa || b_lastcol[j] < mina) {
226  continue;
227  }
228 
229  int* Bindices_j = Bview.indices[j];
230  int B_len_j = Bview.numEntriesPerRow[j];
231  if (B_len_j < 1) {
232  continue;
233  }
234 
235  int_type tmp;
236  int Blen = 0;
237 
238  if (Bview.remote[j]) {
239  for(k=0; k<B_len_j; ++k) {
240  tmp = (int_type) Bview.importColMap->GID64(Bindices_j[k]);
241  if (tmp < mina || tmp > maxa) {
242  continue;
243  }
244 
245  bvals[Blen] = Bview.values[j][k];
246  Bind[Blen++] = tmp;
247  }
248  }
249  else {
250  for(k=0; k<B_len_j; ++k) {
251  tmp = bcols[Bindices_j[k]];
252  if (tmp < mina || tmp > maxa) {
253  continue;
254  }
255 
256  bvals[Blen] = Bview.values[j][k];
257  Bind[Blen++] = tmp;
258  }
259  }
260 
261  if (Blen < 1) {
262  continue;
263  }
264 
265  util.Sort<int_type>(true, Blen, Bind, 1, &bvals, 0, NULL, 0, 0);
266 
267  double C_ij = sparsedot(avals, Aind, A_len_i,
268  bvals, Bind, Blen);
269 
270  if (C_ij == 0.0) {
271  continue;
272  }
273  int_type global_col = (int_type) Bview.rowMap->GID64(j);
274 
275  int err = C_filled ?
276  C.SumIntoGlobalValues(global_row, 1, &C_ij, &global_col)
277  :
278  C.InsertGlobalValues(global_row, 1, &C_ij, &global_col);
279 
280  if (err < 0) {
281  return(err);
282  }
283  if (err > 0) {
284  if (C_filled) {
285  //C.Filled()==true, and C doesn't have all the necessary nonzero
286  //locations, or global_row or global_col is out of range (less
287  //than 0 or non local).
288  std::cerr << "EpetraExt::MatrixMatrix::Multiply Warning: failed "
289  << "to insert value in result matrix at position "<<global_row
290  <<"," <<global_col<<", possibly because result matrix has a "
291  << "column-map that doesn't include column "<<global_col
292  <<" on this proc." <<std::endl;
293  return(err);
294  }
295  }
296  }
297  }
298 
299  delete [] iwork;
300  delete [] bvals;
301  delete [] b_firstcol;
302 
303  return(returnValue);
304 }
305 
306 int mult_A_Btrans(CrsMatrixStruct& Aview,
307  CrsMatrixStruct& Bview,
308  CrsWrapper& C)
309 {
310 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
311  if(Aview.rowMap->GlobalIndicesInt() &&
312  Aview.colMap->GlobalIndicesInt() &&
313  Aview.rowMap->GlobalIndicesInt() &&
314  Aview.colMap->GlobalIndicesInt()) {
315  return mult_A_Btrans<int>(Aview, Bview, C);
316  }
317  else
318 #endif
319 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
320  if(Aview.rowMap->GlobalIndicesLongLong() &&
321  Aview.colMap->GlobalIndicesLongLong() &&
322  Aview.rowMap->GlobalIndicesLongLong() &&
323  Aview.colMap->GlobalIndicesLongLong()) {
324  return mult_A_Btrans<long long>(Aview, Bview, C);
325  }
326  else
327 #endif
328  throw std::runtime_error("EpetraExt::mult_A_Btrans: GlobalIndices type unknown");
329 }
330 
331 //=========================================================================
332 //kernel method for computing the local portion of C = A^T*B
333 template<typename int_type>
335  CrsMatrixStruct& Bview,
336  CrsWrapper& C)
337 {
338  int C_firstCol = Bview.colMap->MinLID();
339  int C_lastCol = Bview.colMap->MaxLID();
340 
341  int C_firstCol_import = 0;
342  int C_lastCol_import = -1;
343 
344  if (Bview.importColMap != NULL) {
345  C_firstCol_import = Bview.importColMap->MinLID();
346  C_lastCol_import = Bview.importColMap->MaxLID();
347  }
348 
349  int C_numCols = C_lastCol - C_firstCol + 1;
350  int C_numCols_import = C_lastCol_import - C_firstCol_import + 1;
351 
352  if (C_numCols_import > C_numCols) C_numCols = C_numCols_import;
353 
354  double* C_row_i = new double[C_numCols];
355  int_type* C_colInds = new int_type[C_numCols];
356 
357  int i, j, k;
358 
359  for(j=0; j<C_numCols; ++j) {
360  C_row_i[j] = 0.0;
361  C_colInds[j] = 0;
362  }
363 
364  //To form C = A^T*B, compute a series of outer-product updates.
365  //
366  // for (ith column of A^T) {
367  // C_i = outer product of A^T(:,i) and B(i,:)
368  // Where C_i is the ith matrix update,
369  // A^T(:,i) is the ith column of A^T, and
370  // B(i,:) is the ith row of B.
371  //
372 
373  //dumpCrsMatrixStruct(Aview);
374  //dumpCrsMatrixStruct(Bview);
375  int localProc = Bview.colMap->Comm().MyPID();
376 
377  int_type* Arows = 0;
378  Aview.rowMap->MyGlobalElementsPtr(Arows);
379 
380  bool C_filled = C.Filled();
381 
382  //loop over the rows of A (which are the columns of A^T).
383  for(i=0; i<Aview.numRows; ++i) {
384 
385  int* Aindices_i = Aview.indices[i];
386  double* Aval_i = Aview.values[i];
387 
388  //we'll need to get the row of B corresponding to Arows[i],
389  //where Arows[i] is the GID of A's ith row.
390  int Bi = Bview.rowMap->LID(Arows[i]);
391  if (Bi<0) {
392  std::cout << "mult_Atrans_B ERROR, proc "<<localProc<<" needs row "
393  <<Arows[i]<<" of matrix B, but doesn't have it."<<std::endl;
394  return(-1);
395  }
396 
397  int* Bcol_inds = Bview.indices[Bi];
398  double* Bvals_i = Bview.values[Bi];
399 
400  //for each column-index Aj in the i-th row of A, we'll update
401  //global-row GID(Aj) of the result matrix C. In that row of C,
402  //we'll update column-indices given by the column-indices in the
403  //ith row of B that we're now holding (Bcol_inds).
404 
405  //First create a list of GIDs for the column-indices
406  //that we'll be updating.
407 
408  int Blen = Bview.numEntriesPerRow[Bi];
409  if (Bview.remote[Bi]) {
410  for(j=0; j<Blen; ++j) {
411  C_colInds[j] = (int_type) Bview.importColMap->GID64(Bcol_inds[j]);
412  }
413  }
414  else {
415  for(j=0; j<Blen; ++j) {
416  C_colInds[j] = (int_type) Bview.colMap->GID64(Bcol_inds[j]);
417  }
418  }
419 
420  //loop across the i-th row of A (column of A^T)
421  for(j=0; j<Aview.numEntriesPerRow[i]; ++j) {
422 
423  int Aj = Aindices_i[j];
424  double Aval = Aval_i[j];
425 
426  int_type global_row;
427  if (Aview.remote[i]) {
428  global_row = (int_type) Aview.importColMap->GID64(Aj);
429  }
430  else {
431  global_row = (int_type) Aview.colMap->GID64(Aj);
432  }
433 
434  if (!C.RowMap().MyGID(global_row)) {
435  continue;
436  }
437 
438  for(k=0; k<Blen; ++k) {
439  C_row_i[k] = Aval*Bvals_i[k];
440  }
441 
442  //
443  //Now add this row-update to C.
444  //
445 
446  int err = C_filled ?
447  C.SumIntoGlobalValues(global_row, Blen, C_row_i, C_colInds)
448  :
449  C.InsertGlobalValues(global_row, Blen, C_row_i, C_colInds);
450 
451  if (err < 0) {
452  return(err);
453  }
454  if (err > 0) {
455  if (C_filled) {
456  //C is Filled, and doesn't have all the necessary nonzero locations.
457  return(err);
458  }
459  }
460  }
461  }
462 
463  delete [] C_row_i;
464  delete [] C_colInds;
465 
466  return(0);
467 }
468 
469 int mult_Atrans_B(CrsMatrixStruct& Aview,
470  CrsMatrixStruct& Bview,
471  CrsWrapper& C)
472 {
473 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
474  if(Aview.rowMap->GlobalIndicesInt() &&
475  Aview.colMap->GlobalIndicesInt() &&
476  Aview.rowMap->GlobalIndicesInt() &&
477  Aview.colMap->GlobalIndicesInt()) {
478  return mult_Atrans_B<int>(Aview, Bview, C);
479  }
480  else
481 #endif
482 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
483  if(Aview.rowMap->GlobalIndicesLongLong() &&
484  Aview.colMap->GlobalIndicesLongLong() &&
485  Aview.rowMap->GlobalIndicesLongLong() &&
486  Aview.colMap->GlobalIndicesLongLong()) {
487  return mult_Atrans_B<long long>(Aview, Bview, C);
488  }
489  else
490 #endif
491  throw std::runtime_error("EpetraExt::mult_Atrans_B: GlobalIndices type unknown");
492 }
493 
494 //kernel method for computing the local portion of C = A^T*B^T
495 template<typename int_type>
497  CrsMatrixStruct& Bview,
498  CrsWrapper& C)
499 {
500  int C_firstCol = Aview.rowMap->MinLID();
501  int C_lastCol = Aview.rowMap->MaxLID();
502 
503  int C_firstCol_import = 0;
504  int C_lastCol_import = -1;
505 
506  if (Aview.importColMap != NULL) {
507  C_firstCol_import = Aview.importColMap->MinLID();
508  C_lastCol_import = Aview.importColMap->MaxLID();
509  }
510 
511  int C_numCols = C_lastCol - C_firstCol + 1;
512  int C_numCols_import = C_lastCol_import - C_firstCol_import + 1;
513 
514  if (C_numCols_import > C_numCols) C_numCols = C_numCols_import;
515 
516  double* dwork = new double[C_numCols];
517  int_type* iwork = new int_type[C_numCols];
518 
519  double* C_col_j = dwork;
520  int_type* C_inds = iwork;
521 
522  //std::cout << "Aview: " << std::endl;
523  //dumpCrsMatrixStruct(Aview);
524 
525  //std::cout << "Bview: " << std::endl;
526  //dumpCrsMatrixStruct(Bview);
527 
528 
529  int i, j, k;
530 
531  for(j=0; j<C_numCols; ++j) {
532  C_col_j[j] = 0.0;
533  C_inds[j] = -1;
534  }
535 
536  int_type* A_col_inds = 0;
537  Aview.colMap->MyGlobalElementsPtr(A_col_inds);
538  int_type* A_col_inds_import = 0;
539  if(Aview.importColMap)
540  Aview.importColMap->MyGlobalElementsPtr(A_col_inds_import);
541 
542  const Epetra_Map* Crowmap = &(C.RowMap());
543 
544  //To form C = A^T*B^T, we're going to execute this expression:
545  //
546  // C(i,j) = sum_k( A(k,i)*B(j,k) )
547  //
548  //Our goal, of course, is to navigate the data in A and B once, without
549  //performing searches for column-indices, etc. In other words, we avoid
550  //column-wise operations like the plague...
551 
552  int_type* Brows = 0;
553  Bview.rowMap->MyGlobalElementsPtr(Brows);
554 
555  //loop over the rows of B
556  for(j=0; j<Bview.numRows; ++j) {
557  int* Bindices_j = Bview.indices[j];
558  double* Bvals_j = Bview.values[j];
559 
560  int_type global_col = Brows[j];
561 
562  //loop across columns in the j-th row of B and for each corresponding
563  //row in A, loop across columns and accumulate product
564  //A(k,i)*B(j,k) into our partial sum quantities in C_col_j. In other
565  //words, as we stride across B(j,:), we use selected rows in A to
566  //calculate updates for column j of the result matrix C.
567 
568  for(k=0; k<Bview.numEntriesPerRow[j]; ++k) {
569 
570  int bk = Bindices_j[k];
571  double Bval = Bvals_j[k];
572 
573  int_type global_k;
574  if (Bview.remote[j]) {
575  global_k = (int_type) Bview.importColMap->GID64(bk);
576  }
577  else {
578  global_k = (int_type) Bview.colMap->GID64(bk);
579  }
580 
581  //get the corresponding row in A
582  int ak = Aview.rowMap->LID(global_k);
583  if (ak<0) {
584  continue;
585  }
586 
587  int* Aindices_k = Aview.indices[ak];
588  double* Avals_k = Aview.values[ak];
589 
590  int C_len = 0;
591 
592  if (Aview.remote[ak]) {
593  for(i=0; i<Aview.numEntriesPerRow[ak]; ++i) {
594  C_col_j[C_len] = Avals_k[i]*Bval;
595  C_inds[C_len++] = A_col_inds_import[Aindices_k[i]];
596  }
597  }
598  else {
599  for(i=0; i<Aview.numEntriesPerRow[ak]; ++i) {
600  C_col_j[C_len] = Avals_k[i]*Bval;
601  C_inds[C_len++] = A_col_inds[Aindices_k[i]];
602  }
603  }
604 
605  //Now loop across the C_col_j values and put non-zeros into C.
606 
607  for(i=0; i < C_len ; ++i) {
608  if (C_col_j[i] == 0.0) continue;
609 
610  int_type global_row = C_inds[i];
611  if (!Crowmap->MyGID(global_row)) {
612  continue;
613  }
614 
615  int err = C.SumIntoGlobalValues(global_row, 1, &(C_col_j[i]), &global_col);
616 
617  if (err < 0) {
618  return(err);
619  }
620  else {
621  if (err > 0) {
622  err = C.InsertGlobalValues(global_row, 1, &(C_col_j[i]), &global_col);
623  if (err < 0) {
624  return(err);
625  }
626  }
627  }
628  }
629 
630  }
631  }
632 
633  delete [] dwork;
634  delete [] iwork;
635 
636  return(0);
637 }
638 
640  CrsMatrixStruct& Bview,
641  CrsWrapper& C)
642 {
643 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
644  if(Aview.rowMap->GlobalIndicesInt() &&
645  Aview.colMap->GlobalIndicesInt() &&
646  Aview.rowMap->GlobalIndicesInt() &&
647  Aview.colMap->GlobalIndicesInt()) {
648  return mult_Atrans_Btrans<int>(Aview, Bview, C);
649  }
650  else
651 #endif
652 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
653  if(Aview.rowMap->GlobalIndicesLongLong() &&
654  Aview.colMap->GlobalIndicesLongLong() &&
655  Aview.rowMap->GlobalIndicesLongLong() &&
656  Aview.colMap->GlobalIndicesLongLong()) {
657  return mult_Atrans_Btrans<long long>(Aview, Bview, C);
658  }
659  else
660 #endif
661  throw std::runtime_error("EpetraExt::mult_Atrans_Btrans: GlobalIndices type unknown");
662 }
663 
664 // ==============================================================
665 template<typename int_type>
666 int import_and_extract_views(const Epetra_CrsMatrix& M,
667  const Epetra_Map& targetMap,
668  CrsMatrixStruct& Mview,
669  const Epetra_Import * prototypeImporter=0)
670 {
671  //The goal of this method is to populate the 'Mview' struct with views of the
672  //rows of M, including all rows that correspond to elements in 'targetMap'.
673  //
674  //If targetMap includes local elements that correspond to remotely-owned rows
675  //of M, then those remotely-owned rows will be imported into
676  //'Mview.importMatrix', and views of them will be included in 'Mview'.
677 
678  // The prototype importer, if used, has to know who owns all of the PIDs in M's rowmap.
679  // The typical use of this is to provide A's column map when I&XV is called for B, for
680  // a C = A * B multiply.
681 
682 #ifdef ENABLE_MMM_TIMINGS
683  using Teuchos::TimeMonitor;
684  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM I&X Alloc")));
685 #endif
686  Mview.deleteContents();
687 
688  Mview.origMatrix = &M;
689  const Epetra_Map& Mrowmap = M.RowMap();
690  int numProcs = Mrowmap.Comm().NumProc();
691  Mview.numRows = targetMap.NumMyElements();
692  int_type* Mrows = 0;
693  targetMap.MyGlobalElementsPtr(Mrows);
694 
695  if (Mview.numRows > 0) {
696  Mview.numEntriesPerRow = new int[Mview.numRows];
697  Mview.indices = new int*[Mview.numRows];
698  Mview.values = new double*[Mview.numRows];
699  Mview.remote = new bool[Mview.numRows];
700  }
701 
702  Mview.origRowMap = &(M.RowMap());
703  Mview.rowMap = &targetMap;
704  Mview.colMap = &(M.ColMap());
705  Mview.domainMap = &(M.DomainMap());
706  Mview.importColMap = NULL;
707  Mview.numRemote = 0;
708 
709 
710 #ifdef ENABLE_MMM_TIMINGS
711  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM I&X Extract")));
712 #endif
713 
714  int *rowptr=0, *colind=0;
715  double *vals=0;
716 
717  EPETRA_CHK_ERR( M.ExtractCrsDataPointers(rowptr,colind,vals) );
718 
719  if(Mrowmap.SameAs(targetMap)) {
720  // Short Circuit: The Row and Target Maps are the Same
721  for(int i=0; i<Mview.numRows; ++i) {
722  Mview.numEntriesPerRow[i] = rowptr[i+1]-rowptr[i];
723  Mview.indices[i] = colind + rowptr[i];
724  Mview.values[i] = vals + rowptr[i];
725  Mview.remote[i] = false;
726  }
727  return 0;
728  }
729  else if(prototypeImporter && prototypeImporter->SourceMap().SameAs(M.RowMap()) && prototypeImporter->TargetMap().SameAs(targetMap)){
730  // The prototypeImporter can tell me what is local and what is remote
731  int * PermuteToLIDs = prototypeImporter->PermuteToLIDs();
732  int * PermuteFromLIDs = prototypeImporter->PermuteFromLIDs();
733  int * RemoteLIDs = prototypeImporter->RemoteLIDs();
734  for(int i=0; i<prototypeImporter->NumSameIDs();i++){
735  Mview.numEntriesPerRow[i] = rowptr[i+1]-rowptr[i];
736  Mview.indices[i] = colind + rowptr[i];
737  Mview.values[i] = vals + rowptr[i];
738  Mview.remote[i] = false;
739  }
740  for(int i=0; i<prototypeImporter->NumPermuteIDs();i++){
741  int to = PermuteToLIDs[i];
742  int from = PermuteFromLIDs[i];
743  Mview.numEntriesPerRow[to] = rowptr[from+1]-rowptr[from];
744  Mview.indices[to] = colind + rowptr[from];
745  Mview.values[to] = vals + rowptr[from];
746  Mview.remote[to] = false;
747  }
748  for(int i=0; i<prototypeImporter->NumRemoteIDs();i++){
749  Mview.remote[RemoteLIDs[i]] = true;
750  ++Mview.numRemote;
751  }
752  }
753  else {
754  // Only LID can tell me who is local and who is remote
755  for(int i=0; i<Mview.numRows; ++i) {
756  int mlid = Mrowmap.LID(Mrows[i]);
757  if (mlid < 0) {
758  Mview.remote[i] = true;
759  ++Mview.numRemote;
760  }
761  else {
762  Mview.numEntriesPerRow[i] = rowptr[mlid+1]-rowptr[mlid];
763  Mview.indices[i] = colind + rowptr[mlid];
764  Mview.values[i] = vals + rowptr[mlid];
765  Mview.remote[i] = false;
766  }
767  }
768  }
769 
770 #ifdef ENABLE_MMM_TIMINGS
771  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM I&X Collective-0")));
772 #endif
773 
774  if (numProcs < 2) {
775  if (Mview.numRemote > 0) {
776  std::cerr << "EpetraExt::MatrixMatrix::Multiply ERROR, numProcs < 2 but "
777  << "attempting to import remote matrix rows."<<std::endl;
778  return(-1);
779  }
780 
781  //If only one processor we don't need to import any remote rows, so return.
782  return(0);
783  }
784 
785  //
786  //Now we will import the needed remote rows of M, if the global maximum
787  //value of numRemote is greater than 0.
788  //
789 
790  int globalMaxNumRemote = 0;
791  Mrowmap.Comm().MaxAll(&Mview.numRemote, &globalMaxNumRemote, 1);
792 
793  if (globalMaxNumRemote > 0) {
794 #ifdef ENABLE_MMM_TIMINGS
795  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM I&X Import-1")));
796 #endif
797  //Create a map that describes the remote rows of M that we need.
798 
799  int_type* MremoteRows = Mview.numRemote>0 ? new int_type[Mview.numRemote] : NULL;
800  int offset = 0;
801  for(int i=0; i<Mview.numRows; ++i) {
802  if (Mview.remote[i]) {
803  MremoteRows[offset++] = Mrows[i];
804  }
805  }
806 
807  LightweightMap MremoteRowMap((int_type) -1, Mview.numRemote, MremoteRows, (int_type) Mrowmap.IndexBase64());
808 
809 #ifdef ENABLE_MMM_TIMINGS
810  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM I&X Import-2")));
811 #endif
812  //Create an importer with target-map MremoteRowMap and
813  //source-map Mrowmap.
814  Epetra_Import * importer=0;
815  RemoteOnlyImport * Rimporter=0;
816  if(prototypeImporter && prototypeImporter->SourceMap().SameAs(M.RowMap()) && prototypeImporter->TargetMap().SameAs(targetMap)) {
817  Rimporter = new RemoteOnlyImport(*prototypeImporter,MremoteRowMap);
818  }
819  else if(!prototypeImporter) {
820  Epetra_Map MremoteRowMap2((int_type) -1, Mview.numRemote, MremoteRows, (int_type) Mrowmap.IndexBase64(), Mrowmap.Comm());
821  importer=new Epetra_Import(MremoteRowMap2, Mrowmap);
822  }
823  else
824  throw std::runtime_error("prototypeImporter->SourceMap() does not match M.RowMap()!");
825 
826 
827 #ifdef ENABLE_MMM_TIMINGS
828  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM I&X Import-3")));
829 #endif
830 
831  if(Mview.importMatrix) delete Mview.importMatrix;
832  if(Rimporter) Mview.importMatrix = new LightweightCrsMatrix(M,*Rimporter);
833  else Mview.importMatrix = new LightweightCrsMatrix(M,*importer);
834 
835 #ifdef ENABLE_MMM_TIMINGS
836  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM I&X Import-4")));
837 #endif
838 
839  //Finally, use the freshly imported data to fill in the gaps in our views
840  int N;
841  if (Mview.importMatrix->use_lw) {
842  N = Mview.importMatrix->RowMapLW_->NumMyElements();
843  } else {
844  N = Mview.importMatrix->RowMapEP_->NumMyElements();
845  }
846 
847  if(N > 0) {
848  rowptr = &Mview.importMatrix->rowptr_[0];
849  colind = &Mview.importMatrix->colind_[0];
850  vals = &Mview.importMatrix->vals_[0];
851  }
852 
853 
854  for(int i=0; i<Mview.numRows; ++i) {
855  if (Mview.remote[i]) {
856  int importLID = MremoteRowMap.LID(Mrows[i]);
857  Mview.numEntriesPerRow[i] = rowptr[importLID+1]-rowptr[importLID];
858  Mview.indices[i] = colind + rowptr[importLID];
859  Mview.values[i] = vals + rowptr[importLID];
860  }
861  }
862 
863 
864  int_type * MyColGIDs = 0;
865  if(Mview.importMatrix->ColMap_.NumMyElements()>0) {
866  Mview.importMatrix->ColMap_.MyGlobalElementsPtr(MyColGIDs);
867  }
868  Mview.importColMap =
869  new Epetra_Map (static_cast<int_type> (-1),
871  MyColGIDs,
872  static_cast<int_type> (Mview.importMatrix->ColMap_.IndexBase64 ()),
873  M.Comm ());
874  delete [] MremoteRows;
875 #ifdef ENABLE_MMM_TIMINGS
876  MM=Teuchos::null;
877 #endif
878 
879  // Cleanup
880  delete Rimporter;
881  delete importer;
882  }
883  return(0);
884 }
885 
886 
887 
888 
889 // ==============================================================
890 template<typename int_type>
891 int import_only(const Epetra_CrsMatrix& M,
892  const Epetra_Map& targetMap,
893  CrsMatrixStruct& Mview,
894  const Epetra_Import * prototypeImporter=0)
895 {
896  // The goal of this method to populare the Mview object with ONLY the rows of M
897  // that correspond to elements in 'targetMap.' There will be no population of the
898  // numEntriesPerRow, indices, values, remote or numRemote arrays.
899 
900 
901  // The prototype importer, if used, has to know who owns all of the PIDs in M's rowmap.
902  // The typical use of this is to provide A's column map when I&XV is called for B, for
903  // a C = A * B multiply.
904 
905 #ifdef ENABLE_MMM_TIMINGS
906  using Teuchos::TimeMonitor;
907  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Ionly Setup")));
908 #endif
909 
910  Mview.deleteContents();
911 
912  Mview.origMatrix = &M;
913  const Epetra_Map& Mrowmap = M.RowMap();
914  int numProcs = Mrowmap.Comm().NumProc();
915  Mview.numRows = targetMap.NumMyElements();
916 
917  Mview.origRowMap = &(M.RowMap());
918  Mview.rowMap = &targetMap;
919  Mview.colMap = &(M.ColMap());
920  Mview.domainMap = &(M.DomainMap());
921  Mview.importColMap = NULL;
922 
923  int i;
924  int numRemote =0;
925  int N = Mview.rowMap->NumMyElements();
926 
927  if(Mrowmap.SameAs(targetMap)) {
928  numRemote = 0;
929  Mview.targetMapToOrigRow.resize(N);
930  for(i=0;i<N; i++) Mview.targetMapToOrigRow[i]=i;
931  return 0;
932  }
933  else if(prototypeImporter && prototypeImporter->SourceMap().SameAs(M.RowMap()) && prototypeImporter->TargetMap().SameAs(targetMap)){
934  numRemote = prototypeImporter->NumRemoteIDs();
935 
936  Mview.targetMapToOrigRow.resize(N); Mview.targetMapToOrigRow.assign(N,-1);
937  Mview.targetMapToImportRow.resize(N); Mview.targetMapToImportRow.assign(N,-1);
938 
939  const int * PermuteToLIDs = prototypeImporter->PermuteToLIDs();
940  const int * PermuteFromLIDs = prototypeImporter->PermuteFromLIDs();
941  const int * RemoteLIDs = prototypeImporter->RemoteLIDs();
942 
943  for(i=0; i<prototypeImporter->NumSameIDs();i++)
944  Mview.targetMapToOrigRow[i] = i;
945 
946  // NOTE: These are reversed on purpose since we're doing a revere map.
947  for(i=0; i<prototypeImporter->NumPermuteIDs();i++)
948  Mview.targetMapToOrigRow[PermuteToLIDs[i]] = PermuteFromLIDs[i];
949 
950  for(i=0; i<prototypeImporter->NumRemoteIDs();i++)
951  Mview.targetMapToImportRow[RemoteLIDs[i]] = i;
952 
953  }
954  else
955  throw std::runtime_error("import_only: This routine only works if you either have the right map or no prototypeImporter");
956 
957  if (numProcs < 2) {
958  if (Mview.numRemote > 0) {
959  std::cerr << "EpetraExt::MatrixMatrix::Multiply ERROR, numProcs < 2 but "
960  << "attempting to import remote matrix rows."<<std::endl;
961  return(-1);
962  }
963 
964  //If only one processor we don't need to import any remote rows, so return.
965  return(0);
966  }
967 
968 #ifdef ENABLE_MMM_TIMINGS
969  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Ionly Import-1")));
970 #endif
971  const int * RemoteLIDs = prototypeImporter->RemoteLIDs();
972 
973  //Create a map that describes the remote rows of M that we need.
974  int_type* MremoteRows = numRemote>0 ? new int_type[prototypeImporter->NumRemoteIDs()] : 0;
975  for(i=0; i<prototypeImporter->NumRemoteIDs(); i++)
976  MremoteRows[i] = (int_type) targetMap.GID64(RemoteLIDs[i]);
977 
978  LightweightMap MremoteRowMap((int_type) -1, numRemote, MremoteRows, (int_type)Mrowmap.IndexBase64());
979 
980 #ifdef ENABLE_MMM_TIMINGS
981  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Ionly Import-2")));
982 #endif
983  //Create an importer with target-map MremoteRowMap and
984  //source-map Mrowmap.
985  RemoteOnlyImport * Rimporter=0;
986  Rimporter = new RemoteOnlyImport(*prototypeImporter,MremoteRowMap);
987 
988 #ifdef ENABLE_MMM_TIMINGS
989  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Ionly Import-3")));
990 #endif
991 
992  Mview.importMatrix = new LightweightCrsMatrix(M,*Rimporter);
993 
994 #ifdef ENABLE_MMM_TIMINGS
995  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Ionly Import-4")));
996 #endif
997 
998  // Cleanup
999  delete Rimporter;
1000  delete [] MremoteRows;
1001 
1002  return(0);
1003 }
1004 
1005 
1006 
1007 
1008 //=========================================================================
1009 template<typename int_type>
1010 int form_map_union(const Epetra_Map* map1,
1011  const Epetra_Map* map2,
1012  const Epetra_Map*& mapunion)
1013 {
1014  //form the union of two maps
1015 
1016  if (map1 == NULL) {
1017  mapunion = new Epetra_Map(*map2);
1018  return(0);
1019  }
1020 
1021  if (map2 == NULL) {
1022  mapunion = new Epetra_Map(*map1);
1023  return(0);
1024  }
1025 
1026  int map1_len = map1->NumMyElements();
1027  int_type* map1_elements = 0;
1028  map1->MyGlobalElementsPtr(map1_elements);
1029  int map2_len = map2->NumMyElements();
1030  int_type* map2_elements = 0;
1031  map2->MyGlobalElementsPtr(map2_elements);
1032 
1033  int_type* union_elements = new int_type[map1_len+map2_len];
1034 
1035  int map1_offset = 0, map2_offset = 0, union_offset = 0;
1036 
1037  while(map1_offset < map1_len && map2_offset < map2_len) {
1038  int_type map1_elem = map1_elements[map1_offset];
1039  int_type map2_elem = map2_elements[map2_offset];
1040 
1041  if (map1_elem < map2_elem) {
1042  union_elements[union_offset++] = map1_elem;
1043  ++map1_offset;
1044  }
1045  else if (map1_elem > map2_elem) {
1046  union_elements[union_offset++] = map2_elem;
1047  ++map2_offset;
1048  }
1049  else {
1050  union_elements[union_offset++] = map1_elem;
1051  ++map1_offset;
1052  ++map2_offset;
1053  }
1054  }
1055 
1056  int i;
1057  for(i=map1_offset; i<map1_len; ++i) {
1058  union_elements[union_offset++] = map1_elements[i];
1059  }
1060 
1061  for(i=map2_offset; i<map2_len; ++i) {
1062  union_elements[union_offset++] = map2_elements[i];
1063  }
1064 
1065  mapunion = new Epetra_Map((int_type) -1, union_offset, union_elements,
1066  (int_type) map1->IndexBase64(), map1->Comm());
1067 
1068  delete [] union_elements;
1069 
1070  return(0);
1071 }
1072 
1073 //=========================================================================
1074 template<typename int_type>
1075 Epetra_Map* Tfind_rows_containing_cols(const Epetra_CrsMatrix& M,
1076  const Epetra_Map& column_map)
1077 {
1078  //The goal of this function is to find all rows in the matrix M that contain
1079  //column-indices which are in 'column_map'. A map containing those rows is
1080  //returned. (The returned map will then be used as the source row-map for
1081  //importing those rows into an overlapping distribution.)
1082 
1083  std::pair<int_type,int_type> my_col_range = get_col_range<int_type>(column_map);
1084 
1085  const Epetra_Comm& Comm = M.RowMap().Comm();
1086  int num_procs = Comm.NumProc();
1087  int my_proc = Comm.MyPID();
1088  std::vector<int> send_procs;
1089  send_procs.reserve(num_procs-1);
1090  std::vector<int_type> col_ranges;
1091  col_ranges.reserve(2*(num_procs-1));
1092  for(int p=0; p<num_procs; ++p) {
1093  if (p == my_proc) continue;
1094  send_procs.push_back(p);
1095  col_ranges.push_back(my_col_range.first);
1096  col_ranges.push_back(my_col_range.second);
1097  }
1098 
1099  Epetra_Distributor* distor = Comm.CreateDistributor();
1100 
1101  int num_recv_procs = 0;
1102  int num_send_procs = send_procs.size();
1103  bool deterministic = true;
1104  int* send_procs_ptr = send_procs.size()>0 ? &send_procs[0] : NULL;
1105  distor->CreateFromSends(num_send_procs, send_procs_ptr, deterministic, num_recv_procs);
1106 
1107  int len_import_chars = 0;
1108  char* import_chars = NULL;
1109 
1110  char* export_chars = col_ranges.size()>0 ? reinterpret_cast<char*>(&col_ranges[0]) : NULL;
1111  int err = distor->Do(export_chars, 2*sizeof(int_type), len_import_chars, import_chars);
1112  if (err != 0) {
1113  std::cout << "ERROR returned from Distributor::Do."<<std::endl;
1114  }
1115 
1116  int_type* IntImports = reinterpret_cast<int_type*>(import_chars);
1117  int num_import_pairs = len_import_chars/(2*sizeof(int_type));
1118  int offset = 0;
1119  std::vector<int> send_procs2;
1120  send_procs2.reserve(num_procs);
1121  std::vector<int_type> proc_col_ranges;
1122  std::pair<int_type,int_type> M_col_range = get_col_range<int_type>(M);
1123  for(int i=0; i<num_import_pairs; ++i) {
1124  int_type first_col = IntImports[offset++];
1125  int_type last_col = IntImports[offset++];
1126  if (first_col <= M_col_range.second && last_col >= M_col_range.first) {
1127  send_procs2.push_back(send_procs[i]);
1128  proc_col_ranges.push_back(first_col);
1129  proc_col_ranges.push_back(last_col);
1130  }
1131  }
1132 
1133  std::vector<int_type> send_rows;
1134  std::vector<int> rows_per_send_proc;
1135  pack_outgoing_rows(M, proc_col_ranges, send_rows, rows_per_send_proc);
1136 
1137  Epetra_Distributor* distor2 = Comm.CreateDistributor();
1138 
1139  int* send_procs2_ptr = send_procs2.size()>0 ? &send_procs2[0] : NULL;
1140  num_recv_procs = 0;
1141  err = distor2->CreateFromSends(send_procs2.size(), send_procs2_ptr, deterministic, num_recv_procs);
1142 
1143  export_chars = send_rows.size()>0 ? reinterpret_cast<char*>(&send_rows[0]) : NULL;
1144  int* rows_per_send_proc_ptr = rows_per_send_proc.size()>0 ? &rows_per_send_proc[0] : NULL;
1145  len_import_chars = 0;
1146  err = distor2->Do(export_chars, (int)sizeof(int_type), rows_per_send_proc_ptr, len_import_chars, import_chars);
1147 
1148  int_type* recvd_row_ints = reinterpret_cast<int_type*>(import_chars);
1149  int num_recvd_rows = len_import_chars/sizeof(int_type);
1150 
1151  Epetra_Map recvd_rows((int_type) -1, num_recvd_rows, recvd_row_ints, (int_type) column_map.IndexBase64(), Comm);
1152 
1153  delete distor;
1154  delete distor2;
1155  delete [] import_chars;
1156 
1157  Epetra_Map* result_map = NULL;
1158 
1159  err = form_map_union<int_type>(&(M.RowMap()), &recvd_rows, (const Epetra_Map*&)result_map);
1160  if (err != 0) {
1161  return(NULL);
1162  }
1163 
1164  return(result_map);
1165 }
1166 
1167 Epetra_Map* find_rows_containing_cols(const Epetra_CrsMatrix& M,
1168  const Epetra_Map& column_map)
1169 {
1170 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
1171  if(M.RowMatrixRowMap().GlobalIndicesInt() && column_map.GlobalIndicesInt()) {
1172  return Tfind_rows_containing_cols<int>(M, column_map);
1173  }
1174  else
1175 #endif
1176 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
1177  if(M.RowMatrixRowMap().GlobalIndicesLongLong() && column_map.GlobalIndicesLongLong()) {
1178  return Tfind_rows_containing_cols<long long>(M, column_map);
1179  }
1180  else
1181 #endif
1182  throw std::runtime_error("EpetraExt::find_rows_containing_cols: GlobalIndices type unknown");
1183 }
1184 
1185 //=========================================================================
1186 template<typename int_type>
1187 int MatrixMatrix::TMultiply(const Epetra_CrsMatrix& A,
1188  bool transposeA,
1189  const Epetra_CrsMatrix& B,
1190  bool transposeB,
1191  Epetra_CrsMatrix& C,
1192  bool call_FillComplete_on_result)
1193 {
1194  // DEBUG
1195  // bool NewFlag=!C.IndicesAreLocal() && !C.IndicesAreGlobal();
1196 #ifdef ENABLE_MMM_TIMINGS
1197  using Teuchos::TimeMonitor;
1198  Teuchos::RCP<TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM All Setup")));
1199 #endif
1200  // end DEBUG
1201 
1202  //
1203  //This method forms the matrix-matrix product C = op(A) * op(B), where
1204  //op(A) == A if transposeA is false,
1205  //op(A) == A^T if transposeA is true,
1206  //and similarly for op(B).
1207  //
1208 
1209  //A and B should already be Filled.
1210  //(Should we go ahead and call FillComplete() on them if necessary?
1211  // or error out? For now, we choose to error out.)
1212  if (!A.Filled() || !B.Filled()) {
1213  EPETRA_CHK_ERR(-1);
1214  }
1215 
1216  // Is the C matrix new?
1217  bool NewFlag=!C.IndicesAreLocal() && !C.IndicesAreGlobal();
1218 
1219  //We're going to refer to the different combinations of op(A) and op(B)
1220  //as scenario 1 through 4.
1221 
1222  int scenario = 1;//A*B
1223  if (transposeB && !transposeA) scenario = 2;//A*B^T
1224  if (transposeA && !transposeB) scenario = 3;//A^T*B
1225  if (transposeA && transposeB) scenario = 4;//A^T*B^T
1226  if(NewFlag && transposeA && !transposeB) scenario = 5; // A^T*B, newmatrix
1227 
1228  //now check size compatibility
1229  long long Aouter = transposeA ? A.NumGlobalCols64() : A.NumGlobalRows64();
1230  long long Bouter = transposeB ? B.NumGlobalRows64() : B.NumGlobalCols64();
1231  long long Ainner = transposeA ? A.NumGlobalRows64() : A.NumGlobalCols64();
1232  long long Binner = transposeB ? B.NumGlobalCols64() : B.NumGlobalRows64();
1233  if (Ainner != Binner) {
1234  std::cerr << "MatrixMatrix::Multiply: ERROR, inner dimensions of op(A) and op(B) "
1235  << "must match for matrix-matrix product. op(A) is "
1236  <<Aouter<<"x"<<Ainner << ", op(B) is "<<Binner<<"x"<<Bouter<<std::endl;
1237  return(-1);
1238  }
1239 
1240  //The result matrix C must at least have a row-map that reflects the
1241  //correct row-size. Don't check the number of columns because rectangular
1242  //matrices which were constructed with only one map can still end up
1243  //having the correct capacity and dimensions when filled.
1244  if (Aouter > C.NumGlobalRows64()) {
1245  std::cerr << "MatrixMatrix::Multiply: ERROR, dimensions of result C must "
1246  << "match dimensions of op(A) * op(B). C has "<<C.NumGlobalRows64()
1247  << " rows, should have at least "<<Aouter << std::endl;
1248  return(-1);
1249  }
1250 
1251  //It doesn't matter whether C is already Filled or not. If it is already
1252  //Filled, it must have space allocated for the positions that will be
1253  //referenced in forming C = op(A)*op(B). If it doesn't have enough space,
1254  //we'll error out later when trying to store result values.
1255 
1256  //We're going to need to import remotely-owned sections of A and/or B
1257  //if more than 1 processor is performing this run, depending on the scenario.
1258  int numProcs = A.Comm().NumProc();
1259 
1260  //If we are to use the transpose of A and/or B, we'll need to be able to
1261  //access, on the local processor, all rows that contain column-indices in
1262  //the domain-map.
1263  const Epetra_Map* domainMap_A = &(A.DomainMap());
1264  const Epetra_Map* domainMap_B = &(B.DomainMap());
1265 
1266  const Epetra_Map* rowmap_A = &(A.RowMap());
1267  const Epetra_Map* rowmap_B = &(B.RowMap());
1268 
1269  //Declare some 'work-space' maps which may be created depending on
1270  //the scenario, and which will be deleted before exiting this function.
1271  const Epetra_Map* workmap1 = NULL;
1272  const Epetra_Map* workmap2 = NULL;
1273  const Epetra_Map* mapunion1 = NULL;
1274 
1275  //Declare a couple of structs that will be used to hold views of the data
1276  //of A and B, to be used for fast access during the matrix-multiplication.
1277  CrsMatrixStruct Aview;
1278  CrsMatrixStruct Atransview;
1279  CrsMatrixStruct Bview;
1280  Teuchos::RCP<Epetra_CrsMatrix> Atrans;
1281 
1282  const Epetra_Map* targetMap_A = rowmap_A;
1283  const Epetra_Map* targetMap_B = rowmap_B;
1284 
1285 #ifdef ENABLE_MMM_TIMINGS
1286  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM All I&X")));
1287 #endif
1288  if (numProcs > 1) {
1289  //If op(A) = A^T, find all rows of A that contain column-indices in the
1290  //local portion of the domain-map. (We'll import any remote rows
1291  //that fit this criteria onto the local processor.)
1292  if (scenario == 3 || scenario == 4) {
1293  workmap1 = Tfind_rows_containing_cols<int_type>(A, *domainMap_A);
1294  targetMap_A = workmap1;
1295  }
1296  }
1297  if (scenario == 5) {
1298  targetMap_A = &(A.ColMap());
1299  }
1300 
1301  //Now import any needed remote rows and populate the Aview struct.
1302  if(scenario==1 && call_FillComplete_on_result) {
1303  EPETRA_CHK_ERR(import_only<int_type>(A,*targetMap_A,Aview));
1304  }
1305  else if (scenario == 5){
1306  // Perform a local transpose of A and store that in Atransview
1307  EpetraExt::RowMatrix_Transpose at(const_cast<Epetra_Map *>(targetMap_A),false);
1308  Epetra_CrsMatrix * Anonconst = const_cast<Epetra_CrsMatrix *>(&A);
1309  Atrans = Teuchos::rcp(at.CreateTransposeLocal(*Anonconst));
1310  import_only<int_type>(*Atrans,*targetMap_A,Atransview);
1311  }
1312  else {
1313  EPETRA_CHK_ERR( import_and_extract_views<int_type>(A, *targetMap_A, Aview));
1314  }
1315 
1316 
1317  // NOTE: Next up is to switch to import_only for B as well, and then modify the THREE SerialCores
1318  // to add a Acol2Brow and Acol2Bimportrow array for in-algorithm lookups.
1319 
1320 
1321  // Make sure B's views are consistent with A even in serial.
1322  const Epetra_Map* colmap_op_A = NULL;
1323  if(scenario==1 || numProcs > 1){
1324  if (transposeA && scenario == 3) {
1325  colmap_op_A = targetMap_A;
1326  }
1327  else {
1328  colmap_op_A = &(A.ColMap());
1329  }
1330  targetMap_B = colmap_op_A;
1331  }
1332  if(scenario==5) targetMap_B = &(B.RowMap());
1333 
1334 
1335  if (numProcs > 1) {
1336  //If op(B) = B^T, find all rows of B that contain column-indices in the
1337  //local-portion of the domain-map, or in the column-map of op(A).
1338  //We'll import any remote rows that fit this criteria onto the
1339  //local processor.
1340  if (transposeB) {
1341  EPETRA_CHK_ERR( form_map_union<int_type>(colmap_op_A, domainMap_B, mapunion1) );
1342  workmap2 = Tfind_rows_containing_cols<int_type>(B, *mapunion1);
1343  targetMap_B = workmap2;
1344  }
1345  }
1346 
1347  //Now import any needed remote rows and populate the Bview struct.
1348  if((scenario==1 && call_FillComplete_on_result) || scenario==5) {
1349  EPETRA_CHK_ERR(import_only<int_type>(B,*targetMap_B,Bview,A.Importer()));
1350  }
1351  else {
1352  EPETRA_CHK_ERR( import_and_extract_views<int_type>(B, *targetMap_B, Bview) );
1353  }
1354 
1355 #ifdef ENABLE_MMM_TIMINGS
1356  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM All Multiply")));
1357 #endif
1358 
1359  // Zero if filled
1360  if(C.Filled()) C.PutScalar(0.0);
1361 
1362  //Now call the appropriate method to perform the actual multiplication.
1363  CrsWrapper_Epetra_CrsMatrix ecrsmat(C);
1364 
1365  switch(scenario) {
1366  case 1: EPETRA_CHK_ERR( mult_A_B(A,Aview,B,Bview,C,call_FillComplete_on_result) );
1367  break;
1368  case 2: EPETRA_CHK_ERR( mult_A_Btrans(Aview, Bview, ecrsmat) );
1369  break;
1370  case 3: EPETRA_CHK_ERR( mult_Atrans_B(Aview, Bview, ecrsmat) );
1371  break;
1372  case 4: EPETRA_CHK_ERR( mult_Atrans_Btrans(Aview, Bview, ecrsmat) );
1373  break;
1374  case 5: EPETRA_CHK_ERR( mult_AT_B_newmatrix(Atransview, Bview, C) );
1375  break;
1376  }
1377 
1378 
1379  if (scenario != 1 && call_FillComplete_on_result && scenario != 5) {
1380  //We'll call FillComplete on the C matrix before we exit, and give
1381  //it a domain-map and a range-map.
1382  //The domain-map will be the domain-map of B, unless
1383  //op(B)==transpose(B), in which case the range-map of B will be used.
1384  //The range-map will be the range-map of A, unless
1385  //op(A)==transpose(A), in which case the domain-map of A will be used.
1386 
1387  const Epetra_Map* domainmap =
1388  transposeB ? &(B.RangeMap()) : &(B.DomainMap());
1389 
1390  const Epetra_Map* rangemap =
1391  transposeA ? &(A.DomainMap()) : &(A.RangeMap());
1392 
1393  if (!C.Filled()) {
1394  EPETRA_CHK_ERR( C.FillComplete(*domainmap, *rangemap) );
1395  }
1396  }
1397 
1398  //Finally, delete the objects that were potentially created
1399  //during the course of importing remote sections of A and B.
1400 
1401  delete mapunion1; mapunion1 = NULL;
1402  delete workmap1; workmap1 = NULL;
1403  delete workmap2; workmap2 = NULL;
1404 
1405  return(0);
1406 }
1407 
1408 int MatrixMatrix::Multiply(const Epetra_CrsMatrix& A,
1409  bool transposeA,
1410  const Epetra_CrsMatrix& B,
1411  bool transposeB,
1412  Epetra_CrsMatrix& C,
1413  bool call_FillComplete_on_result)
1414 {
1415 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
1416  if(A.RowMap().GlobalIndicesInt() && B.RowMap().GlobalIndicesInt()) {
1417  return TMultiply<int>(A, transposeA, B, transposeB, C, call_FillComplete_on_result);
1418  }
1419  else
1420 #endif
1421 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
1422  if(A.RowMap().GlobalIndicesLongLong() && B.RowMap().GlobalIndicesLongLong()) {
1423  return TMultiply<long long>(A, transposeA, B, transposeB, C, call_FillComplete_on_result);
1424  }
1425  else
1426 #endif
1427  throw std::runtime_error("EpetraExt::MatrixMatrix::Add: GlobalIndices type unknown");
1428 }
1429 
1430 //=========================================================================
1431 template<typename int_type>
1432 int MatrixMatrix::TAdd(const Epetra_CrsMatrix& A,
1433  bool transposeA,
1434  double scalarA,
1435  Epetra_CrsMatrix& B,
1436  double scalarB )
1437 {
1438  //
1439  //This method forms the matrix-matrix sum B = scalarA * op(A) + scalarB * B, where
1440 
1441  //A should already be Filled. It doesn't matter whether B is
1442  //already Filled, but if it is, then its graph must already contain
1443  //all nonzero locations that will be referenced in forming the
1444  //sum.
1445 
1446  if (!A.Filled() ) {
1447  std::cerr << "EpetraExt::MatrixMatrix::Add ERROR, input matrix A.Filled() is false, it is required to be true. (Result matrix B is not required to be Filled())."<<std::endl;
1448  EPETRA_CHK_ERR(-1);
1449  }
1450 
1451  //explicit tranpose A formed as necessary
1452  Epetra_CrsMatrix * Aprime = 0;
1453  EpetraExt::RowMatrix_Transpose * Atrans = 0;
1454  if( transposeA )
1455  {
1456  Atrans = new EpetraExt::RowMatrix_Transpose();
1457  Aprime = &(dynamic_cast<Epetra_CrsMatrix&>(((*Atrans)(const_cast<Epetra_CrsMatrix&>(A)))));
1458  }
1459  else
1460  Aprime = const_cast<Epetra_CrsMatrix*>(&A);
1461 
1462  int MaxNumEntries = EPETRA_MAX( A.MaxNumEntries(), B.MaxNumEntries() );
1463  int A_NumEntries, B_NumEntries;
1464  int_type * A_Indices = new int_type[MaxNumEntries];
1465  double * A_Values = new double[MaxNumEntries];
1466  int* B_Indices_local;
1467  int_type* B_Indices_global;
1468  double* B_Values;
1469 
1470  int NumMyRows = B.NumMyRows();
1471  int_type Row;
1472  int err;
1473  int ierr = 0;
1474 
1475  if( scalarA )
1476  {
1477  //Loop over B's rows and sum into
1478  for( int i = 0; i < NumMyRows; ++i )
1479  {
1480  Row = (int_type) B.GRID64(i);
1481  EPETRA_CHK_ERR( Aprime->ExtractGlobalRowCopy( Row, MaxNumEntries, A_NumEntries, A_Values, A_Indices ) );
1482 
1483  if (scalarB != 1.0) {
1484  if (!B.Filled()) {
1485  EPETRA_CHK_ERR( B.ExtractGlobalRowView( Row, B_NumEntries,
1486  B_Values, B_Indices_global));
1487  }
1488  else {
1489  EPETRA_CHK_ERR( B.ExtractMyRowView( i, B_NumEntries,
1490  B_Values, B_Indices_local));
1491  }
1492 
1493  for(int jj=0; jj<B_NumEntries; ++jj) {
1494  B_Values[jj] = scalarB*B_Values[jj];
1495  }
1496  }
1497 
1498  if( scalarA != 1.0 ) {
1499  for( int j = 0; j < A_NumEntries; ++j ) A_Values[j] *= scalarA;
1500  }
1501 
1502  if( B.Filled() ) {//Sum In Values
1503  err = B.SumIntoGlobalValues( Row, A_NumEntries, A_Values, A_Indices );
1504  assert( err >= 0 );
1505  if (err < 0) ierr = err;
1506  }
1507  else {
1508  err = B.InsertGlobalValues( Row, A_NumEntries, A_Values, A_Indices );
1509  assert( err == 0 || err == 1 || err == 3 );
1510  if (err < 0) ierr = err;
1511  }
1512  }
1513  }
1514  else {
1515  EPETRA_CHK_ERR( B.Scale(scalarB) );
1516  }
1517 
1518  delete [] A_Indices;
1519  delete [] A_Values;
1520 
1521  if( Atrans ) delete Atrans;
1522 
1523  return(ierr);
1524 }
1525 
1526 int MatrixMatrix::Add(const Epetra_CrsMatrix& A,
1527  bool transposeA,
1528  double scalarA,
1529  Epetra_CrsMatrix& B,
1530  double scalarB )
1531 {
1532 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
1533  if(A.RowMap().GlobalIndicesInt() && B.RowMap().GlobalIndicesInt()) {
1534  return TAdd<int>(A, transposeA, scalarA, B, scalarB);
1535  }
1536  else
1537 #endif
1538 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
1539  if(A.RowMap().GlobalIndicesLongLong() && B.RowMap().GlobalIndicesLongLong()) {
1540  return TAdd<long long>(A, transposeA, scalarA, B, scalarB);
1541  }
1542  else
1543 #endif
1544  throw std::runtime_error("EpetraExt::MatrixMatrix::Add: GlobalIndices type unknown");
1545 }
1546 
1547 template<typename int_type>
1548 int MatrixMatrix::TAdd(const Epetra_CrsMatrix& A,
1549  bool transposeA,
1550  double scalarA,
1551  const Epetra_CrsMatrix & B,
1552  bool transposeB,
1553  double scalarB,
1554  Epetra_CrsMatrix * & C)
1555 {
1556  //
1557  //This method forms the matrix-matrix sum C = scalarA * op(A) + scalarB * op(B), where
1558 
1559  //A and B should already be Filled. C should be an empty pointer.
1560 
1561  if (!A.Filled() || !B.Filled() ) {
1562  std::cerr << "EpetraExt::MatrixMatrix::Add ERROR, input matrix A.Filled() or B.Filled() is false,"
1563  << "they are required to be true. (Result matrix C should be an empty pointer)" << std::endl;
1564  EPETRA_CHK_ERR(-1);
1565  }
1566 
1567  Epetra_CrsMatrix * Aprime = 0, * Bprime=0;
1568  EpetraExt::RowMatrix_Transpose * Atrans = 0,* Btrans = 0;
1569 
1570  //explicit tranpose A formed as necessary
1571  if( transposeA ) {
1572  Atrans = new EpetraExt::RowMatrix_Transpose();
1573  Aprime = &(dynamic_cast<Epetra_CrsMatrix&>(((*Atrans)(const_cast<Epetra_CrsMatrix&>(A)))));
1574  }
1575  else
1576  Aprime = const_cast<Epetra_CrsMatrix*>(&A);
1577 
1578  //explicit tranpose B formed as necessary
1579  if( transposeB ) {
1580  Btrans = new EpetraExt::RowMatrix_Transpose();
1581  Bprime = &(dynamic_cast<Epetra_CrsMatrix&>(((*Btrans)(const_cast<Epetra_CrsMatrix&>(B)))));
1582  }
1583  else
1584  Bprime = const_cast<Epetra_CrsMatrix*>(&B);
1585 
1586  // allocate or zero the new matrix
1587  if(C!=0)
1588  C->PutScalar(0.0);
1589  else
1590  C = new Epetra_CrsMatrix(Copy,Aprime->RowMap(),0);
1591 
1592  // build arrays for easy resuse
1593  int ierr = 0;
1594  Epetra_CrsMatrix * Mat[] = { Aprime,Bprime};
1595  double scalar[] = { scalarA, scalarB};
1596 
1597  // do a loop over each matrix to add: A reordering might be more efficient
1598  for(int k=0;k<2;k++) {
1599  int MaxNumEntries = Mat[k]->MaxNumEntries();
1600  int NumEntries;
1601  int_type * Indices = new int_type[MaxNumEntries];
1602  double * Values = new double[MaxNumEntries];
1603 
1604  int NumMyRows = Mat[k]->NumMyRows();
1605  int err;
1606  int_type Row;
1607 
1608  //Loop over rows and sum into C
1609  for( int i = 0; i < NumMyRows; ++i ) {
1610  Row = (int_type) Mat[k]->GRID64(i);
1611  EPETRA_CHK_ERR( Mat[k]->ExtractGlobalRowCopy( Row, MaxNumEntries, NumEntries, Values, Indices));
1612 
1613  if( scalar[k] != 1.0 )
1614  for( int j = 0; j < NumEntries; ++j ) Values[j] *= scalar[k];
1615 
1616  if(C->Filled()) { // Sum in values
1617  err = C->SumIntoGlobalValues( Row, NumEntries, Values, Indices );
1618  if (err < 0) ierr = err;
1619  } else { // just add it to the unfilled CRS Matrix
1620  err = C->InsertGlobalValues( Row, NumEntries, Values, Indices );
1621  if (err < 0) ierr = err;
1622  }
1623  }
1624 
1625  delete [] Indices;
1626  delete [] Values;
1627  }
1628 
1629  if( Atrans ) delete Atrans;
1630  if( Btrans ) delete Btrans;
1631 
1632  return(ierr);
1633 }
1634 
1635 int MatrixMatrix::Add(const Epetra_CrsMatrix& A,
1636  bool transposeA,
1637  double scalarA,
1638  const Epetra_CrsMatrix & B,
1639  bool transposeB,
1640  double scalarB,
1641  Epetra_CrsMatrix * & C)
1642 {
1643 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
1644  if(A.RowMap().GlobalIndicesInt() && B.RowMap().GlobalIndicesInt()) {
1645  return TAdd<int>(A, transposeA, scalarA, B, transposeB, scalarB, C);
1646  }
1647  else
1648 #endif
1649 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
1650  if(A.RowMap().GlobalIndicesLongLong() && B.RowMap().GlobalIndicesLongLong()) {
1651  return TAdd<long long>(A, transposeA, scalarA, B, transposeB, scalarB, C);
1652  }
1653  else
1654 #endif
1655  throw std::runtime_error("EpetraExt::MatrixMatrix::Add: GlobalIndices type unknown");
1656 }
1657 
1658 
1659 
1660 //=========================================================================
1661 template<typename int_type>
1662 int MatrixMatrix::TJacobi(double omega,
1663  const Epetra_Vector & Dinv,
1664  const Epetra_CrsMatrix& A,
1665  const Epetra_CrsMatrix& B,
1666  Epetra_CrsMatrix& C,
1667  bool call_FillComplete_on_result)
1668 {
1669 #ifdef ENABLE_MMM_TIMINGS
1670  using Teuchos::TimeMonitor;
1671  Teuchos::RCP<TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi All Setup")));
1672 #endif
1673 
1674  //A and B should already be Filled.
1675  if (!A.Filled() || !B.Filled()) {
1676  EPETRA_CHK_ERR(-1);
1677  }
1678 
1679  //now check size compatibility
1680  long long Aouter = A.NumGlobalRows64();
1681  long long Bouter = B.NumGlobalCols64();
1682  long long Ainner = A.NumGlobalCols64();
1683  long long Binner = B.NumGlobalRows64();
1684  long long Dlen = Dinv.GlobalLength64();
1685  if (Ainner != Binner) {
1686  std::cerr << "MatrixMatrix::Jacobi: ERROR, inner dimensions of A and B "
1687  << "must match for matrix-matrix product. A is "
1688  <<Aouter<<"x"<<Ainner << ", B is "<<Binner<<"x"<<Bouter<<std::endl;
1689  return(-1);
1690  }
1691 
1692  //The result matrix C must at least have a row-map that reflects the
1693  //correct row-size. Don't check the number of columns because rectangular
1694  //matrices which were constructed with only one map can still end up
1695  //having the correct capacity and dimensions when filled.
1696  if (Aouter > C.NumGlobalRows64()) {
1697  std::cerr << "MatrixMatrix::Jacobi: ERROR, dimensions of result C must "
1698  << "match dimensions of A * B. C has "<<C.NumGlobalRows64()
1699  << " rows, should have at least "<<Aouter << std::endl;
1700  return(-1);
1701  }
1702 
1703  // Check against the D matrix
1704  if(Dlen != Aouter) {
1705  std::cerr << "MatrixMatrix::Jacboi: ERROR, dimensions of result D must "
1706  << "match dimensions of A's rows. D has "<< Dlen
1707  << " rows, should have " << Aouter << std::endl;
1708  return(-1);
1709  }
1710 
1711  if(!A.RowMap().SameAs(B.RowMap()) || !A.RowMap().SameAs(Dinv.Map())) {
1712  std::cerr << "MatrixMatrix::Jacboi: ERROR, RowMap of A must match RowMap of B "
1713  << "and Map of D."<<std::endl;
1714  return(-1);
1715  }
1716 
1717  //It doesn't matter whether C is already Filled or not. If it is already
1718  //Filled, it must have space allocated for the positions that will be
1719  //referenced in forming C. If it doesn't have enough space,
1720  //we'll error out later when trying to store result values.
1721 
1722  //We're going to need to import remotely-owned sections of A and/or B
1723  //if more than 1 processor is performing this run, depending on the scenario.
1724  int numProcs = A.Comm().NumProc();
1725 
1726  // Maps
1727  const Epetra_Map* rowmap_A = &(A.RowMap());
1728  const Epetra_Map* rowmap_B = &(B.RowMap());
1729 
1730 
1731 
1732  //Declare some 'work-space' maps which may be created depending on
1733  //the scenario, and which will be deleted before exiting this function.
1734  const Epetra_Map* workmap1 = NULL;
1735  const Epetra_Map* workmap2 = NULL;
1736  const Epetra_Map* mapunion1 = NULL;
1737 
1738  //Declare a couple of structs that will be used to hold views of the data
1739  //of A and B, to be used for fast access during the matrix-multiplication.
1740  CrsMatrixStruct Aview;
1741  CrsMatrixStruct Bview;
1742 
1743  const Epetra_Map* targetMap_A = rowmap_A;
1744  const Epetra_Map* targetMap_B = rowmap_B;
1745 
1746 #ifdef ENABLE_MMM_TIMINGS
1747  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi All I&X")));
1748 #endif
1749 
1750  //Now import any needed remote rows and populate the Aview struct.
1751  if(call_FillComplete_on_result) {
1752  EPETRA_CHK_ERR(import_only<int_type>(A,*targetMap_A,Aview));
1753  }
1754  else {
1755  EPETRA_CHK_ERR( import_and_extract_views<int_type>(A, *targetMap_A, Aview));
1756  }
1757 
1758  // NOTE: Next up is to switch to import_only for B as well, and then modify the THREE SerialCores
1759  // to add a Acol2Brow and Acol2Bimportrow array for in-algorithm lookups.
1760 
1761  // Make sure B's views are consistent with A even in serial.
1762  const Epetra_Map* colmap_op_A = NULL;
1763  if(numProcs > 1){
1764  colmap_op_A = &(A.ColMap());
1765  targetMap_B = colmap_op_A;
1766  }
1767 
1768  //Now import any needed remote rows and populate the Bview struct.
1769  if(call_FillComplete_on_result) {
1770  EPETRA_CHK_ERR(import_only<int_type>(B,*targetMap_B,Bview,A.Importer()));
1771  }
1772  else {
1773  EPETRA_CHK_ERR( import_and_extract_views<int_type>(B, *targetMap_B, Bview) );
1774  }
1775 
1776 #ifdef ENABLE_MMM_TIMINGS
1777  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi All Multiply")));
1778 #endif
1779 
1780  // Zero if filled
1781  if(C.Filled()) C.PutScalar(0.0);
1782 
1783  //Now call the appropriate method to perform the actual multiplication.
1784  CrsWrapper_Epetra_CrsMatrix ecrsmat(C);
1785  EPETRA_CHK_ERR( jacobi_A_B(omega,Dinv,A,Aview,B,Bview,C,call_FillComplete_on_result) );
1786 
1787  //Finally, delete the objects that were potentially created
1788  //during the course of importing remote sections of A and B.
1789  delete mapunion1; mapunion1 = NULL;
1790  delete workmap1; workmap1 = NULL;
1791  delete workmap2; workmap2 = NULL;
1792 
1793  return(0);
1794 }
1795 
1796 
1797 
1798 int MatrixMatrix::Jacobi(double omega,
1799  const Epetra_Vector & Dinv,
1800  const Epetra_CrsMatrix& A,
1801  const Epetra_CrsMatrix& B,
1802  Epetra_CrsMatrix& C,
1803  bool call_FillComplete_on_result)
1804 {
1805 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
1806  if(A.RowMap().GlobalIndicesInt() && B.RowMap().GlobalIndicesInt()) {
1807  return TJacobi<int>(omega, Dinv, A, B, C, call_FillComplete_on_result);
1808  }
1809  else
1810 #endif
1811 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
1812  if(A.RowMap().GlobalIndicesLongLong() && B.RowMap().GlobalIndicesLongLong()) {
1813  return TJacobi<long long>(omega, Dinv, A, B, C, call_FillComplete_on_result);
1814  }
1815  else
1816 #endif
1817  throw std::runtime_error("EpetraExt::MatrixMatrix::Jacobi: GlobalIndices type unknown");
1818 }
1819 
1820 
1821 
1822 
1823 
1824 
1825 
1826 
1827 
1828 
1829 } // namespace EpetraExt
1830 
LightweightCrsMatrix * importMatrix
int mult_Atrans_Btrans(CrsMatrixStruct &Aview, CrsMatrixStruct &Bview, CrsWrapper &C)
std::vector< int > targetMapToOrigRow
virtual int SumIntoGlobalValues(int GlobalRow, int NumEntries, double *Values, int *Indices)=0
void MyGlobalElementsPtr(int *&MyGlobalElementList) const
static int Multiply(const Epetra_CrsMatrix &A, bool transposeA, const Epetra_CrsMatrix &B, bool transposeB, Epetra_CrsMatrix &C, bool call_FillComplete_on_result=true)
Given Epetra_CrsMatrix objects A, B and C, form the product C = A*B.
void pack_outgoing_rows(const Epetra_CrsMatrix &mtx, const std::vector< int > &proc_col_ranges, std::vector< int > &send_rows, std::vector< int > &rows_per_send_proc)
virtual bool Filled()=0
virtual int InsertGlobalValues(int GlobalRow, int NumEntries, double *Values, int *Indices)=0
const Epetra_CrsMatrix * origMatrix
Transform to form the explicit transpose of a Epetra_RowMatrix.
EpetraExt::BlockCrsMatrix: A class for constructing a distributed block matrix.
Epetra_Map * find_rows_containing_cols(const Epetra_CrsMatrix &M, const Epetra_Map &column_map)
int mult_A_Btrans(CrsMatrixStruct &Aview, CrsMatrixStruct &Bview, CrsWrapper &C)
int form_map_union(const Epetra_Map *map1, const Epetra_Map *map2, const Epetra_Map *&mapunion)
int import_and_extract_views(const Epetra_CrsMatrix &M, const Epetra_Map &targetMap, CrsMatrixStruct &Mview, const Epetra_Import *prototypeImporter=0)
std::vector< int > targetMapToImportRow
virtual const Epetra_Map & RowMap() const =0
int mult_Atrans_B(CrsMatrixStruct &Aview, CrsMatrixStruct &Bview, CrsWrapper &C)
static int Add(const Epetra_CrsMatrix &A, bool transposeA, double scalarA, Epetra_CrsMatrix &B, double scalarB)
Given Epetra_CrsMatrix objects A and B, form the sum B = a*A + b*B.
long long IndexBase64() const
double sparsedot(double *u, int_type *u_ind, int u_len, double *v, int_type *v_ind, int v_len)
Method for internal use...
static int Jacobi(double omega, const Epetra_Vector &Dinv, const Epetra_CrsMatrix &A, const Epetra_CrsMatrix &B, Epetra_CrsMatrix &C, bool call_FillComplete_on_result=true)
Given Epetra_CrsMatrix objects A, B and C, and Epetra_Vector Dinv, form the product C = (I-omega * Di...
int import_only(const Epetra_CrsMatrix &M, const Epetra_Map &targetMap, CrsMatrixStruct &Mview, const Epetra_Import *prototypeImporter=0)
Epetra_Map * Tfind_rows_containing_cols(const Epetra_CrsMatrix &M, const Epetra_Map &column_map)
Epetra_CrsMatrix * CreateTransposeLocal(OriginalTypeRef orig)
Local-only transpose operator. Don&#39;t use this unless you&#39;re sure you know what you&#39;re doing...
const Epetra_BlockMap * importColMap