EpetraExt  Development
EpetraExt_MatrixMatrix_mult_A_B.cpp
Go to the documentation of this file.
1 //@HEADER
2 // ***********************************************************************
3 //
4 // EpetraExt: Epetra Extended - Linear Algebra Services Package
5 // Copyright (2011) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 
42 #include <EpetraExt_ConfigDefs.h>
43 #include <EpetraExt_MatrixMatrix.h>
44 
45 #include <EpetraExt_MMHelpers.h>
46 
48 
49 #include <Epetra_Export.h>
50 #include <Epetra_Import.h>
51 #include <Epetra_Util.h>
52 #include <Epetra_Map.h>
53 #include <Epetra_Comm.h>
54 #include <Epetra_CrsMatrix.h>
55 #include <Epetra_Vector.h>
56 #include <Epetra_Directory.h>
57 #include <Epetra_HashTable.h>
58 #include <Epetra_Distributor.h>
59 #include <Epetra_IntSerialDenseVector.h>
60 
61 #ifdef HAVE_VECTOR
62 #include <vector>
63 #endif
64 
65 #ifdef HAVE_MPI
66 #include <Epetra_MpiDistributor.h>
67 #endif
68 
69 
70 #include <Teuchos_TimeMonitor.hpp>
71 
72 namespace EpetraExt {
73 
74 /*****************************************************************************/
75 /*****************************************************************************/
76 /*****************************************************************************/
77 static inline int C_estimate_nnz(const Epetra_CrsMatrix & A, const Epetra_CrsMatrix &B){
78  // Follows the NZ estimate in ML's ml_matmatmult.c
79  int Aest=(A.NumMyRows()>0)? A.NumMyNonzeros()/A.NumMyRows():100;
80  int Best=(B.NumMyRows()>0)? B.NumMyNonzeros()/B.NumMyRows():100;
81 
82  int nnzperrow=(int)(sqrt((double)Aest) + sqrt((double)Best) - 1);
83  nnzperrow*=nnzperrow;
84 
85  return (int)(A.NumMyRows()*nnzperrow*0.75 + 100);
86 }
87 
88 // Commented out unused, file-local function.
89 #if 0
90 /*****************************************************************************/
91 /*****************************************************************************/
92 /*****************************************************************************/
93 static inline int auto_resize(std::vector<int> &x,int num_new){
94  int newsize=x.size() + EPETRA_MAX((int)x.size(),num_new);
95  x.resize(newsize);
96  return newsize;
97 }
98 #endif // 0
99 
100 /*****************************************************************************/
101 /*****************************************************************************/
102 /*****************************************************************************/
103 template<typename int_type>
104 int aztecoo_and_ml_compatible_map_union(const Epetra_CrsMatrix &B, const LightweightCrsMatrix &Bimport, Epetra_Map*& unionmap, std::vector<int>& Cremotepids,
105  std::vector<int> &Bcols2Ccols, std::vector<int> &Icols2Ccols)
106 {
107 #ifdef HAVE_MPI
108 
109 #ifdef ENABLE_MMM_TIMINGS
110  using Teuchos::TimeMonitor;
111  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM M5 CMap 1")));
112 #endif
113 
114  // So we need to merge the ColMap of B and the TargetMap of Bimport in an AztecOO/Ifpack/ML compliant order.
115  Epetra_Util util;
116  int i,j,MyPID = B.Comm().MyPID(), NumProc = B.Comm().NumProc();
117  int Bstart=0, Istart=0, Cstart=0,Pstart=0;
118 
119  const Epetra_Map & BColMap = B.ColMap();
120  const Epetra_Map & DomainMap = B.DomainMap();
121  const LightweightMap & IColMap = Bimport.ColMap_;
122 
123  int Nb = BColMap.NumMyElements();
124  int_type * Bgids = 0;
125  BColMap.MyGlobalElementsPtr(Bgids);
126  int Ni = IColMap.NumMyElements();
127  int_type * Igids = 0;
128  if(Ni>0)
129  IColMap.MyGlobalElementsPtr(Igids);
130 
131  if((int)Bcols2Ccols.size() != Nb) Bcols2Ccols.resize(Nb);
132  if((int)Icols2Ccols.size() != Ni) Icols2Ccols.resize(Ni);
133 
134  // Since we're getting called, we know we have to be using an MPI implementation of Epetra.
135  // Which means we should have an MpiDistributor for both B and Bimport.
136  // Unless all of B's columns are owned by the calling proc (e.g. MueLu for A*Ptent w/ uncoupled aggregation)
137  Epetra_MpiDistributor *Distor=0;
138  if(B.Importer()) {
139  Distor=dynamic_cast<Epetra_MpiDistributor*>(&B.Importer()->Distributor());
140  if(!Distor) EPETRA_CHK_ERR(-2);
141  }
142 
143  // **********************
144  // Stage 1: Get the owning PIDs
145  // **********************
146  // Note: if B doesn't have an importer, the calling proc owns all its colids...
147  std::vector<int> Bpids(Nb), Ipids(Ni);
148  if(B.Importer()) {EPETRA_CHK_ERR(util.GetPids(*B.Importer(),Bpids,true));}
149  else Bpids.assign(Nb,-1);
150 
151  if(Ni != (int) Bimport.ColMapOwningPIDs_.size()) {
152  EPETRA_CHK_ERR(-21);
153  }
154  for(i=0;i<Ni;i++){
155  Ipids[i] = (Bimport.ColMapOwningPIDs_[i]==MyPID)?(-1):(Bimport.ColMapOwningPIDs_[i]);
156  }
157 
158  // **********************
159  // Stage 2: Allocate memory (make things too big)
160  // **********************
161  int Csize=Nb+Ni;
162  int Psize=Nb+Ni;
163  std::vector<int_type> Cgids(Csize);
164  Cremotepids.resize(Psize);
165 
166 #ifdef ENABLE_MMM_TIMINGS
167  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM M5 CMap 2")));
168 #endif
169 
170  // **********************
171  // Stage 3: Local Unknowns
172  // **********************
173  if(!B.Importer() || (B.Importer()->NumSameIDs() == DomainMap.NumMyElements())) {
174  // B's colmap has all of the domain elements. We can just copy those into the start of the array.
175  DomainMap.MyGlobalElements(Cgids.size() ? &Cgids[0] : 0);
176  Cstart=DomainMap.NumMyElements();
177  Bstart=DomainMap.NumMyElements();
178 
179  for(i=0; i<DomainMap.NumMyElements(); i++) Bcols2Ccols[i] = i;
180  }
181  else {
182  // There are more entries in the DomainMap than B's ColMap. So we stream through both B and Bimport for the copy.
183  int NumDomainElements = DomainMap.NumMyElements();
184  for(i = 0; i < NumDomainElements; i++) {
185  int_type GID = (int_type) DomainMap.GID64(i);
186  int LID = BColMap.LID(GID);
187  // B has this guy
188  if(LID!=-1) {
189  Bcols2Ccols[LID]=Cstart;
190  Cgids[Cstart] = GID;
191  Cstart++;
192  Bstart++;
193  }
194  else {
195  // B import has this guy
196  LID = IColMap.LID(GID);
197  if(LID!=-1) {
198  Icols2Ccols[LID]=Cstart;
199  Cgids[Cstart] = GID;
200  Cstart++;
201  }
202  }
203  }
204  }
205 
206  // Now advance Ilast to the last owned unknown in Bimport
207  for(i=0,j=0; i<Ni && Ipids[i]==-1; i++) {
208  while(Cgids[j]!=Igids[i]) j++;
209  Icols2Ccols[i]=j;
210  }
211  Istart=i;
212 
213 
214 #ifdef ENABLE_MMM_TIMINGS
215  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM M5 CMap 3")));
216 #endif
217 
218 
219  // **********************
220  // Stage 4: Processor-by-processor set_union
221  // **********************
222  // NOTE: Intial sizes for Btemp/Itemp from B's distributor. This should be exact for Btemp and a decent guess for Itemp
223  int initial_temp_length = 0;
224  const int * lengths_from=0;
225  if(Distor) {
226  lengths_from= Distor->LengthsFrom();
227  for(i=0; i < Distor->NumReceives(); i++) initial_temp_length += lengths_from[i];
228  }
229  else initial_temp_length=100;
230 
231  std::vector<int_type> Btemp(initial_temp_length), Itemp(initial_temp_length);
232  std::vector<int> Btemp2(initial_temp_length), Itemp2(initial_temp_length);
233 
234 
235  while (Bstart < Nb || Istart < Ni) {
236  int Bproc=NumProc+1, Iproc=NumProc+1, Cproc;
237 
238  // Find the next active processor ID
239  if(Bstart < Nb) Bproc=Bpids[Bstart];
240  if(Istart < Ni) Iproc=Ipids[Istart];
241 
242  Cproc = (Bproc < Iproc)?Bproc:Iproc;
243 
244  if(Bproc == Cproc && Iproc != Cproc) {
245  // Only B has this processor. Copy the data.
246  // B: Find the beginning of the next processor
247  for(i=Bstart; i<Nb && Bpids[i]==Bproc; i++) {}
248  int Bnext=i;
249 
250  // Copy data to C
251  int tCsize = Bnext-Bstart;
252  if(Btemp.size() < (size_t)tCsize) {Btemp2.resize(tCsize);}
253 
254  for(i=Bstart; i<Bnext; i++) {
255  Cremotepids[i-Bstart+Pstart] = Cproc;
256  Cgids[i-Bstart+Cstart] = Bgids[i];
257  Btemp2[i-Bstart] = i;
258  }
259 
260  // Sort & record reindexing
261  int *Bptr2 = Btemp2.size() ? &Btemp2[0] : 0;
262  util.Sort(true, tCsize, &Cgids[Cstart], 0, 0, 1, &Bptr2, 0, 0);
263 
264  for(i=0, j=Cstart; i<tCsize; i++){
265  while(Cgids[j] != Bgids[Btemp2[i]]) j++;
266  Bcols2Ccols[Btemp2[i]] = j;
267  }
268  Cstart+=tCsize;
269  Pstart+=tCsize;
270  Bstart=Bnext;
271  }
272  else if(Bproc != Cproc && Iproc == Cproc) {
273  // Only I has this processor. Copy the data.
274  // I: Find the beginning of the next processor
275  for(i=Istart; i<Ni && Ipids[i]==Iproc; i++) {}
276  int Inext=i;
277 
278  // Copy data to C
279  int tCsize = Inext-Istart;
280  if(Itemp.size() < (size_t)tCsize) {Itemp2.resize(tCsize);}
281 
282  for(i=Istart; i<Inext; i++) {
283  Cremotepids[i-Istart+Pstart] = Cproc;
284  Cgids[i-Istart+Cstart] = Igids[i];
285  Itemp2[i-Istart] = i;
286  }
287 
288  // Sort & record reindexing
289  int *Iptr2 = Itemp2.size() ? &Itemp2[0] : 0;
290  util.Sort(true, tCsize, &Cgids[Cstart], 0, 0, 1, &Iptr2, 0, 0);
291 
292  for(i=0, j=Cstart; i<tCsize; i++){
293  while(Cgids[j] != Igids[Itemp2[i]]) j++;
294  Icols2Ccols[Itemp2[i]] = j;
295  }
296  Cstart+=tCsize;
297  Pstart+=tCsize;
298  Istart=Inext;
299  }
300  else {
301  // Both B and I have this processor, so we need to do a set_union. So we need to sort.
302  int Bnext, Inext;
303  // B: Find the beginning of the next processor
304  for(i=Bstart; i<Nb && Bpids[i]==Bproc; i++) {}
305  Bnext=i;
306 
307  // I: Find the beginning of the next processor
308  for(i=Istart; i<Ni && Ipids[i]==Iproc; i++) {}
309  Inext=i;
310 
311  // Copy data to temp
312  int tBsize = Bnext-Bstart;
313  int tIsize = Inext-Istart;
314  // int tCsize = tBsize+tIsize;
315  if(Btemp.size() < (size_t)tBsize) {Btemp.resize(tBsize); Btemp2.resize(tBsize);}
316  if(Itemp.size() < (size_t)tIsize) {Itemp.resize(tIsize); Itemp2.resize(tIsize);}
317 
318  for(i=Bstart; i<Bnext; i++) {Btemp[i-Bstart]=Bgids[i]; Btemp2[i-Bstart]=i;}
319  for(i=Istart; i<Inext; i++) {Itemp[i-Istart]=Igids[i]; Itemp2[i-Istart]=i;}
320 
321  // Sort & set_union
322  int *Bptr2 = Btemp2.size() ? &Btemp2[0] : 0; int *Iptr2 = Itemp2.size() ? &Itemp2[0] : 0;
323  util.Sort(true, tBsize, Btemp.size() ? &Btemp[0] : 0, 0, 0, 1, &Bptr2, 0, 0);
324  util.Sort(true, tIsize, Itemp.size() ? &Itemp[0] : 0, 0, 0, 1, &Iptr2, 0, 0);
325  typename std::vector<int_type>::iterator mycstart = Cgids.begin()+Cstart;
326  typename std::vector<int_type>::iterator last_el=std::set_union(Btemp.begin(),Btemp.begin()+tBsize,Itemp.begin(),Itemp.begin()+tIsize,mycstart);
327 
328  for(i=0, j=Cstart; i<tBsize; i++){
329  while(Cgids[j] != Bgids[Btemp2[i]]) j++;
330  Bcols2Ccols[Btemp2[i]] = j;
331  }
332 
333  for(i=0, j=Cstart; i<tIsize; i++){
334  while(Cgids[j] != Igids[Itemp2[i]]) j++;
335  Icols2Ccols[Itemp2[i]] = j;
336  }
337 
338  for(i=Pstart; i<(last_el - mycstart) + Pstart; i++) Cremotepids[i]=Cproc;
339  Cstart = (last_el - mycstart) + Cstart;
340  Pstart = (last_el - mycstart) + Pstart;
341  Bstart=Bnext;
342  Istart=Inext;
343  }
344  } // end while
345 
346 #ifdef ENABLE_MMM_TIMINGS
347  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM M5 CMap 4")));
348 #endif
349 
350  // Resize the RemotePIDs down
351  Cremotepids.resize(Pstart);
352 
353  // **********************
354  // Stage 5: Call constructor
355  // **********************
356  // Make the map
357  unionmap=new Epetra_Map((int_type) -1,Cstart,Cgids.size() ? &Cgids[0] : 0, (int_type) B.ColMap().IndexBase64(),
358  B.Comm(),B.ColMap().DistributedGlobal(),(int_type) B.ColMap().MinAllGID64(),(int_type) B.ColMap().MaxAllGID64());
359  return 0;
360 #else
361  return -1;
362 #endif
363  }
364 
365 /*****************************************************************************/
366 /*****************************************************************************/
367 /*****************************************************************************/
368 // Provide a "resize" operation for double*'s.
369 inline void resize_doubles(int nold,int nnew,double*& d){
370  if(nnew > nold){
371  double *tmp = new double[nnew];
372  for(int i=0; i<nold; i++)
373  tmp[i]=d[i];
374  delete [] d;
375  d=tmp;
376  }
377 }
378 
379 
380 /*****************************************************************************/
381 /*****************************************************************************/
382 /*****************************************************************************/
383 template<typename int_type>
384 int mult_A_B_newmatrix(const Epetra_CrsMatrix & A,
385  const Epetra_CrsMatrix & B,
386  const CrsMatrixStruct& Bview,
387  std::vector<int> & Bcol2Ccol,
388  std::vector<int> & Bimportcol2Ccol,
389  std::vector<int>& Cremotepids,
390  Epetra_CrsMatrix& C
391 ){
392 #ifdef ENABLE_MMM_TIMINGS
393  using Teuchos::TimeMonitor;
394  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Newmatrix SerialCore")));
395 #endif
396 
397  // *****************************
398  // Improved Parallel Gustavson in Local IDs
399  // *****************************
400  const Epetra_Map * colmap_C = &(C.ColMap());
401 
402 
403  int NumMyDiagonals=0; // Counter to speed up ESFC
404 
405  int m=A.NumMyRows();
406  int n=colmap_C->NumMyElements();
407  int i,j,k;
408 
409  // DataPointers for A
410  int *Arowptr, *Acolind;
411  double *Avals;
412  EPETRA_CHK_ERR(A.ExtractCrsDataPointers(Arowptr,Acolind,Avals));
413 
414  // DataPointers for B, Bimport
415  int *Browptr, *Bcolind;
416  double *Bvals;
417  EPETRA_CHK_ERR(B.ExtractCrsDataPointers(Browptr,Bcolind,Bvals));
418 
419  int *Irowptr=0, *Icolind=0;
420  double *Ivals=0;
421  if(Bview.importMatrix){
422  Irowptr = &Bview.importMatrix->rowptr_[0];
423  Icolind = (Bview.importMatrix->colind_.size()>0)?(&Bview.importMatrix->colind_[0]):0;
424  Ivals = (Bview.importMatrix->vals_.size()>0)?(&Bview.importMatrix->vals_[0]):0;
425  }
426 
427  // MemorySetup: If somebody else is sharing this C's graphdata, make a new one.
428  // This is needed because I'm about to walk all over the CrsGrapData...
429  C.ExpertMakeUniqueCrsGraphData();
430 
431  // The status array will contain the index into colind where this entry was last deposited.
432  // c_status[i] < CSR_ip - not in the row yet.
433  // c_status[i] >= CSR_ip, this is the entry where you can find the data
434  // We start with this filled with -1's indicating that there are no entries yet.
435  std::vector<int> c_status(n, -1);
436 
437  // Classic csr assembly (low memory edition)
438  int CSR_alloc=C_estimate_nnz(A,B);
439  if(CSR_alloc < n) CSR_alloc = n;
440  int CSR_ip=0,OLD_ip=0;
441  Epetra_IntSerialDenseVector & CSR_rowptr = C.ExpertExtractIndexOffset();
442  Epetra_IntSerialDenseVector & CSR_colind = C.ExpertExtractIndices();
443  double *& CSR_vals = C.ExpertExtractValues();
444 
445  CSR_rowptr.Resize(m+1);
446  CSR_colind.Resize(CSR_alloc);
447  resize_doubles(0,CSR_alloc,CSR_vals);
448 
449  // Static Profile stuff
450  std::vector<int> NumEntriesPerRow(m);
451 
452  // For each row of A/C
453  for(i=0; i<m; i++){
454  bool found_diagonal=false;
455  CSR_rowptr[i]=CSR_ip;
456 
457  for(k=Arowptr[i]; k<Arowptr[i+1]; k++){
458  int Ak = Acolind[k];
459  double Aval = Avals[k];
460  if(Aval==0) continue;
461 
462  if(Bview.targetMapToOrigRow[Ak] != -1){
463  // Local matrix
464  int Bk = Bview.targetMapToOrigRow[Ak];
465  for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
466  int Cj=Bcol2Ccol[Bcolind[j]];
467 
468  if(Cj==i && !found_diagonal) {found_diagonal=true; NumMyDiagonals++;}
469 
470  if(c_status[Cj]<OLD_ip){
471  // New entry
472  c_status[Cj]=CSR_ip;
473  CSR_colind[CSR_ip]=Cj;
474  CSR_vals[CSR_ip]=Aval*Bvals[j];
475  CSR_ip++;
476  }
477  else
478  CSR_vals[c_status[Cj]]+=Aval*Bvals[j];
479  }
480  }
481  else{
482  // Remote matrix
483  int Ik = Bview.targetMapToImportRow[Ak];
484  for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
485  int Cj=Bimportcol2Ccol[Icolind[j]];
486 
487  if(Cj==i && !found_diagonal) {found_diagonal=true; NumMyDiagonals++;}
488 
489  if(c_status[Cj]<OLD_ip){
490  // New entry
491  c_status[Cj]=CSR_ip;
492  CSR_colind[CSR_ip]=Cj;
493  CSR_vals[CSR_ip]=Aval*Ivals[j];
494  CSR_ip++;
495  }
496  else
497  CSR_vals[c_status[Cj]]+=Aval*Ivals[j];
498  }
499  }
500  }
501  NumEntriesPerRow[i]=CSR_ip-CSR_rowptr[i];
502 
503  // Resize for next pass if needed
504  if(CSR_ip + n > CSR_alloc){
505  resize_doubles(CSR_alloc,2*CSR_alloc,CSR_vals);
506  CSR_alloc*=2;
507  CSR_colind.Resize(CSR_alloc);
508  }
509  OLD_ip=CSR_ip;
510  }
511 
512  CSR_rowptr[m]=CSR_ip;
513 
514 #ifdef ENABLE_MMM_TIMINGS
515  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Newmatrix Final Sort")));
516 #endif
517 
518  // Sort the entries
519  Epetra_Util::SortCrsEntries(m, &CSR_rowptr[0], &CSR_colind[0], &CSR_vals[0]);
520 
521 #ifdef ENABLE_MMM_TIMINGS
522  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Newmatrix Fast IE")));
523 #endif
524 
525  // Do a fast build of C's importer
526  Epetra_Import * Cimport=0;
527  int *RemotePIDs = Cremotepids.size()?&Cremotepids[0]:0;
528  int NumExports=0;
529  int *ExportLIDs=0, *ExportPIDs=0;
530  if(Bview.importMatrix) {
531  NumExports = Bview.importMatrix->ExportLIDs_.size();
532  ExportLIDs = Bview.importMatrix->ExportLIDs_.size()?&Bview.importMatrix->ExportLIDs_[0]:0;
533  ExportPIDs = Bview.importMatrix->ExportPIDs_.size()?&Bview.importMatrix->ExportPIDs_[0]:0;
534  }
535  else if(B.Importer()) {
536  // Grab the exports from B proper
537  NumExports = B.Importer()->NumExportIDs();
538  ExportLIDs = B.Importer()->ExportLIDs();
539  ExportPIDs = B.Importer()->ExportPIDs();
540  }
541 
542 
543  if(B.Importer() && C.ColMap().SameAs(B.ColMap()))
544  Cimport = new Epetra_Import(*B.Importer()); // Because the domain maps are the same
545  else if(!C.ColMap().SameAs(B.DomainMap()))
546  Cimport = new Epetra_Import(C.ColMap(),B.DomainMap(),Cremotepids.size(),RemotePIDs,NumExports,ExportLIDs,ExportPIDs);
547 
548 
549 #ifdef ENABLE_MMM_TIMINGS
550  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Newmatrix ESFC")));
551 #endif
552 
553  // Update the CrsGraphData
554  C.ExpertStaticFillComplete(B.DomainMap(),A.RangeMap(),Cimport,0,NumMyDiagonals);
555 
556  return 0;
557 }
558 
559 
560 
561 
562 /*****************************************************************************/
563 /*****************************************************************************/
564 /*****************************************************************************/
565 template<typename int_type>
566 int mult_A_B_reuse(const Epetra_CrsMatrix & A,
567  const Epetra_CrsMatrix & B,
568  CrsMatrixStruct& Bview,
569  std::vector<int> & Bcol2Ccol,
570  std::vector<int> & Bimportcol2Ccol,
571  Epetra_CrsMatrix& C){
572 
573 #ifdef ENABLE_MMM_TIMINGS
574  using Teuchos::TimeMonitor;
575  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Reuse SerialCore")));
576 #endif
577 
578  // *****************************
579  // Improved Parallel Gustavson in Local IDs
580  // *****************************
581  const Epetra_Map * colmap_C = &(C.ColMap());
582 
583  int m=A.NumMyRows();
584  int n=colmap_C->NumMyElements();
585  int i,j,k;
586 
587  // DataPointers for A
588  int *Arowptr, *Acolind;
589  double *Avals;
590  EPETRA_CHK_ERR(A.ExtractCrsDataPointers(Arowptr,Acolind,Avals));
591 
592  // DataPointers for B, Bimport
593  int *Browptr, *Bcolind;
594  double *Bvals;
595  EPETRA_CHK_ERR(B.ExtractCrsDataPointers(Browptr,Bcolind,Bvals));
596 
597  int *Irowptr=0, *Icolind=0;
598  double *Ivals=0;
599  if(Bview.importMatrix){
600  Irowptr = &Bview.importMatrix->rowptr_[0];
601  Icolind = &Bview.importMatrix->colind_[0];
602  Ivals = &Bview.importMatrix->vals_[0];
603  }
604 
605  // DataPointers for C
606  int *CSR_rowptr, *CSR_colind;
607  double *CSR_vals;
608  EPETRA_CHK_ERR(C.ExtractCrsDataPointers(CSR_rowptr,CSR_colind,CSR_vals));
609 
610 
611  // The status array will contain the index into colind where this dude was last deposited.
612  // c_status[i] < CSR_ip - not in the row yet.
613  // c_status[i] >= CSR_ip, this is the entry where you can find the data
614  // We start with this filled with -1's indicating that there are no entries yet.
615  std::vector<int> c_status(n, -1);
616 
617  // Classic csr assembly
618  int CSR_alloc=CSR_rowptr[m] - CSR_rowptr[0];
619  int CSR_ip=0,OLD_ip=0;
620 
621  // For each row of A/C
622  for(i=0; i<m; i++){
623  for(k=Arowptr[i]; k<Arowptr[i+1]; k++){
624  int Ak=Acolind[k];
625  double Aval = Avals[k];
626  if(Aval==0) continue;
627 
628  if(Bview.targetMapToOrigRow[Ak] != -1){
629  // Local matrix
630  int Bk = Bview.targetMapToOrigRow[Ak];
631  for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
632  int Cj=Bcol2Ccol[Bcolind[j]];
633 
634  if(c_status[Cj]<OLD_ip){
635  // New entry
636  if(CSR_ip >= CSR_alloc) EPETRA_CHK_ERR(-13);
637  c_status[Cj]=CSR_ip;
638  CSR_colind[CSR_ip]=Cj;
639  CSR_vals[CSR_ip]=Aval*Bvals[j];
640  CSR_ip++;
641  }
642  else
643  CSR_vals[c_status[Cj]]+=Aval*Bvals[j];
644  }
645  }
646  else{
647  // Remote matrix
648  int Ik = Bview.targetMapToImportRow[Ak];
649  for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
650  int Cj=Bimportcol2Ccol[Icolind[j]];
651 
652  if(c_status[Cj]<OLD_ip){
653  // New entry
654  if(CSR_ip >= CSR_alloc) EPETRA_CHK_ERR(-14);
655  c_status[Cj]=CSR_ip;
656  CSR_colind[CSR_ip]=Cj;
657  CSR_vals[CSR_ip]=Aval*Ivals[j];
658  CSR_ip++;
659  }
660  else
661  CSR_vals[c_status[Cj]]+=Aval*Ivals[j];
662  }
663  }
664  }
665  OLD_ip=CSR_ip;
666  }
667 
668 #ifdef ENABLE_MMM_TIMINGS
669  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Reuse Final Sort")));
670 #endif
671 
672  // Sort the entries
673  Epetra_Util::SortCrsEntries(m, &CSR_rowptr[0], &CSR_colind[0], &CSR_vals[0]);
674 
675  return 0;
676 }
677 
678 
679 
680 /*****************************************************************************/
681 /*****************************************************************************/
682 /*****************************************************************************/
683 //kernel method for computing the local portion of C = A*B
684 template<typename int_type>
685  int mult_A_B_general(const Epetra_CrsMatrix & A,
686  CrsMatrixStruct & Aview,
687  const Epetra_CrsMatrix & B,
688  CrsMatrixStruct& Bview,
689  Epetra_CrsMatrix& C,
690  bool call_FillComplete_on_result)
691 {
692  int C_firstCol = Bview.colMap->MinLID();
693  int C_lastCol = Bview.colMap->MaxLID();
694 
695  int C_firstCol_import = 0;
696  int C_lastCol_import = -1;
697 
698  int_type* bcols = 0;
699  Bview.colMap->MyGlobalElementsPtr(bcols);
700  int_type* bcols_import = NULL;
701  if (Bview.importMatrix != NULL) {
702  C_firstCol_import = Bview.importMatrix->ColMap_.MinLID();
703  C_lastCol_import = Bview.importMatrix->ColMap_.MaxLID();
704  Bview.importMatrix->ColMap_.MyGlobalElementsPtr(bcols_import);
705  }
706 
707  int C_numCols = C_lastCol - C_firstCol + 1;
708  int C_numCols_import = C_lastCol_import - C_firstCol_import + 1;
709 
710  if (C_numCols_import > C_numCols) C_numCols = C_numCols_import;
711 
712  // Allocate workspace memory
713  double* dwork = new double[C_numCols];
714  int_type* iwork = new int_type[C_numCols];
715  int_type *c_cols=iwork;
716  double *c_vals=dwork;
717  int *c_index=new int[C_numCols];
718 
719  int i, j, k;
720  bool C_filled=C.Filled();
721 
722  // DataPointers for A
723  int *Arowptr, *Acolind;
724  double *Avals;
725  EPETRA_CHK_ERR(A.ExtractCrsDataPointers(Arowptr,Acolind,Avals));
726 
727  //To form C = A*B we're going to execute this expression:
728  //
729  // C(i,j) = sum_k( A(i,k)*B(k,j) )
730  //
731  //Our goal, of course, is to navigate the data in A and B once, without
732  //performing searches for column-indices, etc.
733 
734  // Mark indices as empty w/ -1
735  for(k=0;k<C_numCols;k++) c_index[k]=-1;
736 
737  //loop over the rows of A.
738  for(i=0; i<A.NumMyRows(); ++i) {
739 
740  int_type global_row = (int_type) Aview.rowMap->GID64(i);
741 
742  //loop across the i-th row of A and for each corresponding row
743  //in B, loop across colums and accumulate product
744  //A(i,k)*B(k,j) into our partial sum quantities C_row_i. In other words,
745  //as we stride across B(k,:) we're calculating updates for row i of the
746  //result matrix C.
747 
748  /* Outline of the revised, ML-inspired algorithm
749 
750  C_{i,j} = \sum_k A_{i,k} B_{k,j}
751 
752  This algorithm uses a "middle product" formulation, with the loop ordering of
753  i, k, j. This means we compute a row of C at a time, but compute partial sums of
754  each entry in row i until we finish the k loop.
755 
756  This algorithm also has a few twists worth documenting.
757 
758  1) The first major twist involves the c_index, c_cols and c_vals arrays. The arrays c_cols
759  and c_vals store the *local* column index and values accumulator respectively. These
760  arrays are allocated to a size equal to the max number of local columns in C, namely C_numcols.
761  The value c_current tells us how many non-zeros we currently have in this row.
762 
763  So how do we take a LCID and find the right accumulator? This is where the c_index array
764  comes in. At the start (and stop) and the i loop, c_index is filled with -1's. Now
765  whenever we find a LCID in the k loop, we first loop at c_index[lcid]. If this value is
766  -1 we haven't seen this entry yet. In which case we add the appropriate stuff to c_cols
767  and c_vals and then set c_index[lcid] to the location of the accumulator (c_current before
768  we increment it). If the value is NOT -1, this tells us the location in the c_vals/c_cols
769  arrays (namely c_index[lcid]) where our accumulator lives.
770 
771  This trick works because we're working with local ids. We can then loop from 0 to c_current
772  and reset c_index to -1's when we're done, only touching the arrays that have changed.
773  While we're at it, we can switch to the global ids so we can call [Insert|SumInto]GlobalValues.
774  Thus, the effect of this trick is to avoid passes over the index array.
775 
776  2) The second major twist involves handling the remote and local components of B separately.
777  (ML doesn't need to do this, because its local ordering scheme is consistent between the local
778  and imported components of B.) Since they have different column maps, they have inconsistent
779  local column ids. This means the "second twist" won't work as stated on both matrices at the
780  same time. While this could be handled any number of ways, I have chosen to do the two parts
781  of B separately to make the code easier to read (and reduce the memory footprint of the MMM).
782  */
783 
784  // Local matrix: Zero Current counts for matrix
785  int c_current=0;
786 
787  // Local matrix: Do the "middle product"
788  for(k=Arowptr[i]; k<Arowptr[i+1]; ++k) {
789  int Ak = Acolind[k];
790  double Aval = Avals[k];
791  // We're skipping remote entries on this pass.
792  if(Bview.remote[Ak] || Aval==0) continue;
793 
794  int* Bcol_inds = Bview.indices[Ak];
795  double* Bvals_k = Bview.values[Ak];
796 
797  for(j=0; j<Bview.numEntriesPerRow[Ak]; ++j) {
798  int col=Bcol_inds[j];
799  if(c_index[col]<0){
800  // We haven't seen this entry before; add it. (In ML, on
801  // the first pass, you haven't seen any of the entries
802  // before, so they are added without the check. Not sure
803  // how much more efficient that would be; depends on branch
804  // prediction. We've favored code readability here.)
805  c_cols[c_current]=col;
806  c_vals[c_current]=Aval*Bvals_k[j];
807  c_index[col]=c_current;
808  c_current++;
809  }
810  else{
811  // We've already seen this entry; accumulate it.
812  c_vals[c_index[col]]+=Aval*Bvals_k[j];
813  }
814  }
815  }
816  // Local matrix: Reset c_index and switch c_cols to GIDs
817  for(k=0; k<c_current; k++){
818  c_index[c_cols[k]]=-1;
819  c_cols[k]=bcols[c_cols[k]]; // Switch from local to global IDs.
820  }
821  // Local matrix: Insert.
822  //
823  // We should check global error results after the algorithm is
824  // through. It's probably safer just to let the algorithm run all
825  // the way through before doing this, since otherwise we have to
826  // remember to free all allocations carefully.
827  //
828  // FIXME (mfh 27 Mar 2015) This code collects error codes, but
829  // doesn't do anything with them. This results in build warnings
830  // (set but unused variable). Thus, I'm commenting out error code
831  // collection for now.
832 #if 0
833  int err = C_filled ?
834  C.SumIntoGlobalValues(global_row,c_current,c_vals,c_cols)
835  :
836  C.InsertGlobalValues(global_row,c_current,c_vals,c_cols);
837 #else
838  if (C_filled) {
839  C.SumIntoGlobalValues(global_row,c_current,c_vals,c_cols);
840  } else {
841  C.InsertGlobalValues(global_row,c_current,c_vals,c_cols);
842  }
843 #endif // 0
844 
845  // Remote matrix: Zero current counts again for matrix
846  c_current=0;
847 
848  // Remote matrix: Do the "middle product"
849  for(k=Arowptr[i]; k<Arowptr[i+1]; ++k) {
850  int Ak = Acolind[k];
851  double Aval = Avals[k];
852  // We're skipping local entries on this pass.
853  if(!Bview.remote[Ak] || Aval==0) continue;
854 
855  int* Bcol_inds = Bview.indices[Ak];
856  double* Bvals_k = Bview.values[Ak];
857 
858  for(j=0; j<Bview.numEntriesPerRow[Ak]; ++j) {
859  int col=Bcol_inds[j];
860  if(c_index[col]<0){
861  c_cols[c_current]=col;
862  c_vals[c_current]=Aval*Bvals_k[j];
863  c_index[col]=c_current;
864  c_current++;
865  }
866  else{
867  c_vals[c_index[col]]+=Aval*Bvals_k[j];
868  }
869  }
870  }
871  // Remote matrix: Reset c_index and switch c_cols to GIDs
872  for(k=0; k<c_current; k++){
873  c_index[c_cols[k]]=-1;
874  c_cols[k]=bcols_import[c_cols[k]];
875  }
876  // Remove matrix: Insert
877  //
878  // See above (on error handling).
879 #if 0
880  err = C_filled ?
881  C.SumIntoGlobalValues(global_row,c_current,c_vals,c_cols)
882  :
883  C.InsertGlobalValues(global_row,c_current,c_vals,c_cols);
884 #else
885  if (C_filled) {
886  C.SumIntoGlobalValues(global_row,c_current,c_vals,c_cols);
887  } else {
888  C.InsertGlobalValues(global_row,c_current,c_vals,c_cols);
889  }
890 #endif // 0
891  }
892 
893  // Since Multiply won't do this
894  if(call_FillComplete_on_result)
895  C.FillComplete(B.DomainMap(),A.RangeMap());
896 
897  delete [] dwork;
898  delete [] iwork;
899  delete [] c_index;
900  return(0);
901 }
902 
903 
904 
905 
906 
907 /*****************************************************************************/
908 /*****************************************************************************/
909 /*****************************************************************************/
910 template<typename int_type>
911 int MatrixMatrix::Tmult_A_B(const Epetra_CrsMatrix & A,
912  CrsMatrixStruct & Aview,
913  const Epetra_CrsMatrix & B,
914  CrsMatrixStruct& Bview,
915  Epetra_CrsMatrix& C,
916  bool call_FillComplete_on_result){
917 
918  int i,rv;
919  Epetra_Map* mapunion = 0;
920  const Epetra_Map * colmap_B = &(B.ColMap());
921  const Epetra_Map * colmap_C = &(C.ColMap());
922 
923  std::vector<int> Cremotepids;
924  std::vector<int> Bcol2Ccol(B.ColMap().NumMyElements());
925  std::vector<int> Bimportcol2Ccol;
926  if(Bview.importMatrix) Bimportcol2Ccol.resize(Bview.importMatrix->ColMap_.NumMyElements());
927 
928 #ifdef ENABLE_MMM_TIMINGS
929  using Teuchos::TimeMonitor;
930  Teuchos::RCP<Teuchos::TimeMonitor> MM;
931 #endif
932 
933  // If the user doesn't want us to call FillComplete, use the general routine
934  if(!call_FillComplete_on_result) {
935 #ifdef ENABLE_MMM_TIMINGS
936  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM General Multiply")));
937 #endif
938  rv=mult_A_B_general<int_type>(A,Aview,B,Bview,C,false);
939  return rv;
940  }
941 
942  // Is this a "clean" matrix
943  bool NewFlag=!C.IndicesAreLocal() && !C.IndicesAreGlobal();
944 
945  // Does ExtractCrsDataPointers work?
946  int *C_rowptr, *C_colind;
947  double * C_vals;
948  C.ExtractCrsDataPointers(C_rowptr,C_colind,C_vals);
949  bool ExtractFailFlag=!C_rowptr || !C_colind || !C_vals;
950 
951  // It's a new matrix that hasn't been fill-completed, use the general routine
952  if(!NewFlag && ExtractFailFlag){
953 #ifdef ENABLE_MMM_TIMINGS
954  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM General Multiply")));
955 #endif
956  rv=mult_A_B_general<int_type>(A,Aview,B,Bview,C,call_FillComplete_on_result);
957  return rv;
958  }
959 
960 
961 #ifdef ENABLE_MMM_TIMINGS
962  if(NewFlag) MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Newmatrix CMap")));
963  else MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Reuse CMap")));
964 #endif
965 
966  // If new, build & clobber a colmap for C
967  if(NewFlag){
968  if(Bview.importMatrix) {
969  EPETRA_CHK_ERR( aztecoo_and_ml_compatible_map_union<int_type>(B,*Bview.importMatrix,mapunion,Cremotepids,Bcol2Ccol,Bimportcol2Ccol) );
970  EPETRA_CHK_ERR( C.ReplaceColMap(*mapunion) );
971  }
972  else {
973  EPETRA_CHK_ERR( C.ReplaceColMap(B.ColMap()) );
974  for(i=0;i<colmap_B->NumMyElements();i++) Bcol2Ccol[i]=i;
975 
976  // Copy B's remote list (if any)
977  if(B.Importer())
978  EPETRA_CHK_ERR( Epetra_Util::GetRemotePIDs(*B.Importer(),Cremotepids));
979  }
980  }
981 
982 #ifdef ENABLE_MMM_TIMINGS
983  if(NewFlag) MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Newmatrix Lookups")));
984  else MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Reuse Lookups")));
985 #endif
986 
987  // ********************************************
988  // Setup Bcol2Ccol / Bimportcol2Ccol lookups
989  // ********************************************
990  // Note: If we ran the map_union, we have this information already
991 
992  if(!NewFlag) {
993  if(colmap_B->SameAs(*colmap_C)){
994  // Maps are the same: Use local IDs as the hash
995  for(i=0;i<colmap_B->NumMyElements();i++)
996  Bcol2Ccol[i]=i;
997  }
998  else {
999  // Maps are not the same: Use the map's hash
1000  for(i=0;i<colmap_B->NumMyElements();i++){
1001  Bcol2Ccol[i]=colmap_C->LID((int_type) colmap_B->GID64(i));
1002  if(Bcol2Ccol[i]==-1) EPETRA_CHK_ERR(-11);
1003  }
1004  }
1005 
1006  if(Bview.importMatrix){
1007  Bimportcol2Ccol.resize(Bview.importMatrix->ColMap_.NumMyElements());
1008  for(i=0;i<Bview.importMatrix->ColMap_.NumMyElements();i++){
1009  Bimportcol2Ccol[i]=colmap_C->LID((int_type) Bview.importMatrix->ColMap_.GID64(i));
1010  if(Bimportcol2Ccol[i]==-1) EPETRA_CHK_ERR(-12);
1011  }
1012 
1013  }
1014  }
1015 #ifdef ENABLE_MMM_TIMINGS
1016  MM=Teuchos::null;
1017 #endif
1018 
1019  // Call the appropriate core routine
1020  if(NewFlag) {
1021  EPETRA_CHK_ERR(mult_A_B_newmatrix<int_type>(A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,Cremotepids,C));
1022  }
1023  else {
1024  // This always has a real map
1025  EPETRA_CHK_ERR(mult_A_B_reuse<int_type>(A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,C));
1026  }
1027 
1028  // Cleanup
1029  delete mapunion;
1030  return 0;
1031 }
1032 
1033 
1034 /*****************************************************************************/
1035 /*****************************************************************************/
1036 /*****************************************************************************/
1037 int MatrixMatrix::mult_A_B(const Epetra_CrsMatrix & A,
1038  CrsMatrixStruct & Aview,
1039  const Epetra_CrsMatrix & B,
1040  CrsMatrixStruct& Bview,
1041  Epetra_CrsMatrix& C,
1042  bool call_FillComplete_on_result){
1043 
1044 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
1045  if(A.RowMap().GlobalIndicesInt() && B.RowMap().GlobalIndicesInt()) {
1046  return Tmult_A_B<int>(A, Aview, B, Bview, C, call_FillComplete_on_result);
1047  }
1048  else
1049 #endif
1050 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
1051  if(A.RowMap().GlobalIndicesLongLong() && B.RowMap().GlobalIndicesLongLong()) {
1052  return Tmult_A_B<long long>(A, Aview, B, Bview, C, call_FillComplete_on_result);
1053  }
1054  else
1055 #endif
1056  throw "EpetraExt::MatrixMatrix::mult_A_B: GlobalIndices type unknown";
1057 }
1058 
1059 
1060 /*****************************************************************************/
1061 /*****************************************************************************/
1062 /*****************************************************************************/
1063 template<typename int_type>
1064 int jacobi_A_B_reuse(double omega,
1065  const Epetra_Vector & Dinv,
1066  const Epetra_CrsMatrix & A,
1067  const Epetra_CrsMatrix & B,
1068  CrsMatrixStruct& Bview,
1069  std::vector<int> & Bcol2Ccol,
1070  std::vector<int> & Bimportcol2Ccol,
1071  Epetra_CrsMatrix& C){
1072 
1073 #ifdef ENABLE_MMM_TIMINGS
1074  using Teuchos::TimeMonitor;
1075  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi Reuse SerialCore")));
1076 #endif
1077 
1078  // *****************************
1079  // Improved Parallel Gustavson in Local IDs
1080  // *****************************
1081  const Epetra_Map * colmap_C = &(C.ColMap());
1082 
1083  int m=A.NumMyRows();
1084  int n=colmap_C->NumMyElements();
1085  int i,j,k;
1086 
1087  // DataPointers for A
1088  int *Arowptr, *Acolind;
1089  double *Avals;
1090  EPETRA_CHK_ERR(A.ExtractCrsDataPointers(Arowptr,Acolind,Avals));
1091 
1092  // DataPointers for B, Bimport
1093  int *Browptr, *Bcolind;
1094  double *Bvals;
1095  EPETRA_CHK_ERR(B.ExtractCrsDataPointers(Browptr,Bcolind,Bvals));
1096 
1097  int *Irowptr=0, *Icolind=0;
1098  double *Ivals=0;
1099  if(Bview.importMatrix){
1100  Irowptr = &Bview.importMatrix->rowptr_[0];
1101  Icolind = &Bview.importMatrix->colind_[0];
1102  Ivals = &Bview.importMatrix->vals_[0];
1103  }
1104 
1105  // Data pointer for Dinv
1106  const double *Dvals = Dinv.Values();
1107 
1108  // DataPointers for C
1109  int *CSR_rowptr, *CSR_colind;
1110  double *CSR_vals;
1111  EPETRA_CHK_ERR(C.ExtractCrsDataPointers(CSR_rowptr,CSR_colind,CSR_vals));
1112 
1113 
1114  // The status array will contain the index into colind where this dude was last deposited.
1115  // c_status[i] < CSR_ip - not in the row yet.
1116  // c_status[i] >= CSR_ip, this is the entry where you can find the data
1117  // We start with this filled with -1's indicating that there are no entries yet.
1118  std::vector<int> c_status(n, -1);
1119 
1120  // Classic csr assembly
1121  int CSR_alloc=CSR_rowptr[m] - CSR_rowptr[0];
1122  int CSR_ip=0,OLD_ip=0;
1123 
1124  // For each row of C
1125  for(i=0; i<m; i++){
1126  double Dval = Dvals[i];
1127 
1128  // Entries of B
1129  for(k=Browptr[i]; k<Browptr[i+1]; k++){
1130  // int Bk = Bcolind[k];
1131  double Bval = Bvals[k];
1132  if(Bval==0) continue;
1133  int Ck=Bcol2Ccol[Bcolind[k]];
1134 
1135  // Assume no repeated entries in B
1136  c_status[Ck]=CSR_ip;
1137  CSR_colind[CSR_ip]=Ck;
1138  CSR_vals[CSR_ip]= Bvals[k];
1139  CSR_ip++;
1140  }
1141 
1142  // Entries of -omega * Dinv * A * B
1143  for(k=Arowptr[i]; k<Arowptr[i+1]; k++){
1144  int Ak=Acolind[k];
1145  double Aval = Avals[k];
1146  if(Aval==0) continue;
1147 
1148  if(Bview.targetMapToOrigRow[Ak] != -1){
1149  // Local matrix
1150  int Bk = Bview.targetMapToOrigRow[Ak];
1151  for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
1152  int Cj=Bcol2Ccol[Bcolind[j]];
1153 
1154  if(c_status[Cj]<OLD_ip){
1155  // New entry
1156  if(CSR_ip >= CSR_alloc) EPETRA_CHK_ERR(-13);
1157  c_status[Cj]=CSR_ip;
1158  CSR_colind[CSR_ip]=Cj;
1159  CSR_vals[CSR_ip]= - omega * Dval * Aval * Bvals[j];
1160  CSR_ip++;
1161  }
1162  else
1163  CSR_vals[c_status[Cj]]-= omega * Dval * Aval * Bvals[j];
1164  }
1165  }
1166  else{
1167  // Remote matrix
1168  int Ik = Bview.targetMapToImportRow[Ak];
1169  for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
1170  int Cj=Bimportcol2Ccol[Icolind[j]];
1171 
1172  if(c_status[Cj]<OLD_ip){
1173  // New entry
1174  if(CSR_ip >= CSR_alloc) EPETRA_CHK_ERR(-14);
1175  c_status[Cj]=CSR_ip;
1176  CSR_colind[CSR_ip]=Cj;
1177  CSR_vals[CSR_ip]= - omega * Dval * Aval * Ivals[j];
1178  CSR_ip++;
1179  }
1180  else
1181  CSR_vals[c_status[Cj]]-=omega * Dval * Aval * Ivals[j];
1182  }
1183  }
1184  }
1185  OLD_ip=CSR_ip;
1186  }
1187 
1188 #ifdef ENABLE_MMM_TIMINGS
1189  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi Reuse Final Sort")));
1190 #endif
1191  // Sort the entries
1192  Epetra_Util::SortCrsEntries(m, &CSR_rowptr[0], &CSR_colind[0], &CSR_vals[0]);
1193 
1194  return 0;
1195 }
1196 
1197 
1198 /*****************************************************************************/
1199 /*****************************************************************************/
1200 /*****************************************************************************/
1201 template<typename int_type>
1202 int jacobi_A_B_newmatrix(double omega,
1203  const Epetra_Vector & Dinv,
1204  const Epetra_CrsMatrix & A,
1205  const Epetra_CrsMatrix & B,
1206  CrsMatrixStruct& Bview,
1207  std::vector<int> & Bcol2Ccol,
1208  std::vector<int> & Bimportcol2Ccol,
1209  std::vector<int>& Cremotepids,
1210  Epetra_CrsMatrix& C)
1211 {
1212 #ifdef ENABLE_MMM_TIMINGS
1213  using Teuchos::TimeMonitor;
1214  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi NewMatrix SerialCore")));
1215 #endif
1216 
1217  // *****************************
1218  // Improved Parallel Gustavson in Local IDs
1219  // *****************************
1220  const Epetra_Map * colmap_C = &(C.ColMap());
1221  int NumMyDiagonals=0; // Counter to speed up ESFC
1222 
1223  int m=A.NumMyRows();
1224  int n=colmap_C->NumMyElements();
1225  int i,j,k;
1226 
1227  // DataPointers for A
1228  int *Arowptr, *Acolind;
1229  double *Avals;
1230  EPETRA_CHK_ERR(A.ExtractCrsDataPointers(Arowptr,Acolind,Avals));
1231 
1232  // DataPointers for B, Bimport
1233  int *Browptr, *Bcolind;
1234  double *Bvals;
1235  EPETRA_CHK_ERR(B.ExtractCrsDataPointers(Browptr,Bcolind,Bvals));
1236 
1237  // Data pointer for Dinv
1238  const double *Dvals = Dinv.Values();
1239 
1240  int *Irowptr=0, *Icolind=0;
1241  double *Ivals=0;
1242  if(Bview.importMatrix){
1243  Irowptr = &Bview.importMatrix->rowptr_[0];
1244  Icolind = (Bview.importMatrix->colind_.size()>0)?(&Bview.importMatrix->colind_[0]):0;
1245  Ivals = (Bview.importMatrix->vals_.size()>0)?(&Bview.importMatrix->vals_[0]):0;
1246  }
1247 
1248  // MemorySetup: If somebody else is sharing this C's graphdata, make a new one.
1249  // This is needed because I'm about to walk all over the CrsGrapData...
1250  C.ExpertMakeUniqueCrsGraphData();
1251 
1252  // The status array will contain the index into colind where this entry was last deposited.
1253  // c_status[i] < CSR_ip - not in the row yet.
1254  // c_status[i] >= CSR_ip, this is the entry where you can find the data
1255  // We start with this filled with -1's indicating that there are no entries yet.
1256  std::vector<int> c_status(n, -1);
1257 
1258  // Classic csr assembly (low memory edition)
1259  int CSR_alloc=C_estimate_nnz(A,B);
1260  if(CSR_alloc < B.NumMyNonzeros()) CSR_alloc = B.NumMyNonzeros(); // update for Jacobi
1261  int CSR_ip=0,OLD_ip=0;
1262  Epetra_IntSerialDenseVector & CSR_rowptr = C.ExpertExtractIndexOffset();
1263  Epetra_IntSerialDenseVector & CSR_colind = C.ExpertExtractIndices();
1264  double *& CSR_vals = C.ExpertExtractValues();
1265 
1266  CSR_rowptr.Resize(m+1);
1267  CSR_colind.Resize(CSR_alloc);
1268  resize_doubles(0,CSR_alloc,CSR_vals);
1269 
1270  // Static Profile stuff
1271  std::vector<int> NumEntriesPerRow(m);
1272 
1273  // For each row of C
1274  for(i=0; i<m; i++){
1275  bool found_diagonal=false;
1276  CSR_rowptr[i]=CSR_ip;
1277  double Dval = Dvals[i];
1278 
1279  // Entries of B
1280  for(k=Browptr[i]; k<Browptr[i+1]; k++){
1281  //int Bk = Bcolind[k];
1282  double Bval = Bvals[k];
1283  if(Bval==0) continue;
1284  int Ck=Bcol2Ccol[Bcolind[k]];
1285 
1286  // Assume no repeated entries in B
1287  c_status[Ck]=CSR_ip;
1288  CSR_colind[CSR_ip]=Ck;
1289  CSR_vals[CSR_ip]= Bvals[k];
1290  CSR_ip++;
1291  }
1292 
1293  // Entries of -omega * Dinv * A * B
1294  for(k=Arowptr[i]; k<Arowptr[i+1]; k++){
1295  int Ak = Acolind[k];
1296  double Aval = Avals[k];
1297  if(Aval==0) continue;
1298 
1299  if(Bview.targetMapToOrigRow[Ak] != -1){
1300  // Local matrix
1301  int Bk = Bview.targetMapToOrigRow[Ak];
1302  for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
1303  int Cj=Bcol2Ccol[Bcolind[j]];
1304 
1305  if(Cj==i && !found_diagonal) {found_diagonal=true; NumMyDiagonals++;}
1306 
1307  if(c_status[Cj]<OLD_ip){
1308  // New entry
1309  c_status[Cj]=CSR_ip;
1310  CSR_colind[CSR_ip]=Cj;
1311  CSR_vals[CSR_ip]= - omega * Dval* Aval * Bvals[j];
1312  CSR_ip++;
1313  }
1314  else
1315  CSR_vals[c_status[Cj]]-= omega * Dval * Aval * Bvals[j];
1316  }
1317  }
1318  else{
1319  // Remote matrix
1320  int Ik = Bview.targetMapToImportRow[Ak];
1321  for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
1322  int Cj=Bimportcol2Ccol[Icolind[j]];
1323 
1324  if(Cj==i && !found_diagonal) {found_diagonal=true; NumMyDiagonals++;}
1325 
1326  if(c_status[Cj]<OLD_ip){
1327  // New entry
1328  c_status[Cj]=CSR_ip;
1329  CSR_colind[CSR_ip]=Cj;
1330  CSR_vals[CSR_ip]= - omega * Dval * Aval * Ivals[j];
1331  CSR_ip++;
1332  }
1333  else
1334  CSR_vals[c_status[Cj]]-= omega * Dval * Aval * Ivals[j];
1335  }
1336  }
1337  }
1338  NumEntriesPerRow[i]=CSR_ip-CSR_rowptr[i];
1339 
1340  // Resize for next pass if needed
1341  if(CSR_ip + n > CSR_alloc){
1342  resize_doubles(CSR_alloc,2*CSR_alloc,CSR_vals);
1343  CSR_alloc*=2;
1344  CSR_colind.Resize(CSR_alloc);
1345  }
1346  OLD_ip=CSR_ip;
1347  }
1348 
1349  CSR_rowptr[m]=CSR_ip;
1350 
1351 #ifdef ENABLE_MMM_TIMINGS
1352  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi NewMatrix Final Sort")));
1353 #endif
1354 
1355  // Sort the entries
1356  Epetra_Util::SortCrsEntries(m, &CSR_rowptr[0], &CSR_colind[0], &CSR_vals[0]);
1357 
1358 #ifdef ENABLE_MMM_TIMINGS
1359  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi NewMatrix Fast IE")));
1360 #endif
1361 
1362  // Do a fast build of C's importer
1363  Epetra_Import * Cimport=0;
1364  int *RemotePIDs = Cremotepids.size()?&Cremotepids[0]:0;
1365  int NumExports=0;
1366  int *ExportLIDs=0, *ExportPIDs=0;
1367  if(Bview.importMatrix) {
1368  NumExports = Bview.importMatrix->ExportLIDs_.size();
1369  ExportLIDs = Bview.importMatrix->ExportLIDs_.size()?&Bview.importMatrix->ExportLIDs_[0]:0;
1370  ExportPIDs = Bview.importMatrix->ExportPIDs_.size()?&Bview.importMatrix->ExportPIDs_[0]:0;
1371  }
1372  else if(B.Importer()) {
1373  // Grab the exports from B proper
1374  NumExports = B.Importer()->NumExportIDs();
1375  ExportLIDs = B.Importer()->ExportLIDs();
1376  ExportPIDs = B.Importer()->ExportPIDs();
1377  }
1378 
1379  if(!C.ColMap().SameAs(B.DomainMap()))
1380  Cimport = new Epetra_Import(C.ColMap(),B.DomainMap(),Cremotepids.size(),RemotePIDs,NumExports,ExportLIDs,ExportPIDs);
1381 
1382 #ifdef ENABLE_MMM_TIMINGS
1383  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi NewMatrix ESFC")));
1384 #endif
1385 
1386  // Update the CrsGraphData
1387  C.ExpertStaticFillComplete(B.DomainMap(),A.RangeMap(),Cimport,0,NumMyDiagonals);
1388 
1389  return 0;
1390 }
1391 
1392 
1393 
1394 /*****************************************************************************/
1395 /*****************************************************************************/
1396 /*****************************************************************************/
1397 template<typename int_type>
1398 int MatrixMatrix::Tjacobi_A_B(double omega,
1399  const Epetra_Vector & Dinv,
1400  const Epetra_CrsMatrix & A,
1401  CrsMatrixStruct & Aview,
1402  const Epetra_CrsMatrix & B,
1403  CrsMatrixStruct& Bview,
1404  Epetra_CrsMatrix& C,
1405  bool call_FillComplete_on_result){
1406  int i,rv;
1407  Epetra_Map* mapunion = 0;
1408  const Epetra_Map * colmap_B = &(B.ColMap());
1409  const Epetra_Map * colmap_C = &(C.ColMap());
1410 
1411  std::vector<int> Cremotepids;
1412  std::vector<int> Bcol2Ccol(B.ColMap().NumMyElements());
1413  std::vector<int> Bimportcol2Ccol;
1414  if(Bview.importMatrix) Bimportcol2Ccol.resize(Bview.importMatrix->ColMap_.NumMyElements());
1415 
1416 #ifdef ENABLE_MMM_TIMINGS
1417  using Teuchos::TimeMonitor;
1418  Teuchos::RCP<Teuchos::TimeMonitor> MM;
1419 #endif
1420 
1421  // If the user doesn't want us to call FillComplete, use the general routine
1422  if(!call_FillComplete_on_result) {
1423 #ifdef ENABLE_MMM_TIMINGS
1424  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi General Multiply")));
1425 #endif
1426  throw std::runtime_error("jacobi_A_B_general not implemented");
1427  // rv=mult_A_B_general<int_type>(A,Aview,B,Bview,C,false);
1428  return rv;
1429  }
1430 
1431  // Is this a "clean" matrix
1432  bool NewFlag=!C.IndicesAreLocal() && !C.IndicesAreGlobal();
1433 
1434  // Does ExtractCrsDataPointers work?
1435  int *C_rowptr, *C_colind;
1436  double * C_vals;
1437  C.ExtractCrsDataPointers(C_rowptr,C_colind,C_vals);
1438  bool ExtractFailFlag=!C_rowptr || !C_colind || !C_vals;
1439 
1440  // It's a new matrix that hasn't been fill-completed, use the general routine
1441  if(!NewFlag && ExtractFailFlag){
1442 #ifdef ENABLE_MMM_TIMINGS
1443  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi General Multiply")));
1444 #endif
1445  throw std::runtime_error("jacobi_A_B_general not implemented");
1446  // rv=mult_A_B_general<int_type>(A,Aview,B,Bview,C,call_FillComplete_on_result);
1447  return rv;
1448  }
1449 
1450 #ifdef ENABLE_MMM_TIMINGS
1451  if(NewFlag) MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi Newmatrix CMap")));
1452  else MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi Reuse CMap")));
1453 #endif
1454 
1455  // If new, build & clobber a colmap for C
1456  if(NewFlag){
1457  if(Bview.importMatrix) {
1458  EPETRA_CHK_ERR( aztecoo_and_ml_compatible_map_union<int_type>(B,*Bview.importMatrix,mapunion,Cremotepids,Bcol2Ccol,Bimportcol2Ccol) );
1459  EPETRA_CHK_ERR( C.ReplaceColMap(*mapunion) );
1460  }
1461  else {
1462  EPETRA_CHK_ERR( C.ReplaceColMap(B.ColMap()) );
1463  for(i=0;i<colmap_B->NumMyElements();i++) Bcol2Ccol[i]=i;
1464 
1465  // Copy B's remote list (if any)
1466  if(B.Importer())
1467  EPETRA_CHK_ERR( Epetra_Util::GetRemotePIDs(*B.Importer(),Cremotepids));
1468  }
1469  }
1470 
1471 #ifdef ENABLE_MMM_TIMINGS
1472  if(NewFlag) MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi Newmatrix Lookups")));
1473  else MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi Reuse Lookups")));
1474 #endif
1475 
1476  // ********************************************
1477  // Setup Bcol2Ccol / Bimportcol2Ccol lookups
1478  // ********************************************
1479  // Note: If we ran the map_union, we have this information already
1480 
1481  if(!NewFlag) {
1482  if(colmap_B->SameAs(*colmap_C)){
1483  // Maps are the same: Use local IDs as the hash
1484  for(i=0;i<colmap_B->NumMyElements();i++)
1485  Bcol2Ccol[i]=i;
1486  }
1487  else {
1488  // Maps are not the same: Use the map's hash
1489  for(i=0;i<colmap_B->NumMyElements();i++){
1490  Bcol2Ccol[i]=colmap_C->LID((int_type) colmap_B->GID64(i));
1491  if(Bcol2Ccol[i]==-1) EPETRA_CHK_ERR(-11);
1492  }
1493  }
1494 
1495  if(Bview.importMatrix){
1496  Bimportcol2Ccol.resize(Bview.importMatrix->ColMap_.NumMyElements());
1497  for(i=0;i<Bview.importMatrix->ColMap_.NumMyElements();i++){
1498  Bimportcol2Ccol[i]=colmap_C->LID((int_type) Bview.importMatrix->ColMap_.GID64(i));
1499  if(Bimportcol2Ccol[i]==-1) EPETRA_CHK_ERR(-12);
1500  }
1501 
1502  }
1503  }
1504 
1505 
1506  // Call the appropriate core routine
1507  if(NewFlag) {
1508  EPETRA_CHK_ERR(jacobi_A_B_newmatrix<int_type>(omega,Dinv,A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,Cremotepids,C));
1509  }
1510  else {
1511  // This always has a real map
1512  EPETRA_CHK_ERR(jacobi_A_B_reuse<int_type>(omega,Dinv,A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,C));
1513  }
1514 
1515  // Cleanup
1516  delete mapunion;
1517  return 0;
1518 }
1519 
1520 
1521 /*****************************************************************************/
1522 /*****************************************************************************/
1523 /*****************************************************************************/
1524 int MatrixMatrix::jacobi_A_B(double omega,
1525  const Epetra_Vector & Dinv,
1526  const Epetra_CrsMatrix & A,
1527  CrsMatrixStruct & Aview,
1528  const Epetra_CrsMatrix & B,
1529  CrsMatrixStruct& Bview,
1530  Epetra_CrsMatrix& C,
1531  bool call_FillComplete_on_result)
1532 {
1533 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
1534  if(A.RowMap().GlobalIndicesInt() && B.RowMap().GlobalIndicesInt()) {
1535  return Tjacobi_A_B<int>(omega,Dinv,A,Aview,B,Bview,C,call_FillComplete_on_result);
1536  }
1537  else
1538 #endif
1539 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
1540  if(A.RowMap().GlobalIndicesLongLong() && B.RowMap().GlobalIndicesLongLong()) {
1541  return Tjacobi_A_B<long long>(omega,Dinv,A,Aview,B,Bview,C,call_FillComplete_on_result);
1542  }
1543  else
1544 #endif
1545  throw "EpetraExt::MatrixMatrix::jacobi_A_B: GlobalIndices type unknown";
1546 }
1547 
1548 
1549 
1550 /*****************************************************************************/
1551 /*****************************************************************************/
1552 /*****************************************************************************/
1553 template<typename int_type>
1554 int MatrixMatrix::Tmult_AT_B_newmatrix(const CrsMatrixStruct & Atransview, const CrsMatrixStruct & Bview, Epetra_CrsMatrix & C) {
1555  using Teuchos::RCP;
1556  using Teuchos::rcp;
1557 
1558 #ifdef ENABLE_MMM_TIMINGS
1559  using Teuchos::TimeMonitor;
1560  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM-T Transpose")));
1561 #endif
1562 
1563 
1564  /*************************************************************/
1565  /* 2/3) Call mult_A_B_newmatrix w/ fillComplete */
1566  /*************************************************************/
1567 #ifdef ENABLE_MMM_TIMINGS
1568  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM-T AB-core")));
1569 #endif
1570  RCP<Epetra_CrsMatrix> Ctemp;
1571 
1572  // If Atrans has no Exporter, we can use C instead of having to create a temp matrix
1573  bool needs_final_export = Atransview.origMatrix->Exporter() != 0;
1574  if(needs_final_export)
1575  Ctemp = rcp(new Epetra_CrsMatrix(Copy,Atransview.origMatrix->RowMap(),Bview.origMatrix->ColMap(),0));
1576  else {
1577  EPETRA_CHK_ERR( C.ReplaceColMap(Bview.origMatrix->ColMap()) );
1578  Ctemp = rcp(&C,false);// don't allow deallocation
1579  }
1580 
1581  // Multiply
1582  std::vector<int> Bcol2Ccol(Bview.origMatrix->NumMyCols());
1583  for(int i=0; i<Bview.origMatrix->NumMyCols(); i++)
1584  Bcol2Ccol[i]=i;
1585  std::vector<int> Bimportcol2Ccol,Cremotepids;
1586  if(Bview.origMatrix->Importer())
1587  EPETRA_CHK_ERR( Epetra_Util::GetRemotePIDs(*Bview.origMatrix->Importer(),Cremotepids));
1588 
1589  EPETRA_CHK_ERR(mult_A_B_newmatrix<int_type>(*Atransview.origMatrix,*Bview.origMatrix,Bview,
1590  Bcol2Ccol,Bimportcol2Ccol,Cremotepids,
1591  *Ctemp));
1592 
1593  /*************************************************************/
1594  /* 4) ExportAndFillComplete matrix (if needed) */
1595  /*************************************************************/
1596 #ifdef ENABLE_MMM_TIMINGS
1597  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM-T ESFC")));
1598 #endif
1599 
1600  if(needs_final_export)
1601  C.FusedExport(*Ctemp,*Ctemp->Exporter(),&Bview.origMatrix->DomainMap(),&Atransview.origMatrix->RangeMap(),false);
1602 
1603  return 0;
1604 }
1605 
1606 
1607 
1608 /*****************************************************************************/
1609 /*****************************************************************************/
1610 /*****************************************************************************/
1611 int MatrixMatrix::mult_AT_B_newmatrix(const CrsMatrixStruct & Atransview, const CrsMatrixStruct & Bview, Epetra_CrsMatrix & C) {
1612 
1613 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
1614  if(Atransview.origMatrix->RowMap().GlobalIndicesInt() && Bview.origMatrix->RowMap().GlobalIndicesInt()) {
1615  return Tmult_AT_B_newmatrix<int>(Atransview,Bview,C);
1616  }
1617  else
1618 #endif
1619 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
1620  if(Atransview.origMatrix->RowMap().GlobalIndicesLongLong() && Bview.origMatrix->RowMap().GlobalIndicesLongLong()) {
1621  return Tmult_AT_B_newmatrix<long long>(Atransview,Bview,C);
1622  }
1623  else
1624 #endif
1625  throw "EpetraExt::MatrixMatrix::mult_AT_B_newmatrix: GlobalIndices type unknown";
1626 }
1627 
1628 
1629 
1630 }//namespace EpetraExt
LightweightCrsMatrix * importMatrix
int mult_A_B_newmatrix(const Epetra_CrsMatrix &A, const Epetra_CrsMatrix &B, const CrsMatrixStruct &Bview, std::vector< int > &Bcol2Ccol, std::vector< int > &Bimportcol2Ccol, std::vector< int > &Cremotepids, Epetra_CrsMatrix &C)
std::vector< int > targetMapToOrigRow
void MyGlobalElementsPtr(int *&MyGlobalElementList) const
long long GID64(int LID) const
int mult_A_B_reuse(const Epetra_CrsMatrix &A, const Epetra_CrsMatrix &B, CrsMatrixStruct &Bview, std::vector< int > &Bcol2Ccol, std::vector< int > &Bimportcol2Ccol, Epetra_CrsMatrix &C)
const Epetra_CrsMatrix * origMatrix
EpetraExt::BlockCrsMatrix: A class for constructing a distributed block matrix.
void resize_doubles(int nold, int nnew, double *&d)
std::vector< int > targetMapToImportRow
static int C_estimate_nnz(const Epetra_CrsMatrix &A, const Epetra_CrsMatrix &B)
int jacobi_A_B_reuse(double omega, const Epetra_Vector &Dinv, const Epetra_CrsMatrix &A, const Epetra_CrsMatrix &B, CrsMatrixStruct &Bview, std::vector< int > &Bcol2Ccol, std::vector< int > &Bimportcol2Ccol, Epetra_CrsMatrix &C)
int aztecoo_and_ml_compatible_map_union(const Epetra_CrsMatrix &B, const LightweightCrsMatrix &Bimport, Epetra_Map *&unionmap, std::vector< int > &Cremotepids, std::vector< int > &Bcols2Ccols, std::vector< int > &Icols2Ccols)
int jacobi_A_B_newmatrix(double omega, const Epetra_Vector &Dinv, const Epetra_CrsMatrix &A, const Epetra_CrsMatrix &B, CrsMatrixStruct &Bview, std::vector< int > &Bcol2Ccol, std::vector< int > &Bimportcol2Ccol, std::vector< int > &Cremotepids, Epetra_CrsMatrix &C)
int mult_A_B_general(const Epetra_CrsMatrix &A, CrsMatrixStruct &Aview, const Epetra_CrsMatrix &B, CrsMatrixStruct &Bview, Epetra_CrsMatrix &C, bool call_FillComplete_on_result)