49 #include <Epetra_Export.h> 50 #include <Epetra_Import.h> 51 #include <Epetra_Util.h> 52 #include <Epetra_Map.h> 53 #include <Epetra_Comm.h> 54 #include <Epetra_CrsMatrix.h> 55 #include <Epetra_Vector.h> 56 #include <Epetra_Directory.h> 57 #include <Epetra_HashTable.h> 58 #include <Epetra_Distributor.h> 59 #include <Epetra_IntSerialDenseVector.h> 66 #include <Epetra_MpiDistributor.h> 70 #include <Teuchos_TimeMonitor.hpp> 77 static inline int C_estimate_nnz(
const Epetra_CrsMatrix & A,
const Epetra_CrsMatrix &B){
79 int Aest=(A.NumMyRows()>0)? A.NumMyNonzeros()/A.NumMyRows():100;
80 int Best=(B.NumMyRows()>0)? B.NumMyNonzeros()/B.NumMyRows():100;
82 int nnzperrow=(int)(sqrt((
double)Aest) + sqrt((
double)Best) - 1);
85 return (
int)(A.NumMyRows()*nnzperrow*0.75 + 100);
93 static inline int auto_resize(std::vector<int> &x,
int num_new){
94 int newsize=x.size() + EPETRA_MAX((
int)x.size(),num_new);
103 template<
typename int_type>
105 std::vector<int> &Bcols2Ccols, std::vector<int> &Icols2Ccols)
109 #ifdef ENABLE_MMM_TIMINGS 110 using Teuchos::TimeMonitor;
111 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM M5 CMap 1")));
116 int i,j,MyPID = B.Comm().MyPID(), NumProc = B.Comm().NumProc();
117 int Bstart=0, Istart=0, Cstart=0,Pstart=0;
119 const Epetra_Map & BColMap = B.ColMap();
120 const Epetra_Map & DomainMap = B.DomainMap();
124 int_type * Bgids = 0;
125 BColMap.MyGlobalElementsPtr(Bgids);
127 int_type * Igids = 0;
131 if((
int)Bcols2Ccols.size() != Nb) Bcols2Ccols.resize(Nb);
132 if((
int)Icols2Ccols.size() != Ni) Icols2Ccols.resize(Ni);
137 Epetra_MpiDistributor *Distor=0;
139 Distor=
dynamic_cast<Epetra_MpiDistributor*
>(&B.Importer()->Distributor());
140 if(!Distor) EPETRA_CHK_ERR(-2);
147 std::vector<int> Bpids(Nb), Ipids(Ni);
148 if(B.Importer()) {EPETRA_CHK_ERR(util.GetPids(*B.Importer(),Bpids,
true));}
149 else Bpids.assign(Nb,-1);
163 std::vector<int_type> Cgids(Csize);
164 Cremotepids.resize(Psize);
166 #ifdef ENABLE_MMM_TIMINGS 167 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM M5 CMap 2")));
173 if(!B.Importer() || (B.Importer()->NumSameIDs() == DomainMap.NumMyElements())) {
175 DomainMap.MyGlobalElements(Cgids.size() ? &Cgids[0] : 0);
176 Cstart=DomainMap.NumMyElements();
177 Bstart=DomainMap.NumMyElements();
179 for(i=0; i<DomainMap.NumMyElements(); i++) Bcols2Ccols[i] = i;
183 int NumDomainElements = DomainMap.NumMyElements();
184 for(i = 0; i < NumDomainElements; i++) {
185 int_type GID = (int_type) DomainMap.GID64(i);
186 int LID = BColMap.LID(GID);
189 Bcols2Ccols[LID]=Cstart;
196 LID = IColMap.
LID(GID);
198 Icols2Ccols[LID]=Cstart;
207 for(i=0,j=0; i<Ni && Ipids[i]==-1; i++) {
208 while(Cgids[j]!=Igids[i]) j++;
214 #ifdef ENABLE_MMM_TIMINGS 215 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM M5 CMap 3")));
223 int initial_temp_length = 0;
224 const int * lengths_from=0;
226 lengths_from= Distor->LengthsFrom();
227 for(i=0; i < Distor->NumReceives(); i++) initial_temp_length += lengths_from[i];
229 else initial_temp_length=100;
231 std::vector<int_type> Btemp(initial_temp_length), Itemp(initial_temp_length);
232 std::vector<int> Btemp2(initial_temp_length), Itemp2(initial_temp_length);
235 while (Bstart < Nb || Istart < Ni) {
236 int Bproc=NumProc+1, Iproc=NumProc+1, Cproc;
239 if(Bstart < Nb) Bproc=Bpids[Bstart];
240 if(Istart < Ni) Iproc=Ipids[Istart];
242 Cproc = (Bproc < Iproc)?Bproc:Iproc;
244 if(Bproc == Cproc && Iproc != Cproc) {
247 for(i=Bstart; i<Nb && Bpids[i]==Bproc; i++) {}
251 int tCsize = Bnext-Bstart;
252 if(Btemp.size() < (size_t)tCsize) {Btemp2.resize(tCsize);}
254 for(i=Bstart; i<Bnext; i++) {
255 Cremotepids[i-Bstart+Pstart] = Cproc;
256 Cgids[i-Bstart+Cstart] = Bgids[i];
257 Btemp2[i-Bstart] = i;
261 int *Bptr2 = Btemp2.size() ? &Btemp2[0] : 0;
262 util.Sort(
true, tCsize, &Cgids[Cstart], 0, 0, 1, &Bptr2, 0, 0);
264 for(i=0, j=Cstart; i<tCsize; i++){
265 while(Cgids[j] != Bgids[Btemp2[i]]) j++;
266 Bcols2Ccols[Btemp2[i]] = j;
272 else if(Bproc != Cproc && Iproc == Cproc) {
275 for(i=Istart; i<Ni && Ipids[i]==Iproc; i++) {}
279 int tCsize = Inext-Istart;
280 if(Itemp.size() < (size_t)tCsize) {Itemp2.resize(tCsize);}
282 for(i=Istart; i<Inext; i++) {
283 Cremotepids[i-Istart+Pstart] = Cproc;
284 Cgids[i-Istart+Cstart] = Igids[i];
285 Itemp2[i-Istart] = i;
289 int *Iptr2 = Itemp2.size() ? &Itemp2[0] : 0;
290 util.Sort(
true, tCsize, &Cgids[Cstart], 0, 0, 1, &Iptr2, 0, 0);
292 for(i=0, j=Cstart; i<tCsize; i++){
293 while(Cgids[j] != Igids[Itemp2[i]]) j++;
294 Icols2Ccols[Itemp2[i]] = j;
304 for(i=Bstart; i<Nb && Bpids[i]==Bproc; i++) {}
308 for(i=Istart; i<Ni && Ipids[i]==Iproc; i++) {}
312 int tBsize = Bnext-Bstart;
313 int tIsize = Inext-Istart;
315 if(Btemp.size() < (size_t)tBsize) {Btemp.resize(tBsize); Btemp2.resize(tBsize);}
316 if(Itemp.size() < (size_t)tIsize) {Itemp.resize(tIsize); Itemp2.resize(tIsize);}
318 for(i=Bstart; i<Bnext; i++) {Btemp[i-Bstart]=Bgids[i]; Btemp2[i-Bstart]=i;}
319 for(i=Istart; i<Inext; i++) {Itemp[i-Istart]=Igids[i]; Itemp2[i-Istart]=i;}
322 int *Bptr2 = Btemp2.size() ? &Btemp2[0] : 0;
int *Iptr2 = Itemp2.size() ? &Itemp2[0] : 0;
323 util.Sort(
true, tBsize, Btemp.size() ? &Btemp[0] : 0, 0, 0, 1, &Bptr2, 0, 0);
324 util.Sort(
true, tIsize, Itemp.size() ? &Itemp[0] : 0, 0, 0, 1, &Iptr2, 0, 0);
325 typename std::vector<int_type>::iterator mycstart = Cgids.begin()+Cstart;
326 typename std::vector<int_type>::iterator last_el=std::set_union(Btemp.begin(),Btemp.begin()+tBsize,Itemp.begin(),Itemp.begin()+tIsize,mycstart);
328 for(i=0, j=Cstart; i<tBsize; i++){
329 while(Cgids[j] != Bgids[Btemp2[i]]) j++;
330 Bcols2Ccols[Btemp2[i]] = j;
333 for(i=0, j=Cstart; i<tIsize; i++){
334 while(Cgids[j] != Igids[Itemp2[i]]) j++;
335 Icols2Ccols[Itemp2[i]] = j;
338 for(i=Pstart; i<(last_el - mycstart) + Pstart; i++) Cremotepids[i]=Cproc;
339 Cstart = (last_el - mycstart) + Cstart;
340 Pstart = (last_el - mycstart) + Pstart;
346 #ifdef ENABLE_MMM_TIMINGS 347 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM M5 CMap 4")));
351 Cremotepids.resize(Pstart);
357 unionmap=
new Epetra_Map((int_type) -1,Cstart,Cgids.size() ? &Cgids[0] : 0, (int_type) B.ColMap().IndexBase64(),
358 B.Comm(),B.ColMap().DistributedGlobal(),(int_type) B.ColMap().MinAllGID64(),(int_type) B.ColMap().MaxAllGID64());
371 double *tmp =
new double[nnew];
372 for(
int i=0; i<nold; i++)
383 template<
typename int_type>
385 const Epetra_CrsMatrix & B,
387 std::vector<int> & Bcol2Ccol,
388 std::vector<int> & Bimportcol2Ccol,
389 std::vector<int>& Cremotepids,
392 #ifdef ENABLE_MMM_TIMINGS 393 using Teuchos::TimeMonitor;
394 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Newmatrix SerialCore")));
400 const Epetra_Map * colmap_C = &(C.ColMap());
403 int NumMyDiagonals=0;
406 int n=colmap_C->NumMyElements();
410 int *Arowptr, *Acolind;
412 EPETRA_CHK_ERR(A.ExtractCrsDataPointers(Arowptr,Acolind,Avals));
415 int *Browptr, *Bcolind;
417 EPETRA_CHK_ERR(B.ExtractCrsDataPointers(Browptr,Bcolind,Bvals));
419 int *Irowptr=0, *Icolind=0;
429 C.ExpertMakeUniqueCrsGraphData();
435 std::vector<int> c_status(n, -1);
439 if(CSR_alloc < n) CSR_alloc = n;
440 int CSR_ip=0,OLD_ip=0;
441 Epetra_IntSerialDenseVector & CSR_rowptr = C.ExpertExtractIndexOffset();
442 Epetra_IntSerialDenseVector & CSR_colind = C.ExpertExtractIndices();
443 double *& CSR_vals = C.ExpertExtractValues();
445 CSR_rowptr.Resize(m+1);
446 CSR_colind.Resize(CSR_alloc);
450 std::vector<int> NumEntriesPerRow(m);
454 bool found_diagonal=
false;
455 CSR_rowptr[i]=CSR_ip;
457 for(k=Arowptr[i]; k<Arowptr[i+1]; k++){
459 double Aval = Avals[k];
460 if(Aval==0)
continue;
465 for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
466 int Cj=Bcol2Ccol[Bcolind[j]];
468 if(Cj==i && !found_diagonal) {found_diagonal=
true; NumMyDiagonals++;}
470 if(c_status[Cj]<OLD_ip){
473 CSR_colind[CSR_ip]=Cj;
474 CSR_vals[CSR_ip]=Aval*Bvals[j];
478 CSR_vals[c_status[Cj]]+=Aval*Bvals[j];
484 for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
485 int Cj=Bimportcol2Ccol[Icolind[j]];
487 if(Cj==i && !found_diagonal) {found_diagonal=
true; NumMyDiagonals++;}
489 if(c_status[Cj]<OLD_ip){
492 CSR_colind[CSR_ip]=Cj;
493 CSR_vals[CSR_ip]=Aval*Ivals[j];
497 CSR_vals[c_status[Cj]]+=Aval*Ivals[j];
501 NumEntriesPerRow[i]=CSR_ip-CSR_rowptr[i];
504 if(CSR_ip + n > CSR_alloc){
507 CSR_colind.Resize(CSR_alloc);
512 CSR_rowptr[m]=CSR_ip;
514 #ifdef ENABLE_MMM_TIMINGS 515 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Newmatrix Final Sort")));
519 Epetra_Util::SortCrsEntries(m, &CSR_rowptr[0], &CSR_colind[0], &CSR_vals[0]);
521 #ifdef ENABLE_MMM_TIMINGS 522 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Newmatrix Fast IE")));
526 Epetra_Import * Cimport=0;
527 int *RemotePIDs = Cremotepids.size()?&Cremotepids[0]:0;
529 int *ExportLIDs=0, *ExportPIDs=0;
535 else if(B.Importer()) {
537 NumExports = B.Importer()->NumExportIDs();
538 ExportLIDs = B.Importer()->ExportLIDs();
539 ExportPIDs = B.Importer()->ExportPIDs();
543 if(B.Importer() && C.ColMap().SameAs(B.ColMap()))
544 Cimport =
new Epetra_Import(*B.Importer());
545 else if(!C.ColMap().SameAs(B.DomainMap()))
546 Cimport =
new Epetra_Import(C.ColMap(),B.DomainMap(),Cremotepids.size(),RemotePIDs,NumExports,ExportLIDs,ExportPIDs);
549 #ifdef ENABLE_MMM_TIMINGS 550 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Newmatrix ESFC")));
554 C.ExpertStaticFillComplete(B.DomainMap(),A.RangeMap(),Cimport,0,NumMyDiagonals);
565 template<
typename int_type>
567 const Epetra_CrsMatrix & B,
569 std::vector<int> & Bcol2Ccol,
570 std::vector<int> & Bimportcol2Ccol,
571 Epetra_CrsMatrix& C){
573 #ifdef ENABLE_MMM_TIMINGS 574 using Teuchos::TimeMonitor;
575 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Reuse SerialCore")));
581 const Epetra_Map * colmap_C = &(C.ColMap());
584 int n=colmap_C->NumMyElements();
588 int *Arowptr, *Acolind;
590 EPETRA_CHK_ERR(A.ExtractCrsDataPointers(Arowptr,Acolind,Avals));
593 int *Browptr, *Bcolind;
595 EPETRA_CHK_ERR(B.ExtractCrsDataPointers(Browptr,Bcolind,Bvals));
597 int *Irowptr=0, *Icolind=0;
606 int *CSR_rowptr, *CSR_colind;
608 EPETRA_CHK_ERR(C.ExtractCrsDataPointers(CSR_rowptr,CSR_colind,CSR_vals));
615 std::vector<int> c_status(n, -1);
618 int CSR_alloc=CSR_rowptr[m] - CSR_rowptr[0];
619 int CSR_ip=0,OLD_ip=0;
623 for(k=Arowptr[i]; k<Arowptr[i+1]; k++){
625 double Aval = Avals[k];
626 if(Aval==0)
continue;
631 for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
632 int Cj=Bcol2Ccol[Bcolind[j]];
634 if(c_status[Cj]<OLD_ip){
636 if(CSR_ip >= CSR_alloc) EPETRA_CHK_ERR(-13);
638 CSR_colind[CSR_ip]=Cj;
639 CSR_vals[CSR_ip]=Aval*Bvals[j];
643 CSR_vals[c_status[Cj]]+=Aval*Bvals[j];
649 for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
650 int Cj=Bimportcol2Ccol[Icolind[j]];
652 if(c_status[Cj]<OLD_ip){
654 if(CSR_ip >= CSR_alloc) EPETRA_CHK_ERR(-14);
656 CSR_colind[CSR_ip]=Cj;
657 CSR_vals[CSR_ip]=Aval*Ivals[j];
661 CSR_vals[c_status[Cj]]+=Aval*Ivals[j];
668 #ifdef ENABLE_MMM_TIMINGS 669 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Reuse Final Sort")));
673 Epetra_Util::SortCrsEntries(m, &CSR_rowptr[0], &CSR_colind[0], &CSR_vals[0]);
684 template<
typename int_type>
687 const Epetra_CrsMatrix & B,
690 bool call_FillComplete_on_result)
692 int C_firstCol = Bview.
colMap->MinLID();
693 int C_lastCol = Bview.
colMap->MaxLID();
695 int C_firstCol_import = 0;
696 int C_lastCol_import = -1;
699 Bview.
colMap->MyGlobalElementsPtr(bcols);
700 int_type* bcols_import = NULL;
707 int C_numCols = C_lastCol - C_firstCol + 1;
708 int C_numCols_import = C_lastCol_import - C_firstCol_import + 1;
710 if (C_numCols_import > C_numCols) C_numCols = C_numCols_import;
713 double* dwork =
new double[C_numCols];
714 int_type* iwork =
new int_type[C_numCols];
715 int_type *c_cols=iwork;
716 double *c_vals=dwork;
717 int *c_index=
new int[C_numCols];
720 bool C_filled=C.Filled();
723 int *Arowptr, *Acolind;
725 EPETRA_CHK_ERR(A.ExtractCrsDataPointers(Arowptr,Acolind,Avals));
735 for(k=0;k<C_numCols;k++) c_index[k]=-1;
738 for(i=0; i<A.NumMyRows(); ++i) {
740 int_type global_row = (int_type) Aview.
rowMap->GID64(i);
788 for(k=Arowptr[i]; k<Arowptr[i+1]; ++k) {
790 double Aval = Avals[k];
792 if(Bview.
remote[Ak] || Aval==0)
continue;
794 int* Bcol_inds = Bview.
indices[Ak];
795 double* Bvals_k = Bview.
values[Ak];
798 int col=Bcol_inds[j];
805 c_cols[c_current]=col;
806 c_vals[c_current]=Aval*Bvals_k[j];
807 c_index[col]=c_current;
812 c_vals[c_index[col]]+=Aval*Bvals_k[j];
817 for(k=0; k<c_current; k++){
818 c_index[c_cols[k]]=-1;
819 c_cols[k]=bcols[c_cols[k]];
834 C.SumIntoGlobalValues(global_row,c_current,c_vals,c_cols)
836 C.InsertGlobalValues(global_row,c_current,c_vals,c_cols);
839 C.SumIntoGlobalValues(global_row,c_current,c_vals,c_cols);
841 C.InsertGlobalValues(global_row,c_current,c_vals,c_cols);
849 for(k=Arowptr[i]; k<Arowptr[i+1]; ++k) {
851 double Aval = Avals[k];
853 if(!Bview.
remote[Ak] || Aval==0)
continue;
855 int* Bcol_inds = Bview.
indices[Ak];
856 double* Bvals_k = Bview.
values[Ak];
859 int col=Bcol_inds[j];
861 c_cols[c_current]=col;
862 c_vals[c_current]=Aval*Bvals_k[j];
863 c_index[col]=c_current;
867 c_vals[c_index[col]]+=Aval*Bvals_k[j];
872 for(k=0; k<c_current; k++){
873 c_index[c_cols[k]]=-1;
874 c_cols[k]=bcols_import[c_cols[k]];
881 C.SumIntoGlobalValues(global_row,c_current,c_vals,c_cols)
883 C.InsertGlobalValues(global_row,c_current,c_vals,c_cols);
886 C.SumIntoGlobalValues(global_row,c_current,c_vals,c_cols);
888 C.InsertGlobalValues(global_row,c_current,c_vals,c_cols);
894 if(call_FillComplete_on_result)
895 C.FillComplete(B.DomainMap(),A.RangeMap());
910 template<
typename int_type>
911 int MatrixMatrix::Tmult_A_B(
const Epetra_CrsMatrix & A,
913 const Epetra_CrsMatrix & B,
916 bool call_FillComplete_on_result){
919 Epetra_Map* mapunion = 0;
920 const Epetra_Map * colmap_B = &(B.ColMap());
921 const Epetra_Map * colmap_C = &(C.ColMap());
923 std::vector<int> Cremotepids;
924 std::vector<int> Bcol2Ccol(B.ColMap().NumMyElements());
925 std::vector<int> Bimportcol2Ccol;
928 #ifdef ENABLE_MMM_TIMINGS 929 using Teuchos::TimeMonitor;
930 Teuchos::RCP<Teuchos::TimeMonitor> MM;
934 if(!call_FillComplete_on_result) {
935 #ifdef ENABLE_MMM_TIMINGS 936 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM General Multiply")));
938 rv=mult_A_B_general<int_type>(A,Aview,B,Bview,C,
false);
943 bool NewFlag=!C.IndicesAreLocal() && !C.IndicesAreGlobal();
946 int *C_rowptr, *C_colind;
948 C.ExtractCrsDataPointers(C_rowptr,C_colind,C_vals);
949 bool ExtractFailFlag=!C_rowptr || !C_colind || !C_vals;
952 if(!NewFlag && ExtractFailFlag){
953 #ifdef ENABLE_MMM_TIMINGS 954 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM General Multiply")));
956 rv=mult_A_B_general<int_type>(A,Aview,B,Bview,C,call_FillComplete_on_result);
961 #ifdef ENABLE_MMM_TIMINGS 962 if(NewFlag) MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Newmatrix CMap")));
963 else MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Reuse CMap")));
969 EPETRA_CHK_ERR( aztecoo_and_ml_compatible_map_union<int_type>(B,*Bview.
importMatrix,mapunion,Cremotepids,Bcol2Ccol,Bimportcol2Ccol) );
970 EPETRA_CHK_ERR( C.ReplaceColMap(*mapunion) );
973 EPETRA_CHK_ERR( C.ReplaceColMap(B.ColMap()) );
974 for(i=0;i<colmap_B->NumMyElements();i++) Bcol2Ccol[i]=i;
978 EPETRA_CHK_ERR( Epetra_Util::GetRemotePIDs(*B.Importer(),Cremotepids));
982 #ifdef ENABLE_MMM_TIMINGS 983 if(NewFlag) MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Newmatrix Lookups")));
984 else MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM Reuse Lookups")));
993 if(colmap_B->SameAs(*colmap_C)){
995 for(i=0;i<colmap_B->NumMyElements();i++)
1000 for(i=0;i<colmap_B->NumMyElements();i++){
1001 Bcol2Ccol[i]=colmap_C->LID((int_type) colmap_B->GID64(i));
1002 if(Bcol2Ccol[i]==-1) EPETRA_CHK_ERR(-11);
1010 if(Bimportcol2Ccol[i]==-1) EPETRA_CHK_ERR(-12);
1015 #ifdef ENABLE_MMM_TIMINGS 1021 EPETRA_CHK_ERR(mult_A_B_newmatrix<int_type>(A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,Cremotepids,C));
1025 EPETRA_CHK_ERR(mult_A_B_reuse<int_type>(A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,C));
1037 int MatrixMatrix::mult_A_B(
const Epetra_CrsMatrix & A,
1039 const Epetra_CrsMatrix & B,
1041 Epetra_CrsMatrix& C,
1042 bool call_FillComplete_on_result){
1044 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES 1045 if(A.RowMap().GlobalIndicesInt() && B.RowMap().GlobalIndicesInt()) {
1046 return Tmult_A_B<int>(A, Aview, B, Bview, C, call_FillComplete_on_result);
1050 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES 1051 if(A.RowMap().GlobalIndicesLongLong() && B.RowMap().GlobalIndicesLongLong()) {
1052 return Tmult_A_B<long long>(A, Aview, B, Bview, C, call_FillComplete_on_result);
1056 throw "EpetraExt::MatrixMatrix::mult_A_B: GlobalIndices type unknown";
1063 template<
typename int_type>
1065 const Epetra_Vector & Dinv,
1066 const Epetra_CrsMatrix & A,
1067 const Epetra_CrsMatrix & B,
1069 std::vector<int> & Bcol2Ccol,
1070 std::vector<int> & Bimportcol2Ccol,
1071 Epetra_CrsMatrix& C){
1073 #ifdef ENABLE_MMM_TIMINGS 1074 using Teuchos::TimeMonitor;
1075 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi Reuse SerialCore")));
1081 const Epetra_Map * colmap_C = &(C.ColMap());
1083 int m=A.NumMyRows();
1084 int n=colmap_C->NumMyElements();
1088 int *Arowptr, *Acolind;
1090 EPETRA_CHK_ERR(A.ExtractCrsDataPointers(Arowptr,Acolind,Avals));
1093 int *Browptr, *Bcolind;
1095 EPETRA_CHK_ERR(B.ExtractCrsDataPointers(Browptr,Bcolind,Bvals));
1097 int *Irowptr=0, *Icolind=0;
1106 const double *Dvals = Dinv.Values();
1109 int *CSR_rowptr, *CSR_colind;
1111 EPETRA_CHK_ERR(C.ExtractCrsDataPointers(CSR_rowptr,CSR_colind,CSR_vals));
1118 std::vector<int> c_status(n, -1);
1121 int CSR_alloc=CSR_rowptr[m] - CSR_rowptr[0];
1122 int CSR_ip=0,OLD_ip=0;
1126 double Dval = Dvals[i];
1129 for(k=Browptr[i]; k<Browptr[i+1]; k++){
1131 double Bval = Bvals[k];
1132 if(Bval==0)
continue;
1133 int Ck=Bcol2Ccol[Bcolind[k]];
1136 c_status[Ck]=CSR_ip;
1137 CSR_colind[CSR_ip]=Ck;
1138 CSR_vals[CSR_ip]= Bvals[k];
1143 for(k=Arowptr[i]; k<Arowptr[i+1]; k++){
1145 double Aval = Avals[k];
1146 if(Aval==0)
continue;
1151 for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
1152 int Cj=Bcol2Ccol[Bcolind[j]];
1154 if(c_status[Cj]<OLD_ip){
1156 if(CSR_ip >= CSR_alloc) EPETRA_CHK_ERR(-13);
1157 c_status[Cj]=CSR_ip;
1158 CSR_colind[CSR_ip]=Cj;
1159 CSR_vals[CSR_ip]= - omega * Dval * Aval * Bvals[j];
1163 CSR_vals[c_status[Cj]]-= omega * Dval * Aval * Bvals[j];
1169 for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
1170 int Cj=Bimportcol2Ccol[Icolind[j]];
1172 if(c_status[Cj]<OLD_ip){
1174 if(CSR_ip >= CSR_alloc) EPETRA_CHK_ERR(-14);
1175 c_status[Cj]=CSR_ip;
1176 CSR_colind[CSR_ip]=Cj;
1177 CSR_vals[CSR_ip]= - omega * Dval * Aval * Ivals[j];
1181 CSR_vals[c_status[Cj]]-=omega * Dval * Aval * Ivals[j];
1188 #ifdef ENABLE_MMM_TIMINGS 1189 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi Reuse Final Sort")));
1192 Epetra_Util::SortCrsEntries(m, &CSR_rowptr[0], &CSR_colind[0], &CSR_vals[0]);
1201 template<
typename int_type>
1203 const Epetra_Vector & Dinv,
1204 const Epetra_CrsMatrix & A,
1205 const Epetra_CrsMatrix & B,
1207 std::vector<int> & Bcol2Ccol,
1208 std::vector<int> & Bimportcol2Ccol,
1209 std::vector<int>& Cremotepids,
1210 Epetra_CrsMatrix& C)
1212 #ifdef ENABLE_MMM_TIMINGS 1213 using Teuchos::TimeMonitor;
1214 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi NewMatrix SerialCore")));
1220 const Epetra_Map * colmap_C = &(C.ColMap());
1221 int NumMyDiagonals=0;
1223 int m=A.NumMyRows();
1224 int n=colmap_C->NumMyElements();
1228 int *Arowptr, *Acolind;
1230 EPETRA_CHK_ERR(A.ExtractCrsDataPointers(Arowptr,Acolind,Avals));
1233 int *Browptr, *Bcolind;
1235 EPETRA_CHK_ERR(B.ExtractCrsDataPointers(Browptr,Bcolind,Bvals));
1238 const double *Dvals = Dinv.Values();
1240 int *Irowptr=0, *Icolind=0;
1250 C.ExpertMakeUniqueCrsGraphData();
1256 std::vector<int> c_status(n, -1);
1260 if(CSR_alloc < B.NumMyNonzeros()) CSR_alloc = B.NumMyNonzeros();
1261 int CSR_ip=0,OLD_ip=0;
1262 Epetra_IntSerialDenseVector & CSR_rowptr = C.ExpertExtractIndexOffset();
1263 Epetra_IntSerialDenseVector & CSR_colind = C.ExpertExtractIndices();
1264 double *& CSR_vals = C.ExpertExtractValues();
1266 CSR_rowptr.Resize(m+1);
1267 CSR_colind.Resize(CSR_alloc);
1271 std::vector<int> NumEntriesPerRow(m);
1275 bool found_diagonal=
false;
1276 CSR_rowptr[i]=CSR_ip;
1277 double Dval = Dvals[i];
1280 for(k=Browptr[i]; k<Browptr[i+1]; k++){
1282 double Bval = Bvals[k];
1283 if(Bval==0)
continue;
1284 int Ck=Bcol2Ccol[Bcolind[k]];
1287 c_status[Ck]=CSR_ip;
1288 CSR_colind[CSR_ip]=Ck;
1289 CSR_vals[CSR_ip]= Bvals[k];
1294 for(k=Arowptr[i]; k<Arowptr[i+1]; k++){
1295 int Ak = Acolind[k];
1296 double Aval = Avals[k];
1297 if(Aval==0)
continue;
1302 for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
1303 int Cj=Bcol2Ccol[Bcolind[j]];
1305 if(Cj==i && !found_diagonal) {found_diagonal=
true; NumMyDiagonals++;}
1307 if(c_status[Cj]<OLD_ip){
1309 c_status[Cj]=CSR_ip;
1310 CSR_colind[CSR_ip]=Cj;
1311 CSR_vals[CSR_ip]= - omega * Dval* Aval * Bvals[j];
1315 CSR_vals[c_status[Cj]]-= omega * Dval * Aval * Bvals[j];
1321 for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
1322 int Cj=Bimportcol2Ccol[Icolind[j]];
1324 if(Cj==i && !found_diagonal) {found_diagonal=
true; NumMyDiagonals++;}
1326 if(c_status[Cj]<OLD_ip){
1328 c_status[Cj]=CSR_ip;
1329 CSR_colind[CSR_ip]=Cj;
1330 CSR_vals[CSR_ip]= - omega * Dval * Aval * Ivals[j];
1334 CSR_vals[c_status[Cj]]-= omega * Dval * Aval * Ivals[j];
1338 NumEntriesPerRow[i]=CSR_ip-CSR_rowptr[i];
1341 if(CSR_ip + n > CSR_alloc){
1344 CSR_colind.Resize(CSR_alloc);
1349 CSR_rowptr[m]=CSR_ip;
1351 #ifdef ENABLE_MMM_TIMINGS 1352 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi NewMatrix Final Sort")));
1356 Epetra_Util::SortCrsEntries(m, &CSR_rowptr[0], &CSR_colind[0], &CSR_vals[0]);
1358 #ifdef ENABLE_MMM_TIMINGS 1359 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi NewMatrix Fast IE")));
1363 Epetra_Import * Cimport=0;
1364 int *RemotePIDs = Cremotepids.size()?&Cremotepids[0]:0;
1366 int *ExportLIDs=0, *ExportPIDs=0;
1372 else if(B.Importer()) {
1374 NumExports = B.Importer()->NumExportIDs();
1375 ExportLIDs = B.Importer()->ExportLIDs();
1376 ExportPIDs = B.Importer()->ExportPIDs();
1379 if(!C.ColMap().SameAs(B.DomainMap()))
1380 Cimport =
new Epetra_Import(C.ColMap(),B.DomainMap(),Cremotepids.size(),RemotePIDs,NumExports,ExportLIDs,ExportPIDs);
1382 #ifdef ENABLE_MMM_TIMINGS 1383 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi NewMatrix ESFC")));
1387 C.ExpertStaticFillComplete(B.DomainMap(),A.RangeMap(),Cimport,0,NumMyDiagonals);
1397 template<
typename int_type>
1398 int MatrixMatrix::Tjacobi_A_B(
double omega,
1399 const Epetra_Vector & Dinv,
1400 const Epetra_CrsMatrix & A,
1402 const Epetra_CrsMatrix & B,
1404 Epetra_CrsMatrix& C,
1405 bool call_FillComplete_on_result){
1407 Epetra_Map* mapunion = 0;
1408 const Epetra_Map * colmap_B = &(B.ColMap());
1409 const Epetra_Map * colmap_C = &(C.ColMap());
1411 std::vector<int> Cremotepids;
1412 std::vector<int> Bcol2Ccol(B.ColMap().NumMyElements());
1413 std::vector<int> Bimportcol2Ccol;
1416 #ifdef ENABLE_MMM_TIMINGS 1417 using Teuchos::TimeMonitor;
1418 Teuchos::RCP<Teuchos::TimeMonitor> MM;
1422 if(!call_FillComplete_on_result) {
1423 #ifdef ENABLE_MMM_TIMINGS 1424 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi General Multiply")));
1426 throw std::runtime_error(
"jacobi_A_B_general not implemented");
1432 bool NewFlag=!C.IndicesAreLocal() && !C.IndicesAreGlobal();
1435 int *C_rowptr, *C_colind;
1437 C.ExtractCrsDataPointers(C_rowptr,C_colind,C_vals);
1438 bool ExtractFailFlag=!C_rowptr || !C_colind || !C_vals;
1441 if(!NewFlag && ExtractFailFlag){
1442 #ifdef ENABLE_MMM_TIMINGS 1443 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi General Multiply")));
1445 throw std::runtime_error(
"jacobi_A_B_general not implemented");
1450 #ifdef ENABLE_MMM_TIMINGS 1451 if(NewFlag) MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi Newmatrix CMap")));
1452 else MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi Reuse CMap")));
1458 EPETRA_CHK_ERR( aztecoo_and_ml_compatible_map_union<int_type>(B,*Bview.
importMatrix,mapunion,Cremotepids,Bcol2Ccol,Bimportcol2Ccol) );
1459 EPETRA_CHK_ERR( C.ReplaceColMap(*mapunion) );
1462 EPETRA_CHK_ERR( C.ReplaceColMap(B.ColMap()) );
1463 for(i=0;i<colmap_B->NumMyElements();i++) Bcol2Ccol[i]=i;
1467 EPETRA_CHK_ERR( Epetra_Util::GetRemotePIDs(*B.Importer(),Cremotepids));
1471 #ifdef ENABLE_MMM_TIMINGS 1472 if(NewFlag) MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi Newmatrix Lookups")));
1473 else MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: Jacobi Reuse Lookups")));
1482 if(colmap_B->SameAs(*colmap_C)){
1484 for(i=0;i<colmap_B->NumMyElements();i++)
1489 for(i=0;i<colmap_B->NumMyElements();i++){
1490 Bcol2Ccol[i]=colmap_C->LID((int_type) colmap_B->GID64(i));
1491 if(Bcol2Ccol[i]==-1) EPETRA_CHK_ERR(-11);
1499 if(Bimportcol2Ccol[i]==-1) EPETRA_CHK_ERR(-12);
1508 EPETRA_CHK_ERR(jacobi_A_B_newmatrix<int_type>(omega,Dinv,A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,Cremotepids,C));
1512 EPETRA_CHK_ERR(jacobi_A_B_reuse<int_type>(omega,Dinv,A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,C));
1524 int MatrixMatrix::jacobi_A_B(
double omega,
1525 const Epetra_Vector & Dinv,
1526 const Epetra_CrsMatrix & A,
1528 const Epetra_CrsMatrix & B,
1530 Epetra_CrsMatrix& C,
1531 bool call_FillComplete_on_result)
1533 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES 1534 if(A.RowMap().GlobalIndicesInt() && B.RowMap().GlobalIndicesInt()) {
1535 return Tjacobi_A_B<int>(omega,Dinv,A,Aview,B,Bview,C,call_FillComplete_on_result);
1539 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES 1540 if(A.RowMap().GlobalIndicesLongLong() && B.RowMap().GlobalIndicesLongLong()) {
1541 return Tjacobi_A_B<long long>(omega,Dinv,A,Aview,B,Bview,C,call_FillComplete_on_result);
1545 throw "EpetraExt::MatrixMatrix::jacobi_A_B: GlobalIndices type unknown";
1553 template<
typename int_type>
1558 #ifdef ENABLE_MMM_TIMINGS 1559 using Teuchos::TimeMonitor;
1560 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM-T Transpose")));
1567 #ifdef ENABLE_MMM_TIMINGS 1568 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM-T AB-core")));
1570 RCP<Epetra_CrsMatrix> Ctemp;
1573 bool needs_final_export = Atransview.
origMatrix->Exporter() != 0;
1574 if(needs_final_export)
1575 Ctemp = rcp(
new Epetra_CrsMatrix(Copy,Atransview.
origMatrix->RowMap(),Bview.
origMatrix->ColMap(),0));
1577 EPETRA_CHK_ERR( C.ReplaceColMap(Bview.
origMatrix->ColMap()) );
1578 Ctemp = rcp(&C,
false);
1582 std::vector<int> Bcol2Ccol(Bview.
origMatrix->NumMyCols());
1583 for(
int i=0; i<Bview.
origMatrix->NumMyCols(); i++)
1585 std::vector<int> Bimportcol2Ccol,Cremotepids;
1587 EPETRA_CHK_ERR( Epetra_Util::GetRemotePIDs(*Bview.
origMatrix->Importer(),Cremotepids));
1590 Bcol2Ccol,Bimportcol2Ccol,Cremotepids,
1596 #ifdef ENABLE_MMM_TIMINGS 1597 MM = Teuchos::rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"EpetraExt: MMM-T ESFC")));
1600 if(needs_final_export)
1601 C.FusedExport(*Ctemp,*Ctemp->Exporter(),&Bview.
origMatrix->DomainMap(),&Atransview.
origMatrix->RangeMap(),
false);
1613 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES 1614 if(Atransview.
origMatrix->RowMap().GlobalIndicesInt() && Bview.
origMatrix->RowMap().GlobalIndicesInt()) {
1615 return Tmult_AT_B_newmatrix<int>(Atransview,Bview,C);
1619 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES 1620 if(Atransview.
origMatrix->RowMap().GlobalIndicesLongLong() && Bview.
origMatrix->RowMap().GlobalIndicesLongLong()) {
1621 return Tmult_AT_B_newmatrix<long long>(Atransview,Bview,C);
1625 throw "EpetraExt::MatrixMatrix::mult_AT_B_newmatrix: GlobalIndices type unknown";
LightweightCrsMatrix * importMatrix
int mult_A_B_newmatrix(const Epetra_CrsMatrix &A, const Epetra_CrsMatrix &B, const CrsMatrixStruct &Bview, std::vector< int > &Bcol2Ccol, std::vector< int > &Bimportcol2Ccol, std::vector< int > &Cremotepids, Epetra_CrsMatrix &C)
std::vector< int > targetMapToOrigRow
void MyGlobalElementsPtr(int *&MyGlobalElementList) const
const Epetra_Map * rowMap
long long GID64(int LID) const
int mult_A_B_reuse(const Epetra_CrsMatrix &A, const Epetra_CrsMatrix &B, CrsMatrixStruct &Bview, std::vector< int > &Bcol2Ccol, std::vector< int > &Bimportcol2Ccol, Epetra_CrsMatrix &C)
std::vector< int > colind_
std::vector< int > ExportPIDs_
const Epetra_CrsMatrix * origMatrix
EpetraExt::BlockCrsMatrix: A class for constructing a distributed block matrix.
void resize_doubles(int nold, int nnew, double *&d)
std::vector< int > targetMapToImportRow
int NumMyElements() const
static int C_estimate_nnz(const Epetra_CrsMatrix &A, const Epetra_CrsMatrix &B)
int jacobi_A_B_reuse(double omega, const Epetra_Vector &Dinv, const Epetra_CrsMatrix &A, const Epetra_CrsMatrix &B, CrsMatrixStruct &Bview, std::vector< int > &Bcol2Ccol, std::vector< int > &Bimportcol2Ccol, Epetra_CrsMatrix &C)
std::vector< double > vals_
int aztecoo_and_ml_compatible_map_union(const Epetra_CrsMatrix &B, const LightweightCrsMatrix &Bimport, Epetra_Map *&unionmap, std::vector< int > &Cremotepids, std::vector< int > &Bcols2Ccols, std::vector< int > &Icols2Ccols)
int jacobi_A_B_newmatrix(double omega, const Epetra_Vector &Dinv, const Epetra_CrsMatrix &A, const Epetra_CrsMatrix &B, CrsMatrixStruct &Bview, std::vector< int > &Bcol2Ccol, std::vector< int > &Bimportcol2Ccol, std::vector< int > &Cremotepids, Epetra_CrsMatrix &C)
std::vector< int > ColMapOwningPIDs_
std::vector< int > ExportLIDs_
std::vector< int > rowptr_
const Epetra_Map * colMap
int mult_A_B_general(const Epetra_CrsMatrix &A, CrsMatrixStruct &Aview, const Epetra_CrsMatrix &B, CrsMatrixStruct &Bview, Epetra_CrsMatrix &C, bool call_FillComplete_on_result)