MueLu  Version of the Day
MueLu_AlgebraicPermutationStrategy_def.hpp
Go to the documentation of this file.
1 /*
2  * MueLu_AlgebraicPermutationStrategy_def.hpp
3  *
4  * Created on: Feb 20, 2013
5  * Author: tobias
6  */
7 
8 #ifndef MUELU_ALGEBRAICPERMUTATIONSTRATEGY_DEF_HPP_
9 #define MUELU_ALGEBRAICPERMUTATIONSTRATEGY_DEF_HPP_
10 
11 #include <queue>
12 
13 #include <Xpetra_MultiVector.hpp>
14 #include <Xpetra_Matrix.hpp>
15 #include <Xpetra_CrsGraph.hpp>
16 #include <Xpetra_Vector.hpp>
17 #include <Xpetra_VectorFactory.hpp>
18 #include <Xpetra_CrsMatrixWrap.hpp>
19 #include <Xpetra_Export.hpp>
20 #include <Xpetra_ExportFactory.hpp>
21 #include <Xpetra_Import.hpp>
22 #include <Xpetra_ImportFactory.hpp>
23 #include <Xpetra_MatrixMatrix.hpp>
24 
25 #include "MueLu_Utilities.hpp"
27 
28 namespace MueLu {
29 
30  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
31  void AlgebraicPermutationStrategy<Scalar, LocalOrdinal, GlobalOrdinal, Node>::BuildPermutation(const Teuchos::RCP<Matrix> & A, const Teuchos::RCP<const Map> permRowMap, Level & currentLevel, const FactoryBase* genFactory) const {
32 
33  const Teuchos::RCP< const Teuchos::Comm< int > > comm = A->getRowMap()->getComm();
34  int numProcs = comm->getSize();
35  int myRank = comm->getRank();
36 
37  size_t nDofsPerNode = 1;
38  if (A->IsView("stridedMaps")) {
39  Teuchos::RCP<const Map> permRowMapStrided = A->getRowMap("stridedMaps");
40  nDofsPerNode = Teuchos::rcp_dynamic_cast<const StridedMap>(permRowMapStrided)->getFixedBlockSize();
41  }
42 
43  std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > permutedDiagCandidates;
44  std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > keepDiagonalEntries;
45  std::vector<Scalar> Weights;
46 
47  // loop over all local rows in matrix A and keep diagonal entries if corresponding
48  // matrix rows are not contained in permRowMap
49  for (size_t row = 0; row < A->getRowMap()->getNodeNumElements(); row++) {
50  GlobalOrdinal grow = A->getRowMap()->getGlobalElement(row);
51 
52  if(permRowMap->isNodeGlobalElement(grow) == true) continue;
53 
54  size_t nnz = A->getNumEntriesInLocalRow(row);
55 
56  // extract local row information from matrix
57  Teuchos::ArrayView<const LocalOrdinal> indices;
58  Teuchos::ArrayView<const Scalar> vals;
59  A->getLocalRowView(row, indices, vals);
60 
61  TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(indices.size()) != nnz, Exceptions::RuntimeError, "MueLu::PermutationFactory::Build: number of nonzeros not equal to number of indices? Error.");
62 
63  // find column entry with max absolute value
64  GlobalOrdinal gMaxValIdx = 0;
65  typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType MT;
66  MT norm1 = Teuchos::ScalarTraits<MT>::zero ();
67  MT maxVal = Teuchos::ScalarTraits<MT>::zero ();
68  for (size_t j = 0; j < Teuchos::as<size_t>(indices.size()); j++) {
69  norm1 += Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]);
70  if(Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]) > maxVal) {
71  maxVal = Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]);
72  gMaxValIdx = A->getColMap()->getGlobalElement(indices[j]);
73  }
74  }
75 
76  if(grow == gMaxValIdx) // only keep row/col pair if it's diagonal dominant!!!
77  keepDiagonalEntries.push_back(std::make_pair(grow,grow));
78  }
79 
81  // handle rows that are marked to be relevant for permutations
82  for (size_t row = 0; row < permRowMap->getNodeNumElements(); row++) {
83  GlobalOrdinal grow = permRowMap->getGlobalElement(row);
84  LocalOrdinal lArow = A->getRowMap()->getLocalElement(grow);
85  size_t nnz = A->getNumEntriesInLocalRow(lArow);
86 
87  // extract local row information from matrix
88  Teuchos::ArrayView<const LocalOrdinal> indices;
89  Teuchos::ArrayView<const Scalar> vals;
90  A->getLocalRowView(lArow, indices, vals);
91 
92  TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(indices.size()) != nnz, Exceptions::RuntimeError, "MueLu::PermutationFactory::Build: number of nonzeros not equal to number of indices? Error.");
93 
94  // find column entry with max absolute value
95  GlobalOrdinal gMaxValIdx = 0;
96  typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType MT;
97  MT norm1 = Teuchos::ScalarTraits<MT>::zero ();
98  MT maxVal = Teuchos::ScalarTraits<MT>::zero ();
99  for (size_t j = 0; j < Teuchos::as<size_t>(indices.size()); j++) {
100  norm1 += Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]);
101  if(Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]) > maxVal) {
102  maxVal = Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]);
103  gMaxValIdx = A->getColMap()->getGlobalElement(indices[j]);
104  }
105  }
106 
107  if(maxVal > Teuchos::ScalarTraits<MT>::zero ()) { // keep only max Entries \neq 0.0
108  permutedDiagCandidates.push_back(std::make_pair(grow,gMaxValIdx));
109  Weights.push_back(maxVal/(norm1*Teuchos::as<Scalar>(nnz)));
110  } else {
111  std::cout << "ATTENTION: row " << grow << " has only zero entries -> singular matrix!" << std::endl;
112  }
113 
114  }
115 
116  // sort Weights in descending order
117  std::vector<int> permutation;
118  sortingPermutation(Weights,permutation);
119 
120  // create new vector with exactly one possible entry for each column
121 
122  // each processor which requests the global column id gcid adds 1 to gColVec
123  // gColVec will be summed up over all processors and communicated to gDomVec
124  // which is based on the non-overlapping domain map of A.
125 
126  Teuchos::RCP<Vector> gColVec = VectorFactory::Build(A->getColMap());
127  Teuchos::RCP<Vector> gDomVec = VectorFactory::Build(A->getDomainMap());
128  gColVec->putScalar(0.0);
129  gDomVec->putScalar(0.0);
130 
131  // put in all keep diagonal entries
132  for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = keepDiagonalEntries.begin(); p != keepDiagonalEntries.end(); ++p) {
133  gColVec->sumIntoGlobalValue((*p).second,1.0);
134  }
135 
136  Teuchos::RCP<Export> exporter = ExportFactory::Build(gColVec->getMap(), gDomVec->getMap());
137  gDomVec->doExport(*gColVec,*exporter,Xpetra::ADD); // communicate blocked gcolids to all procs
138  gColVec->doImport(*gDomVec,*exporter,Xpetra::INSERT);
139 
140  std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > permutedDiagCandidatesFiltered; // TODO reserve memory
141  std::map<GlobalOrdinal, Scalar> gColId2Weight;
142 
143  Teuchos::ArrayRCP< Scalar > ddata = gColVec->getDataNonConst(0);
144  for(size_t i = 0; i < permutedDiagCandidates.size(); ++i) {
145  // loop over all candidates
146  std::pair<GlobalOrdinal, GlobalOrdinal> pp = permutedDiagCandidates[permutation[i]];
147  GlobalOrdinal grow = pp.first;
148  GlobalOrdinal gcol = pp.second;
149 
150  LocalOrdinal lcol = A->getColMap()->getLocalElement(gcol);
151  typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType MT;
152  if(Teuchos::ScalarTraits<Scalar>::real (ddata[lcol]) > Teuchos::ScalarTraits<MT>::zero ()){
153  continue; // skip lcol: column already handled by another row
154  }
155 
156  // mark column as already taken
157  ddata[lcol] += Teuchos::ScalarTraits<Scalar>::one ();
158 
159  permutedDiagCandidatesFiltered.push_back(std::make_pair(grow,gcol));
160  gColId2Weight[gcol] = Weights[permutation[i]];
161  }
162 
163  // communicate how often each column index is requested by the different procs
164  gDomVec->doExport(*gColVec,*exporter,Xpetra::ADD);
165  gColVec->doImport(*gDomVec,*exporter,Xpetra::INSERT); // probably not needed // TODO check me
166 
167  //*****************************************************************************************
168  // first communicate ALL global ids of column indices which are requested by more
169  // than one proc to all other procs
170  // detect which global col indices are requested by more than one proc
171  // and store them in the multipleColRequests vector
172  std::vector<GlobalOrdinal> multipleColRequests; // store all global column indices from current processor that are also
173  // requested by another processor. This is possible, since they are stored
174  // in gDomVec which is based on the nonoverlapping domain map. That is, each
175  // global col id is handled by exactly one proc.
176  std::queue<GlobalOrdinal> unusedColIdx; // unused column indices on current processor
177 
178  for(size_t sz = 0; sz<gDomVec->getLocalLength(); ++sz) {
179  Teuchos::ArrayRCP< const Scalar > arrDomVec = gDomVec->getData(0);
180  //
181  // FIXME (mfh 30 Oct 2015) I _think_ it's OK to check just the
182  // real part, because this is a count. (Shouldn't MueLu use
183  // mag_type instead of Scalar here to save space?)
184  //
185  typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType MT;
186  if(Teuchos::ScalarTraits<Scalar>::real (arrDomVec[sz]) > Teuchos::ScalarTraits<MT>::one ()) {
187  multipleColRequests.push_back(gDomVec->getMap()->getGlobalElement(sz));
188  } else if(Teuchos::ScalarTraits<Scalar>::real (arrDomVec[sz]) ==
189  Teuchos::ScalarTraits<MT>::zero ()) {
190  unusedColIdx.push(gDomVec->getMap()->getGlobalElement(sz));
191  }
192  }
193 
194  // communicate the global number of column indices which are requested by more than one proc
195  LocalOrdinal localMultColRequests = Teuchos::as<LocalOrdinal>(multipleColRequests.size());
196  LocalOrdinal globalMultColRequests = 0;
197 
198  // sum up all entries in multipleColRequests over all processors
199  MueLu_sumAll(gDomVec->getMap()->getComm(), (LocalOrdinal)localMultColRequests, globalMultColRequests);
200 
201  if(globalMultColRequests > 0) {
202  // special handling: two processors request the same global column id.
203  // decide which processor gets it
204 
205  // distribute number of multipleColRequests to all processors
206  // each processor stores how many column ids for exchange are handled by the cur proc
207  std::vector<GlobalOrdinal> numMyMultColRequests(numProcs,0);
208  std::vector<GlobalOrdinal> numGlobalMultColRequests(numProcs,0);
209  numMyMultColRequests[myRank] = localMultColRequests;
210  Teuchos::reduceAll(*comm,Teuchos::REDUCE_MAX,numProcs,&numMyMultColRequests[0],&numGlobalMultColRequests[0]);
211 
212  // communicate multipleColRequests entries to all processors
213  int nMyOffset = 0;
214  for (int i=0; i<myRank-1; i++)
215  nMyOffset += numGlobalMultColRequests[i]; // calculate offset to store the weights on the corresponding place in procOverlappingWeights
216 
217  GlobalOrdinal zero=0;
218  std::vector<GlobalOrdinal> procMultRequestedColIds(globalMultColRequests,zero);
219  std::vector<GlobalOrdinal> global_procMultRequestedColIds(globalMultColRequests,zero);
220 
221  // loop over all local column GIDs that are also requested by other procs
222  for(size_t i = 0; i < multipleColRequests.size(); i++) {
223  procMultRequestedColIds[nMyOffset + i] = multipleColRequests[i]; // all weights are > 0 ?
224  }
225 
226  // template ordinal, package (double)
227  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, Teuchos::as<int>(globalMultColRequests), &procMultRequestedColIds[0], &global_procMultRequestedColIds[0]);
228 
229  // loop over global_procOverlappingWeights and eliminate wrong entries...
230  for (size_t k = 0; k<global_procMultRequestedColIds.size(); k++) {
231  GlobalOrdinal globColId = global_procMultRequestedColIds[k];
232 
233  std::vector<Scalar> MyWeightForColId(numProcs,0);
234  std::vector<Scalar> GlobalWeightForColId(numProcs,0);
235 
236  if(gColVec->getMap()->isNodeGlobalElement(globColId)) {
237  MyWeightForColId[myRank] = gColId2Weight[globColId];
238  } else {
239  MyWeightForColId[myRank] = 0.0;
240  }
241 
242  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs, &MyWeightForColId[0], &GlobalWeightForColId[0]);
243 
244  if(gColVec->getMap()->isNodeGlobalElement(globColId)) {
245  // note: 2 procs could have the same weight for a column index.
246  // pick the first one.
247  typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType MT;
248  MT winnerValue = 0.0;
249  int winnerProcRank = 0;
250  for (int proc = 0; proc < numProcs; proc++) {
251  if(Teuchos::ScalarTraits<Scalar>::real (GlobalWeightForColId[proc]) > winnerValue) {
252  winnerValue = Teuchos::ScalarTraits<Scalar>::real (GlobalWeightForColId[proc]);
253  winnerProcRank = proc;
254  }
255  }
256 
257  // winnerProcRank is the winner for handling globColId.
258  // winnerProcRank is unique (even if two procs have the same weight for a column index)
259 
260  if(myRank != winnerProcRank) {
261  // remove corresponding entry from permutedDiagCandidatesFiltered
262  typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = permutedDiagCandidatesFiltered.begin();
263  while(p != permutedDiagCandidatesFiltered.end() )
264  {
265  if((*p).second == globColId)
266  p = permutedDiagCandidatesFiltered.erase(p);
267  else
268  p++;
269  }
270  }
271 
272  } // end if isNodeGlobalElement
273  } // end loop over global_procOverlappingWeights and eliminate wrong entries...
274  } // end if globalMultColRequests > 0
275 
276  // put together all pairs:
277  //size_t sizeRowColPairs = keepDiagonalEntries.size() + permutedDiagCandidatesFiltered.size();
278  std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > RowColPairs;
279  RowColPairs.insert( RowColPairs.end(), keepDiagonalEntries.begin(), keepDiagonalEntries.end());
280  RowColPairs.insert( RowColPairs.end(), permutedDiagCandidatesFiltered.begin(), permutedDiagCandidatesFiltered.end());
281 
282 #ifdef DEBUG_OUTPUT
283  //&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
284  // plausibility check
285  gColVec->putScalar(0.0);
286  gDomVec->putScalar(0.0);
287  typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator pl = RowColPairs.begin();
288  while(pl != RowColPairs.end() )
289  {
290  //GlobalOrdinal ik = (*pl).first;
291  GlobalOrdinal jk = (*pl).second;
292 
293  gColVec->sumIntoGlobalValue(jk,1.0);
294  pl++;
295  }
296  gDomVec->doExport(*gColVec,*exporter,Xpetra::ADD);
297  for(size_t sz = 0; sz<gDomVec->getLocalLength(); ++sz) {
298  Teuchos::ArrayRCP< const Scalar > arrDomVec = gDomVec->getData(0);
299  if(arrDomVec[sz] > 1.0) {
300  GetOStream(Runtime0) << "RowColPairs has multiple column [" << sz << "]=" << arrDomVec[sz] << std::endl;
301  } else if(arrDomVec[sz] == 0.0) {
302  GetOStream(Runtime0) << "RowColPairs has empty column [" << sz << "]=" << arrDomVec[sz] << std::endl;
303  }
304  }
305  //&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
306 #endif
307 
309  // assumption: on each processor RowColPairs now contains
310  // a valid set of (row,column) pairs, where the row entries
311  // are a subset of the processor's rows and the column entries
312  // are unique throughout all processors.
313  // Note: the RowColPairs are only defined for a subset of all rows,
314  // so there might be rows without an entry in RowColPairs.
315  // It can be, that some rows seem to be missing in RowColPairs, since
316  // the entry in that row with maximum absolute value has been reserved
317  // by another row already (e.g. as already diagonal dominant row outside
318  // of perRowMap).
319  // In fact, the RowColPairs vector only defines the (row,column) pairs
320  // that will be definitely moved to the diagonal after permutation.
321 
322 #ifdef DEBUG_OUTPUT
323  // for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = RowColPairs.begin(); p != RowColPairs.end(); ++p) {
324  // std::cout << "proc: " << myRank << " r/c: " << (*p).first << "/" << (*p).second << std::endl;
325  // }
326  // for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = RowColPairs.begin(); p != RowColPairs.end(); ++p)
327  // {
329  // std::cout << (*p).first +1 << " " << (*p).second+1 << std::endl;
330  // }
331  // std::cout << "\n";
332 #endif
333 
334  // vectors to store permutation information
335  Teuchos::RCP<Vector> Pperm = VectorFactory::Build(A->getRowMap());
336  Teuchos::RCP<Vector> Qperm = VectorFactory::Build(A->getDomainMap()); // global variant (based on domain map)
337  Teuchos::RCP<Vector> lQperm = VectorFactory::Build(A->getColMap()); // local variant (based on column map)
338 
339  Teuchos::ArrayRCP< Scalar > PpermData = Pperm->getDataNonConst(0);
340  Teuchos::ArrayRCP< Scalar > QpermData = Qperm->getDataNonConst(0);
341 
342  Pperm->putScalar(0.0);
343  Qperm->putScalar(0.0);
344  lQperm->putScalar(0.0);
345 
346  // setup exporter for Qperm
347  Teuchos::RCP<Export> QpermExporter = ExportFactory::Build(lQperm->getMap(), Qperm->getMap());
348 
349  Teuchos::RCP<Vector> RowIdStatus = VectorFactory::Build(A->getRowMap());
350  Teuchos::RCP<Vector> ColIdStatus = VectorFactory::Build(A->getDomainMap()); // global variant (based on domain map)
351  Teuchos::RCP<Vector> lColIdStatus = VectorFactory::Build(A->getColMap()); // local variant (based on column map)
352  Teuchos::RCP<Vector> ColIdUsed = VectorFactory::Build(A->getDomainMap()); // mark column ids to be already in use
353  Teuchos::ArrayRCP< Scalar > RowIdStatusArray = RowIdStatus->getDataNonConst(0);
354  Teuchos::ArrayRCP< Scalar > ColIdStatusArray = ColIdStatus->getDataNonConst(0);
355  Teuchos::ArrayRCP< Scalar > lColIdStatusArray = lColIdStatus->getDataNonConst(0);
356  Teuchos::ArrayRCP< Scalar > ColIdUsedArray = ColIdUsed->getDataNonConst(0); // not sure about this
357  RowIdStatus->putScalar(0.0);
358  ColIdStatus->putScalar(0.0);
359  lColIdStatus->putScalar(0.0);
360  ColIdUsed->putScalar(0.0); // no column ids are used
361 
362  // count wide-range permutations
363  // a wide-range permutation is defined as a permutation of rows/columns which do not
364  // belong to the same node
365  LocalOrdinal lWideRangeRowPermutations = 0;
366  GlobalOrdinal gWideRangeRowPermutations = 0;
367  LocalOrdinal lWideRangeColPermutations = 0;
368  GlobalOrdinal gWideRangeColPermutations = 0;
369 
370  // run 1: mark all "identity" permutations
371  typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = RowColPairs.begin();
372  while(p != RowColPairs.end() )
373  {
374  GlobalOrdinal ik = (*p).first;
375  GlobalOrdinal jk = (*p).second;
376 
377  LocalOrdinal lik = A->getRowMap()->getLocalElement(ik);
378  LocalOrdinal ljk = A->getColMap()->getLocalElement(jk);
379 
380  if(RowIdStatusArray[lik] == 0.0) {
381  RowIdStatusArray[lik] = 1.0; // use this row id
382  lColIdStatusArray[ljk] = 1.0; // use this column id
383  Pperm->replaceLocalValue(lik, ik);
384  lQperm->replaceLocalValue(ljk, ik); // use column map
385  ColIdUsed->replaceGlobalValue(ik,1.0); // ik is now used
386  p = RowColPairs.erase(p);
387 
388  // detect wide range permutations
389  if(floor(ik/nDofsPerNode) != floor(jk/nDofsPerNode)) {
390  lWideRangeColPermutations++;
391  }
392  }
393  else
394  p++;
395  }
396 
397  // communicate column map -> domain map
398  Qperm->doExport(*lQperm,*QpermExporter,Xpetra::ABSMAX);
399  ColIdStatus->doExport(*lColIdStatus,*QpermExporter,Xpetra::ABSMAX);
400 
401  // plausibility check
402  if(RowColPairs.size()>0) GetOStream(Warnings0) << "MueLu::PermutationFactory: There are Row/Col pairs left!!!" << std::endl; // TODO fix me
403 
404  // close Pperm
405 
406  // count, how many row permutations are missing on current proc
407  size_t cntFreeRowIdx = 0;
408  std::queue<GlobalOrdinal> qFreeGRowIdx; // store global row ids of "free" rows
409  for (size_t lik = 0; lik < RowIdStatus->getLocalLength(); ++lik) {
410  if(RowIdStatusArray[lik] == 0.0) {
411  cntFreeRowIdx++;
412  qFreeGRowIdx.push(RowIdStatus->getMap()->getGlobalElement(lik));
413  }
414  }
415 
416  // fix Pperm
417  for (size_t lik = 0; lik < RowIdStatus->getLocalLength(); ++lik) {
418  if(RowIdStatusArray[lik] == 0.0) {
419  RowIdStatusArray[lik] = 1.0; // use this row id
420  Pperm->replaceLocalValue(lik, qFreeGRowIdx.front());
421  // detect wide range permutations
422  if(floor(qFreeGRowIdx.front()/nDofsPerNode) != floor(RowIdStatus->getMap()->getGlobalElement(lik)/nDofsPerNode)) {
423  lWideRangeRowPermutations++;
424  }
425  qFreeGRowIdx.pop();
426  }
427  }
428 
429  // close Qperm (free permutation entries in Qperm)
430  size_t cntFreeColIdx = 0;
431  std::queue<GlobalOrdinal> qFreeGColIdx; // store global column ids of "free" available columns
432  for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
433  if(ColIdStatusArray[ljk] == 0.0) {
434  cntFreeColIdx++;
435  qFreeGColIdx.push(ColIdStatus->getMap()->getGlobalElement(ljk));
436  }
437  }
438 
439  size_t cntUnusedColIdx = 0;
440  std::queue<GlobalOrdinal> qUnusedGColIdx; // store global column ids of "free" available columns
441  for (size_t ljk = 0; ljk < ColIdUsed->getLocalLength(); ++ljk) {
442  if(ColIdUsedArray[ljk] == 0.0) {
443  cntUnusedColIdx++;
444  qUnusedGColIdx.push(ColIdUsed->getMap()->getGlobalElement(ljk));
445  }
446  }
447 
448  // fix Qperm with local entries
449  for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
450  // stop if no (local) unused column idx are left
451  if(cntUnusedColIdx == 0) break;
452 
453  if(ColIdStatusArray[ljk] == 0.0) {
454  ColIdStatusArray[ljk] = 1.0; // use this row id
455  Qperm->replaceLocalValue(ljk, qUnusedGColIdx.front()); // loop over ColIdStatus (lives on domain map)
456  ColIdUsed->replaceGlobalValue(qUnusedGColIdx.front(),1.0); // ljk is now used, too
457  // detect wide range permutations
458  if(floor(qUnusedGColIdx.front()/nDofsPerNode) != floor(ColIdStatus->getMap()->getGlobalElement(ljk)/nDofsPerNode)) {
459  lWideRangeColPermutations++;
460  }
461  qUnusedGColIdx.pop();
462  cntUnusedColIdx--;
463  cntFreeColIdx--;
464  }
465  }
466 
467  //Qperm->doExport(*lQperm,*QpermExporter,Xpetra::ABSMAX); // no export necessary, since changes only locally
468  //ColIdStatus->doExport(*lColIdStatus,*QpermExporter,Xpetra::ABSMAX);
469 
470  // count, how many unused column idx are needed on current processor
471  // to complete Qperm
472  cntFreeColIdx = 0;
473  for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) { // TODO avoid this loop
474  if(ColIdStatusArray[ljk] == 0.0) {
475  cntFreeColIdx++;
476  }
477  }
478 
479  GlobalOrdinal global_cntFreeColIdx = 0;
480  LocalOrdinal local_cntFreeColIdx = cntFreeColIdx;
481  MueLu_sumAll(comm, Teuchos::as<GlobalOrdinal>(local_cntFreeColIdx), global_cntFreeColIdx);
482 #ifdef DEBUG_OUTPUT
483  std::cout << "global # of empty column idx entries in Qperm: " << global_cntFreeColIdx << std::endl;
484 #endif
485 
486  // avoid global communication if possible
487  if(global_cntFreeColIdx > 0) {
488 
489  // 1) count how many unused column ids are left
490  GlobalOrdinal global_cntUnusedColIdx = 0;
491  LocalOrdinal local_cntUnusedColIdx = cntUnusedColIdx;
492  MueLu_sumAll(comm, Teuchos::as<GlobalOrdinal>(local_cntUnusedColIdx), global_cntUnusedColIdx);
493 #ifdef DEBUG_OUTPUT
494  std::cout << "global # of unused column idx: " << global_cntUnusedColIdx << std::endl;
495 #endif
496 
497  // 2) communicate how many unused column ids are available on procs
498  std::vector<LocalOrdinal> local_UnusedColIdxOnProc (numProcs);
499  std::vector<LocalOrdinal> global_UnusedColIdxOnProc(numProcs);
500  local_UnusedColIdxOnProc[myRank] = local_cntUnusedColIdx;
501  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs, &local_UnusedColIdxOnProc[0], &global_UnusedColIdxOnProc[0]);
502 
503 #ifdef DEBUG_OUTPUT
504  std::cout << "PROC " << myRank << " global num unused indices per proc: ";
505  for (size_t ljk = 0; ljk < global_UnusedColIdxOnProc.size(); ++ljk) {
506  std::cout << " " << global_UnusedColIdxOnProc[ljk];
507  }
508  std::cout << std::endl;
509 #endif
510 
511  // 3) build array of length global_cntUnusedColIdx to globally replicate unused column idx
512  std::vector<GlobalOrdinal> local_UnusedColIdxVector(Teuchos::as<size_t>(global_cntUnusedColIdx));
513  std::vector<GlobalOrdinal> global_UnusedColIdxVector(Teuchos::as<size_t>(global_cntUnusedColIdx));
514  GlobalOrdinal global_cntUnusedColIdxStartIter = 0;
515  for(int proc=0; proc<myRank; proc++) {
516  global_cntUnusedColIdxStartIter += global_UnusedColIdxOnProc[proc];
517  }
518  for(GlobalOrdinal k = global_cntUnusedColIdxStartIter; k < global_cntUnusedColIdxStartIter+local_cntUnusedColIdx; k++) {
519  local_UnusedColIdxVector[k] = qUnusedGColIdx.front();
520  qUnusedGColIdx.pop();
521  }
522  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, Teuchos::as<int>(global_cntUnusedColIdx), &local_UnusedColIdxVector[0], &global_UnusedColIdxVector[0]);
523 #ifdef DEBUG_OUTPUT
524  std::cout << "PROC " << myRank << " global UnusedGColIdx: ";
525  for (size_t ljk = 0; ljk < global_UnusedColIdxVector.size(); ++ljk) {
526  std::cout << " " << global_UnusedColIdxVector[ljk];
527  }
528  std::cout << std::endl;
529 #endif
530 
531 
532 
533  // 4) communicate, how many column idx are needed on each processor
534  // to complete Qperm
535  std::vector<LocalOrdinal> local_EmptyColIdxOnProc (numProcs);
536  std::vector<LocalOrdinal> global_EmptyColIdxOnProc(numProcs);
537  local_EmptyColIdxOnProc[myRank] = local_cntFreeColIdx;
538  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs, &local_EmptyColIdxOnProc[0], &global_EmptyColIdxOnProc[0]);
539 
540 #ifdef DEBUG_OUTPUT
541  std::cout << "PROC " << myRank << " global num of needed column indices: ";
542  for (size_t ljk = 0; ljk < global_EmptyColIdxOnProc.size(); ++ljk) {
543  std::cout << " " << global_EmptyColIdxOnProc[ljk];
544  }
545  std::cout << std::endl;
546 #endif
547 
548  // 5) determine first index in global_UnusedColIdxVector for unused column indices,
549  // that are marked to be used by this processor
550  GlobalOrdinal global_UnusedColStartIdx = 0;
551  for(int proc=0; proc<myRank; proc++) {
552  global_UnusedColStartIdx += global_EmptyColIdxOnProc[proc];
553  }
554 
555 #ifdef DEBUG_OUTPUT
556  GetOStream(Statistics0) << "PROC " << myRank << " is allowd to use the following column gids: ";
557  for(GlobalOrdinal k = global_UnusedColStartIdx; k < global_UnusedColStartIdx + Teuchos::as<GlobalOrdinal>(cntFreeColIdx); k++) {
558  GetOStream(Statistics0) << global_UnusedColIdxVector[k] << " ";
559  }
560  GetOStream(Statistics0) << std::endl;
561 #endif
562 
563  // 6.) fix Qperm with global entries
564  GlobalOrdinal array_iter = 0;
565  for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
566 
567  if(ColIdStatusArray[ljk] == 0.0) {
568  ColIdStatusArray[ljk] = 1.0; // use this row id
569  Qperm->replaceLocalValue(ljk, global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter]);
570  ColIdUsed->replaceGlobalValue(global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter],1.0);
571  // detect wide range permutations
572  if(floor(global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter]/nDofsPerNode) != floor(ColIdStatus->getMap()->getGlobalElement(ljk)/nDofsPerNode)) {
573  lWideRangeColPermutations++;
574  }
575  array_iter++;
576  //cntUnusedColIdx--; // check me
577  }
578  }
579  } // end if global_cntFreeColIdx > 0
581 
582 
583  // create new empty Matrix
584  Teuchos::RCP<CrsMatrixWrap> permPTmatrix = Teuchos::rcp(new CrsMatrixWrap(A->getRowMap(),1,Xpetra::StaticProfile));
585  Teuchos::RCP<CrsMatrixWrap> permQTmatrix = Teuchos::rcp(new CrsMatrixWrap(A->getRowMap(),1,Xpetra::StaticProfile));
586 
587  for(size_t row=0; row<A->getNodeNumRows(); row++) {
588  // FIXME (mfh 30 Oct 2015): Teuchos::as doesn't know how to
589  // convert from complex Scalar to GO, so we have to take the real
590  // part first. I think that's the right thing to do in this case.
591  Teuchos::ArrayRCP<GlobalOrdinal> indoutP(1,Teuchos::as<GO>(Teuchos::ScalarTraits<Scalar>::real(PpermData[row]))); // column idx for Perm^T
592  Teuchos::ArrayRCP<GlobalOrdinal> indoutQ(1,Teuchos::as<GO>(Teuchos::ScalarTraits<Scalar>::real(QpermData[row]))); // column idx for Qperm
593  Teuchos::ArrayRCP<Scalar> valout(1,Teuchos::ScalarTraits<Scalar>::one());
594  permPTmatrix->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indoutP.view(0,indoutP.size()), valout.view(0,valout.size()));
595  permQTmatrix->insertGlobalValues (A->getRowMap()->getGlobalElement(row), indoutQ.view(0,indoutQ.size()), valout.view(0,valout.size()));
596  }
597 
598  permPTmatrix->fillComplete();
599  permQTmatrix->fillComplete();
600 
601  Teuchos::RCP<Matrix> permPmatrix = Utilities::Transpose(*permPTmatrix, true);
602 
603  for(size_t row=0; row<permPTmatrix->getNodeNumRows(); row++) {
604  if(permPTmatrix->getNumEntriesInLocalRow(row) != 1)
605  GetOStream(Warnings0) <<"#entries in row " << row << " of permPTmatrix is " << permPTmatrix->getNumEntriesInLocalRow(row) << std::endl;
606  if(permPmatrix->getNumEntriesInLocalRow(row) != 1)
607  GetOStream(Warnings0) <<"#entries in row " << row << " of permPmatrix is " << permPmatrix->getNumEntriesInLocalRow(row) << std::endl;
608  if(permQTmatrix->getNumEntriesInLocalRow(row) != 1)
609  GetOStream(Warnings0) <<"#entries in row " << row << " of permQmatrix is " << permQTmatrix->getNumEntriesInLocalRow(row) << std::endl;
610  }
611 
612  // build permP * A * permQT
613  Teuchos::RCP<Matrix> ApermQt = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*A, false, *permQTmatrix, false, GetOStream(Statistics2));
614  Teuchos::RCP<Matrix> permPApermQt = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*permPmatrix, false, *ApermQt, false, GetOStream(Statistics2));
615 
616  /*
617  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("A.mat", *A);
618  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("permP.mat", *permPmatrix);
619  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("permQt.mat", *permQTmatrix);
620  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("permPApermQt.mat", *permPApermQt);
621  */
622  // build scaling matrix
623  Teuchos::RCP<Vector> diagVec = VectorFactory::Build(permPApermQt->getRowMap(),true);
624  Teuchos::RCP<Vector> invDiagVec = VectorFactory::Build(permPApermQt->getRowMap(),true);
625  Teuchos::ArrayRCP< const Scalar > diagVecData = diagVec->getData(0);
626  Teuchos::ArrayRCP< Scalar > invDiagVecData = invDiagVec->getDataNonConst(0);
627 
628  permPApermQt->getLocalDiagCopy(*diagVec);
629  for(size_t i = 0; i<diagVec->getMap()->getNodeNumElements(); ++i) {
630  if(diagVecData[i] != 0.0)
631  invDiagVecData[i] = Teuchos::ScalarTraits<Scalar>::one () / diagVecData[i];
632  else {
633  invDiagVecData[i] = Teuchos::ScalarTraits<Scalar>::one ();
634  GetOStream(Statistics0) << "MueLu::PermutationFactory: found zero on diagonal in row " << i << std::endl;
635  }
636  }
637 
638  Teuchos::RCP<CrsMatrixWrap> diagScalingOp = Teuchos::rcp(new CrsMatrixWrap(permPApermQt->getRowMap(),1,Xpetra::StaticProfile));
639 
640  for(size_t row=0; row<A->getNodeNumRows(); row++) {
641  Teuchos::ArrayRCP<GlobalOrdinal> indout(1,permPApermQt->getRowMap()->getGlobalElement(row)); // column idx for Perm^T
642  Teuchos::ArrayRCP<Scalar> valout(1,invDiagVecData[row]);
643  diagScalingOp->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indout.view(0,indout.size()), valout.view(0,valout.size()));
644  }
645  diagScalingOp->fillComplete();
646 
647  Teuchos::RCP<Matrix> scaledA = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*diagScalingOp, false, *permPApermQt, false, GetOStream(Statistics2));
648  currentLevel.Set("A", Teuchos::rcp_dynamic_cast<Matrix>(scaledA), genFactory/*this*/);
649 
650  currentLevel.Set("permA", Teuchos::rcp_dynamic_cast<Matrix>(permPApermQt), genFactory/*this*/); // TODO careful with this!!!
651  currentLevel.Set("permP", Teuchos::rcp_dynamic_cast<Matrix>(permPmatrix), genFactory/*this*/);
652  currentLevel.Set("permQT", Teuchos::rcp_dynamic_cast<Matrix>(permQTmatrix), genFactory/*this*/);
653  currentLevel.Set("permScaling", Teuchos::rcp_dynamic_cast<Matrix>(diagScalingOp), genFactory/*this*/);
654 
656  // count zeros on diagonal in P -> number of row permutations
657  Teuchos::RCP<Vector> diagPVec = VectorFactory::Build(permPmatrix->getRowMap(),true);
658  permPmatrix->getLocalDiagCopy(*diagPVec);
659  Teuchos::ArrayRCP< const Scalar > diagPVecData = diagPVec->getData(0);
660  LocalOrdinal lNumRowPermutations = 0;
661  GlobalOrdinal gNumRowPermutations = 0;
662  for(size_t i = 0; i<diagPVec->getMap()->getNodeNumElements(); ++i) {
663  if(diagPVecData[i] == 0.0) {
664  lNumRowPermutations++;
665  }
666  }
667 
668  // sum up all entries in multipleColRequests over all processors
669  MueLu_sumAll(diagPVec->getMap()->getComm(), Teuchos::as<GlobalOrdinal>(lNumRowPermutations), gNumRowPermutations);
670 
672  // count zeros on diagonal in Q^T -> number of column permutations
673  Teuchos::RCP<Vector> diagQTVec = VectorFactory::Build(permQTmatrix->getRowMap(),true);
674  permQTmatrix->getLocalDiagCopy(*diagQTVec);
675  Teuchos::ArrayRCP< const Scalar > diagQTVecData = diagQTVec->getData(0);
676  LocalOrdinal lNumColPermutations = 0;
677  GlobalOrdinal gNumColPermutations = 0;
678  for(size_t i = 0; i<diagQTVec->getMap()->getNodeNumElements(); ++i) {
679  if(diagQTVecData[i] == 0.0) {
680  lNumColPermutations++;
681  }
682  }
683 
684  // sum up all entries in multipleColRequests over all processors
685  MueLu_sumAll(diagQTVec->getMap()->getComm(), Teuchos::as<GlobalOrdinal>(lNumColPermutations), gNumColPermutations);
686 
687  currentLevel.Set("#RowPermutations", gNumRowPermutations, genFactory/*this*/);
688  currentLevel.Set("#ColPermutations", gNumColPermutations, genFactory/*this*/);
689  currentLevel.Set("#WideRangeRowPermutations", gWideRangeRowPermutations, genFactory/*this*/);
690  currentLevel.Set("#WideRangeColPermutations", gWideRangeColPermutations, genFactory/*this*/);
691 
692  GetOStream(Statistics0) << "#Row permutations/max possible permutations: " << gNumRowPermutations << "/" << diagPVec->getMap()->getGlobalNumElements() << std::endl;
693  GetOStream(Statistics0) << "#Column permutations/max possible permutations: " << gNumColPermutations << "/" << diagQTVec->getMap()->getGlobalNumElements() << std::endl;
694  GetOStream(Runtime1) << "#wide range row permutations: " << gWideRangeRowPermutations << " #wide range column permutations: " << gWideRangeColPermutations << std::endl;
695 }
696 
697 } // namespace MueLu
698 
699 #endif /* MUELU_ALGEBRAICPERMUTATIONSTRATEGY_DEF_HPP_ */
Important warning messages (one line)
#define MueLu_sumAll(rcpComm, in, out)
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string())
void sortingPermutation(const std::vector< Scalar > &values, std::vector< LocalOrdinal > &v)
One-liner description of what is happening.
Namespace for MueLu classes and methods.
Print even more statistics.
Base class for factories (e.g., R, P, and A_coarse).
Print statistics that do not involve significant additional computation.
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
void BuildPermutation(const Teuchos::RCP< Matrix > &A, const Teuchos::RCP< const Map > permRowMap, Level &currentLevel, const FactoryBase *genFactory) const
build permutation operators
Exception throws to report errors in the internal logical of the program.
Description of what is happening (more verbose)