Ifpack2 Templated Preconditioning Package  Version 1.0
Ifpack2_DenseContainer_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Tempated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #ifndef IFPACK2_DENSECONTAINER_DEF_HPP
44 #define IFPACK2_DENSECONTAINER_DEF_HPP
45 
46 #include "Tpetra_CrsMatrix.hpp"
47 #include "Teuchos_LAPACK.hpp"
48 #include "Tpetra_Experimental_BlockMultiVector.hpp"
49 
50 #ifdef HAVE_MPI
51 # include <mpi.h>
52 # include "Teuchos_DefaultMpiComm.hpp"
53 #else
54 # include "Teuchos_DefaultSerialComm.hpp"
55 #endif // HAVE_MPI
56 
57 
58 namespace Ifpack2 {
59 
60 template<class MatrixType, class LocalScalarType>
62 DenseContainer (const Teuchos::RCP<const row_matrix_type>& matrix,
63  const Teuchos::ArrayView<const local_ordinal_type>& localRows) :
64  Container<MatrixType> (matrix, localRows),
65  hasBlockCrsMatrix_(false),
66  numRows_ (localRows.size ()),
67  diagBlock_ (numRows_, numRows_),
68  ipiv_ (numRows_, 0)
69 {
70  using Teuchos::Array;
71  using Teuchos::ArrayView;
72  using Teuchos::RCP;
73  using Teuchos::rcp;
74  using Teuchos::toString;
75  typedef Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> map_type;
76  typedef typename ArrayView<const local_ordinal_type>::size_type size_type;
77  TEUCHOS_TEST_FOR_EXCEPTION(
78  ! matrix->hasColMap (), std::invalid_argument, "Ifpack2::DenseContainer: "
79  "The constructor's input matrix must have a column Map.");
80 
81  // If we have a blockCRS matrix, we need more memory for diagBlock
82  Teuchos::RCP<const block_crs_matrix_type> global_bcrs =
83  Teuchos::rcp_dynamic_cast<const block_crs_matrix_type> (matrix);
84  if (!global_bcrs.is_null ()) {
85  hasBlockCrsMatrix_ = true;
86 
87  // Get the block size
88  blockSize_ = global_bcrs->getBlockSize();
89 
90  // Resize diagBlock
91  diagBlock_.shape(numRows_*blockSize_,numRows_*blockSize_);
92  ipiv_.resize(numRows_*blockSize_);
93  }
94 
95  // Check whether the input set of local row indices is correct.
96  const map_type& rowMap = * (matrix->getRowMap ());
97  const size_type numRows = localRows.size ();
98  bool rowIndicesValid = true;
99  Array<local_ordinal_type> invalidLocalRowIndices;
100  for (size_type i = 0; i < numRows; ++i) {
101  if (! rowMap.isNodeLocalElement (localRows[i])) {
102  rowIndicesValid = false;
103  invalidLocalRowIndices.push_back (localRows[i]);
104  break;
105  }
106  }
107  TEUCHOS_TEST_FOR_EXCEPTION(
108  ! rowIndicesValid, std::invalid_argument, "Ifpack2::DenseContainer: "
109  "On process " << rowMap.getComm ()->getRank () << " of "
110  << rowMap.getComm ()->getSize () << ", in the given set of local row "
111  "indices localRows = " << toString (localRows) << ", the following "
112  "entries are not valid local row indices on the calling process: "
113  << toString (invalidLocalRowIndices) << ".");
114 
115 #ifdef HAVE_MPI
116  RCP<const Teuchos::Comm<int> > localComm =
117  rcp (new Teuchos::MpiComm<int> (MPI_COMM_SELF));
118 #else
119  RCP<const Teuchos::Comm<int> > localComm =
120  rcp (new Teuchos::SerialComm<int> ());
121 #endif // HAVE_MPI
122 
123  // FIXME (mfh 25 Aug 2013) What if the matrix's row Map has a
124  // different index base than zero?
125  const global_ordinal_type indexBase = 0;
126  localMap_ = rcp (new map_type (numRows_, indexBase, localComm));
127 }
128 
129 template<class MatrixType, class LocalScalarType>
131 DenseContainer (const Teuchos::RCP<const row_matrix_type>& matrix,
132  const Teuchos::ArrayView<const local_ordinal_type>& localRows) :
133  Container<MatrixType> (matrix, localRows)
134 {
135  TEUCHOS_TEST_FOR_EXCEPTION
136  (true, std::logic_error, "Ifpack2::DenseContainer: Not implemented for "
137  "LocalScalarType = " << Teuchos::TypeNameTraits<LocalScalarType>::name ()
138  << ".");
139 }
140 
141 template<class MatrixType, class LocalScalarType>
143 {}
144 
145 template<class MatrixType, class LocalScalarType>
147 {}
148 
149 
150 template<class MatrixType, class LocalScalarType>
152 setParameters (const Teuchos::ParameterList& /* List */)
153 {
154  // the solver doesn't currently take any parameters
155 }
156 
157 template<class MatrixType, class LocalScalarType>
159 setParameters (const Teuchos::ParameterList& /* List */)
160 {
161  // the solver doesn't currently take any parameters
162 }
163 
164 
165 template<class MatrixType, class LocalScalarType>
166 void
169 {
170  using Teuchos::null;
171  using Teuchos::rcp;
172 
173  // We assume that if you called this method, you intend to recompute
174  // everything.
175  IsInitialized_ = false;
176  IsComputed_ = false;
177 
178  // Fill the diagonal block and LU permutation array with zeros.
179  diagBlock_.putScalar (Teuchos::ScalarTraits<local_scalar_type>::zero ());
180  std::fill (ipiv_.begin (), ipiv_.end (), 0);
181 
182  IsInitialized_ = true;
183 }
184 
185 template<class MatrixType, class LocalScalarType>
186 void
189 {}
190 
191 template<class MatrixType, class LocalScalarType>
192 void
195 {
196  // FIXME: I am commenting this out because it breaks block CRS support
197 // TEUCHOS_TEST_FOR_EXCEPTION(
198 // static_cast<size_t> (ipiv_.size ()) != numRows_, std::logic_error,
199 // "Ifpack2::DenseContainer::compute: ipiv_ array has the wrong size. "
200 // "Please report this bug to the Ifpack2 developers.");
201 
202  IsComputed_ = false;
203  if (! this->isInitialized ()) {
204  this->initialize ();
205  }
206 
207  // Extract the submatrix.
208  extract (this->getMatrix ()); // extract the submatrix
209  factor (); // factor the submatrix
210 
211  IsComputed_ = true;
212 }
213 
214 template<class MatrixType, class LocalScalarType>
215 void
218 {}
219 
220 template<class MatrixType, class LocalScalarType>
221 void
223 factor ()
224 {
225  Teuchos::LAPACK<int, local_scalar_type> lapack;
226  int INFO = 0;
227  lapack.GETRF (diagBlock_.numRows (), diagBlock_.numCols (),
228  diagBlock_.values (), diagBlock_.stride (),
229  ipiv_.getRawPtr (), &INFO);
230  // INFO < 0 is a bug.
231  TEUCHOS_TEST_FOR_EXCEPTION(
232  INFO < 0, std::logic_error, "Ifpack2::DenseContainer::factor: "
233  "LAPACK's _GETRF (LU factorization with partial pivoting) was called "
234  "incorrectly. INFO = " << INFO << " < 0. "
235  "Please report this bug to the Ifpack2 developers.");
236  // INFO > 0 means the matrix is singular. This is probably an issue
237  // either with the choice of rows the rows we extracted, or with the
238  // input matrix itself.
239  TEUCHOS_TEST_FOR_EXCEPTION(
240  INFO > 0, std::runtime_error, "Ifpack2::DenseContainer::factor: "
241  "LAPACK's _GETRF (LU factorization with partial pivoting) reports that the "
242  "computed U factor is exactly singular. U(" << INFO << "," << INFO << ") "
243  "(one-based index i) is exactly zero. This probably means that the input "
244  "matrix has a singular diagonal block.");
245 }
246 
247 template<class MatrixType, class LocalScalarType>
248 void
250 factor ()
251 {}
252 
253 template<class MatrixType, class LocalScalarType>
254 void
256 applyImplBlockCrs (const local_mv_type& X,
257  local_mv_type& Y,
258  Teuchos::ETransp mode,
259  LocalScalarType alpha,
260  LocalScalarType beta) const
261 {
262  using Teuchos::ArrayRCP;
263  using Teuchos::RCP;
264  using Teuchos::rcp;
265  using Teuchos::rcpFromRef;
266 
267  TEUCHOS_TEST_FOR_EXCEPTION(
268  X.getLocalLength () != Y.getLocalLength (),
269  std::logic_error, "Ifpack2::DenseContainer::applyImpl: X and Y have "
270  "incompatible dimensions (" << X.getLocalLength () << " resp. "
271  << Y.getLocalLength () << "). Please report this bug to "
272  "the Ifpack2 developers.");
273  TEUCHOS_TEST_FOR_EXCEPTION(
274  localMap_->getNodeNumElements()*blockSize_ != X.getLocalLength (),
275  std::logic_error, "Ifpack2::DenseContainer::applyImpl: The inverse "
276  "operator and X have incompatible dimensions (" <<
277  localMap_->getNodeNumElements()*blockSize_ << " resp. "
278  << X.getLocalLength () << "). Please report this bug to "
279  "the Ifpack2 developers.");
280  TEUCHOS_TEST_FOR_EXCEPTION(
281  localMap_->getNodeNumElements()*blockSize_ != Y.getLocalLength (),
282  std::logic_error, "Ifpack2::DenseContainer::applyImpl: The inverse "
283  "operator and Y have incompatible dimensions (" <<
284  localMap_->getNodeNumElements()*blockSize_ << " resp. "
285  << Y.getLocalLength () << "). Please report this bug to "
286  "the Ifpack2 developers.");
287  TEUCHOS_TEST_FOR_EXCEPTION(
288  X.getLocalLength () != static_cast<size_t> (diagBlock_.numRows ()),
289  std::logic_error, "Ifpack2::DenseContainer::applyImpl: The input "
290  "multivector X has incompatible dimensions from those of the "
291  "inverse operator (" << X.getLocalLength () << " vs. "
292  << (mode == Teuchos::NO_TRANS ? diagBlock_.numCols () : diagBlock_.numRows ())
293  << "). Please report this bug to the Ifpack2 developers.");
294  TEUCHOS_TEST_FOR_EXCEPTION(
295  X.getLocalLength () != static_cast<size_t> (diagBlock_.numRows ()),
296  std::logic_error, "Ifpack2::DenseContainer::applyImpl: The output "
297  "multivector Y has incompatible dimensions from those of the "
298  "inverse operator (" << Y.getLocalLength () << " vs. "
299  << (mode == Teuchos::NO_TRANS ? diagBlock_.numRows () : diagBlock_.numCols ())
300  << "). Please report this bug to the Ifpack2 developers.");
301 
302  typedef Teuchos::ScalarTraits<local_scalar_type> STS;
303  const int numVecs = static_cast<int> (X.getNumVectors ());
304  if (alpha == STS::zero ()) { // don't need to solve the linear system
305  if (beta == STS::zero ()) {
306  // Use BLAS AXPY semantics for beta == 0: overwrite, clobbering
307  // any Inf or NaN values in Y (rather than multiplying them by
308  // zero, resulting in NaN values).
309  Y.putScalar (STS::zero ());
310  }
311  else { // beta != 0
312  Y.scale (beta);
313  }
314  }
315  else { // alpha != 0; must solve the linear system
316  Teuchos::LAPACK<int, local_scalar_type> lapack;
317  // If beta is nonzero or Y is not constant stride, we have to use
318  // a temporary output multivector. It gets a (deep) copy of X,
319  // since GETRS overwrites its (multi)vector input with its output.
320  RCP<local_mv_type> Y_tmp;
321  if (beta == STS::zero () ){
322  Tpetra::deep_copy (Y, X);
323  Y_tmp = rcpFromRef (Y);
324  }
325  else {
326  Y_tmp = rcp (new local_mv_type (X, Teuchos::Copy));
327  }
328  const int Y_stride = static_cast<int> (Y_tmp->getStride ());
329  ArrayRCP<local_scalar_type> Y_view = Y_tmp->get1dViewNonConst ();
330  local_scalar_type* const Y_ptr = Y_view.getRawPtr ();
331 
332  int INFO = 0;
333  const char trans =
334  (mode == Teuchos::CONJ_TRANS ? 'C' : (mode == Teuchos::TRANS ? 'T' : 'N'));
335  lapack.GETRS (trans, diagBlock_.numRows (), numVecs,
336  diagBlock_.values (), diagBlock_.stride (),
337  ipiv_.getRawPtr (), Y_ptr, Y_stride, &INFO);
338  TEUCHOS_TEST_FOR_EXCEPTION(
339  INFO != 0, std::runtime_error, "Ifpack2::DenseContainer::applyImpl: "
340  "LAPACK's _GETRS (solve using LU factorization with partial pivoting) "
341  "failed with INFO = " << INFO << " != 0.");
342 
343  if (beta != STS::zero ()) {
344  Y.update (alpha, *Y_tmp, beta);
345  }
346  else if (! Y.isConstantStride ()) {
347  Tpetra::deep_copy (Y, *Y_tmp);
348  }
349  }
350 }
351 
352 template<class MatrixType, class LocalScalarType>
353 void
355 applyImpl (const local_mv_type& X,
356  local_mv_type& Y,
357  Teuchos::ETransp mode,
358  LocalScalarType alpha,
359  LocalScalarType beta) const
360 {
361  using Teuchos::ArrayRCP;
362  using Teuchos::RCP;
363  using Teuchos::rcp;
364  using Teuchos::rcpFromRef;
365 
366  if(hasBlockCrsMatrix_) {
367  applyImplBlockCrs(X,Y,mode,alpha,beta);
368  return;
369  }
370 
371  TEUCHOS_TEST_FOR_EXCEPTION(
372  X.getLocalLength () != Y.getLocalLength (),
373  std::logic_error, "Ifpack2::DenseContainer::applyImpl: X and Y have "
374  "incompatible dimensions (" << X.getLocalLength () << " resp. "
375  << Y.getLocalLength () << "). Please report this bug to "
376  "the Ifpack2 developers.");
377  TEUCHOS_TEST_FOR_EXCEPTION(
378  localMap_->getNodeNumElements () != X.getLocalLength (),
379  std::logic_error, "Ifpack2::DenseContainer::applyImpl: The inverse "
380  "operator and X have incompatible dimensions (" <<
381  localMap_->getNodeNumElements () << " resp. "
382  << X.getLocalLength () << "). Please report this bug to "
383  "the Ifpack2 developers.");
384  TEUCHOS_TEST_FOR_EXCEPTION(
385  localMap_->getNodeNumElements () != Y.getLocalLength (),
386  std::logic_error, "Ifpack2::DenseContainer::applyImpl: The inverse "
387  "operator and Y have incompatible dimensions (" <<
388  localMap_->getNodeNumElements () << " resp. "
389  << Y.getLocalLength () << "). Please report this bug to "
390  "the Ifpack2 developers.");
391  TEUCHOS_TEST_FOR_EXCEPTION(
392  X.getLocalLength () != static_cast<size_t> (diagBlock_.numRows ()),
393  std::logic_error, "Ifpack2::DenseContainer::applyImpl: The input "
394  "multivector X has incompatible dimensions from those of the "
395  "inverse operator (" << X.getLocalLength () << " vs. "
396  << (mode == Teuchos::NO_TRANS ? diagBlock_.numCols () : diagBlock_.numRows ())
397  << "). Please report this bug to the Ifpack2 developers.");
398  TEUCHOS_TEST_FOR_EXCEPTION(
399  X.getLocalLength () != static_cast<size_t> (diagBlock_.numRows ()),
400  std::logic_error, "Ifpack2::DenseContainer::applyImpl: The output "
401  "multivector Y has incompatible dimensions from those of the "
402  "inverse operator (" << Y.getLocalLength () << " vs. "
403  << (mode == Teuchos::NO_TRANS ? diagBlock_.numRows () : diagBlock_.numCols ())
404  << "). Please report this bug to the Ifpack2 developers.");
405 
406  typedef Teuchos::ScalarTraits<local_scalar_type> STS;
407  const int numVecs = static_cast<int> (X.getNumVectors ());
408  if (alpha == STS::zero ()) { // don't need to solve the linear system
409  if (beta == STS::zero ()) {
410  // Use BLAS AXPY semantics for beta == 0: overwrite, clobbering
411  // any Inf or NaN values in Y (rather than multiplying them by
412  // zero, resulting in NaN values).
413  Y.putScalar (STS::zero ());
414  }
415  else { // beta != 0
416  Y.scale (beta);
417  }
418  }
419  else { // alpha != 0; must solve the linear system
420  Teuchos::LAPACK<int, local_scalar_type> lapack;
421  // If beta is nonzero or Y is not constant stride, we have to use
422  // a temporary output multivector. It gets a (deep) copy of X,
423  // since GETRS overwrites its (multi)vector input with its output.
424  RCP<local_mv_type> Y_tmp;
425  if (beta == STS::zero () ){
426  Tpetra::deep_copy (Y, X);
427  Y_tmp = rcpFromRef (Y);
428  }
429  else {
430  Y_tmp = rcp (new local_mv_type (X, Teuchos::Copy));
431  }
432  const int Y_stride = static_cast<int> (Y_tmp->getStride ());
433  ArrayRCP<local_scalar_type> Y_view = Y_tmp->get1dViewNonConst ();
434  local_scalar_type* const Y_ptr = Y_view.getRawPtr ();
435 
436  int INFO = 0;
437  const char trans =
438  (mode == Teuchos::CONJ_TRANS ? 'C' : (mode == Teuchos::TRANS ? 'T' : 'N'));
439  lapack.GETRS (trans, diagBlock_.numRows (), numVecs,
440  diagBlock_.values (), diagBlock_.stride (),
441  ipiv_.getRawPtr (), Y_ptr, Y_stride, &INFO);
442  TEUCHOS_TEST_FOR_EXCEPTION(
443  INFO != 0, std::runtime_error, "Ifpack2::DenseContainer::applyImpl: "
444  "LAPACK's _GETRS (solve using LU factorization with partial pivoting) "
445  "failed with INFO = " << INFO << " != 0.");
446 
447  if (beta != STS::zero ()) {
448  Y.update (alpha, *Y_tmp, beta);
449  }
450  else if (! Y.isConstantStride ()) {
451  Tpetra::deep_copy (Y, *Y_tmp);
452  }
453  }
454 }
455 
456 template<class MatrixType, class LocalScalarType>
457 void
459 applyImplBlockCrs (const local_mv_type& /* X */,
460  local_mv_type& /* Y */,
461  Teuchos::ETransp /* mode */,
462  LocalScalarType /* alpha */,
463  LocalScalarType /* beta */) const
464 {}
465 
466 template<class MatrixType, class LocalScalarType>
467 void
469 applyImpl (const local_mv_type& /* X */,
470  local_mv_type& /* Y */,
471  Teuchos::ETransp /* mode */,
472  LocalScalarType /* alpha */,
473  LocalScalarType /* beta */) const
474 {}
475 
476 
477 template<class MatrixType, class LocalScalarType>
478 void
480 applyBlockCrs (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
481  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
482  Teuchos::ETransp mode,
483  scalar_type alpha,
484  scalar_type beta) const
485 {
486  using Teuchos::ArrayView;
487  using Teuchos::ArrayRCP;
488  using Teuchos::as;
489  using Teuchos::RCP;
490  using Teuchos::rcp;
491 
492  // The local operator might have a different Scalar type than
493  // MatrixType. This means that we might have to convert X and Y to
494  // the Tpetra::MultiVector specialization that the local operator
495  // wants. This class' X_ and Y_ internal fields are of the right
496  // type for the local operator, so we can use those as targets.
497 
498  const char prefix[] = "Ifpack2::DenseContainer::weightedApply: ";
499  TEUCHOS_TEST_FOR_EXCEPTION(
500  ! IsComputed_, std::runtime_error, prefix << "You must have called the "
501  "compute() method before you may call this method. You may call "
502  "apply() as many times as you want after calling compute() once, "
503  "but you must have called compute() at least once first.");
504  const size_t numVecs = X.getNumVectors ();
505  TEUCHOS_TEST_FOR_EXCEPTION(
506  numVecs != Y.getNumVectors (), std::runtime_error,
507  prefix << "X and Y have different numbers of vectors (columns). X has "
508  << X.getNumVectors () << ", but Y has " << X.getNumVectors () << ".");
509 
510  if (numVecs == 0) {
511  return; // done! nothing to do
512  }
513 
514  // The local operator works on a permuted subset of the local parts
515  // of X and Y. The subset and permutation are defined by the index
516  // array returned by getLocalRows(). If the permutation is trivial
517  // and the subset is exactly equal to the local indices, then we
518  // could use the local parts of X and Y exactly, without needing to
519  // permute. Otherwise, we have to use temporary storage to permute
520  // X and Y. For now, we always use temporary storage.
521  //
522  // Create temporary permuted versions of the input and output.
523  // (Re)allocate X_ and/or Y_ only if necessary. We'll use them to
524  // store the permuted versions of X resp. Y. Note that X_local has
525  // the domain Map of the operator, which may be a permuted subset of
526  // the local Map corresponding to X.getMap(). Similarly, Y_local
527  // has the range Map of the operator, which may be a permuted subset
528  // of the local Map corresponding to Y.getMap(). numRows_ here
529  // gives the number of rows in the row Map of the local Inverse_
530  // operator.
531  //
532  // FIXME (mfh 20 Aug 2013) There might be an implicit assumption
533  // here that the row Map and the range Map of that operator are
534  // the same.
535  //
536  // FIXME (mfh 20 Aug 2013) This "local permutation" functionality
537  // really belongs in Tpetra.
538 
539  if (blockX_.is_null ()) {
540  blockX_ = rcp (new local_block_mv_type (*localMap_, blockSize_, numVecs));
541  }
542  local_mv_type X_local = blockX_->getMultiVectorView();
543  TEUCHOS_TEST_FOR_EXCEPTION(
544  X_local.getLocalLength () != numRows_*blockSize_, std::logic_error,
545  "Ifpack2::DenseContainer::apply: "
546  "X_local has length " << X_local.getLocalLength () << ", which does "
547  "not match numRows_*blockSize_ = " << numRows_*blockSize_ << ". Please report this bug to "
548  "the Ifpack2 developers.");
549  ArrayView<const local_ordinal_type> localRows = this->getLocalRows();
550 
551  // Gather x
552  for (size_t j = 0; j < numVecs; ++j) {
553  ArrayRCP<const scalar_type> X_in_j = X.getData (j);
554  ArrayRCP<local_scalar_type> X_out_j = X_local.getDataNonConst (j);
555 
556  for (size_t i = 0; i < numRows_; ++i) {
557  const size_t i_perm = localRows[i];
558  for (int k = 0; k < blockSize_; k++)
559  X_out_j[i*blockSize_+k] = X_in_j[i_perm*blockSize_+k];
560  }
561  }
562 
563  // We must gather the contents of the output multivector Y even on
564  // input to applyImpl(), since the inverse operator might use it as
565  // an initial guess for a linear solve. We have no way of knowing
566  // whether it does or does not.
567 
568  if (blockY_.is_null ()) {
569  blockY_ = rcp (new local_block_mv_type (*localMap_, blockSize_, numVecs));
570  }
571  local_mv_type Y_local = blockY_->getMultiVectorView();
572  TEUCHOS_TEST_FOR_EXCEPTION(
573  Y_local.getLocalLength () != numRows_*blockSize_, std::logic_error,
574  "Ifpack2::DenseContainer::apply: "
575  "Y_local has length " << X_local.getLocalLength () << ", which does "
576  "not match numRows_ = " << numRows_*blockSize_ << ". Please report this bug to "
577  "the Ifpack2 developers.");
578 
579  // gather Y
580  for (size_t j = 0; j < numVecs; ++j) {
581  ArrayRCP<const scalar_type> Y_in_j = Y.getData (j);
582  ArrayRCP<local_scalar_type> Y_out_j = Y_local.getDataNonConst (j);
583 
584  for (size_t i = 0; i < numRows_; ++i) {
585  const size_t i_perm = localRows[i];
586  for (int k = 0; k < blockSize_; k++)
587  Y_out_j[i*blockSize_+k] = Y_in_j[i_perm*blockSize_+k];
588  }
589  }
590 
591  // Apply the local operator:
592  // Y_local := beta*Y_local + alpha*M^{-1}*X_local
593  this->applyImpl (X_local, Y_local, mode, as<local_scalar_type> (alpha),
594  as<local_scalar_type> (beta));
595 
596  // Scatter the permuted subset output vector Y_local back into the
597  // original output multivector Y.
598  for (size_t j = 0; j < numVecs; ++j) {
599  ArrayRCP<local_scalar_type> Y_in_j = Y.getDataNonConst (j);
600  ArrayRCP<const scalar_type> Y_out_j = Y_local.getData (j);
601 
602  for (size_t i = 0; i < numRows_; ++i) {
603  const size_t i_perm = localRows[i];
604  for (int k = 0; k < blockSize_; k++)
605  Y_in_j[i_perm*blockSize_+k] = Y_out_j[i*blockSize_+k];
606  }
607  }
608 }
609 
610 
611 template<class MatrixType, class LocalScalarType>
612 void
614 apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
615  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
616  Teuchos::ETransp mode,
617  scalar_type alpha,
618  scalar_type beta) const
619 {
620  using Teuchos::ArrayView;
621  using Teuchos::as;
622  using Teuchos::RCP;
623  using Teuchos::rcp;
624 
625  // if we have a block CRS matrix, call the appropriate method
626  if(hasBlockCrsMatrix_) {
627  applyBlockCrs(X,Y,mode,alpha,beta);
628  return;
629  }
630 
631  // The local operator might have a different Scalar type than
632  // MatrixType. This means that we might have to convert X and Y to
633  // the Tpetra::MultiVector specialization that the local operator
634  // wants. This class' X_ and Y_ internal fields are of the right
635  // type for the local operator, so we can use those as targets.
636 
637  // Tpetra::MultiVector specialization corresponding to the global operator.
638  typedef Tpetra::MultiVector<scalar_type,
640 
641  const char prefix[] = "Ifpack2::DenseContainer::weightedApply: ";
642  TEUCHOS_TEST_FOR_EXCEPTION(
643  ! IsComputed_, std::runtime_error, prefix << "You must have called the "
644  "compute() method before you may call this method. You may call "
645  "apply() as many times as you want after calling compute() once, "
646  "but you must have called compute() at least once first.");
647  const size_t numVecs = X.getNumVectors ();
648  TEUCHOS_TEST_FOR_EXCEPTION(
649  numVecs != Y.getNumVectors (), std::runtime_error,
650  prefix << "X and Y have different numbers of vectors (columns). X has "
651  << X.getNumVectors () << ", but Y has " << X.getNumVectors () << ".");
652 
653  if (numVecs == 0) {
654  return; // done! nothing to do
655  }
656 
657  // The local operator works on a permuted subset of the local parts
658  // of X and Y. The subset and permutation are defined by the index
659  // array returned by getLocalRows(). If the permutation is trivial
660  // and the subset is exactly equal to the local indices, then we
661  // could use the local parts of X and Y exactly, without needing to
662  // permute. Otherwise, we have to use temporary storage to permute
663  // X and Y. For now, we always use temporary storage.
664  //
665  // Create temporary permuted versions of the input and output.
666  // (Re)allocate X_ and/or Y_ only if necessary. We'll use them to
667  // store the permuted versions of X resp. Y. Note that X_local has
668  // the domain Map of the operator, which may be a permuted subset of
669  // the local Map corresponding to X.getMap(). Similarly, Y_local
670  // has the range Map of the operator, which may be a permuted subset
671  // of the local Map corresponding to Y.getMap(). numRows_ here
672  // gives the number of rows in the row Map of the local Inverse_
673  // operator.
674  //
675  // FIXME (mfh 20 Aug 2013) There might be an implicit assumption
676  // here that the row Map and the range Map of that operator are
677  // the same.
678  //
679  // FIXME (mfh 20 Aug 2013) This "local permutation" functionality
680  // really belongs in Tpetra.
681 
682  if (X_.is_null ()) {
683  X_ = rcp (new local_mv_type (localMap_, numVecs));
684  }
685  RCP<local_mv_type> X_local = X_;
686  TEUCHOS_TEST_FOR_EXCEPTION(
687  X_local->getLocalLength () != numRows_, std::logic_error,
688  "Ifpack2::DenseContainer::apply: "
689  "X_local has length " << X_local->getLocalLength () << ", which does "
690  "not match numRows_ = " << numRows_ << ". Please report this bug to "
691  "the Ifpack2 developers.");
692  ArrayView<const local_ordinal_type> localRows = this->getLocalRows ();
693 
695  mvgs.gather (*X_local, X, localRows);
696 
697  // We must gather the contents of the output multivector Y even on
698  // input to applyImpl(), since the inverse operator might use it as
699  // an initial guess for a linear solve. We have no way of knowing
700  // whether it does or does not.
701 
702  if (Y_.is_null ()) {
703  Y_ = rcp (new local_mv_type (localMap_, numVecs));
704  }
705  RCP<local_mv_type> Y_local = Y_;
706  TEUCHOS_TEST_FOR_EXCEPTION(
707  Y_local->getLocalLength () != numRows_, std::logic_error,
708  "Ifpack2::DenseContainer::apply: "
709  "Y_local has length " << X_local->getLocalLength () << ", which does "
710  "not match numRows_ = " << numRows_ << ". Please report this bug to "
711  "the Ifpack2 developers.");
712  mvgs.gather (*Y_local, Y, localRows);
713 
714  // Apply the local operator:
715  // Y_local := beta*Y_local + alpha*M^{-1}*X_local
716  this->applyImpl (*X_local, *Y_local, mode, as<local_scalar_type> (alpha),
717  as<local_scalar_type> (beta));
718 
719  // Scatter the permuted subset output vector Y_local back into the
720  // original output multivector Y.
721  mvgs.scatter (Y, *Y_local, localRows);
722 }
723 
724 template<class MatrixType, class LocalScalarType>
725 void
727 applyBlockCrs (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& /* X */,
728  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& /* Y */,
729  Teuchos::ETransp /* mode */,
730  scalar_type /* alpha */,
731  scalar_type /* beta */) const
732 {}
733 
734 template<class MatrixType, class LocalScalarType>
735 void
737 apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& /* X */,
738  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& /* Y */,
739  Teuchos::ETransp /* mode */,
740  scalar_type /* alpha */,
741  scalar_type /* beta */) const
742 {}
743 
744 template<class MatrixType, class LocalScalarType>
746 weightedApply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
747  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
748  const Tpetra::Vector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& D,
749  Teuchos::ETransp mode,
750  scalar_type alpha,
751  scalar_type beta) const
752 {
753  using Teuchos::ArrayRCP;
754  using Teuchos::ArrayView;
755  using Teuchos::Range1D;
756  using Teuchos::RCP;
757  using Teuchos::rcp;
758  using Teuchos::rcp_const_cast;
759  using std::endl;
760  typedef Teuchos::ScalarTraits<scalar_type> STS;
761 
762  // The local operator template parameter might have a different
763  // Scalar type than MatrixType. This means that we might have to
764  // convert X and Y to the Tpetra::MultiVector specialization that
765  // the local operator wants. This class' X_ and Y_ internal fields
766  // are of the right type for the local operator, so we can use those
767  // as targets.
768 
769  // Tpetra::MultiVector specialization corresponding to the global operator.
770  typedef Tpetra::MultiVector<scalar_type,
772  // Tpetra::Vector specialization corresponding to the local
773  // operator. We will use this for the subset permutation of the
774  // diagonal scaling D.
775  typedef Tpetra::Vector<local_scalar_type, local_ordinal_type,
776  global_ordinal_type, node_type> local_vec_type;
777 
778  const char prefix[] = "Ifpack2::DenseContainer::weightedApply: ";
779  TEUCHOS_TEST_FOR_EXCEPTION(
780  ! IsComputed_, std::runtime_error, prefix << "You must have called the "
781  "compute() method before you may call this method. You may call "
782  "weightedApply() as many times as you want after calling compute() once, "
783  "but you must have called compute() at least once first.");
784  const size_t numVecs = X.getNumVectors ();
785  TEUCHOS_TEST_FOR_EXCEPTION(
786  numVecs != Y.getNumVectors (), std::runtime_error,
787  prefix << "X and Y have different numbers of vectors (columns). X has "
788  << X.getNumVectors () << ", but Y has " << X.getNumVectors () << ".");
789 
790  if (numVecs == 0) {
791  return; // done! nothing to do
792  }
793 
794  // The local operator works on a permuted subset of the local parts
795  // of X and Y. The subset and permutation are defined by the index
796  // array returned by getLocalRows(). If the permutation is trivial
797  // and the subset is exactly equal to the local indices, then we
798  // could use the local parts of X and Y exactly, without needing to
799  // permute. Otherwise, we have to use temporary storage to permute
800  // X and Y. For now, we always use temporary storage.
801  //
802  // Create temporary permuted versions of the input and output.
803  // (Re)allocate X_ and/or Y_ only if necessary. We'll use them to
804  // store the permuted versions of X resp. Y. Note that X_local has
805  // the domain Map of the operator, which may be a permuted subset of
806  // the local Map corresponding to X.getMap(). Similarly, Y_local
807  // has the range Map of the operator, which may be a permuted subset
808  // of the local Map corresponding to Y.getMap(). numRows_ here
809  // gives the number of rows in the row Map of the local operator.
810  //
811  // FIXME (mfh 20 Aug 2013) There might be an implicit assumption
812  // here that the row Map and the range Map of that operator are
813  // the same.
814  //
815  // FIXME (mfh 20 Aug 2013) This "local permutation" functionality
816  // really belongs in Tpetra.
817 
818  if (X_.is_null ()) {
819  X_ = rcp (new local_mv_type (localMap_, numVecs));
820  }
821  RCP<local_mv_type> X_local = X_;
822  TEUCHOS_TEST_FOR_EXCEPTION(
823  X_local->getLocalLength () != numRows_, std::logic_error,
824  "Ifpack2::DenseContainer::weightedApply: "
825  "X_local has length " << X_local->getLocalLength () << ", which does "
826  "not match numRows_ = " << numRows_ << ". Please report this bug to "
827  "the Ifpack2 developers.");
828  ArrayView<const local_ordinal_type> localRows = this->getLocalRows ();
829 
831  mvgs.gather (*X_local, X, localRows);
832 
833  // We must gather the output multivector Y even on input to
834  // applyImpl(), since the local operator might use it as an initial
835  // guess for a linear solve. We have no way of knowing whether it
836  // does or does not.
837 
838  if (Y_.is_null ()) {
839  Y_ = rcp (new local_mv_type (localMap_, numVecs));
840  }
841  RCP<local_mv_type> Y_local = Y_;
842  TEUCHOS_TEST_FOR_EXCEPTION(
843  Y_local->getLocalLength () != numRows_, std::logic_error,
844  "Ifpack2::DenseContainer::weightedApply: "
845  "Y_local has length " << X_local->getLocalLength () << ", which does "
846  "not match numRows_ = " << numRows_ << ". Please report this bug to "
847  "the Ifpack2 developers.");
848  mvgs.gather (*Y_local, Y, localRows);
849 
850  // Apply the diagonal scaling D to the input X. It's our choice
851  // whether the result has the original input Map of X, or the
852  // permuted subset Map of X_local. If the latter, we also need to
853  // gather D into the permuted subset Map. We choose the latter, to
854  // save memory and computation. Thus, we do the following:
855  //
856  // 1. Gather D into a temporary vector D_local.
857  // 2. Create a temporary X_scaled to hold diag(D_local) * X_local.
858  // 3. Compute X_scaled := diag(D_loca) * X_local.
859 
860  local_vec_type D_local (localMap_);
861  TEUCHOS_TEST_FOR_EXCEPTION(
862  D_local.getLocalLength () != numRows_, std::logic_error,
863  "Ifpack2::DenseContainer::weightedApply: "
864  "D_local has length " << D_local.getLocalLength () << ", which does "
865  "not match numRows_ = " << numRows_ << ". Please report this bug to "
866  "the Ifpack2 developers.");
867  mvgs.gather (D_local, D, localRows);
868  local_mv_type X_scaled (localMap_, numVecs);
869  X_scaled.elementWiseMultiply (STS::one (), D_local, *X_local, STS::zero ());
870 
871  // Y_temp will hold the result of M^{-1}*X_scaled. If beta == 0, we
872  // can write the result of Inverse_->apply() directly to Y_local, so
873  // Y_temp may alias Y_local. Otherwise, if beta != 0, we need
874  // temporary storage for M^{-1}*X_scaled, so Y_temp must be
875  // different than Y_local.
876  RCP<local_mv_type> Y_temp;
877  if (beta == STS::zero ()) {
878  Y_temp = Y_local;
879  } else {
880  Y_temp = rcp (new local_mv_type (localMap_, numVecs));
881  }
882 
883  // Apply the local operator: Y_temp := M^{-1} * X_scaled
884  this->applyImpl (X_scaled, *Y_temp, mode, STS::one (), STS::zero ());
885  // Y_local := beta * Y_local + alpha * diag(D_local) * Y_temp.
886  //
887  // Note that we still use the permuted subset scaling D_local here,
888  // because Y_temp has the same permuted subset Map. That's good, in
889  // fact, because it's a subset: less data to read and multiply.
890  Y_local->elementWiseMultiply (alpha, D_local, *Y_temp, beta);
891 
892  // Copy the permuted subset output vector Y_local into the original
893  // output multivector Y.
894  mvgs.scatter (Y, *Y_local, localRows);
895 }
896 
897 template<class MatrixType, class LocalScalarType>
898 void
900 weightedApply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& /* X */,
901  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& /* Y */,
902  const Tpetra::Vector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& /* D */,
903  Teuchos::ETransp /* mode */,
904  scalar_type /* alpha */,
905  scalar_type /* beta */ ) const
906 {
907 }
908 
909 template<class MatrixType, class LocalScalarType>
910 std::ostream&
912 print (std::ostream& os) const
913 {
914  Teuchos::FancyOStream fos (Teuchos::rcpFromRef (os));
915  fos.setOutputToRootOnly (0);
916  this->describe (fos);
917  return os;
918 }
919 
920 template<class MatrixType, class LocalScalarType>
921 std::ostream&
923 print (std::ostream& os) const
924 {
925  return os;
926 }
927 
928 template<class MatrixType, class LocalScalarType>
929 std::string
931 description () const
932 {
933  std::ostringstream oss;
934  oss << Teuchos::Describable::description();
935  if (isInitialized()) {
936  if (isComputed()) {
937  oss << "{status = initialized, computed";
938  }
939  else {
940  oss << "{status = initialized, not computed";
941  }
942  }
943  else {
944  oss << "{status = not initialized, not computed";
945  }
946 
947  oss << "}";
948  return oss.str();
949 }
950 
951 template<class MatrixType, class LocalScalarType>
952 std::string
954 description () const
955 {
956  return "";
957 }
958 
959 template<class MatrixType, class LocalScalarType>
960 void
962 describe (Teuchos::FancyOStream& os,
963  const Teuchos::EVerbosityLevel verbLevel) const
964 {
965  using std::endl;
966  if(verbLevel==Teuchos::VERB_NONE) return;
967  os << "================================================================================" << endl;
968  os << "Ifpack2::DenseContainer" << endl;
969  os << "Number of rows = " << numRows_ << endl;
970  os << "isInitialized() = " << IsInitialized_ << endl;
971  os << "isComputed() = " << IsComputed_ << endl;
972  os << "================================================================================" << endl;
973  os << endl;
974 }
975 
976 template<class MatrixType, class LocalScalarType>
977 void
979 describe (Teuchos::FancyOStream& os,
980  const Teuchos::EVerbosityLevel verbLevel) const
981 {}
982 
983 
984 template<class MatrixType, class LocalScalarType>
985 void
987 extractBlockCrs (const Teuchos::RCP<const block_crs_matrix_type>& globalMatrix)
988 {
989  using Teuchos::Array;
990  using Teuchos::ArrayView;
991  using Teuchos::toString;
992  typedef local_ordinal_type LO;
993  typedef global_ordinal_type GO;
994  typedef Tpetra::Map<LO, GO, node_type> map_type;
995  const size_t inputMatrixNumRows = globalMatrix->getNodeNumRows ();
996  // We only use the rank of the calling process and the number of MPI
997  // processes for generating error messages. Extraction itself is
998  // entirely local to each participating MPI process.
999  const int myRank = globalMatrix->getRowMap ()->getComm ()->getRank ();
1000  const int numProcs = globalMatrix->getRowMap ()->getComm ()->getSize ();
1001 
1002  // Sanity check that the local row indices to extract fall within
1003  // the valid range of local row indices for the input matrix.
1004  ArrayView<const LO> localRows = this->getLocalRows ();
1005  for (size_t j = 0; j < numRows_; ++j) {
1006  TEUCHOS_TEST_FOR_EXCEPTION(
1007  localRows[j] < 0 ||
1008  static_cast<size_t> (localRows[j]) >= inputMatrixNumRows,
1009  std::runtime_error, "Ifpack2::DenseContainer::extract: On process " <<
1010  myRank << " of " << numProcs << ", localRows[j=" << j << "] = " <<
1011  localRows[j] << ", which is out of the valid range of local row indices "
1012  "indices [0, " << (inputMatrixNumRows - 1) << "] for the input matrix.");
1013  }
1014 
1015  // Convert the local row indices we want into local column indices.
1016  // For every local row ii_local = localRows[i] we take, we also want
1017  // to take the corresponding column. To find the corresponding
1018  // column, we use the row Map to convert the local row index
1019  // ii_local into a global index ii_global, and then use the column
1020  // Map to convert ii_global into a local column index jj_local. If
1021  // the input matrix doesn't have a column Map, we need to be using
1022  // global indices anyway...
1023 
1024  // We use the domain Map to exclude off-process global entries.
1025  Teuchos::RCP<const map_type> globalRowMap = globalMatrix->getRowMap ();
1026  Teuchos::RCP<const map_type> globalColMap = globalMatrix->getColMap ();
1027  Teuchos::RCP<const map_type> globalDomMap = globalMatrix->getDomainMap ();
1028 
1029  bool rowIndsValid = true;
1030  bool colIndsValid = true;
1031  Array<LO> localCols (numRows_);
1032  // For error messages, collect the sets of invalid row indices and
1033  // invalid column indices. They are otherwise not useful.
1034  Array<LO> invalidLocalRowInds;
1035  Array<GO> invalidGlobalColInds;
1036  for (size_t i = 0; i < numRows_; ++i) {
1037  // ii_local is the (local) row index we want to look up.
1038  const LO ii_local = localRows[i];
1039  // Find the global index jj_global corresponding to ii_local.
1040  // Global indices are the same (rather, are required to be the
1041  // same) in all three Maps, which is why we use jj (suggesting a
1042  // column index, which is how we will use it below).
1043  const GO jj_global = globalRowMap->getGlobalElement (ii_local);
1044  if (jj_global == Teuchos::OrdinalTraits<GO>::invalid ()) {
1045  // If ii_local is not a local index in the row Map on the
1046  // calling process, that means localRows is incorrect. We've
1047  // already checked for this in the constructor, but we might as
1048  // well check again here, since it's cheap to do so (just an
1049  // integer comparison, since we need jj_global anyway).
1050  rowIndsValid = false;
1051  invalidLocalRowInds.push_back (ii_local);
1052  break;
1053  }
1054  // Exclude "off-process" entries: that is, those in the column Map
1055  // on this process that are not in the domain Map on this process.
1056  if (globalDomMap->isNodeGlobalElement (jj_global)) {
1057  // jj_global is not an off-process entry. Look up its local
1058  // index in the column Map; we want to extract this column index
1059  // from the input matrix. If jj_global is _not_ in the column
1060  // Map on the calling process, that could mean that the column
1061  // in question is empty on this process. That would be bad for
1062  // solving linear systems with the extract submatrix. We could
1063  // solve the resulting singular linear systems in a minimum-norm
1064  // least-squares sense, but for now we simply raise an exception.
1065  const LO jj_local = globalColMap->getLocalElement (jj_global);
1066  if (jj_local == Teuchos::OrdinalTraits<local_ordinal_type>::invalid ()) {
1067  colIndsValid = false;
1068  invalidGlobalColInds.push_back (jj_global);
1069  break;
1070  }
1071  localCols[i] = jj_local;
1072  }
1073  }
1074  TEUCHOS_TEST_FOR_EXCEPTION(
1075  ! rowIndsValid, std::logic_error, "Ifpack2::DenseContainer::extract: "
1076  "On process " << myRank << ", at least one row index in the set of local "
1077  "row indices given to the constructor is not a valid local row index in "
1078  "the input matrix's row Map on this process. This should be impossible "
1079  "because the constructor checks for this case. Here is the complete set "
1080  "of invalid local row indices: " << toString (invalidLocalRowInds) << ". "
1081  "Please report this bug to the Ifpack2 developers.");
1082  TEUCHOS_TEST_FOR_EXCEPTION(
1083  ! colIndsValid, std::runtime_error, "Ifpack2::DenseContainer::extract: "
1084  "On process " << myRank << ", "
1085  "At least one row index in the set of row indices given to the constructor "
1086  "does not have a corresponding column index in the input matrix's column "
1087  "Map. This probably means that the column(s) in question is/are empty on "
1088  "this process, which would make the submatrix to extract structurally "
1089  "singular. Here is the compete set of invalid global column indices: "
1090  << toString (invalidGlobalColInds) << ".");
1091 
1092  diagBlock_.putScalar (Teuchos::ScalarTraits<local_scalar_type>::zero ());
1093 
1094  const size_t maxNumEntriesInRow = globalMatrix->getNodeMaxNumRowEntries ();
1095  Array<local_ordinal_type> ind (maxNumEntriesInRow);
1096 
1097  const local_ordinal_type INVALID =
1098  Teuchos::OrdinalTraits<local_ordinal_type>::invalid ();
1099 
1100  Array<scalar_type> val (maxNumEntriesInRow*blockSize_*blockSize_);
1101  for (size_t i = 0; i < numRows_; ++i) {
1102  const local_ordinal_type localRow = localRows[i];
1103  size_t numEntries;
1104  globalMatrix->getLocalRowCopy (localRow, ind (), val (), numEntries);
1105 
1106  for (size_t k = 0; k < numEntries; ++k) {
1107  const local_ordinal_type localCol = ind[k];
1108 
1109  // Skip off-process elements
1110  //
1111  // FIXME (mfh 24 Aug 2013) This assumes the following:
1112  //
1113  // 1. The column and row Maps begin with the same set of
1114  // on-process entries, in the same order. That is,
1115  // on-process row and column indices are the same.
1116  // 2. All off-process indices in the column Map of the input
1117  // matrix occur after that initial set.
1118  if (localCol >= 0 && static_cast<size_t> (localCol) < inputMatrixNumRows) {
1119  // for local column IDs, look for each ID in the list
1120  // of columns hosted by this object
1121  local_ordinal_type jj = INVALID;
1122  for (size_t kk = 0; kk < numRows_; ++kk) {
1123  if (localRows[kk] == localCol) {
1124  jj = kk;
1125  }
1126  }
1127  if (jj != INVALID) {
1128 // std::cout << "k: " << k << " and localCol: " << localCol << " and jj: " << jj << std::endl;
1129  // copy entire diagonal block
1130  for(local_ordinal_type c=0; c<blockSize_; c++)
1131  {
1132  for(local_ordinal_type r=0; r<blockSize_; r++)
1133  {
1134  diagBlock_(blockSize_*i+r,blockSize_*jj+c) = val[k*blockSize_*blockSize_+r+c*blockSize_];
1135  }
1136  }
1137  }
1138  }
1139  }
1140  }
1141 // std::cout << "diagonal block: " << diagBlock_ << std::endl;
1142 }
1143 
1144 
1145 template<class MatrixType, class LocalScalarType>
1146 void
1148 extract (const Teuchos::RCP<const row_matrix_type>& globalMatrix)
1149 {
1150  using Teuchos::Array;
1151  using Teuchos::ArrayView;
1152  using Teuchos::toString;
1153  typedef local_ordinal_type LO;
1154  typedef global_ordinal_type GO;
1155  typedef Tpetra::Map<LO, GO, node_type> map_type;
1156  const size_t inputMatrixNumRows = globalMatrix->getNodeNumRows ();
1157  // We only use the rank of the calling process and the number of MPI
1158  // processes for generating error messages. Extraction itself is
1159  // entirely local to each participating MPI process.
1160  const int myRank = globalMatrix->getRowMap ()->getComm ()->getRank ();
1161  const int numProcs = globalMatrix->getRowMap ()->getComm ()->getSize ();
1162 
1163  // If this is a block CRS matrix, call the appropriate function
1164  if(hasBlockCrsMatrix_) {
1165  extractBlockCrs(Teuchos::rcp_dynamic_cast<const block_crs_matrix_type> (globalMatrix));
1166  return;
1167  }
1168 
1169  // Sanity check that the local row indices to extract fall within
1170  // the valid range of local row indices for the input matrix.
1171  ArrayView<const LO> localRows = this->getLocalRows ();
1172  for (size_t j = 0; j < numRows_; ++j) {
1173  TEUCHOS_TEST_FOR_EXCEPTION(
1174  localRows[j] < 0 ||
1175  static_cast<size_t> (localRows[j]) >= inputMatrixNumRows,
1176  std::runtime_error, "Ifpack2::DenseContainer::extract: On process " <<
1177  myRank << " of " << numProcs << ", localRows[j=" << j << "] = " <<
1178  localRows[j] << ", which is out of the valid range of local row indices "
1179  "indices [0, " << (inputMatrixNumRows - 1) << "] for the input matrix.");
1180  }
1181 
1182  // Convert the local row indices we want into local column indices.
1183  // For every local row ii_local = localRows[i] we take, we also want
1184  // to take the corresponding column. To find the corresponding
1185  // column, we use the row Map to convert the local row index
1186  // ii_local into a global index ii_global, and then use the column
1187  // Map to convert ii_global into a local column index jj_local. If
1188  // the input matrix doesn't have a column Map, we need to be using
1189  // global indices anyway...
1190 
1191  // We use the domain Map to exclude off-process global entries.
1192  const map_type& globalRowMap = * (globalMatrix->getRowMap ());
1193  const map_type& globalColMap = * (globalMatrix->getColMap ());
1194  const map_type& globalDomMap = * (globalMatrix->getDomainMap ());
1195 
1196  bool rowIndsValid = true;
1197  bool colIndsValid = true;
1198  Array<LO> localCols (numRows_);
1199  // For error messages, collect the sets of invalid row indices and
1200  // invalid column indices. They are otherwise not useful.
1201  Array<LO> invalidLocalRowInds;
1202  Array<GO> invalidGlobalColInds;
1203  for (size_t i = 0; i < numRows_; ++i) {
1204  // ii_local is the (local) row index we want to look up.
1205  const LO ii_local = localRows[i];
1206  // Find the global index jj_global corresponding to ii_local.
1207  // Global indices are the same (rather, are required to be the
1208  // same) in all three Maps, which is why we use jj (suggesting a
1209  // column index, which is how we will use it below).
1210  const GO jj_global = globalRowMap.getGlobalElement (ii_local);
1211  if (jj_global == Teuchos::OrdinalTraits<GO>::invalid ()) {
1212  // If ii_local is not a local index in the row Map on the
1213  // calling process, that means localRows is incorrect. We've
1214  // already checked for this in the constructor, but we might as
1215  // well check again here, since it's cheap to do so (just an
1216  // integer comparison, since we need jj_global anyway).
1217  rowIndsValid = false;
1218  invalidLocalRowInds.push_back (ii_local);
1219  break;
1220  }
1221  // Exclude "off-process" entries: that is, those in the column Map
1222  // on this process that are not in the domain Map on this process.
1223  if (globalDomMap.isNodeGlobalElement (jj_global)) {
1224  // jj_global is not an off-process entry. Look up its local
1225  // index in the column Map; we want to extract this column index
1226  // from the input matrix. If jj_global is _not_ in the column
1227  // Map on the calling process, that could mean that the column
1228  // in question is empty on this process. That would be bad for
1229  // solving linear systems with the extract submatrix. We could
1230  // solve the resulting singular linear systems in a minimum-norm
1231  // least-squares sense, but for now we simply raise an exception.
1232  const LO jj_local = globalColMap.getLocalElement (jj_global);
1233  if (jj_local == Teuchos::OrdinalTraits<local_ordinal_type>::invalid ()) {
1234  colIndsValid = false;
1235  invalidGlobalColInds.push_back (jj_global);
1236  break;
1237  }
1238  localCols[i] = jj_local;
1239  }
1240  }
1241  TEUCHOS_TEST_FOR_EXCEPTION(
1242  ! rowIndsValid, std::logic_error, "Ifpack2::DenseContainer::extract: "
1243  "On process " << myRank << ", at least one row index in the set of local "
1244  "row indices given to the constructor is not a valid local row index in "
1245  "the input matrix's row Map on this process. This should be impossible "
1246  "because the constructor checks for this case. Here is the complete set "
1247  "of invalid local row indices: " << toString (invalidLocalRowInds) << ". "
1248  "Please report this bug to the Ifpack2 developers.");
1249  TEUCHOS_TEST_FOR_EXCEPTION(
1250  ! colIndsValid, std::runtime_error, "Ifpack2::DenseContainer::extract: "
1251  "On process " << myRank << ", "
1252  "At least one row index in the set of row indices given to the constructor "
1253  "does not have a corresponding column index in the input matrix's column "
1254  "Map. This probably means that the column(s) in question is/are empty on "
1255  "this process, which would make the submatrix to extract structurally "
1256  "singular. Here is the compete set of invalid global column indices: "
1257  << toString (invalidGlobalColInds) << ".");
1258 
1259  diagBlock_.putScalar (Teuchos::ScalarTraits<local_scalar_type>::zero ());
1260 
1261  const size_t maxNumEntriesInRow = globalMatrix->getNodeMaxNumRowEntries ();
1262  Array<local_ordinal_type> ind (maxNumEntriesInRow);
1263 
1264  const local_ordinal_type INVALID =
1265  Teuchos::OrdinalTraits<local_ordinal_type>::invalid ();
1266 
1267  Array<scalar_type> val (maxNumEntriesInRow);
1268  for (size_t i = 0; i < numRows_; ++i) {
1269  const local_ordinal_type localRow = localRows[i];
1270  size_t numEntries;
1271  globalMatrix->getLocalRowCopy (localRow, ind (), val (), numEntries);
1272 
1273  for (size_t k = 0; k < numEntries; ++k) {
1274  const local_ordinal_type localCol = ind[k];
1275 
1276  // Skip off-process elements
1277  //
1278  // FIXME (mfh 24 Aug 2013) This assumes the following:
1279  //
1280  // 1. The column and row Maps begin with the same set of
1281  // on-process entries, in the same order. That is,
1282  // on-process row and column indices are the same.
1283  // 2. All off-process indices in the column Map of the input
1284  // matrix occur after that initial set.
1285  if (localCol >= 0 && static_cast<size_t> (localCol) < inputMatrixNumRows) {
1286  // for local column IDs, look for each ID in the list
1287  // of columns hosted by this object
1288  local_ordinal_type jj = INVALID;
1289  for (size_t kk = 0; kk < numRows_; ++kk) {
1290  if (localRows[kk] == localCol) {
1291  jj = kk;
1292  }
1293  }
1294  if (jj != INVALID) {
1295  diagBlock_ (i, jj) += val[k]; // ???
1296  }
1297  }
1298  }
1299  }
1300 }
1301 
1302 template<class MatrixType, class LocalScalarType>
1303 void
1305 extractBlockCrs (const Teuchos::RCP<const block_crs_matrix_type>& /* globalMatrix */)
1306 {}
1307 
1308 template<class MatrixType, class LocalScalarType>
1309 void
1311 extract (const Teuchos::RCP<const row_matrix_type>& /* globalMatrix */)
1312 {}
1313 
1314 } // namespace Ifpack2
1315 
1316 // There's no need to instantiate for CrsMatrix too. All Ifpack2
1317 // preconditioners can and should do dynamic casts if they need a type
1318 // more specific than RowMatrix.
1319 
1320 #define IFPACK2_DENSECONTAINER_INSTANT(S,LO,GO,N) \
1321  template class Ifpack2::DenseContainer< Tpetra::RowMatrix<S, LO, GO, N>, S >;
1322 
1323 #endif // IFPACK2_SPARSECONTAINER_HPP
DenseContainer(const Teuchos::RCP< const row_matrix_type > &matrix, const Teuchos::ArrayView< const local_ordinal_type > &localRows)
Constructor.
Definition: Ifpack2_DenseContainer_def.hpp:62
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input (global) matrix.
Definition: Ifpack2_DenseContainer_decl.hpp:365
Store and solve a local dense linear problem.
Definition: Ifpack2_DenseContainer_decl.hpp:109
LocalScalarType local_scalar_type
The second template parameter of this class.
Definition: Ifpack2_DenseContainer_decl.hpp:358
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input (global) matrix.
Definition: Ifpack2_DenseContainer_decl.hpp:135
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input (global) matrix.
Definition: Ifpack2_DenseContainer_decl.hpp:363
LocalScalarType local_scalar_type
The second template parameter of this class.
Definition: Ifpack2_DenseContainer_decl.hpp:128
MatrixType::node_type node_type
The Node type of the input (global) matrix.
Definition: Ifpack2_DenseContainer_decl.hpp:137
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input (global) matrix.
Definition: Ifpack2_DenseContainer_decl.hpp:133
Interface for creating and solving a local linear problem.
Definition: Ifpack2_Container.hpp:103
BELOS_DEPRECATED const char * toString(const StatusType status)
Preconditioners and smoothers for Tpetra sparse matrices.
Definition: Ifpack2_AdditiveSchwarz_decl.hpp:72
Implementation detail of Ifpack2::Container subclasses.
Definition: Ifpack2_Details_MultiVectorLocalGatherScatter.hpp:85