Ifpack2 Templated Preconditioning Package  Version 1.0
Ifpack2_Experimental_RBILUK_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Tempated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #ifndef IFPACK2_EXPERIMENTAL_CRSRBILUK_DEF_HPP
44 #define IFPACK2_EXPERIMENTAL_CRSRBILUK_DEF_HPP
45 
46 #include <Tpetra_Experimental_BlockMultiVector.hpp>
47 #include<Ifpack2_OverlappingRowMatrix.hpp>
48 #include<Ifpack2_LocalFilter.hpp>
49 #include <Ifpack2_Experimental_RBILUK.hpp>
50 #include <Ifpack2_Utilities.hpp>
51 #include <Ifpack2_RILUK.hpp>
52 
53 namespace Ifpack2 {
54 
55 namespace Experimental {
56 
57 template<class MatrixType>
58 RBILUK<MatrixType>::RBILUK (const Teuchos::RCP<const row_matrix_type>& Matrix_in)
59  : RILUK<row_matrix_type>(Teuchos::rcp_dynamic_cast<const row_matrix_type>(Matrix_in) ),
60  A_(Matrix_in),
61  A_block_(Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(Matrix_in))
62 {}
63 
64 template<class MatrixType>
65 RBILUK<MatrixType>::RBILUK (const Teuchos::RCP<const block_crs_matrix_type>& Matrix_in)
66  : RILUK<row_matrix_type>(Teuchos::rcp_dynamic_cast<const row_matrix_type>(Matrix_in) ),
67  A_block_(Matrix_in)
68 {}
69 
70 
71 template<class MatrixType>
73 
74 
75 template<class MatrixType>
76 void
77 RBILUK<MatrixType>::setMatrix (const Teuchos::RCP<const block_crs_matrix_type>& A)
78 {
79  // FIXME (mfh 04 Nov 2015) What about A_? When does that get (re)set?
80 
81  // It's legal for A to be null; in that case, you may not call
82  // initialize() until calling setMatrix() with a nonnull input.
83  // Regardless, setting the matrix invalidates any previous
84  // factorization.
85  if (A.getRawPtr () != A_block_.getRawPtr ())
86  {
87  this->isAllocated_ = false;
88  this->isInitialized_ = false;
89  this->isComputed_ = false;
90  this->Graph_ = Teuchos::null;
91  L_block_ = Teuchos::null;
92  U_block_ = Teuchos::null;
93  D_block_ = Teuchos::null;
94  A_block_ = A;
95  }
96 }
97 
98 
99 
100 template<class MatrixType>
101 const typename RBILUK<MatrixType>::block_crs_matrix_type&
103 {
104  TEUCHOS_TEST_FOR_EXCEPTION(
105  L_block_.is_null (), std::runtime_error, "Ifpack2::RILUK::getL: The L factor "
106  "is null. Please call initialize() (and preferably also compute()) "
107  "before calling this method. If the input matrix has not yet been set, "
108  "you must first call setMatrix() with a nonnull input matrix before you "
109  "may call initialize() or compute().");
110  return *L_block_;
111 }
112 
113 
114 template<class MatrixType>
115 const typename RBILUK<MatrixType>::block_crs_matrix_type&
117 {
118  TEUCHOS_TEST_FOR_EXCEPTION(
119  D_block_.is_null (), std::runtime_error, "Ifpack2::RILUK::getD: The D factor "
120  "(of diagonal entries) is null. Please call initialize() (and "
121  "preferably also compute()) before calling this method. If the input "
122  "matrix has not yet been set, you must first call setMatrix() with a "
123  "nonnull input matrix before you may call initialize() or compute().");
124  return *D_block_;
125 }
126 
127 
128 template<class MatrixType>
129 const typename RBILUK<MatrixType>::block_crs_matrix_type&
131 {
132  TEUCHOS_TEST_FOR_EXCEPTION(
133  U_block_.is_null (), std::runtime_error, "Ifpack2::RILUK::getU: The U factor "
134  "is null. Please call initialize() (and preferably also compute()) "
135  "before calling this method. If the input matrix has not yet been set, "
136  "you must first call setMatrix() with a nonnull input matrix before you "
137  "may call initialize() or compute().");
138  return *U_block_;
139 }
140 
141 template<class MatrixType>
143 {
144  using Teuchos::null;
145  using Teuchos::rcp;
146 
147  if (! this->isAllocated_) {
148  // Deallocate any existing storage. This avoids storing 2x
149  // memory, since RCP op= does not deallocate until after the
150  // assignment.
151  this->L_ = null;
152  this->U_ = null;
153  this->D_ = null;
154  L_block_ = null;
155  U_block_ = null;
156  D_block_ = null;
157 
158  // Allocate Matrix using ILUK graphs
159  L_block_ = rcp(new block_crs_matrix_type(*this->Graph_->getL_Graph (), blockSize_) );
160  U_block_ = rcp(new block_crs_matrix_type(*this->Graph_->getU_Graph (), blockSize_) );
161  D_block_ = rcp(new block_crs_matrix_type(*(Ifpack2::Details::computeDiagonalGraph(*(this->Graph_->getOverlapGraph()))),
162  blockSize_) );
163  L_block_->setAllToScalar (STM::zero ()); // Zero out L and U matrices
164  U_block_->setAllToScalar (STM::zero ());
165  D_block_->setAllToScalar (STM::zero ());
166 
167  }
168  this->isAllocated_ = true;
169 }
170 
171 template<class MatrixType>
172 Teuchos::RCP<const typename RBILUK<MatrixType>::block_crs_matrix_type>
174  return A_block_;
175 }
176 
177 template<class MatrixType>
179 {
180  using Teuchos::RCP;
181  using Teuchos::rcp;
182  using Teuchos::rcp_dynamic_cast;
183  const char prefix[] = "Ifpack2::Experimental::RBILUK::initialize: ";
184 
185  // FIXME (mfh 04 Nov 2015) Apparently it's OK for A_ to be null.
186  // That probably means that this preconditioner was created with a
187  // BlockCrsMatrix directly, so it doesn't need the LocalFilter.
188 
189  // TEUCHOS_TEST_FOR_EXCEPTION
190  // (A_.is_null (), std::runtime_error, prefix << "The matrix (A_, the "
191  // "RowMatrix) is null. Please call setMatrix() with a nonnull input "
192  // "before calling this method.");
193 
194  if (A_block_.is_null ()) {
195  // FIXME (mfh 04 Nov 2015) Why does the input have to be a
196  // LocalFilter? Why can't we just take a regular matrix, and
197  // apply a LocalFilter only if necessary, like other "local"
198  // Ifpack2 preconditioners already do?
199  RCP<const LocalFilter<row_matrix_type> > filteredA =
200  rcp_dynamic_cast<const LocalFilter<row_matrix_type> >(A_);
201  TEUCHOS_TEST_FOR_EXCEPTION
202  (filteredA.is_null (), std::runtime_error, prefix <<
203  "Cannot cast to filtered matrix.");
204  RCP<const OverlappingRowMatrix<row_matrix_type> > overlappedA =
205  rcp_dynamic_cast<const OverlappingRowMatrix<row_matrix_type> > (filteredA->getUnderlyingMatrix ());
206  if (! overlappedA.is_null ()) {
207  A_block_ = rcp_dynamic_cast<const block_crs_matrix_type>(overlappedA->getUnderlyingMatrix());
208  } else {
209  //If there is no overlap, filteredA could be the block CRS matrix
210  A_block_ = rcp_dynamic_cast<const block_crs_matrix_type>(filteredA->getUnderlyingMatrix());
211  }
212  }
213 
214  TEUCHOS_TEST_FOR_EXCEPTION
215  (A_block_.is_null (), std::runtime_error, prefix << "The matrix (A_block_, "
216  "the BlockCrsMatrix) is null. Please call setMatrix() with a nonnull "
217  "input before calling this method.");
218  TEUCHOS_TEST_FOR_EXCEPTION
219  (! A_block_->isFillComplete (), std::runtime_error, prefix << "The matrix "
220  "(A_block_, the BlockCrsMatrix) is not fill complete. You may not invoke "
221  "initialize() or compute() with this matrix until the matrix is fill "
222  "complete. Note: BlockCrsMatrix is fill complete if and only if its "
223  "underlying graph is fill complete.");
224 
225  blockSize_ = A_block_->getBlockSize();
226 
227  Teuchos::Time timer ("RBILUK::initialize");
228  { // Start timing
229  Teuchos::TimeMonitor timeMon (timer);
230 
231  // Calling initialize() means that the user asserts that the graph
232  // of the sparse matrix may have changed. We must not just reuse
233  // the previous graph in that case.
234  //
235  // Regarding setting isAllocated_ to false: Eventually, we may want
236  // some kind of clever memory reuse strategy, but it's always
237  // correct just to blow everything away and start over.
238  this->isInitialized_ = false;
239  this->isAllocated_ = false;
240  this->isComputed_ = false;
241  this->Graph_ = Teuchos::null;
242 
243  typedef Tpetra::CrsGraph<local_ordinal_type,
245  node_type> crs_graph_type;
246 
247  RCP<const crs_graph_type> matrixCrsGraph = Teuchos::rcpFromRef(A_block_->getCrsGraph() );
248  this->Graph_ = rcp (new Ifpack2::IlukGraph<crs_graph_type> (matrixCrsGraph,
249  this->LevelOfFill_, 0));
250 
251  this->Graph_->initialize ();
252  allocate_L_and_U_blocks ();
253  initAllValues (*A_block_);
254  } // Stop timing
255 
256  this->isInitialized_ = true;
257  this->numInitialize_ += 1;
258  this->initializeTime_ += timer.totalElapsedTime ();
259 }
260 
261 
262 template<class MatrixType>
263 void
265 initAllValues (const block_crs_matrix_type& A)
266 {
267  //using Teuchos::ArrayRCP;
268  using Teuchos::RCP;
269  typedef Tpetra::Map<local_ordinal_type,global_ordinal_type,node_type> map_type;
270 
271  local_ordinal_type NumIn = 0, NumL = 0, NumU = 0;
272  bool DiagFound = false;
273  size_t NumNonzeroDiags = 0;
274  size_t MaxNumEntries = A.getNodeMaxNumRowEntries();
275  local_ordinal_type blockMatSize = blockSize_*blockSize_;
276 
277  // First check that the local row map ordering is the same as the local portion of the column map.
278  // The extraction of the strictly lower/upper parts of A, as well as the factorization,
279  // implicitly assume that this is the case.
280  Teuchos::ArrayView<const global_ordinal_type> rowGIDs = A.getRowMap()->getNodeElementList();
281  Teuchos::ArrayView<const global_ordinal_type> colGIDs = A.getColMap()->getNodeElementList();
282  bool gidsAreConsistentlyOrdered=true;
283  global_ordinal_type indexOfInconsistentGID=0;
284  for (global_ordinal_type i=0; i<rowGIDs.size(); ++i) {
285  if (rowGIDs[i] != colGIDs[i]) {
286  gidsAreConsistentlyOrdered=false;
287  indexOfInconsistentGID=i;
288  break;
289  }
290  }
291  TEUCHOS_TEST_FOR_EXCEPTION(gidsAreConsistentlyOrdered==false, std::runtime_error,
292  "The ordering of the local GIDs in the row and column maps is not the same"
293  << std::endl << "at index " << indexOfInconsistentGID
294  << ". Consistency is required, as all calculations are done with"
295  << std::endl << "local indexing.");
296 
297  // Allocate temporary space for extracting the strictly
298  // lower and upper parts of the matrix A.
299  Teuchos::Array<local_ordinal_type> LI(MaxNumEntries);
300  Teuchos::Array<local_ordinal_type> UI(MaxNumEntries);
301  Teuchos::Array<scalar_type> LV(MaxNumEntries*blockMatSize);
302  Teuchos::Array<scalar_type> UV(MaxNumEntries*blockMatSize);
303 
304  Teuchos::Array<scalar_type> diagValues(blockMatSize);
305 
306  L_block_->setAllToScalar (STM::zero ()); // Zero out L and U matrices
307  U_block_->setAllToScalar (STM::zero ());
308  D_block_->setAllToScalar (STM::zero ()); // Set diagonal values to zero
309 
310  RCP<const map_type> rowMap = L_block_->getRowMap ();
311 
312  // First we copy the user's matrix into L and U, regardless of fill level.
313  // It is important to note that L and U are populated using local indices.
314  // This means that if the row map GIDs are not monotonically increasing
315  // (i.e., permuted or gappy), then the strictly lower (upper) part of the
316  // matrix is not the one that you would get if you based L (U) on GIDs.
317  // This is ok, as the *order* of the GIDs in the rowmap is a better
318  // expression of the user's intent than the GIDs themselves.
319 
320  for (size_t myRow=0; myRow<A.getNodeNumRows(); ++myRow) {
321  local_ordinal_type local_row = myRow;
322 
323  //TODO JJH 4April2014 An optimization is to use getLocalRowView. Not all matrices support this,
324  // we'd need to check via the Tpetra::RowMatrix method supportsRowViews().
325  const local_ordinal_type * InI = 0;
326  scalar_type * InV = 0;
327  A.getLocalRowView(local_row, InI, InV, NumIn);
328 
329  // Split into L and U (we don't assume that indices are ordered).
330 
331  NumL = 0;
332  NumU = 0;
333  DiagFound = false;
334 
335  for (local_ordinal_type j = 0; j < NumIn; ++j) {
336  const local_ordinal_type k = InI[j];
337  const local_ordinal_type blockOffset = blockMatSize*j;
338 
339  if (k == local_row) {
340  DiagFound = true;
341  // Store perturbed diagonal in Tpetra::Vector D_
342  for (local_ordinal_type jj = 0; jj < blockMatSize; ++jj)
343  diagValues[jj] = this->Rthresh_ * InV[blockOffset+jj] + IFPACK2_SGN(InV[blockOffset+jj]) * this->Athresh_;
344  D_block_->replaceLocalValues(local_row, &InI[j], diagValues.getRawPtr(), 1);
345  }
346  else if (k < 0) { // Out of range
347  TEUCHOS_TEST_FOR_EXCEPTION(
348  true, std::runtime_error, "Ifpack2::RILUK::initAllValues: current "
349  "GID k = " << k << " < 0. I'm not sure why this is an error; it is "
350  "probably an artifact of the undocumented assumptions of the "
351  "original implementation (likely copied and pasted from Ifpack). "
352  "Nevertheless, the code I found here insisted on this being an error "
353  "state, so I will throw an exception here.");
354  }
355  else if (k < local_row) {
356  LI[NumL] = k;
357  const local_ordinal_type LBlockOffset = NumL*blockMatSize;
358  for (local_ordinal_type jj = 0; jj < blockMatSize; ++jj)
359  LV[LBlockOffset+jj] = InV[blockOffset+jj];
360  NumL++;
361  }
362  else if (Teuchos::as<size_t>(k) <= rowMap->getNodeNumElements()) {
363  UI[NumU] = k;
364  const local_ordinal_type UBlockOffset = NumU*blockMatSize;
365  for (local_ordinal_type jj = 0; jj < blockMatSize; ++jj)
366  UV[UBlockOffset+jj] = InV[blockOffset+jj];
367  NumU++;
368  }
369  }
370 
371  // Check in things for this row of L and U
372 
373  if (DiagFound) {
374  ++NumNonzeroDiags;
375  } else
376  {
377  for (local_ordinal_type jj = 0; jj < blockSize_; ++jj)
378  diagValues[jj*(blockSize_+1)] = this->Athresh_;
379  D_block_->replaceLocalValues(local_row, &local_row, diagValues.getRawPtr(), 1);
380  }
381 
382  if (NumL) {
383  L_block_->replaceLocalValues(local_row, &LI[0], &LV[0], NumL);
384  }
385 
386  if (NumU) {
387  U_block_->replaceLocalValues(local_row, &UI[0], &UV[0], NumU);
388  }
389  }
390 
391  this->isInitialized_ = true;
392 }
393 
394 template<class MatrixType>
396 {
397  typedef impl_scalar_type IST;
398  const char prefix[] = "Ifpack2::Experimental::RBILUK::compute: ";
399 
400  // initialize() checks this too, but it's easier for users if the
401  // error shows them the name of the method that they actually
402  // called, rather than the name of some internally called method.
403  TEUCHOS_TEST_FOR_EXCEPTION
404  (A_block_.is_null (), std::runtime_error, prefix << "The matrix (A_block_, "
405  "the BlockCrsMatrix) is null. Please call setMatrix() with a nonnull "
406  "input before calling this method.");
407  TEUCHOS_TEST_FOR_EXCEPTION
408  (! A_block_->isFillComplete (), std::runtime_error, prefix << "The matrix "
409  "(A_block_, the BlockCrsMatrix) is not fill complete. You may not invoke "
410  "initialize() or compute() with this matrix until the matrix is fill "
411  "complete. Note: BlockCrsMatrix is fill complete if and only if its "
412  "underlying graph is fill complete.");
413 
414  if (! this->isInitialized ()) {
415  initialize (); // Don't count this in the compute() time
416  }
417 
418 
419 
420  Teuchos::Time timer ("RBILUK::compute");
421  { // Start timing
422  this->isComputed_ = false;
423 
424  // MinMachNum should be officially defined, for now pick something a little
425  // bigger than IEEE underflow value
426 
427 // const scalar_type MinDiagonalValue = STS::rmin ();
428 // const scalar_type MaxDiagonalValue = STS::one () / MinDiagonalValue;
429  initAllValues (*A_block_);
430 
431  size_t NumIn;
432  local_ordinal_type NumL, NumU, NumURead;
433 
434  // Get Maximum Row length
435  const size_t MaxNumEntries =
436  L_block_->getNodeMaxNumRowEntries () + U_block_->getNodeMaxNumRowEntries () + 1;
437 
438  const local_ordinal_type blockMatSize = blockSize_*blockSize_;
439 
440  // FIXME (mfh 08 Nov 2015) We need to move away from expressing
441  // these strides explicitly, in order to Kokkos-ize
442  // BlockCrsMatrix.
443  const local_ordinal_type rowStride = blockSize_;
444  const local_ordinal_type colStride = 1;
445 
446  Teuchos::Array<int> ipiv_teuchos(blockSize_);
447  Kokkos::View<int*, Kokkos::HostSpace,
448  Kokkos::MemoryUnmanaged> ipiv (ipiv_teuchos.getRawPtr (), blockSize_);
449  Teuchos::Array<IST> work_teuchos(blockSize_);
450  Kokkos::View<IST*, Kokkos::HostSpace,
451  Kokkos::MemoryUnmanaged> work (work_teuchos.getRawPtr (), blockSize_);
452 
453  size_t num_cols = U_block_->getColMap()->getNodeNumElements();
454  Teuchos::Array<int> colflag(num_cols);
455 
456  Teuchos::Array<impl_scalar_type> diagMod(blockMatSize,STM::zero());
457  little_block_type diagModBlock(&diagMod[0], blockSize_, rowStride, colStride);
458  Teuchos::Array<impl_scalar_type> matTmpArray(blockMatSize,STM::zero());
459  little_block_type matTmp(&matTmpArray[0], blockSize_, rowStride, colStride);
460  Teuchos::Array<impl_scalar_type> multiplierArray(blockMatSize, STM::zero());
461  little_block_type multiplier(&multiplierArray[0], blockSize_, rowStride, colStride);
462 
463 // Teuchos::ArrayRCP<scalar_type> DV = D_->get1dViewNonConst(); // Get view of diagonal
464 
465  // Now start the factorization.
466 
467  // Need some integer workspace and pointers
468  local_ordinal_type NumUU;
469  for (size_t j = 0; j < num_cols; ++j) {
470  colflag[j] = -1;
471  }
472  Teuchos::Array<local_ordinal_type> InI(MaxNumEntries, 0);
473  Teuchos::Array<scalar_type> InV(MaxNumEntries*blockMatSize,STM::zero());
474 
475  const local_ordinal_type numLocalRows = L_block_->getNodeNumRows ();
476  for (local_ordinal_type local_row = 0; local_row < numLocalRows; ++local_row) {
477 
478  // Fill InV, InI with current row of L, D and U combined
479 
480  NumIn = MaxNumEntries;
481  const local_ordinal_type * colValsL;
482  scalar_type * valsL;
483 
484  L_block_->getLocalRowView(local_row, colValsL, valsL, NumL);
485  for (local_ordinal_type j = 0; j < NumL; ++j)
486  {
487  const local_ordinal_type matOffset = blockMatSize*j;
488  little_block_type lmat(&valsL[matOffset],blockSize_,rowStride, colStride);
489  little_block_type lmatV(&InV[matOffset],blockSize_,rowStride, colStride);
490  //lmatV.assign(lmat);
491  Tpetra::Experimental::COPY (lmat, lmatV);
492  InI[j] = colValsL[j];
493  }
494 
495  little_block_type dmat = D_block_->getLocalBlock(local_row, local_row);
496  little_block_type dmatV(&InV[NumL*blockMatSize], blockSize_, rowStride, colStride);
497  //dmatV.assign(dmat);
498  Tpetra::Experimental::COPY (dmat, dmatV);
499  InI[NumL] = local_row;
500 
501  const local_ordinal_type * colValsU;
502  scalar_type * valsU;
503  U_block_->getLocalRowView(local_row, colValsU, valsU, NumURead);
504  NumU = 0;
505  for (local_ordinal_type j = 0; j < NumURead; ++j)
506  {
507  if (!(colValsU[j] < numLocalRows)) continue;
508  InI[NumL+1+j] = colValsU[j];
509  const local_ordinal_type matOffset = blockMatSize*(NumL+1+j);
510  little_block_type umat(&valsU[blockMatSize*j], blockSize_, rowStride, colStride);
511  little_block_type umatV(&InV[matOffset], blockSize_, rowStride, colStride);
512  //umatV.assign(umat);
513  Tpetra::Experimental::COPY (umat, umatV);
514  NumU += 1;
515  }
516  NumIn = NumL+NumU+1;
517 
518  // Set column flags
519  for (size_t j = 0; j < NumIn; ++j) {
520  colflag[InI[j]] = j;
521  }
522 
523 
524  scalar_type diagmod = STM::zero (); // Off-diagonal accumulator
525  Tpetra::Experimental::deep_copy (diagModBlock, diagmod);
526 
527  for (local_ordinal_type jj = 0; jj < NumL; ++jj) {
528  local_ordinal_type j = InI[jj];
529  little_block_type currentVal(&InV[jj*blockMatSize], blockSize_, rowStride, colStride); // current_mults++;
530  //multiplier.assign(currentVal);
531  Tpetra::Experimental::COPY (currentVal, multiplier);
532 
533  const little_block_type dmatInverse = D_block_->getLocalBlock(j,j);
534  // alpha = 1, beta = 0
535  Tpetra::Experimental::GEMM ("N", "N", STS::one (), currentVal, dmatInverse,
536  STS::zero (), matTmp);
537  //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*> (currentVal.ptr_on_device ()), reinterpret_cast<impl_scalar_type*> (dmatInverse.ptr_on_device ()), reinterpret_cast<impl_scalar_type*> (matTmp.ptr_on_device ()), blockSize_);
538  //currentVal.assign(matTmp);
539  Tpetra::Experimental::COPY (matTmp, currentVal);
540 
541  const local_ordinal_type * UUI;
542  scalar_type * UUV;
543  U_block_->getLocalRowView(j, UUI, UUV, NumUU);
544 
545  if (this->RelaxValue_ == STM::zero ()) {
546  for (local_ordinal_type k = 0; k < NumUU; ++k) {
547  if (!(UUI[k] < numLocalRows)) continue;
548  const int kk = colflag[UUI[k]];
549  if (kk > -1) {
550  little_block_type kkval(&InV[kk*blockMatSize], blockSize_, rowStride, colStride);
551  little_block_type uumat(&UUV[k*blockMatSize], blockSize_, rowStride, colStride);
552  Tpetra::Experimental::GEMM ("N", "N", -STM::one (), multiplier, uumat,
553  STM::one (), kkval);
554  //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*> (multiplier.ptr_on_device ()), reinterpret_cast<impl_scalar_type*> (uumat.ptr_on_device ()), reinterpret_cast<impl_scalar_type*> (kkval.ptr_on_device ()), blockSize_, -STM::one(), STM::one());
555  }
556  }
557  }
558  else {
559  for (local_ordinal_type k = 0; k < NumUU; ++k) {
560  if (!(UUI[k] < numLocalRows)) continue;
561  const int kk = colflag[UUI[k]];
562  little_block_type uumat(&UUV[k*blockMatSize], blockSize_, rowStride, colStride);
563  if (kk > -1) {
564  little_block_type kkval(&InV[kk*blockMatSize], blockSize_, rowStride, colStride);
565  Tpetra::Experimental::GEMM ("N", "N", -STM::one (), multiplier, uumat,
566  STM::one (), kkval);
567  //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(multiplier.ptr_on_device ()), reinterpret_cast<impl_scalar_type*>(uumat.ptr_on_device ()), reinterpret_cast<impl_scalar_type*>(kkval.ptr_on_device ()), blockSize_, -STM::one(), STM::one());
568  }
569  else {
570  Tpetra::Experimental::GEMM ("N", "N", -STM::one (), multiplier, uumat,
571  STM::one (), diagModBlock);
572  //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(multiplier.ptr_on_device ()), reinterpret_cast<impl_scalar_type*>(uumat.ptr_on_device ()), reinterpret_cast<impl_scalar_type*>(diagModBlock.ptr_on_device ()), blockSize_, -STM::one(), STM::one());
573  }
574  }
575  }
576  }
577  if (NumL) {
578  // Replace current row of L
579  L_block_->replaceLocalValues (local_row, InI.getRawPtr (), InV.getRawPtr (), NumL);
580  }
581 
582  // dmat.assign(dmatV);
583  Tpetra::Experimental::COPY (dmatV, dmat);
584 
585  if (this->RelaxValue_ != STM::zero ()) {
586  //dmat.update(this->RelaxValue_, diagModBlock);
587  Tpetra::Experimental::AXPY (this->RelaxValue_, diagModBlock, dmat);
588  }
589 
590 // if (STS::magnitude (DV[i]) > STS::magnitude (MaxDiagonalValue)) {
591 // if (STS::real (DV[i]) < STM::zero ()) {
592 // DV[i] = -MinDiagonalValue;
593 // }
594 // else {
595 // DV[i] = MinDiagonalValue;
596 // }
597 // }
598 // else
599  {
600  int lapackInfo = 0;
601  for (int k = 0; k < blockSize_; ++k) {
602  ipiv[k] = 0;
603  }
604 
605  Tpetra::Experimental::GETF2 (dmat, ipiv, lapackInfo);
606  //lapack.GETRF(blockSize_, blockSize_, d_raw, blockSize_, ipiv.getRawPtr(), &lapackInfo);
607  TEUCHOS_TEST_FOR_EXCEPTION(
608  lapackInfo != 0, std::runtime_error, "Ifpack2::Experimental::RBILUK::compute: "
609  "lapackInfo = " << lapackInfo << " which indicates an error in the factorization GETRF.");
610 
611  Tpetra::Experimental::GETRI (dmat, ipiv, work, lapackInfo);
612  //lapack.GETRI(blockSize_, d_raw, blockSize_, ipiv.getRawPtr(), work.getRawPtr(), lwork, &lapackInfo);
613  TEUCHOS_TEST_FOR_EXCEPTION(
614  lapackInfo != 0, std::runtime_error, "Ifpack2::Experimental::RBILUK::compute: "
615  "lapackInfo = " << lapackInfo << " which indicates an error in the matrix inverse GETRI.");
616  }
617 
618  for (local_ordinal_type j = 0; j < NumU; ++j) {
619  little_block_type currentVal(&InV[(NumL+1+j)*blockMatSize], blockSize_, rowStride, colStride); // current_mults++;
620  // scale U by the diagonal inverse
621  Tpetra::Experimental::GEMM ("N", "N", STS::one (), dmat, currentVal,
622  STS::zero (), matTmp);
623  //blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(dmat.ptr_on_device ()), reinterpret_cast<impl_scalar_type*>(currentVal.ptr_on_device ()), reinterpret_cast<impl_scalar_type*>(matTmp.ptr_on_device ()), blockSize_);
624  //currentVal.assign(matTmp);
625  Tpetra::Experimental::COPY (matTmp, currentVal);
626  }
627 
628  if (NumU) {
629  // Replace current row of L and U
630  U_block_->replaceLocalValues (local_row, &InI[NumL+1], &InV[blockMatSize*(NumL+1)], NumU);
631  }
632 
633  // Reset column flags
634  for (size_t j = 0; j < num_cols; ++j) {
635  colflag[j] = -1;
636  }
637  }
638 
639  } // Stop timing
640 
641  this->isComputed_ = true;
642  this->numCompute_ += 1;
643  this->computeTime_ += timer.totalElapsedTime ();
644 }
645 
646 
647 template<class MatrixType>
648 void
650 apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
651  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
652  Teuchos::ETransp mode,
653  scalar_type alpha,
654  scalar_type beta) const
655 {
656  using Teuchos::RCP;
657  typedef Tpetra::Experimental::BlockMultiVector<scalar_type,
659  typedef Tpetra::MultiVector<scalar_type,
661 
662  TEUCHOS_TEST_FOR_EXCEPTION(
663  A_block_.is_null (), std::runtime_error, "Ifpack2::Experimental::RBILUK::apply: The matrix is "
664  "null. Please call setMatrix() with a nonnull input, then initialize() "
665  "and compute(), before calling this method.");
666  TEUCHOS_TEST_FOR_EXCEPTION(
667  ! this->isComputed (), std::runtime_error,
668  "Ifpack2::Experimental::RBILUK::apply: If you have not yet called compute(), "
669  "you must call compute() before calling this method.");
670  TEUCHOS_TEST_FOR_EXCEPTION(
671  X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
672  "Ifpack2::Experimental::RBILUK::apply: X and Y do not have the same number of columns. "
673  "X.getNumVectors() = " << X.getNumVectors ()
674  << " != Y.getNumVectors() = " << Y.getNumVectors () << ".");
675  TEUCHOS_TEST_FOR_EXCEPTION(
676  STS::isComplex && mode == Teuchos::CONJ_TRANS, std::logic_error,
677  "Ifpack2::Experimental::RBILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
678  "complex Scalar type. Please talk to the Ifpack2 developers to get this "
679  "fixed. There is a FIXME in this file about this very issue.");
680 
681  const local_ordinal_type blockMatSize = blockSize_*blockSize_;
682 
683  const local_ordinal_type rowStride = blockSize_;
684  const local_ordinal_type colStride = 1;
685 
686  BMV yBlock (Y, * (A_block_->getGraph ()->getDomainMap ()), blockSize_);
687  const BMV xBlock (X, * (A_block_->getColMap ()), blockSize_);
688 
689  Teuchos::Array<scalar_type> lclarray(blockSize_);
690  little_vec_type lclvec(&lclarray[0], blockSize_, colStride);
691  const scalar_type one = STM::one ();
692  const scalar_type zero = STM::zero ();
693 
694  Teuchos::Time timer ("RBILUK::apply");
695  { // Start timing
696  Teuchos::TimeMonitor timeMon (timer);
697  if (alpha == one && beta == zero) {
698  if (mode == Teuchos::NO_TRANS) { // Solve L (D (U Y)) = X for Y.
699  // Start by solving L C = X for C. C must have the same Map
700  // as D. We have to use a temp multivector, since
701  // localSolve() does not allow its input and output to alias
702  // one another.
703  //
704  // FIXME (mfh 24 Jan 2014) Cache this temp multivector.
705  const local_ordinal_type numVectors = xBlock.getNumVectors();
706  BMV cBlock (* (A_block_->getGraph ()->getDomainMap ()), blockSize_, numVectors);
707  BMV rBlock (* (A_block_->getGraph ()->getDomainMap ()), blockSize_, numVectors);
708  for (local_ordinal_type imv = 0; imv < numVectors; ++imv)
709  {
710  for (size_t i = 0; i < D_block_->getNodeNumRows(); ++i)
711  {
712  local_ordinal_type local_row = i;
713  little_vec_type xval = xBlock.getLocalBlock(local_row,imv);
714  little_vec_type cval = cBlock.getLocalBlock(local_row,imv);
715  //cval.assign(xval);
716  Tpetra::Experimental::COPY (xval, cval);
717 
718  local_ordinal_type NumL;
719  const local_ordinal_type * colValsL;
720  scalar_type * valsL;
721 
722  L_block_->getLocalRowView(local_row, colValsL, valsL, NumL);
723 
724  for (local_ordinal_type j = 0; j < NumL; ++j)
725  {
726  local_ordinal_type col = colValsL[j];
727  little_vec_type prevVal = cBlock.getLocalBlock(col, imv);
728 
729  const local_ordinal_type matOffset = blockMatSize*j;
730  little_block_type lij(&valsL[matOffset],blockSize_,rowStride, colStride);
731 
732  //cval.matvecUpdate(-one, lij, prevVal);
733  Tpetra::Experimental::GEMV (-one, lij, prevVal, cval);
734  }
735  }
736  }
737 
738  // Solve D R = C. Note that D has been replaced by D^{-1} at this point.
739  D_block_->applyBlock(cBlock, rBlock);
740 
741  // Solve U Y = R.
742  for (local_ordinal_type imv = 0; imv < numVectors; ++imv)
743  {
744  const local_ordinal_type numRows = D_block_->getNodeNumRows();
745  for (local_ordinal_type i = 0; i < numRows; ++i)
746  {
747  local_ordinal_type local_row = (numRows-1)-i;
748  little_vec_type rval = rBlock.getLocalBlock(local_row,imv);
749  little_vec_type yval = yBlock.getLocalBlock(local_row,imv);
750  //yval.assign(rval);
751  Tpetra::Experimental::COPY (rval, yval);
752 
753  local_ordinal_type NumU;
754  const local_ordinal_type * colValsU;
755  scalar_type * valsU;
756 
757  U_block_->getLocalRowView(local_row, colValsU, valsU, NumU);
758 
759  for (local_ordinal_type j = 0; j < NumU; ++j)
760  {
761  local_ordinal_type col = colValsU[NumU-1-j];
762  little_vec_type prevVal = yBlock.getLocalBlock(col, imv);
763 
764  const local_ordinal_type matOffset = blockMatSize*(NumU-1-j);
765  little_block_type uij(&valsU[matOffset], blockSize_, rowStride, colStride);
766 
767  //yval.matvecUpdate(-one, uij, prevVal);
768  Tpetra::Experimental::GEMV (-one, uij, prevVal, yval);
769  }
770  }
771  }
772  }
773  else { // Solve U^P (D^P (L^P Y)) = X for Y (where P is * or T).
774  TEUCHOS_TEST_FOR_EXCEPTION(
775  true, std::runtime_error,
776  "Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm. ");
777  }
778  }
779  else { // alpha != 1 or beta != 0
780  if (alpha == zero) {
781  if (beta == zero) {
782  Y.putScalar (zero);
783  } else {
784  Y.scale (beta);
785  }
786  } else { // alpha != zero
787  MV Y_tmp (Y.getMap (), Y.getNumVectors ());
788  apply (X, Y_tmp, mode);
789  Y.update (alpha, Y_tmp, beta);
790  }
791  }
792  } // Stop timing
793 
794  this->numApply_ += 1;
795  this->applyTime_ = timer.totalElapsedTime ();
796 }
797 
798 
799 template<class MatrixType>
801 {
802  std::ostringstream os;
803 
804  // Output is a valid YAML dictionary in flow style. If you don't
805  // like everything on a single line, you should call describe()
806  // instead.
807  os << "\"Ifpack2::Experimental::RBILUK\": {";
808  os << "Initialized: " << (this->isInitialized () ? "true" : "false") << ", "
809  << "Computed: " << (this->isComputed () ? "true" : "false") << ", ";
810 
811  os << "Level-of-fill: " << this->getLevelOfFill() << ", ";
812 
813  if (A_block_.is_null ()) {
814  os << "Matrix: null";
815  }
816  else {
817  os << "Global matrix dimensions: ["
818  << A_block_->getGlobalNumRows () << ", " << A_block_->getGlobalNumCols () << "]"
819  << ", Global nnz: " << A_block_->getGlobalNumEntries();
820  }
821 
822  os << "}";
823  return os.str ();
824 }
825 
826 } // namespace Experimental
827 
828 } // namespace Ifpack2
829 
830 // FIXME (mfh 26 Aug 2015) We only need to do instantiation for
831 // MatrixType = Tpetra::RowMatrix. Conversions to BlockCrsMatrix are
832 // handled internally via dynamic cast.
833 
834 #define IFPACK2_EXPERIMENTAL_RBILUK_INSTANT(S,LO,GO,N) \
835  template class Ifpack2::Experimental::RBILUK< Tpetra::Experimental::BlockCrsMatrix<S, LO, GO, N> >; \
836  template class Ifpack2::Experimental::RBILUK< Tpetra::RowMatrix<S, LO, GO, N> >;
837 
838 #endif
Ifpack2 features that are experimental. Use at your own risk.
void initialize()
Initialize by computing the symbolic incomplete factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:178
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:145
virtual ~RBILUK()
Destructor (declared virtual for memory safety).
Definition: Ifpack2_Experimental_RBILUK_def.hpp:72
Teuchos::RCP< const block_crs_matrix_type > getBlockMatrix() const
Get the input matrix.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:173
const block_crs_matrix_type & getUBlock() const
Return the U factor of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:130
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:136
Teuchos::RCP< crs_matrix_type > L_
The L (lower triangular) factor of ILU(k).
Definition: Ifpack2_RILUK_decl.hpp:571
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the (inverse of the) incomplete factorization to X, resulting in Y.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:650
ILU(k) factorization of a given Tpetra::RowMatrix.
Definition: Ifpack2_RILUK_decl.hpp:243
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:148
bool isComputed() const
Whether compute() has been called on this object.
Definition: Ifpack2_RILUK_decl.hpp:344
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:142
const block_crs_matrix_type & getDBlock() const
Return the diagonal entries of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:116
void compute()
Compute the (numeric) incomplete factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:395
Teuchos::RCP< Ifpack2::IlukGraph< Tpetra::CrsGraph< local_ordinal_type, global_ordinal_type, node_type > > > Graph_
The ILU(k) graph.
Definition: Ifpack2_RILUK_decl.hpp:559
File for utility functions.
Construct a level filled graph for use in computing an ILU(k) incomplete factorization.
Definition: Ifpack2_IlukGraph.hpp:97
bool isInitialized() const
Whether initialize() has been called on this object.
Definition: Ifpack2_RILUK_decl.hpp:340
void setMatrix(const Teuchos::RCP< const block_crs_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:77
Definition: Ifpack2_Details_Amesos2Wrapper_decl.hpp:63
Teuchos::RCP< vec_type > D_
The diagonal entries of the ILU(k) factorization.
Definition: Ifpack2_RILUK_decl.hpp:575
const block_crs_matrix_type & getLBlock() const
Return the L factor of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:102
Sparse matrix (Tpetra::RowMatrix subclass) with ghost rows.
Definition: Ifpack2_OverlappingRowMatrix_decl.hpp:59
Access only local rows and columns of a sparse matrix.
Definition: Ifpack2_LocalFilter_decl.hpp:160
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:157
std::string description() const
A one-line description of this object.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:800
Preconditioners and smoothers for Tpetra sparse matrices.
Definition: Ifpack2_AdditiveSchwarz_decl.hpp:72
ILU(k) factorization of a given Tpetra::Experimental::BlockCrsMatrix.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:128
Teuchos::RCP< crs_matrix_type > U_
The U (upper triangular) factor of ILU(k).
Definition: Ifpack2_RILUK_decl.hpp:573