MueLu  Version of the Day
MueLu_CoalesceDropFactory_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
47 #define MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
48 
49 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
50 #include <Kokkos_CrsMatrix.hpp>
51 
53 
54 #include "MueLu_AmalgamationInfo.hpp"
55 #include "MueLu_Exceptions.hpp"
56 #include "MueLu_Level.hpp"
57 #include "MueLu_LWGraph_kokkos.hpp"
58 #include "MueLu_MasterList.hpp"
59 #include "MueLu_Monitor.hpp"
60 #include "MueLu_Utilities_kokkos.hpp"
61 
62 namespace MueLu {
63 
64  namespace { // anonymous
65 
66  template<class LocalOrdinal, class RowType>
67  class ScanFunctor {
68  public:
69  ScanFunctor(RowType rows) : rows_(rows) { }
70 
71  KOKKOS_INLINE_FUNCTION
72  void operator()(const LocalOrdinal i, LocalOrdinal& upd, const bool& final) const {
73  upd += rows_(i);
74  if (final)
75  rows_(i) = upd;
76  }
77 
78  private:
79  RowType rows_;
80  };
81 
82  }
83 
84  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
85  RCP<const ParameterList> CoalesceDropFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::GetValidParameterList() const {
86  RCP<ParameterList> validParamList = rcp(new ParameterList());
87 
88 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
89  SET_VALID_ENTRY("aggregation: drop tol");
90  SET_VALID_ENTRY("aggregation: Dirichlet threshold");
91  SET_VALID_ENTRY("aggregation: drop scheme");
92  {
93  typedef Teuchos::StringToIntegralParameterEntryValidator<int> validatorType;
94  validParamList->getEntry("aggregation: drop scheme").setValidator(
95  rcp(new validatorType(Teuchos::tuple<std::string>("classical", "distance laplacian"), "aggregation: drop scheme")));
96  }
97 #undef SET_VALID_ENTRY
98  validParamList->set< bool > ("lightweight wrap", true, "Experimental option for lightweight graph access");
99 
100  validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
101  validParamList->set< RCP<const FactoryBase> >("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo");
102  validParamList->set< RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory for Coordinates");
103 
104  return validParamList;
105  }
106 
107  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class DeviceType>
108  void CoalesceDropFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::DeclareInput(Level &currentLevel) const {
109  Input(currentLevel, "A");
110  Input(currentLevel, "UnAmalgamationInfo");
111 
112  const ParameterList& pL = GetParameterList();
113  if (pL.get<bool>("lightweight wrap") == true) {
114  if (pL.get<std::string>("aggregation: drop scheme") == "distance laplacian")
115  Input(currentLevel, "Coordinates");
116  }
117  }
118 
119  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class DeviceType>
120  void CoalesceDropFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::Build(Level& currentLevel) const {
121  FactoryMonitor m(*this, "Build", currentLevel);
122 
123  typedef Teuchos::ScalarTraits<SC> STS;
124  SC zero = STS::zero(), one = STS::one();
125 
126  auto A = Get< RCP<Matrix> >(currentLevel, "A");
127  LO blkSize = A->GetFixedBlockSize();
128  GO indexBase = A->getRowMap()->getIndexBase();
129 
130  auto amalInfo = Get< RCP<AmalgamationInfo> >(currentLevel, "UnAmalgamationInfo");
131 
132  const ParameterList& pL = GetParameterList();
133 
134  // bool doLightWeightWrap = pL.get<bool>("lightweight wrap");
135  GetOStream(Warnings0) << "lightweight wrap is deprecated" << std::endl;
136 
137  std::string algo = pL.get<std::string>("aggregation: drop scheme");
138 
139  double threshold = pL.get<double>("aggregation: drop tol");
140  GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
141 
142  Set(currentLevel, "Filtering", (threshold != STS::zero()));
143 
144  const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
145 
146  GO numDropped = 0, numTotal = 0;
147 
148  RCP<LWGraph_kokkos> graph;
149  LO dofsPerNode = -1;
150 
151  typedef typename LWGraph_kokkos::boundary_nodes_type boundary_nodes_type;
152  boundary_nodes_type boundaryNodes;
153 
154  if (algo == "classical") {
155 
156  if (blkSize == 1 && threshold == STS::zero()) {
157  //
158  // Case 1: scalar problem without dropping
159  //
160  graph = rcp(new LWGraph_kokkos(A->getLocalMatrix().graph, A->getRowMap(), A->getColMap(), "graph of A"));
161 
162  // Detect and record rows that correspond to Dirichlet boundary conditions
163  boundaryNodes = Utilities_kokkos::DetectDirichletRows(*A, dirichletThreshold);
164  graph->SetBoundaryNodeMap(boundaryNodes);
165 
166  numTotal = A->getNodeNumEntries();
167 
168  dofsPerNode = 1;
169 
170  } else if (blkSize == 1 && threshold != STS::zero()) {
171  //
172  // Case 2: scalar problem with filtering
173  //
174  RCP<Vector> ghostedDiagVec = Utilities_kokkos::GetMatrixOverlappedDiagonal(*A);
175  auto ghostedDiag = ghostedDiagVec->template getLocalView<typename NO::device_type>();
176 
177  typedef typename Matrix::local_matrix_type local_matrix_type;
178  typedef typename LWGraph_kokkos::local_graph_type kokkos_graph_type;
179  typedef typename kokkos_graph_type::row_map_type row_map_type;
180  typedef typename kokkos_graph_type::entries_type entries_type;
181 
182  LO numRows = A->getNodeNumRows();
183  auto kokkosMatrix = A->getLocalMatrix();
184 
185  typedef Kokkos::ArithTraits<SC> ATS;
186  typedef typename ATS::magnitudeType magnitudeType;
187 
188  // Stage 1: calculate the number of remaining entries per row
189  typename row_map_type::non_const_type rows("row_map", numRows+1); // rows(0) = 0 automatically
190 
191  LO realnnz = 0;
192  Kokkos::parallel_reduce("CoalesceDropF:Build:scalar_filter:stage1_reduce", numRows, KOKKOS_LAMBDA(const LO row, LO& nnz) {
193  auto rowView = kokkosMatrix.template row<LO>(row);
194  auto length = rowView.length;
195 
196  LO rownnz = 0;
197  for (decltype(length) colID = 0; colID < length; colID++) {
198  LO col = rowView.colidx(colID);
199 
200  // Avoid square root by using squared values
201  magnitudeType aiiajj = threshold*threshold * ATS::magnitude(ghostedDiag(row, 0))*ATS::magnitude(ghostedDiag(col, 0)); // eps^2*|a_ii|*|a_jj|
202  magnitudeType aij2 = ATS::magnitude(rowView.value(colID)) * ATS::magnitude(rowView.value(colID)); // |a_ij|^2
203 
204  if (aij2 > aiiajj || row == col)
205  rownnz++;
206  }
207  rows(row+1) = rownnz;
208  nnz += rownnz;
209  }, realnnz);
210 
211  // parallel_scan (exclusive)
212  // NOTE: parallel_scan with KOKKOS_LAMBDA does not work with CUDA for now
213 #if 0
214  Kokkos::parallel_scan("CoalesceDropF:Build:scalar_filter:stage1_scan", numRows+1, KOKKOS_LAMBDA(const LO i, LO& upd, const bool& final) {
215  upd += rows(i);
216  if (final)
217  rows(i) = upd;
218  });
219 #else
220  ScanFunctor<LO,decltype(rows)> scanFunctor(rows);
221  Kokkos::parallel_scan("CoalesceDropF:Build:scalar_filter:stage1_scan", numRows+1, scanFunctor);
222 #endif
223 
224 
225  // Stage 2: fill in the column indices
226  typename boundary_nodes_type::non_const_type bndNodes("boundaryNodes", numRows);
227  typename entries_type::non_const_type cols ("entries", realnnz);
228  Kokkos::parallel_reduce("CoalesceDropF:Build:scalar_filter:stage2_reduce", numRows, KOKKOS_LAMBDA(const LO row, GO& dropped) {
229  auto rowView = kokkosMatrix.template row<LO>(row);
230  auto length = rowView.length;
231 
232  LO rownnz = 0;
233  for (decltype(length) colID = 0; colID < length; colID++) {
234  LO col = rowView.colidx(colID);
235 
236  // Avoid square root by using squared values
237  magnitudeType aiiajj = threshold*threshold * ATS::magnitude(ghostedDiag(row, 0))*ATS::magnitude(ghostedDiag(col, 0)); // eps^2*|a_ii|*|a_jj|
238  magnitudeType aij2 = ATS::magnitude(rowView.value(colID)) * ATS::magnitude(rowView.value(colID)); // |a_ij|^2
239 
240  if (aij2 > aiiajj || row == col) {
241  cols(rows(row) + rownnz) = col;
242  rownnz++;
243  } else {
244  dropped++;
245  }
246  if (rownnz == 1) {
247  // If the only element remaining after filtering is diagonal, mark node as boundary
248  // FIXME: this should really be replaced by the following
249  // if (indices.size() == 1 && indices[0] == row)
250  // boundaryNodes[row] = true;
251  // We do not do it this way now because there is no framework for distinguishing isolated
252  // and boundary nodes in the aggregation algorithms
253  bndNodes(row) = true;
254  }
255  }
256  }, numDropped);
257  boundaryNodes = bndNodes;
258 
259  kokkos_graph_type kokkosGraph(cols, rows);
260 
261  graph = rcp(new LWGraph_kokkos(kokkosGraph, A->getRowMap(), A->getColMap(), "filtered graph of A"));
262  graph->SetBoundaryNodeMap(boundaryNodes);
263 
264  numTotal = A->getNodeNumEntries();
265 
266  dofsPerNode = 1;
267 
268  } else if (blkSize > 1 && threshold == STS::zero()) {
269  //
270  // Case 3: block problem without filtering
271  //
272  throw Exceptions::RuntimeError("Block systems without filtering are not implemented");
273 
274  // Detect and record rows that correspond to Dirichlet boundary conditions
275  // boundary_nodes_type pointBoundaryNodes = Utilities_kokkos::DetectDirichletRows(*A, dirichletThreshold);
276 
277  } else if (blkSize > 1 && threshold != STS::zero()) {
278  //
279  // Case 4: block problem with filtering
280  //
281  throw Exceptions::RuntimeError("Block systems with filtering are not implemented");
282 
283  // Detect and record rows that correspond to Dirichlet boundary conditions
284  // boundary_nodes_type pointBoundaryNodes = Utilities_kokkos::DetectDirichletRows(*A, dirichletThreshold);
285  }
286 
287  } else if (algo == "distance laplacian") {
288  typedef Xpetra::MultiVector<double,LO,GO,NO> doubleMultiVector;
289 
290  auto coords = Get<RCP<doubleMultiVector> >(currentLevel, "Coordinates");
291 
292  if (A->GetFixedBlockSize() == 1 && threshold == STS::zero()) {
293  //
294  // Case 1: scalar problem without dropping
295  //
296  graph = rcp(new LWGraph_kokkos(A->getLocalMatrix().graph, A->getRowMap(), A->getColMap(), "graph of A"));
297 
298  // Detect and record rows that correspond to Dirichlet boundary conditions
299  boundaryNodes = Utilities_kokkos::DetectDirichletRows(*A, dirichletThreshold);
300  graph->SetBoundaryNodeMap(boundaryNodes);
301 
302  numTotal = A->getNodeNumEntries();
303 
304  dofsPerNode = 1;
305 
306  } else if (blkSize == 1 && threshold != STS::zero()) {
307  //
308  // Case 2: scalar problem with filtering
309  //
310  throw Exceptions::RuntimeError("not implemented");
311 
312  } else if (blkSize > 1 && threshold == STS::zero()) {
313  //
314  // Case 3: block problem without filtering
315  //
316  throw Exceptions::RuntimeError("Block systems without filtering are not implemented");
317 
318  // Detect and record rows that correspond to Dirichlet boundary conditions
319  boundary_nodes_type pointBoundaryNodes = Utilities_kokkos::DetectDirichletRows(*A, dirichletThreshold);
320 
321  } else if (blkSize > 1 && threshold != STS::zero()) {
322  //
323  // Case 4: block problem with filtering
324  //
325  throw Exceptions::RuntimeError("Block systems with filtering are not implemented");
326 
327  // Detect and record rows that correspond to Dirichlet boundary conditions
328  boundary_nodes_type pointBoundaryNodes = Utilities_kokkos::DetectDirichletRows(*A, dirichletThreshold);
329  }
330 
331  } else if (algo == "distance laplacian") {
332  typedef Xpetra::MultiVector<double,LO,GO,NO> doubleMultiVector;
333 
334  auto coords = Get<RCP<doubleMultiVector> >(currentLevel, "Coordinates");
335 
336  if (A->GetFixedBlockSize() == 1 && threshold == STS::zero()) {
337  //
338  // Case 1: scalar problem without dropping
339  //
340  graph = rcp(new LWGraph_kokkos(A->getLocalMatrix().graph, A->getRowMap(), A->getColMap(), "graph of A"));
341 
342  // Detect and record rows that correspond to Dirichlet boundary conditions
343  boundaryNodes = Utilities_kokkos::DetectDirichletRows(*A, dirichletThreshold);
344  graph->SetBoundaryNodeMap(boundaryNodes);
345 
346  numTotal = A->getNodeNumEntries();
347 
348  dofsPerNode = 1;
349 
350  } else if (blkSize == 1 && threshold != STS::zero()) {
351  //
352  // Case 2: scalar problem with filtering
353  //
354  throw Exceptions::RuntimeError("not implemented");
355 
356  } else if (blkSize > 1 && threshold == STS::zero()) {
357  //
358  // Case 3: block problem without filtering
359  //
360  throw Exceptions::RuntimeError("Block systems without filtering are not implemented");
361 
362  // Detect and record rows that correspond to Dirichlet boundary conditions
363  // boundary_nodes_type pointBoundaryNodes = Utilities_kokkos::DetectDirichletRows(*A, dirichletThreshold);
364 
365  } else if (blkSize > 1 && threshold != STS::zero()) {
366  //
367  // Case 4: block problem with filtering
368  //
369  throw Exceptions::RuntimeError("Block systems with filtering are not implemented");
370 
371  // Detect and record rows that correspond to Dirichlet boundary conditions
372  // boundary_nodes_type pointBoundaryNodes = Utilities_kokkos::DetectDirichletRows(*A, dirichletThreshold);
373  }
374 
375 
376  }
377 
378  if (GetVerbLevel() & Statistics0) {
379  GO numLocalBoundaryNodes = 0;
380  GO numGlobalBoundaryNodes = 0;
381  Kokkos::parallel_reduce("CoalesceDropF:Build:bnd", boundaryNodes.dimension_0(), KOKKOS_LAMBDA(const LO i, GO& n) {
382  if (boundaryNodes(i))
383  n++;
384  }, numLocalBoundaryNodes);
385 
386  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
387  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
388  GetOStream(Statistics0) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
389  }
390 
391  if ((GetVerbLevel() & Statistics0) && threshold != STS::zero()) {
392  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
393  GO numGlobalTotal, numGlobalDropped;
394  MueLu_sumAll(comm, numTotal, numGlobalTotal);
395  MueLu_sumAll(comm, numDropped, numGlobalDropped);
396  if (numGlobalTotal != 0) {
397  GetOStream(Statistics0) << "Number of dropped entries: "
398  << numGlobalDropped << "/" << numGlobalTotal
399  << " (" << 100*Teuchos::as<double>(numGlobalDropped)/Teuchos::as<double>(numGlobalTotal) << "%)" << std::endl;
400  }
401  }
402 
403  Set(currentLevel, "DofsPerNode", dofsPerNode);
404  Set(currentLevel, "Graph", graph);
405  }
406 }
407 #endif // HAVE_MUELU_KOKKOS_REFACTOR
408 #endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
Important warning messages (one line)
#define MueLu_sumAll(rcpComm, in, out)
One-liner description of what is happening.
Namespace for MueLu classes and methods.
Kokkos::View< const bool *, typename NO::device_type > DetectDirichletRows(const Xpetra::Matrix< SC, LO, GO, NO > &A, const typename Teuchos::ScalarTraits< SC >::magnitudeType &tol)
void parallel_reduce(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< !Impl::is_integral< ExecPolicy >::value >::type *=0)
Print statistics that do not involve significant additional computation.
#define SET_VALID_ENTRY(name)