46 #ifndef MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP 47 #define MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP 49 #ifdef HAVE_MUELU_KOKKOS_REFACTOR 50 #include <Kokkos_CrsMatrix.hpp> 54 #include "MueLu_AmalgamationInfo.hpp" 57 #include "MueLu_LWGraph_kokkos.hpp" 60 #include "MueLu_Utilities_kokkos.hpp" 66 template<
class LocalOrdinal,
class RowType>
69 ScanFunctor(RowType rows) : rows_(rows) { }
71 KOKKOS_INLINE_FUNCTION
72 void operator()(
const LocalOrdinal i, LocalOrdinal& upd,
const bool&
final)
const {
84 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
85 RCP<const ParameterList> CoalesceDropFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::GetValidParameterList()
const {
86 RCP<ParameterList> validParamList = rcp(
new ParameterList());
88 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) 93 typedef Teuchos::StringToIntegralParameterEntryValidator<int> validatorType;
94 validParamList->getEntry(
"aggregation: drop scheme").setValidator(
95 rcp(
new validatorType(Teuchos::tuple<std::string>(
"classical",
"distance laplacian"),
"aggregation: drop scheme")));
97 #undef SET_VALID_ENTRY 98 validParamList->set<
bool > (
"lightweight wrap",
true,
"Experimental option for lightweight graph access");
100 validParamList->set< RCP<const FactoryBase> >(
"A", Teuchos::null,
"Generating factory of the matrix A");
101 validParamList->set< RCP<const FactoryBase> >(
"UnAmalgamationInfo", Teuchos::null,
"Generating factory for UnAmalgamationInfo");
102 validParamList->set< RCP<const FactoryBase> >(
"Coordinates", Teuchos::null,
"Generating factory for Coordinates");
104 return validParamList;
107 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
108 void CoalesceDropFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::DeclareInput(Level ¤tLevel)
const {
109 Input(currentLevel,
"A");
110 Input(currentLevel,
"UnAmalgamationInfo");
112 const ParameterList& pL = GetParameterList();
113 if (pL.get<
bool>(
"lightweight wrap") ==
true) {
114 if (pL.get<std::string>(
"aggregation: drop scheme") ==
"distance laplacian")
115 Input(currentLevel,
"Coordinates");
119 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
120 void CoalesceDropFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::Build(Level& currentLevel)
const {
121 FactoryMonitor m(*
this,
"Build", currentLevel);
123 typedef Teuchos::ScalarTraits<SC> STS;
124 SC zero = STS::zero(), one = STS::one();
126 auto A = Get< RCP<Matrix> >(currentLevel,
"A");
127 LO blkSize = A->GetFixedBlockSize();
128 GO indexBase = A->getRowMap()->getIndexBase();
130 auto amalInfo = Get< RCP<AmalgamationInfo> >(currentLevel,
"UnAmalgamationInfo");
132 const ParameterList& pL = GetParameterList();
135 GetOStream(
Warnings0) <<
"lightweight wrap is deprecated" << std::endl;
137 std::string algo = pL.get<std::string>(
"aggregation: drop scheme");
139 double threshold = pL.get<
double>(
"aggregation: drop tol");
140 GetOStream(
Runtime0) <<
"algorithm = \"" << algo <<
"\": threshold = " << threshold <<
", blocksize = " << A->GetFixedBlockSize() << std::endl;
142 Set(currentLevel,
"Filtering", (threshold != STS::zero()));
144 const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<
double>(
"aggregation: Dirichlet threshold")));
146 GO numDropped = 0, numTotal = 0;
148 RCP<LWGraph_kokkos> graph;
151 typedef typename LWGraph_kokkos::boundary_nodes_type boundary_nodes_type;
152 boundary_nodes_type boundaryNodes;
154 if (algo ==
"classical") {
156 if (blkSize == 1 && threshold == STS::zero()) {
160 graph = rcp(
new LWGraph_kokkos(A->getLocalMatrix().graph, A->getRowMap(), A->getColMap(),
"graph of A"));
164 graph->SetBoundaryNodeMap(boundaryNodes);
166 numTotal = A->getNodeNumEntries();
170 }
else if (blkSize == 1 && threshold != STS::zero()) {
174 RCP<Vector> ghostedDiagVec = Utilities_kokkos::GetMatrixOverlappedDiagonal(*A);
175 auto ghostedDiag = ghostedDiagVec->template getLocalView<typename NO::device_type>();
177 typedef typename Matrix::local_matrix_type local_matrix_type;
178 typedef typename LWGraph_kokkos::local_graph_type kokkos_graph_type;
179 typedef typename kokkos_graph_type::row_map_type row_map_type;
180 typedef typename kokkos_graph_type::entries_type entries_type;
182 LO numRows = A->getNodeNumRows();
183 auto kokkosMatrix = A->getLocalMatrix();
185 typedef Kokkos::ArithTraits<SC> ATS;
186 typedef typename ATS::magnitudeType magnitudeType;
189 typename row_map_type::non_const_type rows(
"row_map", numRows+1);
192 Kokkos::parallel_reduce(
"CoalesceDropF:Build:scalar_filter:stage1_reduce", numRows, KOKKOS_LAMBDA(
const LO row, LO& nnz) {
193 auto rowView = kokkosMatrix.template row<LO>(row);
194 auto length = rowView.length;
197 for (decltype(length) colID = 0; colID < length; colID++) {
198 LO col = rowView.colidx(colID);
201 magnitudeType aiiajj = threshold*threshold * ATS::magnitude(ghostedDiag(row, 0))*ATS::magnitude(ghostedDiag(col, 0));
202 magnitudeType aij2 = ATS::magnitude(rowView.value(colID)) * ATS::magnitude(rowView.value(colID));
204 if (aij2 > aiiajj || row == col)
207 rows(row+1) = rownnz;
214 Kokkos::parallel_scan(
"CoalesceDropF:Build:scalar_filter:stage1_scan", numRows+1, KOKKOS_LAMBDA(
const LO i, LO& upd,
const bool&
final) {
220 ScanFunctor<LO,decltype(rows)> scanFunctor(rows);
221 Kokkos::parallel_scan(
"CoalesceDropF:Build:scalar_filter:stage1_scan", numRows+1, scanFunctor);
226 typename boundary_nodes_type::non_const_type bndNodes(
"boundaryNodes", numRows);
227 typename entries_type::non_const_type cols (
"entries", realnnz);
228 Kokkos::parallel_reduce(
"CoalesceDropF:Build:scalar_filter:stage2_reduce", numRows, KOKKOS_LAMBDA(
const LO row, GO& dropped) {
229 auto rowView = kokkosMatrix.template row<LO>(row);
230 auto length = rowView.length;
233 for (decltype(length) colID = 0; colID < length; colID++) {
234 LO col = rowView.colidx(colID);
237 magnitudeType aiiajj = threshold*threshold * ATS::magnitude(ghostedDiag(row, 0))*ATS::magnitude(ghostedDiag(col, 0));
238 magnitudeType aij2 = ATS::magnitude(rowView.value(colID)) * ATS::magnitude(rowView.value(colID));
240 if (aij2 > aiiajj || row == col) {
241 cols(rows(row) + rownnz) = col;
253 bndNodes(row) =
true;
257 boundaryNodes = bndNodes;
259 kokkos_graph_type kokkosGraph(cols, rows);
261 graph = rcp(
new LWGraph_kokkos(kokkosGraph, A->getRowMap(), A->getColMap(),
"filtered graph of A"));
262 graph->SetBoundaryNodeMap(boundaryNodes);
264 numTotal = A->getNodeNumEntries();
268 }
else if (blkSize > 1 && threshold == STS::zero()) {
272 throw Exceptions::RuntimeError(
"Block systems without filtering are not implemented");
277 }
else if (blkSize > 1 && threshold != STS::zero()) {
281 throw Exceptions::RuntimeError(
"Block systems with filtering are not implemented");
287 }
else if (algo ==
"distance laplacian") {
288 typedef Xpetra::MultiVector<double,LO,GO,NO> doubleMultiVector;
290 auto coords = Get<RCP<doubleMultiVector> >(currentLevel,
"Coordinates");
292 if (A->GetFixedBlockSize() == 1 && threshold == STS::zero()) {
296 graph = rcp(
new LWGraph_kokkos(A->getLocalMatrix().graph, A->getRowMap(), A->getColMap(),
"graph of A"));
300 graph->SetBoundaryNodeMap(boundaryNodes);
302 numTotal = A->getNodeNumEntries();
306 }
else if (blkSize == 1 && threshold != STS::zero()) {
310 throw Exceptions::RuntimeError(
"not implemented");
312 }
else if (blkSize > 1 && threshold == STS::zero()) {
316 throw Exceptions::RuntimeError(
"Block systems without filtering are not implemented");
321 }
else if (blkSize > 1 && threshold != STS::zero()) {
325 throw Exceptions::RuntimeError(
"Block systems with filtering are not implemented");
331 }
else if (algo ==
"distance laplacian") {
332 typedef Xpetra::MultiVector<double,LO,GO,NO> doubleMultiVector;
334 auto coords = Get<RCP<doubleMultiVector> >(currentLevel,
"Coordinates");
336 if (A->GetFixedBlockSize() == 1 && threshold == STS::zero()) {
340 graph = rcp(
new LWGraph_kokkos(A->getLocalMatrix().graph, A->getRowMap(), A->getColMap(),
"graph of A"));
344 graph->SetBoundaryNodeMap(boundaryNodes);
346 numTotal = A->getNodeNumEntries();
350 }
else if (blkSize == 1 && threshold != STS::zero()) {
354 throw Exceptions::RuntimeError(
"not implemented");
356 }
else if (blkSize > 1 && threshold == STS::zero()) {
360 throw Exceptions::RuntimeError(
"Block systems without filtering are not implemented");
365 }
else if (blkSize > 1 && threshold != STS::zero()) {
369 throw Exceptions::RuntimeError(
"Block systems with filtering are not implemented");
379 GO numLocalBoundaryNodes = 0;
380 GO numGlobalBoundaryNodes = 0;
381 Kokkos::parallel_reduce(
"CoalesceDropF:Build:bnd", boundaryNodes.dimension_0(), KOKKOS_LAMBDA(
const LO i, GO& n) {
382 if (boundaryNodes(i))
384 }, numLocalBoundaryNodes);
386 RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
387 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
388 GetOStream(
Statistics0) <<
"Detected " << numGlobalBoundaryNodes <<
" Dirichlet nodes" << std::endl;
391 if ((GetVerbLevel() &
Statistics0) && threshold != STS::zero()) {
392 RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
393 GO numGlobalTotal, numGlobalDropped;
396 if (numGlobalTotal != 0) {
397 GetOStream(
Statistics0) <<
"Number of dropped entries: " 398 << numGlobalDropped <<
"/" << numGlobalTotal
399 <<
" (" << 100*Teuchos::as<double>(numGlobalDropped)/Teuchos::as<double>(numGlobalTotal) <<
"%)" << std::endl;
403 Set(currentLevel,
"DofsPerNode", dofsPerNode);
404 Set(currentLevel,
"Graph", graph);
407 #endif // HAVE_MUELU_KOKKOS_REFACTOR 408 #endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP Important warning messages (one line)
#define MueLu_sumAll(rcpComm, in, out)
One-liner description of what is happening.
Namespace for MueLu classes and methods.
Kokkos::View< const bool *, typename NO::device_type > DetectDirichletRows(const Xpetra::Matrix< SC, LO, GO, NO > &A, const typename Teuchos::ScalarTraits< SC >::magnitudeType &tol)
void parallel_reduce(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< !Impl::is_integral< ExecPolicy >::value >::type *=0)
Print statistics that do not involve significant additional computation.
#define SET_VALID_ENTRY(name)