MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_CoalesceDropFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_COALESCEDROPFACTORY_DEF_HPP
47 #define MUELU_COALESCEDROPFACTORY_DEF_HPP
48 
50 #include <Xpetra_CrsGraph.hpp>
51 #include <Xpetra_ImportFactory.hpp>
52 #include <Xpetra_ExportFactory.hpp>
53 #include <Xpetra_MapFactory.hpp>
54 #include <Xpetra_Map.hpp>
55 #include <Xpetra_Matrix.hpp>
56 #include <Xpetra_MultiVectorFactory.hpp>
57 #include <Xpetra_MultiVector.hpp>
58 #include <Xpetra_StridedMap.hpp>
59 #include <Xpetra_VectorFactory.hpp>
60 #include <Xpetra_Vector.hpp>
61 
62 #include <Xpetra_IO.hpp>
63 
65 
66 #include "MueLu_AmalgamationFactory.hpp"
67 #include "MueLu_AmalgamationInfo.hpp"
68 #include "MueLu_Exceptions.hpp"
69 #include "MueLu_LWGraph.hpp"
70 
71 #include "MueLu_Level.hpp"
72 #include "MueLu_LWGraph.hpp"
73 #include "MueLu_MasterList.hpp"
74 #include "MueLu_Monitor.hpp"
75 #include "MueLu_PreDropFunctionConstVal.hpp"
76 #include "MueLu_Utilities.hpp"
77 
78 #ifdef HAVE_XPETRA_TPETRA
79 #include "Tpetra_CrsGraphTransposer.hpp"
80 #endif
81 
82 #include <algorithm>
83 #include <cstdlib>
84 #include <string>
85 
86 // If defined, read environment variables.
87 // Should be removed once we are confident that this works.
88 //#define DJS_READ_ENV_VARIABLES
89 
90 namespace MueLu {
91 
92 namespace Details {
93 template <class real_type, class LO>
94 struct DropTol {
95  DropTol() = default;
96  DropTol(DropTol const&) = default;
97  DropTol(DropTol&&) = default;
98 
99  DropTol& operator=(DropTol const&) = default;
100  DropTol& operator=(DropTol&&) = default;
101 
102  DropTol(real_type val_, real_type diag_, LO col_, bool drop_)
103  : val{val_}
104  , diag{diag_}
105  , col{col_}
106  , drop{drop_} {}
107 
111  bool drop{true};
112 
113  // CMS: Auxillary information for debugging info
114  // real_type aux_val {Teuchos::ScalarTraits<real_type>::nan()};
115 };
116 } // namespace Details
117 
118 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
120  RCP<ParameterList> validParamList = rcp(new ParameterList());
121 
122 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
123  SET_VALID_ENTRY("aggregation: drop tol");
124  SET_VALID_ENTRY("aggregation: use ml scaling of drop tol");
125  SET_VALID_ENTRY("aggregation: Dirichlet threshold");
126  SET_VALID_ENTRY("aggregation: greedy Dirichlet");
127  SET_VALID_ENTRY("aggregation: row sum drop tol");
128  SET_VALID_ENTRY("aggregation: drop scheme");
129  SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize");
130  SET_VALID_ENTRY("aggregation: distance laplacian directional weights");
131  SET_VALID_ENTRY("aggregation: dropping may create Dirichlet");
132 
133  {
134  // "signed classical" is the Ruge-Stuben style (relative to max off-diagonal), "sign classical sa" is the signed version of the sa criterion (relative to the diagonal values)
135  validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("signed classical sa", "classical", "distance laplacian", "signed classical", "block diagonal", "block diagonal classical", "block diagonal distance laplacian", "block diagonal signed classical", "block diagonal colored signed classical"))));
136  }
137  SET_VALID_ENTRY("aggregation: distance laplacian algo");
138  SET_VALID_ENTRY("aggregation: classical algo");
139  SET_VALID_ENTRY("aggregation: coloring: localize color graph");
140 #undef SET_VALID_ENTRY
141  validParamList->set<bool>("lightweight wrap", true, "Experimental option for lightweight graph access");
142 
143  validParamList->set<RCP<const FactoryBase>>("A", Teuchos::null, "Generating factory of the matrix A");
144  validParamList->set<RCP<const FactoryBase>>("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo");
145  validParamList->set<RCP<const FactoryBase>>("Coordinates", Teuchos::null, "Generating factory for Coordinates");
146  validParamList->set<RCP<const FactoryBase>>("BlockNumber", Teuchos::null, "Generating factory for BlockNUmber");
147 
148  return validParamList;
149 }
150 
151 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
153  : predrop_(Teuchos::null) {}
154 
155 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
157  Input(currentLevel, "A");
158  Input(currentLevel, "UnAmalgamationInfo");
159 
160  const ParameterList& pL = GetParameterList();
161  if (pL.get<bool>("lightweight wrap") == true) {
162  std::string algo = pL.get<std::string>("aggregation: drop scheme");
163  if (algo == "distance laplacian" || algo == "block diagonal distance laplacian") {
164  Input(currentLevel, "Coordinates");
165  }
166  if (algo == "signed classical sa")
167  ;
168  else if (algo.find("block diagonal") != std::string::npos || algo.find("signed classical") != std::string::npos) {
169  Input(currentLevel, "BlockNumber");
170  }
171  }
172 }
173 
174 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
176  FactoryMonitor m(*this, "Build", currentLevel);
177 
178  typedef Teuchos::ScalarTraits<SC> STS;
179  typedef typename STS::magnitudeType real_type;
180  typedef Xpetra::MultiVector<real_type, LO, GO, NO> RealValuedMultiVector;
181  typedef Xpetra::MultiVectorFactory<real_type, LO, GO, NO> RealValuedMultiVectorFactory;
182 
183  if (predrop_ != Teuchos::null)
184  GetOStream(Parameters0) << predrop_->description();
185 
186  RCP<Matrix> realA = Get<RCP<Matrix>>(currentLevel, "A");
187  RCP<AmalgamationInfo> amalInfo = Get<RCP<AmalgamationInfo>>(currentLevel, "UnAmalgamationInfo");
188  const ParameterList& pL = GetParameterList();
189  bool doExperimentalWrap = pL.get<bool>("lightweight wrap");
190 
191  GetOStream(Parameters0) << "lightweight wrap = " << doExperimentalWrap << std::endl;
192  std::string algo = pL.get<std::string>("aggregation: drop scheme");
193  const bool aggregationMayCreateDirichlet = pL.get<bool>("aggregation: dropping may create Dirichlet");
194 
196  RCP<Matrix> A;
197 
198  bool use_block_algorithm = false;
199  LO interleaved_blocksize = as<LO>(pL.get<int>("aggregation: block diagonal: interleaved blocksize"));
200  bool useSignedClassicalRS = false;
201  bool useSignedClassicalSA = false;
202  bool generateColoringGraph = false;
203 
204  // NOTE: If we're doing blockDiagonal, we'll not want to do rowSum twice (we'll do it
205  // in the block diagonalization). So we'll clobber the rowSumTol with -1.0 in this case
206  typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
207 
208  RCP<LocalOrdinalVector> ghostedBlockNumber;
209  ArrayRCP<const LO> g_block_id;
210 
211  if (algo == "distance laplacian") {
212  // Grab the coordinates for distance laplacian
213  Coords = Get<RCP<RealValuedMultiVector>>(currentLevel, "Coordinates");
214  A = realA;
215  } else if (algo == "signed classical sa") {
216  useSignedClassicalSA = true;
217  algo = "classical";
218  A = realA;
219  } else if (algo == "signed classical" || algo == "block diagonal colored signed classical" || algo == "block diagonal signed classical") {
220  useSignedClassicalRS = true;
221  // if(realA->GetFixedBlockSize() > 1) {
222  RCP<LocalOrdinalVector> BlockNumber = Get<RCP<LocalOrdinalVector>>(currentLevel, "BlockNumber");
223  // Ghost the column block numbers if we need to
224  RCP<const Import> importer = realA->getCrsGraph()->getImporter();
225  if (!importer.is_null()) {
226  SubFactoryMonitor m1(*this, "Block Number import", currentLevel);
227  ghostedBlockNumber = Xpetra::VectorFactory<LO, LO, GO, NO>::Build(importer->getTargetMap());
228  ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT);
229  } else {
230  ghostedBlockNumber = BlockNumber;
231  }
232  g_block_id = ghostedBlockNumber->getData(0);
233  // }
234  if (algo == "block diagonal colored signed classical")
235  generateColoringGraph = true;
236  algo = "classical";
237  A = realA;
238 
239  } else if (algo == "block diagonal") {
240  // Handle the "block diagonal" filtering and then leave
241  BlockDiagonalize(currentLevel, realA, false);
242  return;
243  } else if (algo == "block diagonal classical" || algo == "block diagonal distance laplacian") {
244  // Handle the "block diagonal" filtering, and then continue onward
245  use_block_algorithm = true;
246  RCP<Matrix> filteredMatrix = BlockDiagonalize(currentLevel, realA, true);
247  if (algo == "block diagonal distance laplacian") {
248  // We now need to expand the coordinates by the interleaved blocksize
249  RCP<RealValuedMultiVector> OldCoords = Get<RCP<RealValuedMultiVector>>(currentLevel, "Coordinates");
250  if (OldCoords->getLocalLength() != realA->getLocalNumRows()) {
251  LO dim = (LO)OldCoords->getNumVectors();
252  Coords = RealValuedMultiVectorFactory::Build(realA->getRowMap(), dim);
253  for (LO k = 0; k < dim; k++) {
254  ArrayRCP<const real_type> old_vec = OldCoords->getData(k);
255  ArrayRCP<real_type> new_vec = Coords->getDataNonConst(k);
256  for (LO i = 0; i < (LO)OldCoords->getLocalLength(); i++) {
257  LO new_base = i * dim;
258  for (LO j = 0; j < interleaved_blocksize; j++)
259  new_vec[new_base + j] = old_vec[i];
260  }
261  }
262  } else {
263  Coords = OldCoords;
264  }
265  algo = "distance laplacian";
266  } else if (algo == "block diagonal classical") {
267  algo = "classical";
268  }
269  // All cases
270  A = filteredMatrix;
271  rowSumTol = -1.0;
272  } else {
273  A = realA;
274  }
275 
276  // Distance Laplacian weights
277  Array<double> dlap_weights = pL.get<Array<double>>("aggregation: distance laplacian directional weights");
278  enum { NO_WEIGHTS = 0,
279  SINGLE_WEIGHTS,
280  BLOCK_WEIGHTS };
281  int use_dlap_weights = NO_WEIGHTS;
282  if (algo == "distance laplacian") {
283  LO dim = (LO)Coords->getNumVectors();
284  // If anything isn't 1.0 we need to turn on the weighting
285  bool non_unity = false;
286  for (LO i = 0; !non_unity && i < (LO)dlap_weights.size(); i++) {
287  if (dlap_weights[i] != 1.0) {
288  non_unity = true;
289  }
290  }
291  if (non_unity) {
292  LO blocksize = use_block_algorithm ? as<LO>(pL.get<int>("aggregation: block diagonal: interleaved blocksize")) : 1;
293  if ((LO)dlap_weights.size() == dim)
294  use_dlap_weights = SINGLE_WEIGHTS;
295  else if ((LO)dlap_weights.size() == blocksize * dim)
296  use_dlap_weights = BLOCK_WEIGHTS;
297  else {
299  "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize");
300  }
301  if (GetVerbLevel() & Statistics1)
302  GetOStream(Statistics1) << "Using distance laplacian weights: " << dlap_weights << std::endl;
303  }
304  }
305 
306  // decide wether to use the fast-track code path for standard maps or the somewhat slower
307  // code path for non-standard maps
308  /*bool bNonStandardMaps = false;
309  if (A->IsView("stridedMaps") == true) {
310  Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
311  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
312  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
313  if (strMap->getStridedBlockId() != -1 || strMap->getOffset() > 0)
314  bNonStandardMaps = true;
315  }*/
316 
317  if (doExperimentalWrap) {
318  TEUCHOS_TEST_FOR_EXCEPTION(predrop_ != null && algo != "classical", Exceptions::RuntimeError, "Dropping function must not be provided for \"" << algo << "\" algorithm");
319  TEUCHOS_TEST_FOR_EXCEPTION(algo != "classical" && algo != "distance laplacian" && algo != "signed classical", Exceptions::RuntimeError, "\"algorithm\" must be one of (classical|distance laplacian|signed classical)");
320 
321  SC threshold;
322  // If we're doing the ML-style halving of the drop tol at each level, we do that here.
323  if (pL.get<bool>("aggregation: use ml scaling of drop tol"))
324  threshold = pL.get<double>("aggregation: drop tol") / pow(2.0, currentLevel.GetLevelID());
325  else
326  threshold = as<SC>(pL.get<double>("aggregation: drop tol"));
327 
328  std::string distanceLaplacianAlgoStr = pL.get<std::string>("aggregation: distance laplacian algo");
329  std::string classicalAlgoStr = pL.get<std::string>("aggregation: classical algo");
330  real_type realThreshold = STS::magnitude(threshold); // CMS: Rename this to "magnitude threshold" sometime
331 
333  // Remove this bit once we are confident that cut-based dropping works.
334 #ifdef HAVE_MUELU_DEBUG
335  int distanceLaplacianCutVerbose = 0;
336 #endif
337 #ifdef DJS_READ_ENV_VARIABLES
338  if (getenv("MUELU_DROP_TOLERANCE_MODE")) {
339  distanceLaplacianAlgoStr = std::string(getenv("MUELU_DROP_TOLERANCE_MODE"));
340  }
341 
342  if (getenv("MUELU_DROP_TOLERANCE_THRESHOLD")) {
343  auto tmp = atoi(getenv("MUELU_DROP_TOLERANCE_THRESHOLD"));
344  realThreshold = 1e-4 * tmp;
345  }
346 
347 #ifdef HAVE_MUELU_DEBUG
348  if (getenv("MUELU_DROP_TOLERANCE_VERBOSE")) {
349  distanceLaplacianCutVerbose = atoi(getenv("MUELU_DROP_TOLERANCE_VERBOSE"));
350  }
351 #endif
352 #endif
353 
355  enum decisionAlgoType { defaultAlgo,
356  unscaled_cut,
357  scaled_cut,
358  scaled_cut_symmetric };
359 
360  decisionAlgoType distanceLaplacianAlgo = defaultAlgo;
361  decisionAlgoType classicalAlgo = defaultAlgo;
362  if (algo == "distance laplacian") {
363  if (distanceLaplacianAlgoStr == "default")
364  distanceLaplacianAlgo = defaultAlgo;
365  else if (distanceLaplacianAlgoStr == "unscaled cut")
366  distanceLaplacianAlgo = unscaled_cut;
367  else if (distanceLaplacianAlgoStr == "scaled cut")
368  distanceLaplacianAlgo = scaled_cut;
369  else if (distanceLaplacianAlgoStr == "scaled cut symmetric")
370  distanceLaplacianAlgo = scaled_cut_symmetric;
371  else
372  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut), not \"" << distanceLaplacianAlgoStr << "\"");
373  GetOStream(Runtime0) << "algorithm = \"" << algo << "\" distance laplacian algorithm = \"" << distanceLaplacianAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
374  } else if (algo == "classical") {
375  if (classicalAlgoStr == "default")
376  classicalAlgo = defaultAlgo;
377  else if (classicalAlgoStr == "unscaled cut")
378  classicalAlgo = unscaled_cut;
379  else if (classicalAlgoStr == "scaled cut")
380  classicalAlgo = scaled_cut;
381  else
382  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut), not \"" << classicalAlgoStr << "\"");
383  GetOStream(Runtime0) << "algorithm = \"" << algo << "\" classical algorithm = \"" << classicalAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
384 
385  } else
386  GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
387  Set<bool>(currentLevel, "Filtering", (threshold != STS::zero()));
388 
389  const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
390 
391  // NOTE: We don't support signed classical RS or SA with cut drop at present
392  TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassicalRS && classicalAlgo != defaultAlgo, Exceptions::RuntimeError, "\"aggregation: classical algo\" != default is not supported for scalled classical aggregation");
393  TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassicalSA && classicalAlgo != defaultAlgo, Exceptions::RuntimeError, "\"aggregation: classical algo\" != default is not supported for scalled classical sa aggregation");
394 
395  GO numDropped = 0, numTotal = 0;
396  std::string graphType = "unamalgamated"; // for description purposes only
397 
398  /* NOTE: storageblocksize (from GetStorageBlockSize()) is the size of a block in the chosen storage scheme.
399  BlockSize is the number of storage blocks that must kept together during the amalgamation process.
400 
401  Both of these quantities may be different than numPDEs (from GetFixedBlockSize()), but the following must always hold:
402 
403  numPDEs = BlockSize * storageblocksize.
404 
405  If numPDEs==1
406  Matrix is point storage (classical CRS storage). storageblocksize=1 and BlockSize=1
407  No other values makes sense.
408 
409  If numPDEs>1
410  If matrix uses point storage, then storageblocksize=1 and BlockSize=numPDEs.
411  If matrix uses block storage, with block size of n, then storageblocksize=n, and BlockSize=numPDEs/n.
412  Thus far, only storageblocksize=numPDEs and BlockSize=1 has been tested.
413  */
414  TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() % A->GetStorageBlockSize() != 0, Exceptions::RuntimeError, "A->GetFixedBlockSize() needs to be a multiple of A->GetStorageBlockSize()");
415  const LO BlockSize = A->GetFixedBlockSize() / A->GetStorageBlockSize();
416 
417  /************************** RS or SA-style Classical Dropping (and variants) **************************/
418  if (algo == "classical") {
419  if (predrop_ == null) {
420  // ap: this is a hack: had to declare predrop_ as mutable
421  predrop_ = rcp(new PreDropFunctionConstVal(threshold));
422  }
423 
424  if (predrop_ != null) {
425  RCP<PreDropFunctionConstVal> predropConstVal = rcp_dynamic_cast<PreDropFunctionConstVal>(predrop_);
426  TEUCHOS_TEST_FOR_EXCEPTION(predropConstVal == Teuchos::null, Exceptions::BadCast,
427  "MueLu::CoalesceFactory::Build: cast to PreDropFunctionConstVal failed.");
428  // If a user provided a predrop function, it overwrites the XML threshold parameter
429  SC newt = predropConstVal->GetThreshold();
430  if (newt != threshold) {
431  GetOStream(Warnings0) << "switching threshold parameter from " << threshold << " (list) to " << newt << " (user function" << std::endl;
432  threshold = newt;
433  }
434  }
435  // At this points we either have
436  // (predrop_ != null)
437  // Therefore, it is sufficient to check only threshold
438  if (BlockSize == 1 && threshold == STS::zero() && !useSignedClassicalRS && !useSignedClassicalSA && A->hasCrsGraph()) {
439  // Case 1: scalar problem, no dropping => just use matrix graph
440  RCP<LWGraph> graph = rcp(new LWGraph(A->getCrsGraph(), "graph of A"));
441  // Detect and record rows that correspond to Dirichlet boundary conditions
442  auto boundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
443  if (rowSumTol > 0.)
444  Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, boundaryNodes);
445 
446  graph->SetBoundaryNodeMap(boundaryNodes);
447  numTotal = A->getLocalNumEntries();
448 
449  if (GetVerbLevel() & Statistics1) {
450  GO numLocalBoundaryNodes = 0;
451  GO numGlobalBoundaryNodes = 0;
452  for (size_t i = 0; i < boundaryNodes.size(); ++i)
453  if (boundaryNodes[i])
454  numLocalBoundaryNodes++;
455  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
456  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
457  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
458  }
459 
460  Set(currentLevel, "DofsPerNode", 1);
461  Set(currentLevel, "Graph", graph);
462 
463  } else if ((BlockSize == 1 && threshold != STS::zero()) ||
464  (BlockSize == 1 && threshold == STS::zero() && !A->hasCrsGraph()) ||
465  (BlockSize == 1 && useSignedClassicalRS) ||
466  (BlockSize == 1 && useSignedClassicalSA)) {
467  // Case 2: scalar problem with dropping => record the column indices of undropped entries, but still use original
468  // graph's map information, e.g., whether index is local
469  // OR a matrix without a CrsGraph
470 
471  // allocate space for the local graph
472  typename LWGraph::row_type::non_const_type rows("rows", A->getLocalNumRows() + 1);
473  typename LWGraph::entries_type::non_const_type columns("columns", A->getLocalNumEntries());
474 
475  using MT = typename STS::magnitudeType;
476  RCP<Vector> ghostedDiag;
477  ArrayRCP<const SC> ghostedDiagVals;
478  ArrayRCP<const MT> negMaxOffDiagonal;
479  // RS style needs the max negative off-diagonal, SA style needs the diagonal
480  if (useSignedClassicalRS) {
481  if (ghostedBlockNumber.is_null()) {
483  if (GetVerbLevel() & Statistics1)
484  GetOStream(Statistics1) << "Calculated max point off-diagonal" << std::endl;
485  } else {
486  negMaxOffDiagonal = MueLu::Utilities<SC, LO, GO, NO>::GetMatrixMaxMinusOffDiagonal(*A, *ghostedBlockNumber);
487  if (GetVerbLevel() & Statistics1)
488  GetOStream(Statistics1) << "Calculating max block off-diagonal" << std::endl;
489  }
490  } else {
492  ghostedDiagVals = ghostedDiag->getData(0);
493  }
494  auto boundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
495  if (rowSumTol > 0.) {
496  if (ghostedBlockNumber.is_null()) {
497  if (GetVerbLevel() & Statistics1)
498  GetOStream(Statistics1) << "Applying point row sum criterion." << std::endl;
499  Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, boundaryNodes);
500  } else {
501  if (GetVerbLevel() & Statistics1)
502  GetOStream(Statistics1) << "Applying block row sum criterion." << std::endl;
503  Utilities::ApplyRowSumCriterionHost(*A, *ghostedBlockNumber, rowSumTol, boundaryNodes);
504  }
505  }
506 
507  LO realnnz = 0;
508  rows(0) = 0;
509  for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
510  size_t nnz = A->getNumEntriesInLocalRow(row);
511  bool rowIsDirichlet = boundaryNodes[row];
512  ArrayView<const LO> indices;
513  ArrayView<const SC> vals;
514  A->getLocalRowView(row, indices, vals);
515 
516  if (classicalAlgo == defaultAlgo) {
517  // FIXME the current predrop function uses the following
518  // FIXME if(std::abs(vals[k]) > std::abs(threshold_) || grow == gcid )
519  // FIXME but the threshold doesn't take into account the rows' diagonal entries
520  // FIXME For now, hardwiring the dropping in here
521 
522  LO rownnz = 0;
523  if (useSignedClassicalRS) {
524  // Signed classical RS style
525  for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
526  LO col = indices[colID];
527  MT max_neg_aik = realThreshold * STS::real(negMaxOffDiagonal[row]);
528  MT neg_aij = -STS::real(vals[colID]);
529  /* if(row==1326) printf("A(%d,%d) = %6.4e, block = (%d,%d) neg_aij = %6.4e max_neg_aik = %6.4e\n",row,col,vals[colID],
530  g_block_id.is_null() ? -1 : g_block_id[row],
531  g_block_id.is_null() ? -1 : g_block_id[col],
532  neg_aij, max_neg_aik);*/
533  if ((!rowIsDirichlet && (g_block_id.is_null() || g_block_id[row] == g_block_id[col]) && neg_aij > max_neg_aik) || row == col) {
534  columns[realnnz++] = col;
535  rownnz++;
536  } else
537  numDropped++;
538  }
539  rows(row + 1) = realnnz;
540  } else if (useSignedClassicalSA) {
541  // Signed classical SA style
542  for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
543  LO col = indices[colID];
544 
545  bool is_nonpositive = STS::real(vals[colID]) <= 0;
546  MT aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[col] * ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj|
547  MT aij = is_nonpositive ? STS::magnitude(vals[colID] * vals[colID]) : (-STS::magnitude(vals[colID] * vals[colID])); // + |a_ij|^2, if a_ij < 0, - |a_ij|^2 if a_ij >=0
548  /*
549  if(row==1326) printf("A(%d,%d) = %6.4e, raw_aij = %6.4e aij = %6.4e aiiajj = %6.4e\n",row,col,vals[colID],
550  vals[colID],aij, aiiajj);
551  */
552 
553  if ((!rowIsDirichlet && aij > aiiajj) || row == col) {
554  columns(realnnz++) = col;
555  rownnz++;
556  } else
557  numDropped++;
558  }
559  rows[row + 1] = realnnz;
560  } else {
561  // Standard abs classical
562  for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
563  LO col = indices[colID];
564  MT aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[col] * ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj|
565  MT aij = STS::magnitude(vals[colID] * vals[colID]); // |a_ij|^2
566 
567  if ((!rowIsDirichlet && aij > aiiajj) || row == col) {
568  columns(realnnz++) = col;
569  rownnz++;
570  } else
571  numDropped++;
572  }
573  rows(row + 1) = realnnz;
574  }
575  } else {
576  /* Cut Algorithm */
577  // CMS
578  using DropTol = Details::DropTol<real_type, LO>;
579  std::vector<DropTol> drop_vec;
580  drop_vec.reserve(nnz);
581  const real_type zero = Teuchos::ScalarTraits<real_type>::zero();
582  const real_type one = Teuchos::ScalarTraits<real_type>::one();
583  LO rownnz = 0;
584  // NOTE: This probably needs to be fixed for rowsum
585 
586  // find magnitudes
587  for (LO colID = 0; colID < (LO)nnz; colID++) {
588  LO col = indices[colID];
589  if (row == col) {
590  drop_vec.emplace_back(zero, one, colID, false);
591  continue;
592  }
593 
594  // Don't aggregate boundaries
595  if (boundaryNodes[colID]) continue;
596  typename STS::magnitudeType aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[col] * ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj|
597  typename STS::magnitudeType aij = STS::magnitude(vals[colID] * vals[colID]); // |a_ij|^2
598  drop_vec.emplace_back(aij, aiiajj, colID, false);
599  }
600 
601  const size_t n = drop_vec.size();
602 
603  if (classicalAlgo == unscaled_cut) {
604  std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) {
605  return a.val > b.val;
606  });
607 
608  bool drop = false;
609  for (size_t i = 1; i < n; ++i) {
610  if (!drop) {
611  auto const& x = drop_vec[i - 1];
612  auto const& y = drop_vec[i];
613  auto a = x.val;
614  auto b = y.val;
615  if (a > realThreshold * b) {
616  drop = true;
617 #ifdef HAVE_MUELU_DEBUG
618  if (distanceLaplacianCutVerbose) {
619  std::cout << "DJS: KEEP, N, ROW: " << i + 1 << ", " << n << ", " << row << std::endl;
620  }
621 #endif
622  }
623  }
624  drop_vec[i].drop = drop;
625  }
626  } else if (classicalAlgo == scaled_cut) {
627  std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) {
628  return a.val / a.diag > b.val / b.diag;
629  });
630  bool drop = false;
631  // printf("[%d] Scaled Cut: ",(int)row);
632  // printf("%3d(%4s) ",indices[drop_vec[0].col],"keep");
633  for (size_t i = 1; i < n; ++i) {
634  if (!drop) {
635  auto const& x = drop_vec[i - 1];
636  auto const& y = drop_vec[i];
637  auto a = x.val / x.diag;
638  auto b = y.val / y.diag;
639  if (a > realThreshold * b) {
640  drop = true;
641 
642 #ifdef HAVE_MUELU_DEBUG
643  if (distanceLaplacianCutVerbose) {
644  std::cout << "DJS: KEEP, N, ROW: " << i + 1 << ", " << n << ", " << row << std::endl;
645  }
646 #endif
647  }
648  // printf("%3d(%4s) ",indices[drop_vec[i].col],drop?"drop":"keep");
649  }
650  drop_vec[i].drop = drop;
651  }
652  // printf("\n");
653  }
654  std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) {
655  return a.col < b.col;
656  });
657 
658  for (LO idxID = 0; idxID < (LO)drop_vec.size(); idxID++) {
659  LO col = indices[drop_vec[idxID].col];
660  // don't drop diagonal
661  if (row == col) {
662  columns[realnnz++] = col;
663  rownnz++;
664  continue;
665  }
666 
667  if (!drop_vec[idxID].drop) {
668  columns[realnnz++] = col;
669  rownnz++;
670  } else {
671  numDropped++;
672  }
673  }
674  // CMS
675  rows[row + 1] = realnnz;
676  }
677  } // end for row
678 
679  numTotal = A->getLocalNumEntries();
680 
681  if (aggregationMayCreateDirichlet) {
682  // If the only element remaining after filtering is diagonal, mark node as boundary
683  for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
684  if (rows[row + 1] - rows[row] <= 1)
685  boundaryNodes[row] = true;
686  }
687  }
688 
689  RCP<LWGraph> graph = rcp(new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), A->getRowMap(), A->getColMap(), "thresholded graph of A"));
690  graph->SetBoundaryNodeMap(boundaryNodes);
691  if (GetVerbLevel() & Statistics1) {
692  GO numLocalBoundaryNodes = 0;
693  GO numGlobalBoundaryNodes = 0;
694  for (size_t i = 0; i < boundaryNodes.size(); ++i)
695  if (boundaryNodes(i))
696  numLocalBoundaryNodes++;
697  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
698  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
699  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
700  }
701  Set(currentLevel, "Graph", graph);
702  Set(currentLevel, "DofsPerNode", 1);
703 
704  // If we're doing signed classical, we might want to block-diagonalize *after* the dropping
705  if (generateColoringGraph) {
706  RCP<LWGraph> colorGraph;
707  RCP<const Import> importer = A->getCrsGraph()->getImporter();
708  BlockDiagonalizeGraph(graph, ghostedBlockNumber, colorGraph, importer);
709  Set(currentLevel, "Coloring Graph", colorGraph);
710  // #define CMS_DUMP
711 #ifdef CMS_DUMP
712  {
713  Xpetra::IO<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("m_regular_graph." + std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast<LWGraph>(graph)->GetCrsGraph());
714  Xpetra::IO<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("m_color_graph." + std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast<LWGraph>(colorGraph)->GetCrsGraph());
715  // int rank = graph->GetDomainMap()->getComm()->getRank();
716  // {
717  // std::ofstream ofs(std::string("m_color_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out);
718  // RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs));
719  // colorGraph->print(*fancy,Debug);
720  // }
721  // {
722  // std::ofstream ofs(std::string("m_regular_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out);
723  // RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs));
724  // graph->print(*fancy,Debug);
725  // }
726  }
727 #endif
728  } // end generateColoringGraph
729  } else if (BlockSize > 1 && threshold == STS::zero()) {
730  // Case 3: Multiple DOF/node problem without dropping
731  const RCP<const Map> rowMap = A->getRowMap();
732  const RCP<const Map> colMap = A->getColMap();
733 
734  graphType = "amalgamated";
735 
736  // build node row map (uniqueMap) and node column map (nonUniqueMap)
737  // the arrays rowTranslation and colTranslation contain the local node id
738  // given a local dof id. The data is calculated by the AmalgamationFactory and
739  // stored in the variable container "UnAmalgamationInfo"
740  RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
741  RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
742  Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
743  Array<LO> colTranslation = *(amalInfo->getColTranslation());
744 
745  // get number of local nodes
746  LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
747 
748  // Allocate space for the local graph
749  typename LWGraph::row_type::non_const_type rows("rows", numRows + 1);
750  typename LWGraph::entries_type::non_const_type columns("columns", A->getLocalNumEntries());
751 
752  typename LWGraph::boundary_nodes_type amalgBoundaryNodes("amalgBoundaryNodes", numRows);
753  Kokkos::deep_copy(amalgBoundaryNodes, false);
754 
755  // Detect and record rows that correspond to Dirichlet boundary conditions
756  // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
757  // TODO the array one bigger than the number of local rows, and the last entry can
758  // TODO hold the actual number of boundary nodes. Clever, huh?
759  ArrayRCP<bool> pointBoundaryNodes;
760  pointBoundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows(*A, dirichletThreshold));
761  if (rowSumTol > 0.)
762  Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes);
763 
764  // extract striding information
765  LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
766  LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map)
767  LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map
768  if (A->IsView("stridedMaps") == true) {
769  Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
770  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
771  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
772  blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
773  blkId = strMap->getStridedBlockId();
774  if (blkId > -1)
775  blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
776  }
777 
778  // loop over all local nodes
779  LO realnnz = 0;
780  rows(0) = 0;
781  Array<LO> indicesExtra;
782  for (LO row = 0; row < numRows; row++) {
783  ArrayView<const LO> indices;
784  indicesExtra.resize(0);
785 
786  // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
787  // Note, that pointBoundaryNodes lives on the dofmap (and not the node map).
788  // Therefore, looping over all dofs is fine here. We use blkPartSize as we work
789  // with local ids.
790  // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet)
791  // node.
792  bool isBoundary = false;
793  if (pL.get<bool>("aggregation: greedy Dirichlet") == true) {
794  for (LO j = 0; j < blkPartSize; j++) {
795  if (pointBoundaryNodes[row * blkPartSize + j]) {
796  isBoundary = true;
797  break;
798  }
799  }
800  } else {
801  isBoundary = true;
802  for (LO j = 0; j < blkPartSize; j++) {
803  if (!pointBoundaryNodes[row * blkPartSize + j]) {
804  isBoundary = false;
805  break;
806  }
807  }
808  }
809 
810  // Merge rows of A
811  // The array indicesExtra contains local column node ids for the current local node "row"
812  if (!isBoundary)
813  MergeRows(*A, row, indicesExtra, colTranslation);
814  else
815  indicesExtra.push_back(row);
816  indices = indicesExtra;
817  numTotal += indices.size();
818 
819  // add the local column node ids to the full columns array which
820  // contains the local column node ids for all local node rows
821  LO nnz = indices.size(), rownnz = 0;
822  for (LO colID = 0; colID < nnz; colID++) {
823  LO col = indices[colID];
824  columns(realnnz++) = col;
825  rownnz++;
826  }
827 
828  if (rownnz == 1) {
829  // If the only element remaining after filtering is diagonal, mark node as boundary
830  // FIXME: this should really be replaced by the following
831  // if (indices.size() == 1 && indices[0] == row)
832  // boundaryNodes[row] = true;
833  // We do not do it this way now because there is no framework for distinguishing isolated
834  // and boundary nodes in the aggregation algorithms
835  amalgBoundaryNodes[row] = true;
836  }
837  rows(row + 1) = realnnz;
838  } // for (LO row = 0; row < numRows; row++)
839 
840  RCP<LWGraph> graph = rcp(new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), uniqueMap, nonUniqueMap, "amalgamated graph of A"));
841  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
842 
843  if (GetVerbLevel() & Statistics1) {
844  GO numLocalBoundaryNodes = 0;
845  GO numGlobalBoundaryNodes = 0;
846 
847  for (size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
848  if (amalgBoundaryNodes(i))
849  numLocalBoundaryNodes++;
850 
851  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
852  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
853  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes
854  << " agglomerated Dirichlet nodes" << std::endl;
855  }
856 
857  Set(currentLevel, "Graph", graph);
858  Set(currentLevel, "DofsPerNode", blkSize); // full block size
859 
860  } else if (BlockSize > 1 && threshold != STS::zero()) {
861  // Case 4: Multiple DOF/node problem with dropping
862  const RCP<const Map> rowMap = A->getRowMap();
863  const RCP<const Map> colMap = A->getColMap();
864  graphType = "amalgamated";
865 
866  // build node row map (uniqueMap) and node column map (nonUniqueMap)
867  // the arrays rowTranslation and colTranslation contain the local node id
868  // given a local dof id. The data is calculated by the AmalgamationFactory and
869  // stored in the variable container "UnAmalgamationInfo"
870  RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
871  RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
872  Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
873  Array<LO> colTranslation = *(amalInfo->getColTranslation());
874 
875  // get number of local nodes
876  LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
877 
878  // Allocate space for the local graph
879  typename LWGraph::row_type::non_const_type rows("rows", numRows + 1);
880  typename LWGraph::entries_type::non_const_type columns("columns", A->getLocalNumEntries());
881 
882  typename LWGraph::boundary_nodes_type amalgBoundaryNodes("amalgBoundaryNodes", numRows);
883  Kokkos::deep_copy(amalgBoundaryNodes, false);
884 
885  // Detect and record rows that correspond to Dirichlet boundary conditions
886  // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
887  // TODO the array one bigger than the number of local rows, and the last entry can
888  // TODO hold the actual number of boundary nodes. Clever, huh?
889  auto pointBoundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
890  if (rowSumTol > 0.)
891  Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, pointBoundaryNodes);
892 
893  // extract striding information
894  LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
895  LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map)
896  LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map
897  if (A->IsView("stridedMaps") == true) {
898  Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
899  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
900  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
901  blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
902  blkId = strMap->getStridedBlockId();
903  if (blkId > -1)
904  blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
905  }
906 
907  // extract diagonal data for dropping strategy
909  const ArrayRCP<const SC> ghostedDiagVals = ghostedDiag->getData(0);
910 
911  // loop over all local nodes
912  LO realnnz = 0;
913  rows[0] = 0;
914  Array<LO> indicesExtra;
915  for (LO row = 0; row < numRows; row++) {
916  ArrayView<const LO> indices;
917  indicesExtra.resize(0);
918 
919  // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
920  // Note, that pointBoundaryNodes lives on the dofmap (and not the node map).
921  // Therefore, looping over all dofs is fine here. We use blkPartSize as we work
922  // with local ids.
923  // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet)
924  // node.
925  bool isBoundary = false;
926  if (pL.get<bool>("aggregation: greedy Dirichlet") == true) {
927  for (LO j = 0; j < blkPartSize; j++) {
928  if (pointBoundaryNodes[row * blkPartSize + j]) {
929  isBoundary = true;
930  break;
931  }
932  }
933  } else {
934  isBoundary = true;
935  for (LO j = 0; j < blkPartSize; j++) {
936  if (!pointBoundaryNodes[row * blkPartSize + j]) {
937  isBoundary = false;
938  break;
939  }
940  }
941  }
942 
943  // Merge rows of A
944  // The array indicesExtra contains local column node ids for the current local node "row"
945  if (!isBoundary)
946  MergeRowsWithDropping(*A, row, ghostedDiagVals, threshold, indicesExtra, colTranslation);
947  else
948  indicesExtra.push_back(row);
949  indices = indicesExtra;
950  numTotal += indices.size();
951 
952  // add the local column node ids to the full columns array which
953  // contains the local column node ids for all local node rows
954  LO nnz = indices.size(), rownnz = 0;
955  for (LO colID = 0; colID < nnz; colID++) {
956  LO col = indices[colID];
957  columns[realnnz++] = col;
958  rownnz++;
959  }
960 
961  if (rownnz == 1) {
962  // If the only element remaining after filtering is diagonal, mark node as boundary
963  // FIXME: this should really be replaced by the following
964  // if (indices.size() == 1 && indices[0] == row)
965  // boundaryNodes[row] = true;
966  // We do not do it this way now because there is no framework for distinguishing isolated
967  // and boundary nodes in the aggregation algorithms
968  amalgBoundaryNodes[row] = true;
969  }
970  rows[row + 1] = realnnz;
971  } // for (LO row = 0; row < numRows; row++)
972  // columns.resize(realnnz);
973 
974  RCP<LWGraph> graph = rcp(new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), uniqueMap, nonUniqueMap, "amalgamated graph of A"));
975  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
976 
977  if (GetVerbLevel() & Statistics1) {
978  GO numLocalBoundaryNodes = 0;
979  GO numGlobalBoundaryNodes = 0;
980 
981  for (size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
982  if (amalgBoundaryNodes(i))
983  numLocalBoundaryNodes++;
984 
985  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
986  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
987  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes
988  << " agglomerated Dirichlet nodes" << std::endl;
989  }
990 
991  Set(currentLevel, "Graph", graph);
992  Set(currentLevel, "DofsPerNode", blkSize); // full block size
993  }
994 
995  } else if (algo == "distance laplacian") {
996  LO blkSize = A->GetFixedBlockSize();
997  GO indexBase = A->getRowMap()->getIndexBase();
998  // [*0*] : FIXME
999  // ap: somehow, if I move this line to [*1*], Belos throws an error
1000  // I'm not sure what's going on. Do we always have to Get data, if we did
1001  // DeclareInput for it?
1002  // RCP<RealValuedMultiVector> Coords = Get< RCP<RealValuedMultiVector > >(currentLevel, "Coordinates");
1003 
1004  // Detect and record rows that correspond to Dirichlet boundary conditions
1005  // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
1006  // TODO the array one bigger than the number of local rows, and the last entry can
1007  // TODO hold the actual number of boundary nodes. Clever, huh?
1008  auto pointBoundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
1009  if (rowSumTol > 0.)
1010  Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, pointBoundaryNodes);
1011 
1012  if ((blkSize == 1) && (threshold == STS::zero())) {
1013  // Trivial case: scalar problem, no dropping. Can return original graph
1014  RCP<LWGraph> graph = rcp(new LWGraph(A->getCrsGraph(), "graph of A"));
1015  graph->SetBoundaryNodeMap(pointBoundaryNodes);
1016  graphType = "unamalgamated";
1017  numTotal = A->getLocalNumEntries();
1018 
1019  if (GetVerbLevel() & Statistics1) {
1020  GO numLocalBoundaryNodes = 0;
1021  GO numGlobalBoundaryNodes = 0;
1022  for (size_t i = 0; i < pointBoundaryNodes.size(); ++i)
1023  if (pointBoundaryNodes(i))
1024  numLocalBoundaryNodes++;
1025  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1026  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1027  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1028  }
1029 
1030  Set(currentLevel, "DofsPerNode", blkSize);
1031  Set(currentLevel, "Graph", graph);
1032 
1033  } else {
1034  // ap: We make quite a few assumptions here; general case may be a lot different,
1035  // but much much harder to implement. We assume that:
1036  // 1) all maps are standard maps, not strided maps
1037  // 2) global indices of dofs in A are related to dofs in coordinates in a simple arithmetic
1038  // way: rows i*blkSize, i*blkSize+1, ..., i*blkSize + (blkSize-1) correspond to node i
1039  //
1040  // NOTE: Potentially, some of the code below could be simplified with UnAmalgamationInfo,
1041  // but as I totally don't understand that code, here is my solution
1042 
1043  // [*1*]: see [*0*]
1044 
1045  // Check that the number of local coordinates is consistent with the #rows in A
1046  TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getLocalNumElements() / blkSize != Coords->getLocalLength(), Exceptions::Incompatible,
1047  "Coordinate vector length (" << Coords->getLocalLength() << ") is incompatible with number of rows in A (" << A->getRowMap()->getLocalNumElements() << ") by modulo block size (" << blkSize << ").");
1048 
1049  const RCP<const Map> colMap = A->getColMap();
1050  RCP<const Map> uniqueMap, nonUniqueMap;
1051  Array<LO> colTranslation;
1052  if (blkSize == 1) {
1053  uniqueMap = A->getRowMap();
1054  nonUniqueMap = A->getColMap();
1055  graphType = "unamalgamated";
1056 
1057  } else {
1058  uniqueMap = Coords->getMap();
1059  TEUCHOS_TEST_FOR_EXCEPTION(uniqueMap->getIndexBase() != indexBase, Exceptions::Incompatible,
1060  "Different index bases for matrix and coordinates");
1061 
1062  AmalgamationFactory::AmalgamateMap(*(A->getColMap()), *A, nonUniqueMap, colTranslation);
1063 
1064  graphType = "amalgamated";
1065  }
1066  LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getLocalNumElements());
1067 
1068  RCP<RealValuedMultiVector> ghostedCoords;
1069  RCP<Vector> ghostedLaplDiag;
1070  Teuchos::ArrayRCP<SC> ghostedLaplDiagData;
1071  if (threshold != STS::zero()) {
1072  // Get ghost coordinates
1073  RCP<const Import> importer;
1074  {
1075  SubFactoryMonitor m1(*this, "Import construction", currentLevel);
1076  if (blkSize == 1 && realA->getCrsGraph()->getImporter() != Teuchos::null) {
1077  GetOStream(Warnings1) << "Using existing importer from matrix graph" << std::endl;
1078  importer = realA->getCrsGraph()->getImporter();
1079  } else {
1080  GetOStream(Warnings0) << "Constructing new importer instance" << std::endl;
1081  importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
1082  }
1083  } // subtimer
1084  ghostedCoords = Xpetra::MultiVectorFactory<real_type, LO, GO, NO>::Build(nonUniqueMap, Coords->getNumVectors());
1085  {
1086  SubFactoryMonitor m1(*this, "Coordinate import", currentLevel);
1087  ghostedCoords->doImport(*Coords, *importer, Xpetra::INSERT);
1088  } // subtimer
1089 
1090  // Construct Distance Laplacian diagonal
1091  RCP<Vector> localLaplDiag = VectorFactory::Build(uniqueMap);
1092  Array<LO> indicesExtra;
1094  if (threshold != STS::zero()) {
1095  const size_t numVectors = ghostedCoords->getNumVectors();
1096  coordData.reserve(numVectors);
1097  for (size_t j = 0; j < numVectors; j++) {
1098  Teuchos::ArrayRCP<const real_type> tmpData = ghostedCoords->getData(j);
1099  coordData.push_back(tmpData);
1100  }
1101  }
1102  {
1103  SubFactoryMonitor m1(*this, "Laplacian local diagonal", currentLevel);
1104  ArrayRCP<SC> localLaplDiagData = localLaplDiag->getDataNonConst(0);
1105  for (LO row = 0; row < numRows; row++) {
1106  ArrayView<const LO> indices;
1107 
1108  if (blkSize == 1) {
1109  ArrayView<const SC> vals;
1110  A->getLocalRowView(row, indices, vals);
1111 
1112  } else {
1113  // Merge rows of A
1114  indicesExtra.resize(0);
1115  MergeRows(*A, row, indicesExtra, colTranslation);
1116  indices = indicesExtra;
1117  }
1118 
1119  LO nnz = indices.size();
1120  bool haveAddedToDiag = false;
1121  for (LO colID = 0; colID < nnz; colID++) {
1122  const LO col = indices[colID];
1123 
1124  if (row != col) {
1125  if (use_dlap_weights == SINGLE_WEIGHTS) {
1126  /*printf("[%d,%d] Unweighted Distance = %6.4e Weighted Distance = %6.4e\n",row,col,
1127  MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col),
1128  MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(),coordData, row, col));*/
1129  localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(), coordData, row, col);
1130  } else if (use_dlap_weights == BLOCK_WEIGHTS) {
1131  int block_id = row % interleaved_blocksize;
1132  int block_start = block_id * interleaved_blocksize;
1133  localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(block_start, interleaved_blocksize), coordData, row, col);
1134  } else {
1135  // printf("[%d,%d] Unweighted Distance = %6.4e\n",row,col,MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col));
1136  localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(coordData, row, col);
1137  }
1138  haveAddedToDiag = true;
1139  }
1140  }
1141  // Deal with the situation where boundary conditions have only been enforced on rows, but not on columns.
1142  // We enforce dropping of these entries by assigning a very large number to the diagonal entries corresponding to BCs.
1143  if (!haveAddedToDiag)
1144  localLaplDiagData[row] = STS::rmax();
1145  }
1146  } // subtimer
1147  {
1148  SubFactoryMonitor m1(*this, "Laplacian distributed diagonal", currentLevel);
1149  ghostedLaplDiag = VectorFactory::Build(nonUniqueMap);
1150  ghostedLaplDiag->doImport(*localLaplDiag, *importer, Xpetra::INSERT);
1151  ghostedLaplDiagData = ghostedLaplDiag->getDataNonConst(0);
1152  } // subtimer
1153 
1154  } else {
1155  GetOStream(Runtime0) << "Skipping distance laplacian construction due to 0 threshold" << std::endl;
1156  }
1157 
1158  // NOTE: ghostedLaplDiagData might be zero if we don't actually calculate the laplacian
1159 
1160  // allocate space for the local graph
1161  typename LWGraph::row_type::non_const_type rows("rows", numRows + 1);
1162  typename LWGraph::entries_type::non_const_type columns("columns", A->getLocalNumEntries());
1163 
1164 #ifdef HAVE_MUELU_DEBUG
1165  // DEBUGGING
1166  for (LO i = 0; i < (LO)columns.size(); i++) columns[i] = -666;
1167 #endif
1168 
1169  // Extra array for if we're allowing symmetrization with cutting
1170  ArrayRCP<LO> rows_stop;
1171  bool use_stop_array = threshold != STS::zero() && distanceLaplacianAlgo == scaled_cut_symmetric;
1172  if (use_stop_array)
1173  // rows_stop = typename LWGraph::row_type::non_const_type("rows_stop", numRows);
1174  rows_stop.resize(numRows);
1175 
1176  typename LWGraph::boundary_nodes_type amalgBoundaryNodes("amalgBoundaryNodes", numRows);
1177  Kokkos::deep_copy(amalgBoundaryNodes, false);
1178 
1179  LO realnnz = 0;
1180  rows(0) = 0;
1181 
1182  Array<LO> indicesExtra;
1183  {
1184  SubFactoryMonitor m1(*this, "Laplacian dropping", currentLevel);
1186  if (threshold != STS::zero()) {
1187  const size_t numVectors = ghostedCoords->getNumVectors();
1188  coordData.reserve(numVectors);
1189  for (size_t j = 0; j < numVectors; j++) {
1190  Teuchos::ArrayRCP<const real_type> tmpData = ghostedCoords->getData(j);
1191  coordData.push_back(tmpData);
1192  }
1193  }
1194 
1195  ArrayView<const SC> vals; // CMS hackery
1196  for (LO row = 0; row < numRows; row++) {
1197  ArrayView<const LO> indices;
1198  indicesExtra.resize(0);
1199  bool isBoundary = false;
1200 
1201  if (blkSize == 1) {
1202  // ArrayView<const SC> vals;//CMS uncomment
1203  A->getLocalRowView(row, indices, vals);
1204  isBoundary = pointBoundaryNodes[row];
1205  } else {
1206  // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
1207  for (LO j = 0; j < blkSize; j++) {
1208  if (!pointBoundaryNodes[row * blkSize + j]) {
1209  isBoundary = false;
1210  break;
1211  }
1212  }
1213 
1214  // Merge rows of A
1215  if (!isBoundary)
1216  MergeRows(*A, row, indicesExtra, colTranslation);
1217  else
1218  indicesExtra.push_back(row);
1219  indices = indicesExtra;
1220  }
1221  numTotal += indices.size();
1222 
1223  LO nnz = indices.size(), rownnz = 0;
1224 
1225  if (use_stop_array) {
1226  rows(row + 1) = rows(row) + nnz;
1227  realnnz = rows(row);
1228  }
1229 
1230  if (threshold != STS::zero()) {
1231  // default
1232  if (distanceLaplacianAlgo == defaultAlgo) {
1233  /* Standard Distance Laplacian */
1234  for (LO colID = 0; colID < nnz; colID++) {
1235  LO col = indices[colID];
1236 
1237  if (row == col) {
1238  columns(realnnz++) = col;
1239  rownnz++;
1240  continue;
1241  }
1242 
1243  // We do not want the distance Laplacian aggregating boundary nodes
1244  if (isBoundary) continue;
1245 
1246  SC laplVal;
1247  if (use_dlap_weights == SINGLE_WEIGHTS) {
1248  laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(), coordData, row, col);
1249  } else if (use_dlap_weights == BLOCK_WEIGHTS) {
1250  int block_id = row % interleaved_blocksize;
1251  int block_start = block_id * interleaved_blocksize;
1252  laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(block_start, interleaved_blocksize), coordData, row, col);
1253  } else {
1254  laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(coordData, row, col);
1255  }
1256  real_type aiiajj = STS::magnitude(realThreshold * realThreshold * ghostedLaplDiagData[row] * ghostedLaplDiagData[col]);
1257  real_type aij = STS::magnitude(laplVal * laplVal);
1258 
1259  if (aij > aiiajj) {
1260  columns(realnnz++) = col;
1261  rownnz++;
1262  } else {
1263  numDropped++;
1264  }
1265  }
1266  } else {
1267  /* Cut Algorithm */
1268  using DropTol = Details::DropTol<real_type, LO>;
1269  std::vector<DropTol> drop_vec;
1270  drop_vec.reserve(nnz);
1271  const real_type zero = Teuchos::ScalarTraits<real_type>::zero();
1272  const real_type one = Teuchos::ScalarTraits<real_type>::one();
1273 
1274  // find magnitudes
1275  for (LO colID = 0; colID < nnz; colID++) {
1276  LO col = indices[colID];
1277 
1278  if (row == col) {
1279  drop_vec.emplace_back(zero, one, colID, false);
1280  continue;
1281  }
1282  // We do not want the distance Laplacian aggregating boundary nodes
1283  if (isBoundary) continue;
1284 
1285  SC laplVal;
1286  if (use_dlap_weights == SINGLE_WEIGHTS) {
1287  laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(), coordData, row, col);
1288  } else if (use_dlap_weights == BLOCK_WEIGHTS) {
1289  int block_id = row % interleaved_blocksize;
1290  int block_start = block_id * interleaved_blocksize;
1291  laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(dlap_weights(block_start, interleaved_blocksize), coordData, row, col);
1292  } else {
1293  laplVal = STS::one() / MueLu::Utilities<real_type, LO, GO, NO>::Distance2(coordData, row, col);
1294  }
1295 
1296  real_type aiiajj = STS::magnitude(ghostedLaplDiagData[row] * ghostedLaplDiagData[col]);
1297  real_type aij = STS::magnitude(laplVal * laplVal);
1298 
1299  drop_vec.emplace_back(aij, aiiajj, colID, false);
1300  }
1301 
1302  const size_t n = drop_vec.size();
1303 
1304  if (distanceLaplacianAlgo == unscaled_cut) {
1305  std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) {
1306  return a.val > b.val;
1307  });
1308 
1309  bool drop = false;
1310  for (size_t i = 1; i < n; ++i) {
1311  if (!drop) {
1312  auto const& x = drop_vec[i - 1];
1313  auto const& y = drop_vec[i];
1314  auto a = x.val;
1315  auto b = y.val;
1316  if (a > realThreshold * b) {
1317  drop = true;
1318 #ifdef HAVE_MUELU_DEBUG
1319  if (distanceLaplacianCutVerbose) {
1320  std::cout << "DJS: KEEP, N, ROW: " << i + 1 << ", " << n << ", " << row << std::endl;
1321  }
1322 #endif
1323  }
1324  }
1325  drop_vec[i].drop = drop;
1326  }
1327  } else if (distanceLaplacianAlgo == scaled_cut || distanceLaplacianAlgo == scaled_cut_symmetric) {
1328  std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) {
1329  return a.val / a.diag > b.val / b.diag;
1330  });
1331 
1332  bool drop = false;
1333  for (size_t i = 1; i < n; ++i) {
1334  if (!drop) {
1335  auto const& x = drop_vec[i - 1];
1336  auto const& y = drop_vec[i];
1337  auto a = x.val / x.diag;
1338  auto b = y.val / y.diag;
1339  if (a > realThreshold * b) {
1340  drop = true;
1341 #ifdef HAVE_MUELU_DEBUG
1342  if (distanceLaplacianCutVerbose) {
1343  std::cout << "DJS: KEEP, N, ROW: " << i + 1 << ", " << n << ", " << row << std::endl;
1344  }
1345 #endif
1346  }
1347  }
1348  drop_vec[i].drop = drop;
1349  }
1350  }
1351 
1352  std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) {
1353  return a.col < b.col;
1354  });
1355 
1356  for (LO idxID = 0; idxID < (LO)drop_vec.size(); idxID++) {
1357  LO col = indices[drop_vec[idxID].col];
1358 
1359  // don't drop diagonal
1360  if (row == col) {
1361  columns(realnnz++) = col;
1362  rownnz++;
1363  // printf("(%d,%d) KEEP %13s matrix = %6.4e\n",row,row,"DIAGONAL",drop_vec[idxID].aux_val);
1364  continue;
1365  }
1366 
1367  if (!drop_vec[idxID].drop) {
1368  columns(realnnz++) = col;
1369  // printf("(%d,%d) KEEP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val);
1370  rownnz++;
1371  } else {
1372  // printf("(%d,%d) DROP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val);
1373  numDropped++;
1374  }
1375  }
1376  }
1377  } else {
1378  // Skip laplace calculation and threshold comparison for zero threshold
1379  for (LO colID = 0; colID < nnz; colID++) {
1380  LO col = indices[colID];
1381  columns(realnnz++) = col;
1382  rownnz++;
1383  }
1384  }
1385 
1386  if (rownnz == 1) {
1387  // If the only element remaining after filtering is diagonal, mark node as boundary
1388  // FIXME: this should really be replaced by the following
1389  // if (indices.size() == 1 && indices[0] == row)
1390  // boundaryNodes[row] = true;
1391  // We do not do it this way now because there is no framework for distinguishing isolated
1392  // and boundary nodes in the aggregation algorithms
1393  amalgBoundaryNodes[row] = true;
1394  }
1395 
1396  if (use_stop_array)
1397  rows_stop[row] = rownnz + rows[row];
1398  else
1399  rows[row + 1] = realnnz;
1400  } // for (LO row = 0; row < numRows; row++)
1401 
1402  } // subtimer
1403 
1404  if (use_stop_array) {
1405  // Do symmetrization of the cut matrix
1406  // NOTE: We assume nested row/column maps here
1407  for (LO row = 0; row < numRows; row++) {
1408  for (LO colidx = rows[row]; colidx < rows_stop[row]; colidx++) {
1409  LO col = columns[colidx];
1410  if (col >= numRows) continue;
1411 
1412  bool found = false;
1413  for (LO t_col = rows(col); !found && t_col < rows_stop[col]; t_col++) {
1414  if (columns[t_col] == row)
1415  found = true;
1416  }
1417  // We didn't find the transpose buddy, so let's symmetrize, unless we'd be symmetrizing
1418  // into a Dirichlet unknown. In that case don't.
1419  if (!found && !pointBoundaryNodes[col] && Teuchos::as<typename LWGraph::row_type::value_type>(rows_stop[col]) < rows[col + 1]) {
1420  LO new_idx = rows_stop[col];
1421  // printf("(%d,%d) SYMADD entry\n",col,row);
1422  columns[new_idx] = row;
1423  rows_stop[col]++;
1424  numDropped--;
1425  }
1426  }
1427  }
1428 
1429  // Condense everything down
1430  LO current_start = 0;
1431  for (LO row = 0; row < numRows; row++) {
1432  LO old_start = current_start;
1433  for (LO col = rows(row); col < rows_stop[row]; col++) {
1434  if (current_start != col) {
1435  columns(current_start) = columns(col);
1436  }
1437  current_start++;
1438  }
1439  rows[row] = old_start;
1440  }
1441  rows(numRows) = realnnz = current_start;
1442  }
1443 
1444  RCP<LWGraph> graph;
1445  {
1446  SubFactoryMonitor m1(*this, "Build amalgamated graph", currentLevel);
1447  graph = rcp(new LWGraph(rows, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), uniqueMap, nonUniqueMap, "amalgamated graph of A"));
1448  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1449  } // subtimer
1450 
1451  if (GetVerbLevel() & Statistics1) {
1452  GO numLocalBoundaryNodes = 0;
1453  GO numGlobalBoundaryNodes = 0;
1454 
1455  for (size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
1456  if (amalgBoundaryNodes(i))
1457  numLocalBoundaryNodes++;
1458 
1459  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1460  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1461  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " agglomerated Dirichlet nodes"
1462  << " using threshold " << dirichletThreshold << std::endl;
1463  }
1464 
1465  Set(currentLevel, "Graph", graph);
1466  Set(currentLevel, "DofsPerNode", blkSize);
1467  }
1468  }
1469 
1470  if ((GetVerbLevel() & Statistics1) && !(A->GetFixedBlockSize() > 1 && threshold != STS::zero())) {
1471  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1472  GO numGlobalTotal, numGlobalDropped;
1473  MueLu_sumAll(comm, numTotal, numGlobalTotal);
1474  MueLu_sumAll(comm, numDropped, numGlobalDropped);
1475  GetOStream(Statistics1) << "Number of dropped entries in " << graphType << " matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1476  if (numGlobalTotal != 0)
1477  GetOStream(Statistics1) << " (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) << "%)";
1478  GetOStream(Statistics1) << std::endl;
1479  }
1480 
1481  } else {
1482  // what Tobias has implemented
1483 
1484  SC threshold = as<SC>(pL.get<double>("aggregation: drop tol"));
1485  // GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
1486  GetOStream(Runtime0) << "algorithm = \""
1487  << "failsafe"
1488  << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
1489  Set<bool>(currentLevel, "Filtering", (threshold != STS::zero()));
1490 
1491  RCP<const Map> rowMap = A->getRowMap();
1492  RCP<const Map> colMap = A->getColMap();
1493 
1494  LO blockdim = 1; // block dim for fixed size blocks
1495  GO indexBase = rowMap->getIndexBase(); // index base of maps
1496  GO offset = 0;
1497 
1498  // 1) check for blocking/striding information
1499  if (A->IsView("stridedMaps") &&
1500  Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap("stridedMaps")) != Teuchos::null) {
1501  Xpetra::viewLabel_t oldView = A->SwitchToView("stridedMaps"); // note: "stridedMaps are always non-overlapping (correspond to range and domain maps!)
1502  RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap());
1503  TEUCHOS_TEST_FOR_EXCEPTION(strMap == Teuchos::null, Exceptions::BadCast, "MueLu::CoalesceFactory::Build: cast to strided row map failed.");
1504  blockdim = strMap->getFixedBlockSize();
1505  offset = strMap->getOffset();
1506  oldView = A->SwitchToView(oldView);
1507  GetOStream(Statistics1) << "CoalesceDropFactory::Build():"
1508  << " found blockdim=" << blockdim << " from strided maps. offset=" << offset << std::endl;
1509  } else
1510  GetOStream(Statistics1) << "CoalesceDropFactory::Build(): no striding information available. Use blockdim=1 with offset=0" << std::endl;
1511 
1512  // 2) get row map for amalgamated matrix (graph of A)
1513  // with same distribution over all procs as row map of A
1514  RCP<const Map> nodeMap = amalInfo->getNodeRowMap();
1515  GetOStream(Statistics1) << "CoalesceDropFactory: nodeMap " << nodeMap->getLocalNumElements() << "/" << nodeMap->getGlobalNumElements() << " elements" << std::endl;
1516 
1517  // 3) create graph of amalgamated matrix
1518  RCP<CrsGraph> crsGraph = CrsGraphFactory::Build(nodeMap, A->getLocalMaxNumRowEntries() * blockdim);
1519 
1520  LO numRows = A->getRowMap()->getLocalNumElements();
1521  LO numNodes = nodeMap->getLocalNumElements();
1522  typename LWGraph::boundary_nodes_type amalgBoundaryNodes("amalgBoundaryNodes", numNodes);
1523  Kokkos::deep_copy(amalgBoundaryNodes, false);
1524  const ArrayRCP<int> numberDirichletRowsPerNode(numNodes, 0); // helper array counting the number of Dirichlet nodes associated with node
1525  bool bIsDiagonalEntry = false; // boolean flag stating that grid==gcid
1526 
1527  // 4) do amalgamation. generate graph of amalgamated matrix
1528  // Note, this code is much more inefficient than the leightwight implementation
1529  // Most of the work has already been done in the AmalgamationFactory
1530  for (LO row = 0; row < numRows; row++) {
1531  // get global DOF id
1532  GO grid = rowMap->getGlobalElement(row);
1533 
1534  // reinitialize boolean helper variable
1535  bIsDiagonalEntry = false;
1536 
1537  // translate grid to nodeid
1538  GO nodeId = AmalgamationFactory::DOFGid2NodeId(grid, blockdim, offset, indexBase);
1539 
1540  size_t nnz = A->getNumEntriesInLocalRow(row);
1543  A->getLocalRowView(row, indices, vals);
1544 
1545  RCP<std::vector<GO>> cnodeIds = Teuchos::rcp(new std::vector<GO>); // global column block ids
1546  LO realnnz = 0;
1547  for (LO col = 0; col < Teuchos::as<LO>(nnz); col++) {
1548  GO gcid = colMap->getGlobalElement(indices[col]); // global column id
1549 
1550  if (vals[col] != STS::zero()) {
1551  GO cnodeId = AmalgamationFactory::DOFGid2NodeId(gcid, blockdim, offset, indexBase);
1552  cnodeIds->push_back(cnodeId);
1553  realnnz++; // increment number of nnz in matrix row
1554  if (grid == gcid) bIsDiagonalEntry = true;
1555  }
1556  }
1557 
1558  if (realnnz == 1 && bIsDiagonalEntry == true) {
1559  LO lNodeId = nodeMap->getLocalElement(nodeId);
1560  numberDirichletRowsPerNode[lNodeId] += 1; // increment Dirichlet row counter associated with lNodeId
1561  if (numberDirichletRowsPerNode[lNodeId] == blockdim) // mark full Dirichlet nodes
1562  amalgBoundaryNodes[lNodeId] = true;
1563  }
1564 
1565  Teuchos::ArrayRCP<GO> arr_cnodeIds = Teuchos::arcp(cnodeIds);
1566 
1567  if (arr_cnodeIds.size() > 0)
1568  crsGraph->insertGlobalIndices(nodeId, arr_cnodeIds());
1569  }
1570  // fill matrix graph
1571  crsGraph->fillComplete(nodeMap, nodeMap);
1572 
1573  // 5) create MueLu Graph object
1574  RCP<LWGraph> graph = rcp(new LWGraph(crsGraph, "amalgamated graph of A"));
1575 
1576  // Detect and record rows that correspond to Dirichlet boundary conditions
1577  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1578 
1579  if (GetVerbLevel() & Statistics1) {
1580  GO numLocalBoundaryNodes = 0;
1581  GO numGlobalBoundaryNodes = 0;
1582  for (size_t i = 0; i < amalgBoundaryNodes.size(); ++i)
1583  if (amalgBoundaryNodes(i))
1584  numLocalBoundaryNodes++;
1585  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1586  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1587  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1588  }
1589 
1590  // 6) store results in Level
1591  // graph->SetBoundaryNodeMap(gBoundaryNodeMap);
1592  Set(currentLevel, "DofsPerNode", blockdim);
1593  Set(currentLevel, "Graph", graph);
1594 
1595  } // if (doExperimentalWrap) ... else ...
1596 
1597 } // Build
1598 
1599 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1600 void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MergeRows(const Matrix& A, const LO row, Array<LO>& cols, const Array<LO>& translation) const {
1601  typedef typename ArrayView<const LO>::size_type size_type;
1602 
1603  // extract striding information
1604  LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map
1605  if (A.IsView("stridedMaps") == true) {
1606  Teuchos::RCP<const Map> myMap = A.getRowMap("stridedMaps");
1607  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
1608  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
1609  if (strMap->getStridedBlockId() > -1)
1610  blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1611  }
1612 
1613  // count nonzero entries in all dof rows associated with node row
1614  size_t nnz = 0, pos = 0;
1615  for (LO j = 0; j < blkSize; j++)
1616  nnz += A.getNumEntriesInLocalRow(row * blkSize + j);
1617 
1618  if (nnz == 0) {
1619  cols.resize(0);
1620  return;
1621  }
1622 
1623  cols.resize(nnz);
1624 
1625  // loop over all local dof rows associated with local node "row"
1626  ArrayView<const LO> inds;
1627  ArrayView<const SC> vals;
1628  for (LO j = 0; j < blkSize; j++) {
1629  A.getLocalRowView(row * blkSize + j, inds, vals);
1630  size_type numIndices = inds.size();
1631 
1632  if (numIndices == 0) // skip empty dof rows
1633  continue;
1634 
1635  // cols: stores all local node ids for current local node id "row"
1636  cols[pos++] = translation[inds[0]];
1637  for (size_type k = 1; k < numIndices; k++) {
1638  LO nodeID = translation[inds[k]];
1639  // Here we try to speed up the process by reducing the size of an array
1640  // to sort. This works if the column nonzeros belonging to the same
1641  // node are stored consequently.
1642  if (nodeID != cols[pos - 1])
1643  cols[pos++] = nodeID;
1644  }
1645  }
1646  cols.resize(pos);
1647  nnz = pos;
1648 
1649  // Sort and remove duplicates
1650  std::sort(cols.begin(), cols.end());
1651  pos = 0;
1652  for (size_t j = 1; j < nnz; j++)
1653  if (cols[j] != cols[pos])
1654  cols[++pos] = cols[j];
1655  cols.resize(pos + 1);
1656 }
1657 
1658 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1659 void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MergeRowsWithDropping(const Matrix& A, const LO row, const ArrayRCP<const SC>& ghostedDiagVals, SC threshold, Array<LO>& cols, const Array<LO>& translation) const {
1660  typedef typename ArrayView<const LO>::size_type size_type;
1661  typedef Teuchos::ScalarTraits<SC> STS;
1662 
1663  // extract striding information
1664  LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map
1665  if (A.IsView("stridedMaps") == true) {
1666  Teuchos::RCP<const Map> myMap = A.getRowMap("stridedMaps");
1667  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
1668  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
1669  if (strMap->getStridedBlockId() > -1)
1670  blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1671  }
1672 
1673  // count nonzero entries in all dof rows associated with node row
1674  size_t nnz = 0, pos = 0;
1675  for (LO j = 0; j < blkSize; j++)
1676  nnz += A.getNumEntriesInLocalRow(row * blkSize + j);
1677 
1678  if (nnz == 0) {
1679  cols.resize(0);
1680  return;
1681  }
1682 
1683  cols.resize(nnz);
1684 
1685  // loop over all local dof rows associated with local node "row"
1686  ArrayView<const LO> inds;
1687  ArrayView<const SC> vals;
1688  for (LO j = 0; j < blkSize; j++) {
1689  A.getLocalRowView(row * blkSize + j, inds, vals);
1690  size_type numIndices = inds.size();
1691 
1692  if (numIndices == 0) // skip empty dof rows
1693  continue;
1694 
1695  // cols: stores all local node ids for current local node id "row"
1696  LO prevNodeID = -1;
1697  for (size_type k = 0; k < numIndices; k++) {
1698  LO dofID = inds[k];
1699  LO nodeID = translation[inds[k]];
1700 
1701  // we avoid a square root by using squared values
1702  typename STS::magnitudeType aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[dofID] * ghostedDiagVals[row * blkSize + j]); // eps^2 * |a_ii| * |a_jj|
1703  typename STS::magnitudeType aij = STS::magnitude(vals[k] * vals[k]);
1704 
1705  // check dropping criterion
1706  if (aij > aiiajj || (row * blkSize + j == dofID)) {
1707  // accept entry in graph
1708 
1709  // Here we try to speed up the process by reducing the size of an array
1710  // to sort. This works if the column nonzeros belonging to the same
1711  // node are stored consequently.
1712  if (nodeID != prevNodeID) {
1713  cols[pos++] = nodeID;
1714  prevNodeID = nodeID;
1715  }
1716  }
1717  }
1718  }
1719  cols.resize(pos);
1720  nnz = pos;
1721 
1722  // Sort and remove duplicates
1723  std::sort(cols.begin(), cols.end());
1724  pos = 0;
1725  for (size_t j = 1; j < nnz; j++)
1726  if (cols[j] != cols[pos])
1727  cols[++pos] = cols[j];
1728  cols.resize(pos + 1);
1729 
1730  return;
1731 }
1732 
1733 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1735  typedef Teuchos::ScalarTraits<SC> STS;
1736 
1737  const ParameterList& pL = GetParameterList();
1738  const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
1739  const typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
1740 
1741  RCP<LocalOrdinalVector> BlockNumber = Get<RCP<LocalOrdinalVector>>(currentLevel, "BlockNumber");
1742  RCP<LocalOrdinalVector> ghostedBlockNumber;
1743  GetOStream(Statistics1) << "Using BlockDiagonal Graph before dropping (with provided blocking)" << std::endl;
1744 
1745  // Ghost the column block numbers if we need to
1746  RCP<const Import> importer = A->getCrsGraph()->getImporter();
1747  if (!importer.is_null()) {
1748  SubFactoryMonitor m1(*this, "Block Number import", currentLevel);
1749  ghostedBlockNumber = Xpetra::VectorFactory<LO, LO, GO, NO>::Build(importer->getTargetMap());
1750  ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT);
1751  } else {
1752  ghostedBlockNumber = BlockNumber;
1753  }
1754 
1755  // Accessors for block numbers
1756  Teuchos::ArrayRCP<const LO> row_block_number = BlockNumber->getData(0);
1757  Teuchos::ArrayRCP<const LO> col_block_number = ghostedBlockNumber->getData(0);
1758 
1759  // allocate space for the local graph
1760  typename CrsMatrix::local_matrix_type::row_map_type::HostMirror::non_const_type rows_mat;
1761  typename LWGraph::row_type::non_const_type rows_graph;
1762  typename LWGraph::entries_type::non_const_type columns;
1763  typename CrsMatrix::local_matrix_type::values_type::HostMirror::non_const_type values;
1764  RCP<CrsMatrixWrap> crs_matrix_wrap;
1765 
1766  if (generate_matrix) {
1767  crs_matrix_wrap = rcp(new CrsMatrixWrap(A->getRowMap(), A->getColMap(), 0));
1768  rows_mat = typename CrsMatrix::local_matrix_type::row_map_type::HostMirror::non_const_type("rows_mat", A->getLocalNumRows() + 1);
1769  } else {
1770  rows_graph = typename LWGraph::row_type::non_const_type("rows_graph", A->getLocalNumRows() + 1);
1771  }
1772  columns = typename LWGraph::entries_type::non_const_type("columns", A->getLocalNumEntries());
1773  values = typename CrsMatrix::local_matrix_type::values_type::HostMirror::non_const_type("values", A->getLocalNumEntries());
1774 
1775  LO realnnz = 0;
1776  GO numDropped = 0, numTotal = 0;
1777  for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
1778  LO row_block = row_block_number[row];
1779  size_t nnz = A->getNumEntriesInLocalRow(row);
1780  ArrayView<const LO> indices;
1781  ArrayView<const SC> vals;
1782  A->getLocalRowView(row, indices, vals);
1783 
1784  LO rownnz = 0;
1785  for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
1786  LO col = indices[colID];
1787  LO col_block = col_block_number[col];
1788 
1789  if (row_block == col_block) {
1790  if (generate_matrix) values[realnnz] = vals[colID];
1791  columns[realnnz++] = col;
1792  rownnz++;
1793  } else
1794  numDropped++;
1795  }
1796  if (generate_matrix)
1797  rows_mat[row + 1] = realnnz;
1798  else
1799  rows_graph[row + 1] = realnnz;
1800  }
1801 
1802  auto boundaryNodes = MueLu::Utilities<SC, LO, GO, NO>::DetectDirichletRows_kokkos_host(*A, dirichletThreshold);
1803  if (rowSumTol > 0.)
1804  Utilities::ApplyRowSumCriterionHost(*A, rowSumTol, boundaryNodes);
1805 
1806  numTotal = A->getLocalNumEntries();
1807 
1808  if (GetVerbLevel() & Statistics1) {
1809  GO numLocalBoundaryNodes = 0;
1810  GO numGlobalBoundaryNodes = 0;
1811  for (size_t i = 0; i < boundaryNodes.size(); ++i)
1812  if (boundaryNodes(i))
1813  numLocalBoundaryNodes++;
1814  RCP<const Teuchos::Comm<int>> comm = A->getRowMap()->getComm();
1815  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1816  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1817 
1818  GO numGlobalTotal, numGlobalDropped;
1819  MueLu_sumAll(comm, numTotal, numGlobalTotal);
1820  MueLu_sumAll(comm, numDropped, numGlobalDropped);
1821  GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1822  if (numGlobalTotal != 0)
1823  GetOStream(Statistics1) << " (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) << "%)";
1824  GetOStream(Statistics1) << std::endl;
1825  }
1826 
1827  Set(currentLevel, "Filtering", true);
1828 
1829  if (generate_matrix) {
1830  // NOTE: Trying to use A's Import/Export objects will cause the code to segfault back in Build() with errors on the Import
1831  // if you're using Epetra. I'm not really sure why. By using the Col==Domain and Row==Range maps, we get null Import/Export objects
1832  // here, which is legit, because we never use them anyway.
1833  if constexpr (std::is_same<typename LWGraph::row_type,
1834  typename CrsMatrix::local_matrix_type::row_map_type>::value) {
1835  crs_matrix_wrap->getCrsMatrix()->setAllValues(rows_mat, columns, values);
1836  } else {
1837  auto rows_mat2 = typename CrsMatrix::local_matrix_type::row_map_type::non_const_type("rows_mat2", rows_mat.extent(0));
1838  Kokkos::deep_copy(rows_mat2, rows_mat);
1839  auto columns2 = typename CrsMatrix::local_graph_type::entries_type::non_const_type("columns2", columns.extent(0));
1840  Kokkos::deep_copy(columns2, columns);
1841  auto values2 = typename CrsMatrix::local_matrix_type::values_type::non_const_type("values2", values.extent(0));
1842  Kokkos::deep_copy(values2, values);
1843  crs_matrix_wrap->getCrsMatrix()->setAllValues(rows_mat2, columns2, values2);
1844  }
1845  crs_matrix_wrap->getCrsMatrix()->expertStaticFillComplete(A->getColMap(), A->getRowMap());
1846  } else {
1847  RCP<LWGraph> graph = rcp(new LWGraph(rows_graph, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), A->getRowMap(), A->getColMap(), "block-diagonalized graph of A"));
1848  graph->SetBoundaryNodeMap(boundaryNodes);
1849  Set(currentLevel, "Graph", graph);
1850  }
1851 
1852  Set(currentLevel, "DofsPerNode", 1);
1853  return crs_matrix_wrap;
1854 }
1855 
1856 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1858  TEUCHOS_TEST_FOR_EXCEPTION(ghostedBlockNumber.is_null(), Exceptions::RuntimeError, "BlockDiagonalizeGraph(): ghostedBlockNumber is null.");
1859  const ParameterList& pL = GetParameterList();
1860 
1861  const bool localizeColoringGraph = pL.get<bool>("aggregation: coloring: localize color graph");
1862 
1863  GetOStream(Statistics1) << "Using BlockDiagonal Graph after Dropping (with provided blocking)";
1864  if (localizeColoringGraph)
1865  GetOStream(Statistics1) << ", with localization" << std::endl;
1866  else
1867  GetOStream(Statistics1) << ", without localization" << std::endl;
1868 
1869  // Accessors for block numbers
1870  Teuchos::ArrayRCP<const LO> row_block_number = ghostedBlockNumber->getData(0);
1871  Teuchos::ArrayRCP<const LO> col_block_number = ghostedBlockNumber->getData(0);
1872 
1873  // allocate space for the local graph
1874  ArrayRCP<size_t> rows_mat;
1875  typename LWGraph::row_type::non_const_type rows_graph("rows_graph", inputGraph->GetNodeNumVertices() + 1);
1876  typename LWGraph::entries_type::non_const_type columns("columns", inputGraph->GetNodeNumEdges());
1877 
1878  LO realnnz = 0;
1879  GO numDropped = 0, numTotal = 0;
1880  const LO numRows = Teuchos::as<LO>(inputGraph->GetDomainMap()->getLocalNumElements());
1881  if (localizeColoringGraph) {
1882  for (LO row = 0; row < numRows; ++row) {
1883  LO row_block = row_block_number[row];
1884  auto indices = inputGraph->getNeighborVertices(row);
1885 
1886  LO rownnz = 0;
1887  for (LO colID = 0; colID < Teuchos::as<LO>(indices.length); colID++) {
1888  LO col = indices(colID);
1889  LO col_block = col_block_number[col];
1890 
1891  if ((row_block == col_block) && (col < numRows)) {
1892  columns(realnnz++) = col;
1893  rownnz++;
1894  } else
1895  numDropped++;
1896  }
1897  rows_graph(row + 1) = realnnz;
1898  }
1899  } else {
1900  // ghosting of boundary node map
1901  auto boundaryNodes = inputGraph->GetBoundaryNodeMap();
1903  for (size_t i = 0; i < inputGraph->GetNodeNumVertices(); i++)
1904  boundaryNodesVector->getDataNonConst(0)[i] = boundaryNodes[i];
1905  // Xpetra::IO<LocalOrdinal,LocalOrdinal,GlobalOrdinal,Node>::Write("boundary",*boundaryNodesVector);
1907  boundaryColumnVector->doImport(*boundaryNodesVector, *importer, Xpetra::INSERT);
1908  auto boundaryColumn = boundaryColumnVector->getData(0);
1909 
1910  for (LO row = 0; row < numRows; ++row) {
1911  LO row_block = row_block_number[row];
1912  auto indices = inputGraph->getNeighborVertices(row);
1913 
1914  LO rownnz = 0;
1915  for (LO colID = 0; colID < Teuchos::as<LO>(indices.length); colID++) {
1916  LO col = indices(colID);
1917  LO col_block = col_block_number[col];
1918 
1919  if ((row_block == col_block) && ((row == col) || (boundaryColumn[col] == 0))) {
1920  columns(realnnz++) = col;
1921  rownnz++;
1922  } else
1923  numDropped++;
1924  }
1925  rows_graph(row + 1) = realnnz;
1926  }
1927  }
1928 
1929  numTotal = inputGraph->GetNodeNumEdges();
1930 
1931  if (GetVerbLevel() & Statistics1) {
1932  RCP<const Teuchos::Comm<int>> comm = inputGraph->GetDomainMap()->getComm();
1933  GO numGlobalTotal, numGlobalDropped;
1934  MueLu_sumAll(comm, numTotal, numGlobalTotal);
1935  MueLu_sumAll(comm, numDropped, numGlobalDropped);
1936  GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1937  if (numGlobalTotal != 0)
1938  GetOStream(Statistics1) << " (" << 100 * Teuchos::as<double>(numGlobalDropped) / Teuchos::as<double>(numGlobalTotal) << "%)";
1939  GetOStream(Statistics1) << std::endl;
1940  }
1941 
1942  if (localizeColoringGraph) {
1943  outputGraph = rcp(new LWGraph(rows_graph, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A"));
1944  outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap());
1945  } else {
1946  TEUCHOS_ASSERT(inputGraph->GetDomainMap()->lib() == Xpetra::UseTpetra);
1947 #ifdef HAVE_XPETRA_TPETRA
1948  auto outputGraph2 = rcp(new LWGraph(rows_graph, Kokkos::subview(columns, Kokkos::make_pair(0, realnnz)), inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A"));
1949 
1950  auto tpGraph = Xpetra::toTpetra(rcp_const_cast<const CrsGraph>(outputGraph2->GetCrsGraph()));
1952  auto tpGraphSym = sym->symmetrize();
1953  auto lclGraphSym = tpGraphSym->getLocalGraphHost();
1954  auto colIndsSym = lclGraphSym.entries;
1955 
1956  auto rowsSym = tpGraphSym->getLocalRowPtrsHost();
1957  typename LWGraph::row_type::non_const_type rows_graphSym("rows_graphSym", rowsSym.size());
1958  for (size_t row = 0; row < rowsSym.size(); row++)
1959  rows_graphSym(row) = rowsSym(row);
1960  outputGraph = rcp(new LWGraph(rows_graphSym, colIndsSym, inputGraph->GetDomainMap(), Xpetra::toXpetra(tpGraphSym->getColMap()), "block-diagonalized graph of A"));
1961  outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap());
1962 #endif
1963  }
1964 }
1965 
1966 } // namespace MueLu
1967 
1968 #endif // MUELU_COALESCEDROPFACTORY_DEF_HPP
Important warning messages (one line)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
Exception indicating invalid cast attempted.
void reserve(size_type n)
void BlockDiagonalizeGraph(const RCP< LWGraph > &inputGraph, const RCP< LocalOrdinalVector > &ghostedBlockNumber, RCP< LWGraph > &outputGraph, RCP< const Import > &importer) const
#define MueLu_sumAll(rcpComm, in, out)
static void ApplyRowSumCriterionHost(const Matrix &A, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType rowSumTol, Kokkos::View< bool *, Kokkos::HostSpace > &dirichletRows)
KOKKOS_INLINE_FUNCTION void SetBoundaryNodeMap(const boundary_nodes_type bndry)
Set boolean array indicating which rows correspond to Dirichlet boundaries.
static Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Build(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node >> &map, size_t NumVectors, bool zeroOut=true)
void setValidator(RCP< const ParameterEntryValidator > const &validator)
GlobalOrdinal GO
T & get(const std::string &name, T def_value)
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
static void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude rowSumTol, Teuchos::ArrayRCP< bool > &dirichletRows)
Apply Rowsum Criterion.
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
static void Write(const std::string &fileName, const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &M)
Print more statistics.
size_type size() const
DropTol & operator=(DropTol const &)=default
Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > BlockDiagonalize(Level &currentLevel, const RCP< Matrix > &A, bool generate_matrix) const
LocalOrdinal LO
One-liner description of what is happening.
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
KOKKOS_INLINE_FUNCTION size_type GetNodeNumEdges() const
Return number of graph edges.
KOKKOS_INLINE_FUNCTION const boundary_nodes_type GetBoundaryNodeMap() const
Returns map with global ids of boundary nodes.
size_type size() const
Exception throws to report incompatible objects (like maps).
static Teuchos::ArrayRCP< Magnitude > GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Return vector containing: max_{i=k}(-a_ik), for each for i in the matrix.
static const GlobalOrdinal DOFGid2NodeId(GlobalOrdinal gid, LocalOrdinal blockSize, const GlobalOrdinal offset, const GlobalOrdinal indexBase)
Translate global (row/column) id to global amalgamation block id.
Scalar GetThreshold() const
Return threshold value.
Additional warnings.
void resize(const size_type n, const T &val=T())
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
static Teuchos::ArrayRCP< const bool > DetectDirichletRows(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude &tol=Teuchos::ScalarTraits< Magnitude >::zero(), bool count_twos_as_dirichlet=false)
Detect Dirichlet rows.
#define SET_VALID_ENTRY(name)
static void AmalgamateMap(const Map &sourceMap, const Matrix &A, RCP< const Map > &amalgamatedMap, Array< LO > &translation)
Method to create merged map for systems of PDEs.
static RCP< Vector > GetMatrixOverlappedDiagonal(const Matrix &A)
Extract Overlapped Matrix Diagonal.
void resize(size_type new_size, const value_type &x=value_type())
void Build(Level &currentLevel) const
Build an object with this factory.
std::string viewLabel_t
static RCP< Vector > Build(const Teuchos::RCP< const Map > &map, bool zeroOut=true)
iterator end()
void DeclareInput(Level &currentLevel) const
Input.
KOKKOS_INLINE_FUNCTION neighbor_vertices_type getNeighborVertices(LO i) const
Return the list of vertices adjacent to the vertex &#39;v&#39;.
Print class parameters.
const RCP< const Map > GetImportMap() const
Return overlapping import map (nodes).
RCP< const Tpetra::CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > toTpetra(const RCP< const CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > &graph)
void push_back(const value_type &x)
void MergeRows(const Matrix &A, const LO row, Array< LO > &cols, const Array< LO > &translation) const
Method to merge rows of matrix for systems of PDEs.
RCP< const CrsGraph< int, GlobalOrdinal, Node > > toXpetra(const Epetra_CrsGraph &g)
size_type size() const
Scalar SC
const RCP< const Map > GetDomainMap() const
Lightweight MueLu representation of a compressed row storage graph.
DropTol(real_type val_, real_type diag_, LO col_, bool drop_)
Exception throws to report errors in the internal logical of the program.
#define TEUCHOS_ASSERT(assertion_test)
static Kokkos::View< bool *, typename Kokkos::HostSpace > DetectDirichletRows_kokkos_host(const Matrix &A, const Magnitude &tol=Teuchos::ScalarTraits< typename Teuchos::ScalarTraits< SC >::magnitudeType >::zero(), const bool count_twos_as_dirichlet=false)
static Teuchos::ScalarTraits< Scalar >::magnitudeType Distance2(const Teuchos::Array< Teuchos::ArrayRCP< const Scalar >> &v, LocalOrdinal i0, LocalOrdinal i1)
Squared distance between two rows in a multivector.
ParameterEntry & getEntry(const std::string &name)
bool is_null() const
iterator begin()
void MergeRowsWithDropping(const Matrix &A, const LO row, const ArrayRCP< const SC > &ghostedDiagVals, SC threshold, Array< LO > &cols, const Array< LO > &translation) const
bool is_null() const