MueLu  Version of the Day
MueLu_RefMaxwell_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_REFMAXWELL_DEF_HPP
47 #define MUELU_REFMAXWELL_DEF_HPP
48 
49 #include <sstream>
50 
51 #include "MueLu_ConfigDefs.hpp"
52 
53 #include "Xpetra_Map.hpp"
54 #include "Xpetra_MatrixMatrix.hpp"
55 #include "Xpetra_TripleMatrixMultiply.hpp"
56 #include "Xpetra_CrsMatrixUtils.hpp"
57 #include "Xpetra_MatrixUtils.hpp"
58 
60 
61 #include "MueLu_AmalgamationFactory.hpp"
62 #include "MueLu_RAPFactory.hpp"
63 #include "MueLu_ThresholdAFilterFactory.hpp"
64 #include "MueLu_TransPFactory.hpp"
65 #include "MueLu_SmootherFactory.hpp"
66 
67 #include "MueLu_CoalesceDropFactory.hpp"
68 #include "MueLu_CoarseMapFactory.hpp"
69 #include "MueLu_CoordinatesTransferFactory.hpp"
70 #include "MueLu_UncoupledAggregationFactory.hpp"
71 #include "MueLu_TentativePFactory.hpp"
72 #include "MueLu_SaPFactory.hpp"
73 #include "MueLu_AggregationExportFactory.hpp"
74 #include "MueLu_Utilities.hpp"
75 
76 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
77 #include "MueLu_AmalgamationFactory_kokkos.hpp"
78 #include "MueLu_CoalesceDropFactory_kokkos.hpp"
79 #include "MueLu_CoarseMapFactory_kokkos.hpp"
80 #include "MueLu_CoordinatesTransferFactory_kokkos.hpp"
81 #include "MueLu_UncoupledAggregationFactory_kokkos.hpp"
82 #include "MueLu_TentativePFactory_kokkos.hpp"
83 #include "MueLu_SaPFactory_kokkos.hpp"
84 #include "MueLu_Utilities_kokkos.hpp"
85 #include <Kokkos_Core.hpp>
86 #include <KokkosSparse_CrsMatrix.hpp>
87 #endif
88 
89 #include "MueLu_ZoltanInterface.hpp"
90 #include "MueLu_Zoltan2Interface.hpp"
91 #include "MueLu_RepartitionHeuristicFactory.hpp"
92 #include "MueLu_RepartitionFactory.hpp"
93 #include "MueLu_RebalanceAcFactory.hpp"
94 #include "MueLu_RebalanceTransferFactory.hpp"
95 
96 #include "MueLu_VerbosityLevel.hpp"
97 
100 
101 #ifdef HAVE_MUELU_CUDA
102 #include "cuda_profiler_api.h"
103 #endif
104 
105 
106 namespace MueLu {
107 
108  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
109  void FindNonZeros(const Teuchos::ArrayRCP<const Scalar> vals,
110  Teuchos::ArrayRCP<bool> nonzeros) {
111  TEUCHOS_ASSERT(vals.size() == nonzeros.size());
112  typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType magnitudeType;
113  const magnitudeType eps = 2.0*Teuchos::ScalarTraits<magnitudeType>::eps();
114  for(size_t i=0; i<static_cast<size_t>(vals.size()); i++) {
115  nonzeros[i] = (Teuchos::ScalarTraits<Scalar>::magnitude(vals[i]) > eps);
116  }
117  }
118 
119 
120  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
121  void DetectDirichletCols(const Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>& A,
122  const Teuchos::ArrayRCP<bool>& dirichletRows,
123  Teuchos::ArrayRCP<bool> dirichletCols,
124  Teuchos::ArrayRCP<bool> dirichletDomain) {
125  const Scalar one = Teuchos::ScalarTraits<Scalar>::one();
126  RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > domMap = A.getDomainMap();
127  RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > rowMap = A.getRowMap();
128  RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > colMap = A.getColMap();
129  TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == rowMap->getNodeNumElements());
130  TEUCHOS_ASSERT(static_cast<size_t>(dirichletCols.size()) == colMap->getNodeNumElements());
131  TEUCHOS_ASSERT(static_cast<size_t>(dirichletDomain.size()) == domMap->getNodeNumElements());
132  RCP<Xpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > myColsToZero = Xpetra::MultiVectorFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Build(colMap, 1, /*zeroOut=*/true);
133  // Find all local column indices that are in Dirichlet rows, record in myColsToZero as 1.0
134  for(size_t i=0; i<(size_t) dirichletRows.size(); i++) {
135  if (dirichletRows[i]) {
136  ArrayView<const LocalOrdinal> indices;
137  ArrayView<const Scalar> values;
138  A.getLocalRowView(i,indices,values);
139  for(size_t j=0; j<static_cast<size_t>(indices.size()); j++)
140  myColsToZero->replaceLocalValue(indices[j],0,one);
141  }
142  }
143 
144  RCP<Xpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > globalColsToZero;
145  RCP<const Xpetra::Import<LocalOrdinal,GlobalOrdinal,Node> > importer = A.getCrsGraph()->getImporter();
146  if (!importer.is_null()) {
147  globalColsToZero = Xpetra::MultiVectorFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Build(domMap, 1, /*zeroOut=*/true);
148  // export to domain map
149  globalColsToZero->doExport(*myColsToZero,*importer,Xpetra::ADD);
150  // import to column map
151  myColsToZero->doImport(*globalColsToZero,*importer,Xpetra::INSERT);
152  }
153  else
154  globalColsToZero = myColsToZero;
155 
156  FindNonZeros<Scalar,LocalOrdinal,GlobalOrdinal,Node>(globalColsToZero->getData(0),dirichletDomain);
157  FindNonZeros<Scalar,LocalOrdinal,GlobalOrdinal,Node>(myColsToZero->getData(0),dirichletCols);
158  }
159 
160 
161 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
162 
163  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
164  void FindNonZeros(const typename Xpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dual_view_type::t_dev_um vals,
166  using ATS = Kokkos::ArithTraits<Scalar>;
167  using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
169  TEUCHOS_ASSERT(vals.extent(0) == nonzeros.extent(0));
170  const typename ATS::magnitudeType eps = 2.0*impl_ATS::eps();
171 
172  Kokkos::parallel_for("MueLu:RefMaxwell::FindNonZeros", range_type(0,vals.extent(0)),
173  KOKKOS_LAMBDA (const size_t i) {
174  nonzeros(i) = (impl_ATS::magnitude(vals(i,0)) > eps);
175  });
176  }
177 
178  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
179  void DetectDirichletCols(const Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>& A,
184  const Scalar one = Teuchos::ScalarTraits<Scalar>::one();
185  RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > domMap = A.getDomainMap();
186  RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > rowMap = A.getRowMap();
187  RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > colMap = A.getColMap();
188  TEUCHOS_ASSERT(dirichletRows.extent(0) == rowMap->getNodeNumElements());
189  TEUCHOS_ASSERT(dirichletCols.extent(0) == colMap->getNodeNumElements());
190  TEUCHOS_ASSERT(dirichletDomain.extent(0) == domMap->getNodeNumElements());
191  RCP<Xpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > myColsToZero = Xpetra::VectorFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Build(colMap, /*zeroOut=*/true);
192  // Find all local column indices that are in Dirichlet rows, record in myColsToZero as 1.0
193  auto myColsToZeroView = myColsToZero->template getLocalView<typename Node::device_type>();
194  auto localMatrix = A.getLocalMatrix();
195  Kokkos::parallel_for("MueLu:RefMaxwell::DetectDirichletCols", range_type(0,rowMap->getNodeNumElements()),
196  KOKKOS_LAMBDA(const LocalOrdinal row) {
197  if (dirichletRows(row)) {
198  auto rowView = localMatrix.row(row);
199  auto length = rowView.length;
200 
201  for (decltype(length) colID = 0; colID < length; colID++)
202  myColsToZeroView(rowView.colidx(colID),0) = one;
203  }
204  });
205 
206  RCP<Xpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > globalColsToZero;
207  RCP<const Xpetra::Import<LocalOrdinal,GlobalOrdinal,Node> > importer = A.getCrsGraph()->getImporter();
208  if (!importer.is_null()) {
209  globalColsToZero = Xpetra::VectorFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Build(domMap, /*zeroOut=*/true);
210  // export to domain map
211  globalColsToZero->doExport(*myColsToZero,*importer,Xpetra::ADD);
212  // import to column map
213  myColsToZero->doImport(*globalColsToZero,*importer,Xpetra::INSERT);
214  }
215  else
216  globalColsToZero = myColsToZero;
217  FindNonZeros<Scalar,LocalOrdinal,GlobalOrdinal,Node>(globalColsToZero->template getLocalView<typename Node::device_type>(),dirichletDomain);
218  FindNonZeros<Scalar,LocalOrdinal,GlobalOrdinal,Node>(myColsToZero->template getLocalView<typename Node::device_type>(),dirichletCols);
219  }
220 
221 #endif
222 
223  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
224  Teuchos::RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getDomainMap() const {
225  return SM_Matrix_->getDomainMap();
226  }
227 
228 
229  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
230  Teuchos::RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getRangeMap() const {
231  return SM_Matrix_->getRangeMap();
232  }
233 
234 
235  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
237 
238  if (list.isType<std::string>("parameterlist: syntax") && list.get<std::string>("parameterlist: syntax") == "ml") {
239  Teuchos::ParameterList newList = *Teuchos::getParametersFromXmlString(MueLu::ML2MueLuParameterTranslator::translate(list,"refmaxwell"));
240  if(list.isSublist("refmaxwell: 11list") && list.sublist("refmaxwell: 11list").isSublist("edge matrix free: coarse"))
241  newList.sublist("refmaxwell: 11list") = *Teuchos::getParametersFromXmlString(MueLu::ML2MueLuParameterTranslator::translate(list.sublist("refmaxwell: 11list").sublist("edge matrix free: coarse"),"SA"));
242  if(list.isSublist("refmaxwell: 22list"))
243  newList.sublist("refmaxwell: 22list") = *Teuchos::getParametersFromXmlString(MueLu::ML2MueLuParameterTranslator::translate(list.sublist("refmaxwell: 22list"),"SA"));
244  list = newList;
245  }
246 
247  parameterList_ = list;
248  std::string verbosityLevel = parameterList_.get<std::string>("verbosity", MasterList::getDefault<std::string>("verbosity"));
250  std::string outputFilename = parameterList_.get<std::string>("output filename", MasterList::getDefault<std::string>("output filename"));
251  if (outputFilename != "")
252  VerboseObject::SetMueLuOFileStream(outputFilename);
253  if (parameterList_.isType<Teuchos::RCP<Teuchos::FancyOStream> >("output stream"))
254  VerboseObject::SetMueLuOStream(parameterList_.get<Teuchos::RCP<Teuchos::FancyOStream> >("output stream"));
255 
256  if (parameterList_.get("print initial parameters",MasterList::getDefault<bool>("print initial parameters")))
257  GetOStream(static_cast<MsgType>(Runtime1), 0) << parameterList_ << std::endl;
258  disable_addon_ = list.get("refmaxwell: disable addon", MasterList::getDefault<bool>("refmaxwell: disable addon"));
259  mode_ = list.get("refmaxwell: mode", MasterList::getDefault<std::string>("refmaxwell: mode"));
260  use_as_preconditioner_ = list.get("refmaxwell: use as preconditioner", MasterList::getDefault<bool>("refmaxwell: use as preconditioner"));
261  dump_matrices_ = list.get("refmaxwell: dump matrices", MasterList::getDefault<bool>("refmaxwell: dump matrices"));
262  implicitTranspose_ = list.get("transpose: use implicit", MasterList::getDefault<bool>("transpose: use implicit"));
263  fuseProlongationAndUpdate_ = list.get("fuse prolongation and update", MasterList::getDefault<bool>("fuse prolongation and update"));
264  syncTimers_ = list.get("sync timers", false);
265  numItersH_ = list.get("refmaxwell: num iters H", 1);
266  numIters22_ = list.get("refmaxwell: num iters 22", 1);
267 
268  if(list.isSublist("refmaxwell: 11list"))
269  precList11_ = list.sublist("refmaxwell: 11list");
270  if(!precList11_.isType<std::string>("smoother: type") && !precList11_.isType<std::string>("smoother: pre type") && !precList11_.isType<std::string>("smoother: post type")) {
271  precList11_.set("smoother: type", "CHEBYSHEV");
272  precList11_.sublist("smoother: params").set("chebyshev: degree",2);
273  precList11_.sublist("smoother: params").set("chebyshev: ratio eigenvalue",5.4);
274  precList11_.sublist("smoother: params").set("chebyshev: eigenvalue max iterations",30);
275  }
276 
277  if(list.isSublist("refmaxwell: 22list"))
278  precList22_ = list.sublist("refmaxwell: 22list");
279  if(!precList22_.isType<std::string>("smoother: type") && !precList22_.isType<std::string>("smoother: pre type") && !precList22_.isType<std::string>("smoother: post type")) {
280  precList22_.set("smoother: type", "CHEBYSHEV");
281  precList22_.sublist("smoother: params").set("chebyshev: degree",2);
282  precList22_.sublist("smoother: params").set("chebyshev: ratio eigenvalue",7.0);
283  precList22_.sublist("smoother: params").set("chebyshev: eigenvalue max iterations",30);
284  }
285 
286  if(!list.isType<std::string>("smoother: type") && !list.isType<std::string>("smoother: pre type") && !list.isType<std::string>("smoother: post type")) {
287  list.set("smoother: type", "CHEBYSHEV");
288  list.sublist("smoother: params").set("chebyshev: degree",2);
289  list.sublist("smoother: params").set("chebyshev: ratio eigenvalue",20.0);
290  list.sublist("smoother: params").set("chebyshev: eigenvalue max iterations",30);
291  }
292 
293  if(list.isSublist("smoother: params")) {
294  smootherList_ = list.sublist("smoother: params");
295  }
296 
297 #if !defined(HAVE_MUELU_KOKKOS_REFACTOR)
298  useKokkos_ = false;
299 #else
300 # ifdef HAVE_MUELU_SERIAL
301  if (typeid(Node).name() == typeid(Kokkos::Compat::KokkosSerialWrapperNode).name())
302  useKokkos_ = false;
303 # endif
304 # ifdef HAVE_MUELU_OPENMP
305  if (typeid(Node).name() == typeid(Kokkos::Compat::KokkosOpenMPWrapperNode).name())
306  useKokkos_ = true;
307 # endif
308 # ifdef HAVE_MUELU_CUDA
309  if (typeid(Node).name() == typeid(Kokkos::Compat::KokkosCudaWrapperNode).name())
310  useKokkos_ = true;
311 # endif
312  useKokkos_ = list.get("use kokkos refactor",useKokkos_);
313 #endif
314  }
315 
316 
317  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
319 
320 #ifdef HAVE_MUELU_CUDA
321  if (parameterList_.get<bool>("refmaxwell: cuda profile setup", false)) cudaProfilerStart();
322 #endif
323 
324  std::string timerLabel;
325  if (reuse)
326  timerLabel = "MueLu RefMaxwell: compute (reuse)";
327  else
328  timerLabel = "MueLu RefMaxwell: compute";
329  RCP<Teuchos::TimeMonitor> tmCompute = getTimer(timerLabel);
330 
332  // Remove explicit zeros from matrices
333 
334  bool defaultFilter = false;
335 
336  // Remove zero entries from D0 if necessary.
337  // In the construction of the prolongator we use the graph of the
338  // matrix, so zero entries mess it up.
339  if (parameterList_.get<bool>("refmaxwell: filter D0", true) && D0_Matrix_->getNodeMaxNumRowEntries()>2) {
340  Level fineLevel;
341  fineLevel.SetFactoryManager(null);
342  fineLevel.SetLevelID(0);
343  fineLevel.Set("A",D0_Matrix_);
344  fineLevel.setlib(D0_Matrix_->getDomainMap()->lib());
345  // We expect D0 to have entries +-1, so any threshold value will do.
346  RCP<ThresholdAFilterFactory> ThreshFact = rcp(new ThresholdAFilterFactory("A",1.0e-8,/*keepDiagonal=*/false,/*expectedNNZperRow=*/2));
347  fineLevel.Request("A",ThreshFact.get());
348  ThreshFact->Build(fineLevel);
349  D0_Matrix_ = fineLevel.Get< RCP<Matrix> >("A",ThreshFact.get());
350 
351  // If D0 has too many zeros, maybe SM and M1 do as well.
352  defaultFilter = true;
353  }
354 
355  if (parameterList_.get<bool>("refmaxwell: filter SM", defaultFilter)) {
356  RCP<Vector> diag = VectorFactory::Build(SM_Matrix_->getRowMap());
357  // find a reasonable absolute value threshold
358  SM_Matrix_->getLocalDiagCopy(*diag);
359  magnitudeType threshold = 1.0e-8 * diag->normInf();
360 
361  Level fineLevel;
362  fineLevel.SetFactoryManager(null);
363  fineLevel.SetLevelID(0);
364  fineLevel.Set("A",SM_Matrix_);
365  fineLevel.setlib(SM_Matrix_->getDomainMap()->lib());
366  RCP<ThresholdAFilterFactory> ThreshFact = rcp(new ThresholdAFilterFactory("A",threshold,/*keepDiagonal=*/true));
367  fineLevel.Request("A",ThreshFact.get());
368  ThreshFact->Build(fineLevel);
369  SM_Matrix_ = fineLevel.Get< RCP<Matrix> >("A",ThreshFact.get());
370  }
371 
372  if (parameterList_.get<bool>("refmaxwell: filter M1", defaultFilter)) {
373  RCP<Vector> diag = VectorFactory::Build(M1_Matrix_->getRowMap());
374  // find a reasonable absolute value threshold
375  M1_Matrix_->getLocalDiagCopy(*diag);
376  magnitudeType threshold = 1.0e-8 * diag->normInf();
377 
378  Level fineLevel;
379  fineLevel.SetFactoryManager(null);
380  fineLevel.SetLevelID(0);
381  fineLevel.Set("A",M1_Matrix_);
382  fineLevel.setlib(M1_Matrix_->getDomainMap()->lib());
383  RCP<ThresholdAFilterFactory> ThreshFact = rcp(new ThresholdAFilterFactory("A",threshold,/*keepDiagonal=*/true));
384  fineLevel.Request("A",ThreshFact.get());
385  ThreshFact->Build(fineLevel);
386  M1_Matrix_ = fineLevel.Get< RCP<Matrix> >("A",ThreshFact.get());
387  }
388 
389  if (parameterList_.get<bool>("refmaxwell: filter Ms", defaultFilter)) {
390  RCP<Vector> diag = VectorFactory::Build(Ms_Matrix_->getRowMap());
391  // find a reasonable absolute value threshold
392  Ms_Matrix_->getLocalDiagCopy(*diag);
393  magnitudeType threshold = 1.0e-8 * diag->normInf();
394 
395  Level fineLevel;
396  fineLevel.SetFactoryManager(null);
397  fineLevel.SetLevelID(0);
398  fineLevel.Set("A",Ms_Matrix_);
399  fineLevel.setlib(Ms_Matrix_->getDomainMap()->lib());
400  RCP<ThresholdAFilterFactory> ThreshFact = rcp(new ThresholdAFilterFactory("A",threshold,/*keepDiagonal=*/true));
401  fineLevel.Request("A",ThreshFact.get());
402  ThreshFact->Build(fineLevel);
403  Ms_Matrix_ = fineLevel.Get< RCP<Matrix> >("A",ThreshFact.get());
404  }
405 
406  if (IsPrint(Statistics2)) {
407  RCP<ParameterList> params = rcp(new ParameterList());;
408  params->set("printLoadBalancingInfo", true);
409  params->set("printCommInfo", true);
410  GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*SM_Matrix_, "SM_Matrix", params);
411  }
412 
414  // Detect Dirichlet boundary conditions
415 
416  if (!reuse) {
417  // clean rows associated with boundary conditions
418  // Find rows with only 1 or 2 nonzero entries, record them in BCrows_.
419  // BCrows_[i] is true, iff i is a boundary row
420  // BCcols_[i] is true, iff i is a boundary column
421  int BCedgesLocal = 0;
422  int BCnodesLocal = 0;
423 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
424  if (useKokkos_) {
425  BCrowsKokkos_ = Utilities_kokkos::DetectDirichletRows(*SM_Matrix_,Teuchos::ScalarTraits<magnitudeType>::eps(),/*count_twos_as_dirichlet=*/true);
426 
427  double rowsumTol = parameterList_.get("refmaxwell: row sum drop tol",-1.0);
428  if (rowsumTol > 0.) {
429  typedef Teuchos::ScalarTraits<Scalar> STS;
430  RCP<const Map> rowmap = SM_Matrix_->getRowMap();
431  for (LO row = 0; row < Teuchos::as<LO>(SM_Matrix_->getRowMap()->getNodeNumElements()); ++row) {
432  size_t nnz = SM_Matrix_->getNumEntriesInLocalRow(row);
433  ArrayView<const LO> indices;
434  ArrayView<const SC> vals;
435  SM_Matrix_->getLocalRowView(row, indices, vals);
436 
437  SC rowsum = STS::zero();
438  SC diagval = STS::zero();
439  for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
440  LO col = indices[colID];
441  if (row == col)
442  diagval = vals[colID];
443  rowsum += vals[colID];
444  }
445  if (STS::real(rowsum) > STS::magnitude(diagval) * rowsumTol)
446  BCrowsKokkos_(row) = true;
447  }
448  }
449 
450  BCcolsKokkos_ = Kokkos::View<bool*,typename Node::device_type>(Kokkos::ViewAllocateWithoutInitializing("dirichletCols"), D0_Matrix_->getColMap()->getNodeNumElements());
451  BCdomainKokkos_ = Kokkos::View<bool*,typename Node::device_type>(Kokkos::ViewAllocateWithoutInitializing("dirichletCols"), D0_Matrix_->getDomainMap()->getNodeNumElements());
452  DetectDirichletCols<Scalar,LocalOrdinal,GlobalOrdinal,Node>(*D0_Matrix_,BCrowsKokkos_,BCcolsKokkos_,BCdomainKokkos_);
453 
454  dump(BCrowsKokkos_, "BCrows.m");
455  dump(BCcolsKokkos_, "BCcols.m");
456  dump(BCdomainKokkos_, "BCdomain.m");
457 
458  for (size_t i = 0; i<BCrowsKokkos_.size(); i++)
459  if (BCrowsKokkos_(i))
460  BCedgesLocal += 1;
461  for (size_t i = 0; i<BCdomainKokkos_.size(); i++)
462  if (BCdomainKokkos_(i))
463  BCnodesLocal += 1;
464  } else
465 #endif // HAVE_MUELU_KOKKOS_REFACTOR
466  {
467  BCrows_ = Teuchos::arcp_const_cast<bool>(Utilities::DetectDirichletRows(*SM_Matrix_,Teuchos::ScalarTraits<magnitudeType>::eps(),/*count_twos_as_dirichlet=*/true));
468 
469  double rowsumTol = parameterList_.get("refmaxwell: row sum drop tol",-1.0);
470  if (rowsumTol > 0.) {
471  typedef Teuchos::ScalarTraits<Scalar> STS;
472  RCP<const Map> rowmap = SM_Matrix_->getRowMap();
473  for (LO row = 0; row < Teuchos::as<LO>(SM_Matrix_->getRowMap()->getNodeNumElements()); ++row) {
474  size_t nnz = SM_Matrix_->getNumEntriesInLocalRow(row);
475  ArrayView<const LO> indices;
476  ArrayView<const SC> vals;
477  SM_Matrix_->getLocalRowView(row, indices, vals);
478 
479  SC rowsum = STS::zero();
480  SC diagval = STS::zero();
481  for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
482  LO col = indices[colID];
483  if (row == col)
484  diagval = vals[colID];
485  rowsum += vals[colID];
486  }
487  if (STS::real(rowsum) > STS::magnitude(diagval) * rowsumTol)
488  BCrows_[row] = true;
489  }
490  }
491 
492  BCcols_.resize(D0_Matrix_->getColMap()->getNodeNumElements());
493  BCdomain_.resize(D0_Matrix_->getDomainMap()->getNodeNumElements());
494  DetectDirichletCols<Scalar,LocalOrdinal,GlobalOrdinal,Node>(*D0_Matrix_,BCrows_,BCcols_,BCdomain_);
495 
496  dump(BCrows_, "BCrows.m");
497  dump(BCcols_, "BCcols.m");
498  dump(BCdomain_, "BCdomain.m");
499 
500  for (auto it = BCrows_.begin(); it != BCrows_.end(); ++it)
501  if (*it)
502  BCedgesLocal += 1;
503  for (auto it = BCdomain_.begin(); it != BCdomain_.end(); ++it)
504  if (*it)
505  BCnodesLocal += 1;
506  }
507 
508 #ifdef HAVE_MPI
509  MueLu_sumAll(SM_Matrix_->getRowMap()->getComm(), BCedgesLocal, BCedges_);
510  MueLu_sumAll(SM_Matrix_->getRowMap()->getComm(), BCnodesLocal, BCnodes_);
511 #else
512  BCedges_ = BCedgesLocal;
513  BCnodes_ = BCnodesLocal;
514 #endif
515  if (IsPrint(Statistics2)) {
516  GetOStream(Statistics2) << "MueLu::RefMaxwell::compute(): Detected " << BCedges_ << " BC rows and " << BCnodes_ << " BC columns." << std::endl;
517  }
518 
519  TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<Xpetra::global_size_t>(BCedges_) >= D0_Matrix_->getRangeMap()->getGlobalNumElements(), Exceptions::RuntimeError,
520  "All edges are detected as boundary edges!");
521 
522  }
523 
525  // build nullspace if necessary
526 
527  if(Nullspace_ != null) {
528  // no need to do anything - nullspace is built
529  TEUCHOS_ASSERT(Nullspace_->getMap()->isCompatible(*(SM_Matrix_->getRowMap())));
530  }
531  else if(Nullspace_ == null && Coords_ != null) {
532  // normalize coordinates
533  Array<coordinateType> norms(Coords_->getNumVectors());
534  Coords_->norm2(norms);
535  for (size_t i=0;i<Coords_->getNumVectors();i++)
536  norms[i] = ((coordinateType)1.0)/norms[i];
537  Nullspace_ = MultiVectorFactory::Build(SM_Matrix_->getRowMap(),Coords_->getNumVectors());
538 
539  // Cast coordinates to Scalar so they can be multiplied against D0
540  Array<Scalar> normsSC(Coords_->getNumVectors());
541  for (size_t i=0;i<Coords_->getNumVectors();i++)
542  normsSC[i] = (SC) norms[i];
543 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
544  RCP<MultiVector> CoordsSC;
545  if (useKokkos_)
546  CoordsSC = Utilities_kokkos::RealValuedToScalarMultiVector(Coords_);
547  else
548  CoordsSC = Utilities::RealValuedToScalarMultiVector(Coords_);
549 #else
550  RCP<MultiVector> CoordsSC = Utilities::RealValuedToScalarMultiVector(Coords_);
551 #endif
552  D0_Matrix_->apply(*CoordsSC,*Nullspace_);
553  if (IsPrint(Statistics2)) {
554  // compute edge lengths
555  ArrayRCP<ArrayRCP<const Scalar> > localNullspace(Nullspace_->getNumVectors());
556  for (size_t i = 0; i < Nullspace_->getNumVectors(); i++)
557  localNullspace[i] = Nullspace_->getData(i);
558  coordinateType localMinLen = Teuchos::ScalarTraits<coordinateType>::rmax();
559  coordinateType localMeanLen = Teuchos::ScalarTraits<coordinateType>::zero();
560  coordinateType localMaxLen = Teuchos::ScalarTraits<coordinateType>::zero();
561  for (size_t j=0; j < Nullspace_->getMap()->getNodeNumElements(); j++) {
562  Scalar lenSC = Teuchos::ScalarTraits<Scalar>::zero();
563  for (size_t i=0; i < Nullspace_->getNumVectors(); i++)
564  lenSC += localNullspace[i][j]*localNullspace[i][j];
565  coordinateType len = sqrt(Teuchos::ScalarTraits<Scalar>::real(lenSC));
566  localMinLen = std::min(localMinLen, len);
567  localMaxLen = std::max(localMaxLen, len);
568  localMeanLen += len;
569  }
570  coordinateType minLen, maxLen, meanLen;
571 #ifdef HAVE_MPI
572  RCP<const Teuchos::Comm<int> > comm = Nullspace_->getMap()->getComm();
573  MueLu_minAll(comm, localMinLen, minLen);
574  MueLu_sumAll(comm, localMeanLen, meanLen);
575  MueLu_maxAll(comm, localMaxLen, maxLen);
576 #else
577  minLen = localMinLen;
578  meanLen = localMeanLen;
579  maxLen = localMaxLen;
580 #endif
581  meanLen /= Nullspace_->getMap()->getGlobalNumElements();
582  GetOStream(Statistics0) << "Edge length (min/mean/max): " << minLen << " / " << meanLen << " / " << maxLen << std::endl;
583  }
584  Nullspace_->scale(normsSC());
585  }
586  else {
587  GetOStream(Errors) << "MueLu::RefMaxwell::compute(): either the nullspace or the nodal coordinates must be provided." << std::endl;
588  }
589 
590  if (!reuse) {
591  // Nuke the BC edges in nullspace
592 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
593  if (useKokkos_)
594  Utilities_kokkos::ZeroDirichletRows(Nullspace_,BCrowsKokkos_);
595  else
596  Utilities::ZeroDirichletRows(Nullspace_,BCrows_);
597 #else
598  Utilities::ZeroDirichletRows(Nullspace_,BCrows_);
599 #endif
600  dump(*Nullspace_, "nullspace.m");
601  }
602 
604  // build special prolongator for (1,1)-block
605 
606  if(P11_.is_null()) {
607  // Form A_nodal = D0* Ms D0 (aka TMT_agg)
608  Level fineLevel, coarseLevel;
609  fineLevel.SetFactoryManager(null);
610  coarseLevel.SetFactoryManager(null);
611  coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
612  fineLevel.SetLevelID(0);
613  coarseLevel.SetLevelID(1);
614  fineLevel.Set("A",Ms_Matrix_);
615  coarseLevel.Set("P",D0_Matrix_);
616  coarseLevel.setlib(Ms_Matrix_->getDomainMap()->lib());
617  fineLevel.setlib(Ms_Matrix_->getDomainMap()->lib());
618  coarseLevel.setObjectLabel("RefMaxwell (1,1) A_nodal");
619  fineLevel.setObjectLabel("RefMaxwell (1,1) A_nodal");
620 
621  RCP<RAPFactory> rapFact = rcp(new RAPFactory());
622  ParameterList rapList = *(rapFact->GetValidParameterList());
623  rapList.set("transpose: use implicit", true);
624  rapList.set("rap: fix zero diagonals", parameterList_.get<bool>("rap: fix zero diagonals", true));
625  rapList.set("rap: triple product", parameterList_.get<bool>("rap: triple product", false));
626  rapFact->SetParameterList(rapList);
627 
628 
629  coarseLevel.Request("A", rapFact.get());
630 
631  A_nodal_Matrix_ = coarseLevel.Get< RCP<Matrix> >("A", rapFact.get());
632 
633  // Apply boundary conditions to A_nodal
634 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
635  if (useKokkos_)
636  Utilities_kokkos::ApplyOAZToMatrixRows(A_nodal_Matrix_,BCdomainKokkos_);
637  else
638 #endif
639  Utilities::ApplyOAZToMatrixRows(A_nodal_Matrix_,BCdomain_);
640  dump(*A_nodal_Matrix_, "A_nodal.m");
641 
642  // build special prolongator
643  GetOStream(Runtime0) << "RefMaxwell::compute(): building special prolongator" << std::endl;
644  buildProlongator();
645  }
646 
647 #ifdef HAVE_MPI
648  bool doRebalancing = false;
649  doRebalancing = parameterList_.get<bool>("refmaxwell: subsolves on subcommunicators", MasterList::getDefault<bool>("refmaxwell: subsolves on subcommunicators"));
650  int rebalanceStriding = parameterList_.get<int>("refmaxwell: subsolves striding", -1);
651  int numProcsAH, numProcsA22;
652 #endif
653  {
654  // build coarse grid operator for (1,1)-block
655  formCoarseMatrix();
656 
657 #ifdef HAVE_MPI
658  int numProcs = SM_Matrix_->getDomainMap()->getComm()->getSize();
659  if (doRebalancing && numProcs > 1) {
660 
661  {
662  Level level;
663  level.SetFactoryManager(null);
664  level.SetLevelID(0);
665  level.Set("A",AH_);
666 
667  auto repartheurFactory = rcp(new RepartitionHeuristicFactory());
668  ParameterList repartheurParams;
669  repartheurParams.set("repartition: start level", 0);
670  // Setting min == target on purpose.
671  int defaultTargetRows = 10000;
672  repartheurParams.set("repartition: min rows per proc", precList11_.get<int>("repartition: target rows per proc", defaultTargetRows));
673  repartheurParams.set("repartition: target rows per proc", precList11_.get<int>("repartition: target rows per proc", defaultTargetRows));
674  repartheurParams.set("repartition: min rows per thread", precList11_.get<int>("repartition: target rows per thread", defaultTargetRows));
675  repartheurParams.set("repartition: target rows per thread", precList11_.get<int>("repartition: target rows per thread", defaultTargetRows));
676  repartheurParams.set("repartition: max imbalance", precList11_.get<double>("repartition: max imbalance", 1.1));
677  repartheurFactory->SetParameterList(repartheurParams);
678 
679  level.Request("number of partitions", repartheurFactory.get());
680  repartheurFactory->Build(level);
681  numProcsAH = level.Get<int>("number of partitions", repartheurFactory.get());
682  numProcsAH = std::min(numProcsAH,numProcs);
683  }
684 
685  {
686  Level level;
687  level.SetFactoryManager(null);
688  level.SetLevelID(0);
689 
690  level.Set("Map",D0_Matrix_->getDomainMap());
691 
692  auto repartheurFactory = rcp(new RepartitionHeuristicFactory());
693  ParameterList repartheurParams;
694  repartheurParams.set("repartition: start level", 0);
695  repartheurParams.set("repartition: use map", true);
696  // Setting min == target on purpose.
697  int defaultTargetRows = 10000;
698  repartheurParams.set("repartition: min rows per proc", precList22_.get<int>("repartition: target rows per proc", defaultTargetRows));
699  repartheurParams.set("repartition: target rows per proc", precList22_.get<int>("repartition: target rows per proc", defaultTargetRows));
700  repartheurParams.set("repartition: min rows per thread", precList22_.get<int>("repartition: target rows per thread", defaultTargetRows));
701  repartheurParams.set("repartition: target rows per thread", precList22_.get<int>("repartition: target rows per thread", defaultTargetRows));
702  // repartheurParams.set("repartition: max imbalance", precList22_.get<double>("repartition: max imbalance", 1.1));
703  repartheurFactory->SetParameterList(repartheurParams);
704 
705  level.Request("number of partitions", repartheurFactory.get());
706  repartheurFactory->Build(level);
707  numProcsA22 = level.Get<int>("number of partitions", repartheurFactory.get());
708  numProcsA22 = std::min(numProcsA22,numProcs);
709  }
710 
711  if (rebalanceStriding >= 1) {
712  TEUCHOS_ASSERT(rebalanceStriding*numProcsAH<=numProcs);
713  TEUCHOS_ASSERT(rebalanceStriding*numProcsA22<=numProcs);
714  if (rebalanceStriding*(numProcsAH+numProcsA22)>numProcs) {
715  GetOStream(Warnings0) << "RefMaxwell::compute(): Disabling striding = " << rebalanceStriding << ", since AH needs " << numProcsAH
716  << " procs and A22 needs " << numProcsA22 << " procs."<< std::endl;
717  rebalanceStriding = -1;
718  }
719  }
720 
721  } else
722  doRebalancing = false;
723 
724  if (doRebalancing) { // rebalance AH
725  if (!reuse) {
726  RCP<Teuchos::TimeMonitor> tm = getTimer("MueLu RefMaxwell: Rebalance AH");
727 
728  Level fineLevel, coarseLevel;
729  fineLevel.SetFactoryManager(null);
730  coarseLevel.SetFactoryManager(null);
731  coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
732  fineLevel.SetLevelID(0);
733  coarseLevel.SetLevelID(1);
734  coarseLevel.Set("A",AH_);
735  coarseLevel.Set("P",P11_);
736  coarseLevel.Set("Coordinates",CoordsH_);
737  coarseLevel.Set("number of partitions", numProcsAH);
738  coarseLevel.Set("repartition: heuristic target rows per process", 1000);
739 
740  coarseLevel.setlib(AH_->getDomainMap()->lib());
741  fineLevel.setlib(AH_->getDomainMap()->lib());
742  coarseLevel.setObjectLabel("RefMaxwell coarse (1,1)");
743  fineLevel.setObjectLabel("RefMaxwell coarse (1,1)");
744 
745  std::string partName = precList11_.get<std::string>("repartition: partitioner", "zoltan2");
746  RCP<Factory> partitioner;
747  if (partName == "zoltan") {
748 #ifdef HAVE_MUELU_ZOLTAN
749  partitioner = rcp(new ZoltanInterface());
750  // NOTE: ZoltanInteface ("zoltan") does not support external parameters through ParameterList
751  // partitioner->SetFactory("number of partitions", repartheurFactory);
752 #else
753  throw Exceptions::RuntimeError("Zoltan interface is not available");
754 #endif
755  } else if (partName == "zoltan2") {
756 #ifdef HAVE_MUELU_ZOLTAN2
757  partitioner = rcp(new Zoltan2Interface());
758  ParameterList partParams;
759  RCP<const ParameterList> partpartParams = rcp(new ParameterList(precList11_.sublist("repartition: params", false)));
760  partParams.set("ParameterList", partpartParams);
761  partitioner->SetParameterList(partParams);
762  // partitioner->SetFactory("number of partitions", repartheurFactory);
763 #else
764  throw Exceptions::RuntimeError("Zoltan2 interface is not available");
765 #endif
766  }
767 
768  auto repartFactory = rcp(new RepartitionFactory());
769  ParameterList repartParams;
770  repartParams.set("repartition: print partition distribution", precList11_.get<bool>("repartition: print partition distribution", false));
771  repartParams.set("repartition: remap parts", precList11_.get<bool>("repartition: remap parts", true));
772  if (rebalanceStriding >= 1) {
773  bool acceptPart = (SM_Matrix_->getDomainMap()->getComm()->getRank() % rebalanceStriding) == 0;
774  if (SM_Matrix_->getDomainMap()->getComm()->getRank() >= numProcsAH*rebalanceStriding)
775  acceptPart = false;
776  repartParams.set("repartition: remap accept partition", acceptPart);
777  }
778  repartFactory->SetParameterList(repartParams);
779  // repartFactory->SetFactory("number of partitions", repartheurFactory);
780  repartFactory->SetFactory("Partition", partitioner);
781 
782  auto newP = rcp(new RebalanceTransferFactory());
783  ParameterList newPparams;
784  newPparams.set("type", "Interpolation");
785  newPparams.set("repartition: rebalance P and R", precList11_.get<bool>("repartition: rebalance P and R", false));
786  newPparams.set("repartition: use subcommunicators", true);
787  newPparams.set("repartition: rebalance Nullspace", false);
788  newP->SetFactory("Coordinates", NoFactory::getRCP());
789  newP->SetParameterList(newPparams);
790  newP->SetFactory("Importer", repartFactory);
791 
792  auto newA = rcp(new RebalanceAcFactory());
793  ParameterList rebAcParams;
794  rebAcParams.set("repartition: use subcommunicators", true);
795  newA->SetParameterList(rebAcParams);
796  newA->SetFactory("Importer", repartFactory);
797 
798  coarseLevel.Request("P", newP.get());
799  coarseLevel.Request("Importer", repartFactory.get());
800  coarseLevel.Request("A", newA.get());
801  coarseLevel.Request("Coordinates", newP.get());
802  repartFactory->Build(coarseLevel);
803 
804  if (!precList11_.get<bool>("repartition: rebalance P and R", false))
805  ImporterH_ = coarseLevel.Get< RCP<const Import> >("Importer", repartFactory.get());
806  P11_ = coarseLevel.Get< RCP<Matrix> >("P", newP.get());
807  AH_ = coarseLevel.Get< RCP<Matrix> >("A", newA.get());
808  CoordsH_ = coarseLevel.Get< RCP<RealValuedMultiVector> >("Coordinates", newP.get());
809 
810  } else {
811  ParameterList XpetraList;
812  XpetraList.set("Restrict Communicator",true);
813  XpetraList.set("Timer Label","MueLu RefMaxwell::RebalanceAH");
814  RCP<const Map> targetMap = ImporterH_->getTargetMap();
815  AH_ = MatrixFactory::Build(AH_, *ImporterH_, *ImporterH_, targetMap, targetMap, rcp(&XpetraList,false));
816  }
817  if (!AH_.is_null())
818  AH_->setObjectLabel("RefMaxwell coarse (1,1)");
819  }
820 #endif // HAVE_MPI
821 
822 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
823  // This should be taken out again as soon as
824  // CoalesceDropFactory_kokkos supports BlockSize > 1 and
825  // drop tol != 0.0
826  if (useKokkos_ && precList11_.isParameter("aggregation: drop tol") && precList11_.get<double>("aggregation: drop tol") != 0.0) {
827  GetOStream(Warnings0) << "RefMaxwell::compute(): Setting \"aggregation: drop tol\". to 0.0, since CoalesceDropFactory_kokkos does not "
828  << "support BlockSize > 1 and drop tol != 0.0" << std::endl;
829  precList11_.set("aggregation: drop tol", 0.0);
830  }
831 #endif
832  dump(*P11_, "P11.m");
833 
834  if (!implicitTranspose_) {
835 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
836  if (useKokkos_)
837  R11_ = Utilities_kokkos::Transpose(*P11_);
838  else
839  R11_ = Utilities::Transpose(*P11_);
840 #else
841  R11_ = Utilities::Transpose(*P11_);
842 #endif
843  dump(*R11_, "R11.m");
844  }
845 
847  if (!AH_.is_null()) {
848  dump(*AH_, "AH.m");
849  dumpCoords(*CoordsH_, "coordsH.m");
850  int oldRank = SetProcRankVerbose(AH_->getDomainMap()->getComm()->getRank());
851  if (IsPrint(Statistics2)) {
852  RCP<ParameterList> params = rcp(new ParameterList());;
853  params->set("printLoadBalancingInfo", true);
854  params->set("printCommInfo", true);
855  GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*AH_, "AH", params);
856  }
857  if (!reuse) {
858  ParameterList& userParamList = precList11_.sublist("user data");
859  userParamList.set<RCP<RealValuedMultiVector> >("Coordinates", CoordsH_);
860  HierarchyH_ = MueLu::CreateXpetraPreconditioner(AH_, precList11_);
861  } else {
862  RCP<MueLu::Level> level0 = HierarchyH_->GetLevel(0);
863  level0->Set("A", AH_);
864  HierarchyH_->SetupRe();
865  }
866  SetProcRankVerbose(oldRank);
867  }
868  VerboseObject::SetDefaultVerbLevel(verbosityLevel);
869 
870  }
871 
872  if(!reuse) {
873  GetOStream(Runtime0) << "RefMaxwell::compute(): nuking BC nodes of D0" << std::endl;
874 
875  D0_Matrix_->resumeFill();
876  Scalar replaceWith;
877  if (D0_Matrix_->getRowMap()->lib() == Xpetra::UseEpetra)
878  replaceWith= Teuchos::ScalarTraits<SC>::eps();
879  else
880  replaceWith = Teuchos::ScalarTraits<SC>::zero();
881 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
882  if (useKokkos_) {
883  Utilities_kokkos::ZeroDirichletCols(D0_Matrix_,BCcolsKokkos_,replaceWith);
884  } else {
885  Utilities::ZeroDirichletCols(D0_Matrix_,BCcols_,replaceWith);
886  }
887 #else
888  Utilities::ZeroDirichletCols(D0_Matrix_,BCcols_,replaceWith);
889 #endif
890  D0_Matrix_->fillComplete(D0_Matrix_->getDomainMap(),D0_Matrix_->getRangeMap());
891  }
892 
894  // Build A22 = D0* SM D0 and hierarchy for A22
895  {
896  GetOStream(Runtime0) << "RefMaxwell::compute(): building MG for (2,2)-block" << std::endl;
897 
898  { // build fine grid operator for (2,2)-block, D0* SM D0 (aka TMT)
899  RCP<Teuchos::TimeMonitor> tm = getTimer("MueLu RefMaxwell: Build A22");
900 
901  Level fineLevel, coarseLevel;
902  fineLevel.SetFactoryManager(null);
903  coarseLevel.SetFactoryManager(null);
904  coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
905  fineLevel.SetLevelID(0);
906  coarseLevel.SetLevelID(1);
907  fineLevel.Set("A",SM_Matrix_);
908  coarseLevel.Set("P",D0_Matrix_);
909  coarseLevel.Set("Coordinates",Coords_);
910 
911  coarseLevel.setlib(SM_Matrix_->getDomainMap()->lib());
912  fineLevel.setlib(SM_Matrix_->getDomainMap()->lib());
913  coarseLevel.setObjectLabel("RefMaxwell (2,2)");
914  fineLevel.setObjectLabel("RefMaxwell (2,2)");
915 
916  RCP<RAPFactory> rapFact = rcp(new RAPFactory());
917  ParameterList rapList = *(rapFact->GetValidParameterList());
918  rapList.set("transpose: use implicit", true);
919  rapList.set("rap: fix zero diagonals", parameterList_.get<bool>("rap: fix zero diagonals", true));
920  rapList.set("rap: triple product", parameterList_.get<bool>("rap: triple product", false));
921  rapFact->SetParameterList(rapList);
922 
923  if (!A22_AP_reuse_data_.is_null()) {
924  coarseLevel.AddKeepFlag("AP reuse data", rapFact.get());
925  coarseLevel.Set<Teuchos::RCP<Teuchos::ParameterList> >("AP reuse data", A22_AP_reuse_data_, rapFact.get());
926  }
927  if (!A22_RAP_reuse_data_.is_null()) {
928  coarseLevel.AddKeepFlag("RAP reuse data", rapFact.get());
929  coarseLevel.Set<Teuchos::RCP<Teuchos::ParameterList> >("RAP reuse data", A22_RAP_reuse_data_, rapFact.get());
930  }
931 
932 #ifdef HAVE_MPI
933  if (doRebalancing) {
934 
935  if (!reuse) {
936 
937  coarseLevel.Set("number of partitions", numProcsA22);
938  coarseLevel.Set("repartition: heuristic target rows per process", 1000);
939 
940  std::string partName = precList22_.get<std::string>("repartition: partitioner", "zoltan2");
941  RCP<Factory> partitioner;
942  if (partName == "zoltan") {
943 #ifdef HAVE_MUELU_ZOLTAN
944  partitioner = rcp(new ZoltanInterface());
945  partitioner->SetFactory("A", rapFact);
946  // partitioner->SetFactory("number of partitions", repartheurFactory);
947  // NOTE: ZoltanInteface ("zoltan") does not support external parameters through ParameterList
948 #else
949  throw Exceptions::RuntimeError("Zoltan interface is not available");
950 #endif
951  } else if (partName == "zoltan2") {
952 #ifdef HAVE_MUELU_ZOLTAN2
953  partitioner = rcp(new Zoltan2Interface());
954  ParameterList partParams;
955  RCP<const ParameterList> partpartParams = rcp(new ParameterList(precList22_.sublist("repartition: params", false)));
956  partParams.set("ParameterList", partpartParams);
957  partitioner->SetParameterList(partParams);
958  partitioner->SetFactory("A", rapFact);
959  // partitioner->SetFactory("number of partitions", repartheurFactory);
960 #else
961  throw Exceptions::RuntimeError("Zoltan2 interface is not available");
962 #endif
963  }
964 
965  auto repartFactory = rcp(new RepartitionFactory());
966  ParameterList repartParams;
967  repartParams.set("repartition: print partition distribution", precList22_.get<bool>("repartition: print partition distribution", false));
968  repartParams.set("repartition: remap parts", precList22_.get<bool>("repartition: remap parts", true));
969  if (rebalanceStriding >= 1) {
970  bool acceptPart = ((SM_Matrix_->getDomainMap()->getComm()->getSize()-1-SM_Matrix_->getDomainMap()->getComm()->getRank()) % rebalanceStriding) == 0;
971  if (SM_Matrix_->getDomainMap()->getComm()->getSize()-1-SM_Matrix_->getDomainMap()->getComm()->getRank() >= numProcsA22*rebalanceStriding)
972  acceptPart = false;
973  if (acceptPart)
974  TEUCHOS_ASSERT(AH_.is_null());
975  repartParams.set("repartition: remap accept partition", acceptPart);
976  } else
977  repartParams.set("repartition: remap accept partition", AH_.is_null());
978  repartFactory->SetParameterList(repartParams);
979  repartFactory->SetFactory("A", rapFact);
980  // repartFactory->SetFactory("number of partitions", repartheurFactory);
981  repartFactory->SetFactory("Partition", partitioner);
982 
983  auto newP = rcp(new RebalanceTransferFactory());
984  ParameterList newPparams;
985  newPparams.set("type", "Interpolation");
986  newPparams.set("repartition: rebalance P and R", precList22_.get<bool>("repartition: rebalance P and R", false));
987  newPparams.set("repartition: use subcommunicators", true);
988  newPparams.set("repartition: rebalance Nullspace", false);
989  newP->SetFactory("Coordinates", NoFactory::getRCP());
990  newP->SetParameterList(newPparams);
991  newP->SetFactory("Importer", repartFactory);
992 
993  auto newA = rcp(new RebalanceAcFactory());
994  ParameterList rebAcParams;
995  rebAcParams.set("repartition: use subcommunicators", true);
996  newA->SetParameterList(rebAcParams);
997  newA->SetFactory("A", rapFact);
998  newA->SetFactory("Importer", repartFactory);
999 
1000  coarseLevel.Request("P", newP.get());
1001  coarseLevel.Request("Importer", repartFactory.get());
1002  coarseLevel.Request("A", newA.get());
1003  if (precList22_.isType<std::string>("reuse: type") && precList22_.get<std::string>("reuse: type") != "none") {
1004  if (!parameterList_.get<bool>("rap: triple product", false))
1005  coarseLevel.Request("AP reuse data", rapFact.get());
1006  coarseLevel.Request("RAP reuse data", rapFact.get());
1007  }
1008  coarseLevel.Request("Coordinates", newP.get());
1009  rapFact->Build(fineLevel,coarseLevel);
1010  repartFactory->Build(coarseLevel);
1011 
1012  if (!precList22_.get<bool>("repartition: rebalance P and R", false))
1013  Importer22_ = coarseLevel.Get< RCP<const Import> >("Importer", repartFactory.get());
1014  D0_Matrix_ = coarseLevel.Get< RCP<Matrix> >("P", newP.get());
1015  A22_ = coarseLevel.Get< RCP<Matrix> >("A", newA.get());
1016  if (precList22_.isType<std::string>("reuse: type") && precList22_.get<std::string>("reuse: type") != "none") {
1017  if (!parameterList_.get<bool>("rap: triple product", false))
1018  A22_AP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("AP reuse data", rapFact.get());
1019  A22_RAP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("RAP reuse data", rapFact.get());
1020  }
1021  Coords_ = coarseLevel.Get< RCP<RealValuedMultiVector> >("Coordinates", newP.get());
1022  } else {
1023  coarseLevel.Request("A", rapFact.get());
1024  if (precList22_.isType<std::string>("reuse: type") && precList22_.get<std::string>("reuse: type") != "none") {
1025  coarseLevel.Request("AP reuse data", rapFact.get());
1026  coarseLevel.Request("RAP reuse data", rapFact.get());
1027  }
1028 
1029  A22_ = coarseLevel.Get< RCP<Matrix> >("A", rapFact.get());
1030  if (precList22_.isType<std::string>("reuse: type") && precList22_.get<std::string>("reuse: type") != "none") {
1031  if (!parameterList_.get<bool>("rap: triple product", false))
1032  A22_AP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("AP reuse data", rapFact.get());
1033  A22_RAP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("RAP reuse data", rapFact.get());
1034  }
1035 
1036  ParameterList XpetraList;
1037  XpetraList.set("Restrict Communicator",true);
1038  XpetraList.set("Timer Label","MueLu RefMaxwell::RebalanceA22");
1039  RCP<const Map> targetMap = Importer22_->getTargetMap();
1040  A22_ = MatrixFactory::Build(A22_, *Importer22_, *Importer22_, targetMap, targetMap, rcp(&XpetraList,false));
1041  }
1042  } else
1043 #endif // HAVE_MPI
1044  {
1045  coarseLevel.Request("A", rapFact.get());
1046  if (precList22_.isType<std::string>("reuse: type") && precList22_.get<std::string>("reuse: type") != "none") {
1047  coarseLevel.Request("AP reuse data", rapFact.get());
1048  coarseLevel.Request("RAP reuse data", rapFact.get());
1049  }
1050 
1051  A22_ = coarseLevel.Get< RCP<Matrix> >("A", rapFact.get());
1052 
1053  if (precList22_.isType<std::string>("reuse: type") && precList22_.get<std::string>("reuse: type") != "none") {
1054  if (!parameterList_.get<bool>("rap: triple product", false))
1055  A22_AP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("AP reuse data", rapFact.get());
1056  A22_RAP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("RAP reuse data", rapFact.get());
1057  }
1058  }
1059  }
1060 
1061  if (!implicitTranspose_) {
1062 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
1063  if (useKokkos_)
1064  D0_T_Matrix_ = Utilities_kokkos::Transpose(*D0_Matrix_);
1065  else
1066  D0_T_Matrix_ = Utilities::Transpose(*D0_Matrix_);
1067 #else
1068  D0_T_Matrix_ = Utilities::Transpose(*D0_Matrix_);
1069 #endif
1070  }
1071 
1072  VerbLevel verbosityLevel = VerboseObject::GetDefaultVerbLevel();
1073  if (!A22_.is_null()) {
1074  dump(*A22_, "A22.m");
1075  A22_->setObjectLabel("RefMaxwell (2,2)");
1076  int oldRank = SetProcRankVerbose(A22_->getDomainMap()->getComm()->getRank());
1077  if (IsPrint(Statistics2)) {
1078  RCP<ParameterList> params = rcp(new ParameterList());;
1079  params->set("printLoadBalancingInfo", true);
1080  params->set("printCommInfo", true);
1081  GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*A22_, "A22", params);
1082  }
1083  if (!reuse) {
1084  ParameterList& userParamList = precList22_.sublist("user data");
1085  userParamList.set<RCP<RealValuedMultiVector> >("Coordinates", Coords_);
1086  // If we detected no boundary conditions, the (2,2) problem is singular.
1087  // Therefore, if we want to use a direct coarse solver, we need to fix up the nullspace.
1088  std::string coarseType = "";
1089  if (precList22_.isParameter("coarse: type")) {
1090  coarseType = precList22_.get<std::string>("coarse: type");
1091  // Transform string to "Abcde" notation
1092  std::transform(coarseType.begin(), coarseType.end(), coarseType.begin(), ::tolower);
1093  std::transform(coarseType.begin(), ++coarseType.begin(), coarseType.begin(), ::toupper);
1094  }
1095  if (BCedges_ == 0 &&
1096  (coarseType == "" ||
1097  coarseType == "Klu" ||
1098  coarseType == "Klu2") &&
1099  (!precList22_.isSublist("coarse: params") ||
1100  !precList22_.sublist("coarse: params").isParameter("fix nullspace")))
1101  precList22_.sublist("coarse: params").set("fix nullspace",true);
1102  Hierarchy22_ = MueLu::CreateXpetraPreconditioner(A22_, precList22_);
1103  } else {
1104  RCP<MueLu::Level> level0 = Hierarchy22_->GetLevel(0);
1105  level0->Set("A", A22_);
1106  Hierarchy22_->SetupRe();
1107  }
1108  SetProcRankVerbose(oldRank);
1109  }
1110  VerboseObject::SetDefaultVerbLevel(verbosityLevel);
1111 
1112  }
1113 
1114  if(!reuse) {
1115  GetOStream(Runtime0) << "RefMaxwell::compute(): nuking BC edges of D0" << std::endl;
1116 
1117  D0_Matrix_->resumeFill();
1118  Scalar replaceWith;
1119  if (D0_Matrix_->getRowMap()->lib() == Xpetra::UseEpetra)
1120  replaceWith= Teuchos::ScalarTraits<SC>::eps();
1121  else
1122  replaceWith = Teuchos::ScalarTraits<SC>::zero();
1123 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
1124  if (useKokkos_) {
1125  Utilities_kokkos::ZeroDirichletRows(D0_Matrix_,BCrowsKokkos_,replaceWith);
1126  } else {
1127  Utilities::ZeroDirichletRows(D0_Matrix_,BCrows_,replaceWith);
1128  }
1129 #else
1130  Utilities::ZeroDirichletRows(D0_Matrix_,BCrows_,replaceWith);
1131 #endif
1132  D0_Matrix_->fillComplete(D0_Matrix_->getDomainMap(),D0_Matrix_->getRangeMap());
1133  dump(*D0_Matrix_, "D0_nuked.m");
1134  }
1135 
1136  {
1137  if (parameterList_.isType<std::string>("smoother: type") &&
1138  parameterList_.get<std::string>("smoother: type") == "hiptmair" &&
1139  SM_Matrix_->getDomainMap()->lib() == Xpetra::UseTpetra &&
1140  A22_->getDomainMap()->lib() == Xpetra::UseTpetra &&
1141  D0_Matrix_->getDomainMap()->lib() == Xpetra::UseTpetra) {
1142 #if defined(MUELU_REFMAXWELL_CAN_USE_HIPTMAIR)
1143  ParameterList hiptmairPreList, hiptmairPostList, smootherPreList, smootherPostList;
1144 
1145  if (smootherList_.isSublist("smoother: pre params"))
1146  smootherPreList = smootherList_.sublist("smoother: pre params");
1147  else if (smootherList_.isSublist("smoother: params"))
1148  smootherPreList = smootherList_.sublist("smoother: params");
1149  hiptmairPreList.set("hiptmair: smoother type 1",
1150  smootherPreList.get<std::string>("hiptmair: smoother type 1", "CHEBYSHEV"));
1151  hiptmairPreList.set("hiptmair: smoother type 2",
1152  smootherPreList.get<std::string>("hiptmair: smoother type 2", "CHEBYSHEV"));
1153  if(smootherPreList.isSublist("hiptmair: smoother list 1"))
1154  hiptmairPreList.set("hiptmair: smoother list 1", smootherPreList.sublist("hiptmair: smoother list 1"));
1155  if(smootherPreList.isSublist("hiptmair: smoother list 2"))
1156  hiptmairPreList.set("hiptmair: smoother list 2", smootherPreList.sublist("hiptmair: smoother list 2"));
1157  hiptmairPreList.set("hiptmair: pre or post",
1158  smootherPreList.get<std::string>("hiptmair: pre or post", "pre"));
1159  hiptmairPreList.set("hiptmair: zero starting solution",
1160  smootherPreList.get<bool>("hiptmair: zero starting solution", true));
1161 
1162  if (smootherList_.isSublist("smoother: post params"))
1163  smootherPostList = smootherList_.sublist("smoother: post params");
1164  else if (smootherList_.isSublist("smoother: params"))
1165  smootherPostList = smootherList_.sublist("smoother: params");
1166  hiptmairPostList.set("hiptmair: smoother type 1",
1167  smootherPostList.get<std::string>("hiptmair: smoother type 1", "CHEBYSHEV"));
1168  hiptmairPostList.set("hiptmair: smoother type 2",
1169  smootherPostList.get<std::string>("hiptmair: smoother type 2", "CHEBYSHEV"));
1170  if(smootherPostList.isSublist("hiptmair: smoother list 1"))
1171  hiptmairPostList.set("hiptmair: smoother list 1", smootherPostList.sublist("hiptmair: smoother list 1"));
1172  if(smootherPostList.isSublist("hiptmair: smoother list 2"))
1173  hiptmairPostList.set("hiptmair: smoother list 2", smootherPostList.sublist("hiptmair: smoother list 2"));
1174  hiptmairPostList.set("hiptmair: pre or post",
1175  smootherPostList.get<std::string>("hiptmair: pre or post", "post"));
1176  hiptmairPostList.set("hiptmair: zero starting solution",
1177  smootherPostList.get<bool>("hiptmair: zero starting solution", false));
1178 
1179  typedef Tpetra::RowMatrix<SC, LO, GO, NO> TROW;
1180  RCP<const TROW > EdgeMatrix = Utilities::Op2NonConstTpetraRow(SM_Matrix_);
1181  RCP<const TROW > NodeMatrix = Utilities::Op2NonConstTpetraRow(A22_);
1182  RCP<const TROW > PMatrix = Utilities::Op2NonConstTpetraRow(D0_Matrix_);
1183 
1184  hiptmairPreSmoother_ = rcp( new Ifpack2::Hiptmair<TROW>(EdgeMatrix,NodeMatrix,PMatrix) );
1185  hiptmairPreSmoother_ -> setParameters(hiptmairPreList);
1186  hiptmairPreSmoother_ -> initialize();
1187  hiptmairPreSmoother_ -> compute();
1188  hiptmairPostSmoother_ = rcp( new Ifpack2::Hiptmair<TROW>(EdgeMatrix,NodeMatrix,PMatrix) );
1189  hiptmairPostSmoother_ -> setParameters(hiptmairPostList);
1190  hiptmairPostSmoother_ -> initialize();
1191  hiptmairPostSmoother_ -> compute();
1192  useHiptmairSmoothing_ = true;
1193 #else
1194  throw(Xpetra::Exceptions::RuntimeError("MueLu must be compiled with Ifpack2 for Hiptmair smoothing."));
1195 #endif // defined(MUELU_REFMAXWELL_CAN_USE_HIPTMAIR)
1196  } else {
1197  if (parameterList_.isType<std::string>("smoother: pre type") && parameterList_.isType<std::string>("smoother: post type")) {
1198  std::string preSmootherType = parameterList_.get<std::string>("smoother: pre type");
1199  std::string postSmootherType = parameterList_.get<std::string>("smoother: post type");
1200 
1201  ParameterList preSmootherList, postSmootherList;
1202  if (parameterList_.isSublist("smoother: pre params"))
1203  preSmootherList = parameterList_.sublist("smoother: pre params");
1204  if (parameterList_.isSublist("smoother: post params"))
1205  postSmootherList = parameterList_.sublist("smoother: post params");
1206 
1207  Level level;
1208  RCP<MueLu::FactoryManagerBase> factoryHandler = rcp(new FactoryManager());
1209  level.SetFactoryManager(factoryHandler);
1210  level.SetLevelID(0);
1211  level.setObjectLabel("RefMaxwell (1,1)");
1212  level.Set("A",SM_Matrix_);
1213  level.setlib(SM_Matrix_->getDomainMap()->lib());
1214 
1215  RCP<SmootherPrototype> preSmootherPrototype = rcp(new TrilinosSmoother(preSmootherType, preSmootherList));
1216  RCP<SmootherFactory> preSmootherFact = rcp(new SmootherFactory(preSmootherPrototype));
1217 
1218  RCP<SmootherPrototype> postSmootherPrototype = rcp(new TrilinosSmoother(postSmootherType, postSmootherList));
1219  RCP<SmootherFactory> postSmootherFact = rcp(new SmootherFactory(postSmootherPrototype));
1220 
1221  level.Request("PreSmoother",preSmootherFact.get());
1222  preSmootherFact->Build(level);
1223  PreSmoother_ = level.Get<RCP<SmootherBase> >("PreSmoother",preSmootherFact.get());
1224 
1225  level.Request("PostSmoother",postSmootherFact.get());
1226  postSmootherFact->Build(level);
1227  PostSmoother_ = level.Get<RCP<SmootherBase> >("PostSmoother",postSmootherFact.get());
1228  } else {
1229  std::string smootherType = parameterList_.get<std::string>("smoother: type", "CHEBYSHEV");
1230  Level level;
1231  RCP<MueLu::FactoryManagerBase> factoryHandler = rcp(new FactoryManager());
1232  level.SetFactoryManager(factoryHandler);
1233  level.SetLevelID(0);
1234  level.setObjectLabel("RefMaxwell (1,1)");
1235  level.Set("A",SM_Matrix_);
1236  level.setlib(SM_Matrix_->getDomainMap()->lib());
1237  RCP<SmootherPrototype> smootherPrototype = rcp(new TrilinosSmoother(smootherType, smootherList_));
1238  RCP<SmootherFactory> SmootherFact = rcp(new SmootherFactory(smootherPrototype));
1239  level.Request("PreSmoother",SmootherFact.get());
1240  SmootherFact->Build(level);
1241  PreSmoother_ = level.Get<RCP<SmootherBase> >("PreSmoother",SmootherFact.get());
1242  PostSmoother_ = PreSmoother_;
1243  }
1244  useHiptmairSmoothing_ = false;
1245  }
1246  }
1247 
1248  if (!ImporterH_.is_null()) {
1249  RCP<const Import> ImporterP11 = ImportFactory::Build(ImporterH_->getTargetMap(),P11_->getColMap());
1250  rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix()->replaceDomainMapAndImporter(ImporterH_->getTargetMap(), ImporterP11);
1251  }
1252 
1253  if (!Importer22_.is_null()) {
1254  RCP<const Import> ImporterD0 = ImportFactory::Build(Importer22_->getTargetMap(),D0_Matrix_->getColMap());
1255  rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->replaceDomainMapAndImporter(Importer22_->getTargetMap(), ImporterD0);
1256  }
1257 
1258  if ((!D0_T_Matrix_.is_null()) &&
1259  (!R11_.is_null()) &&
1260  (!rcp_dynamic_cast<CrsMatrixWrap>(D0_T_Matrix_)->getCrsMatrix()->getCrsGraph()->getImporter().is_null()) &&
1261  (!rcp_dynamic_cast<CrsMatrixWrap>(R11_)->getCrsMatrix()->getCrsGraph()->getImporter().is_null()) &&
1262  (D0_T_Matrix_->getColMap()->lib() == Xpetra::UseTpetra) &&
1263  (R11_->getColMap()->lib() == Xpetra::UseTpetra))
1264  D0_T_R11_colMapsMatch_ = D0_T_Matrix_->getColMap()->isSameAs(*R11_->getColMap());
1265  else
1266  D0_T_R11_colMapsMatch_ = false;
1267  if (D0_T_R11_colMapsMatch_)
1268  GetOStream(Runtime0) << "RefMaxwell::compute(): D0_T and R11 have matching colMaps" << std::endl;
1269 
1270  // Allocate temporary MultiVectors for solve
1271  allocateMemory(1);
1272 
1273  if (parameterList_.isSublist("matvec params"))
1274  {
1275  RCP<ParameterList> matvecParams = rcpFromRef(parameterList_.sublist("matvec params"));
1276  setMatvecParams(*SM_Matrix_, matvecParams);
1277  setMatvecParams(*D0_Matrix_, matvecParams);
1278  setMatvecParams(*P11_, matvecParams);
1279  if (!D0_T_Matrix_.is_null()) setMatvecParams(*D0_T_Matrix_, matvecParams);
1280  if (!R11_.is_null()) setMatvecParams(*R11_, matvecParams);
1281  if (!ImporterH_.is_null()) ImporterH_->setDistributorParameters(matvecParams);
1282  if (!Importer22_.is_null()) Importer22_->setDistributorParameters(matvecParams);
1283  }
1284  if (!ImporterH_.is_null() && parameterList_.isSublist("refmaxwell: ImporterH params")){
1285  RCP<ParameterList> importerParams = rcpFromRef(parameterList_.sublist("refmaxwell: ImporterH params"));
1286  ImporterH_->setDistributorParameters(importerParams);
1287  }
1288  if (!Importer22_.is_null() && parameterList_.isSublist("refmaxwell: Importer22 params")){
1289  RCP<ParameterList> importerParams = rcpFromRef(parameterList_.sublist("refmaxwell: Importer22 params"));
1290  Importer22_->setDistributorParameters(importerParams);
1291  }
1292 
1293  describe(GetOStream(Runtime0));
1294 
1295 #ifdef HAVE_MUELU_CUDA
1296  if (parameterList_.get<bool>("refmaxwell: cuda profile setup", false)) cudaProfilerStop();
1297 #endif
1298  }
1299 
1300 
1301  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1303  if (!R11_.is_null())
1304  P11res_ = MultiVectorFactory::Build(R11_->getRangeMap(), numVectors);
1305  else
1306  P11res_ = MultiVectorFactory::Build(P11_->getDomainMap(), numVectors);
1307  if (D0_T_R11_colMapsMatch_)
1308  D0TR11Tmp_ = MultiVectorFactory::Build(R11_->getColMap(), numVectors);
1309  if (!ImporterH_.is_null()) {
1310  P11resTmp_ = MultiVectorFactory::Build(ImporterH_->getTargetMap(), numVectors);
1311  P11xTmp_ = MultiVectorFactory::Build(ImporterH_->getSourceMap(), numVectors);
1312  P11x_ = MultiVectorFactory::Build(ImporterH_->getTargetMap(), numVectors);
1313  } else
1314  P11x_ = MultiVectorFactory::Build(P11_->getDomainMap(), numVectors);
1315  if (!D0_T_Matrix_.is_null())
1316  D0res_ = MultiVectorFactory::Build(D0_T_Matrix_->getRangeMap(), numVectors);
1317  else
1318  D0res_ = MultiVectorFactory::Build(D0_Matrix_->getDomainMap(), numVectors);
1319  if (!Importer22_.is_null()) {
1320  D0resTmp_ = MultiVectorFactory::Build(Importer22_->getTargetMap(), numVectors);
1321  D0xTmp_ = MultiVectorFactory::Build(Importer22_->getSourceMap(), numVectors);
1322  D0x_ = MultiVectorFactory::Build(Importer22_->getTargetMap(), numVectors);
1323  } else
1324  D0x_ = MultiVectorFactory::Build(D0_Matrix_->getDomainMap(), numVectors);
1325  residual_ = MultiVectorFactory::Build(SM_Matrix_->getDomainMap(), numVectors);
1326  }
1327 
1328 
1329  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1330  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dump(const Matrix& A, std::string name) const {
1331  if (dump_matrices_) {
1332  GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1333  Xpetra::IO<SC, LO, GO, NO>::Write(name, A);
1334  }
1335  }
1336 
1337 
1338  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1339  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dump(const MultiVector& X, std::string name) const {
1340  if (dump_matrices_) {
1341  GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1342  Xpetra::IO<SC, LO, GO, NO>::Write(name, X);
1343  }
1344  }
1345 
1346 
1347  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1349  if (dump_matrices_) {
1350  GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1351  Xpetra::IO<coordinateType, LO, GO, NO>::Write(name, X);
1352  }
1353  }
1354 
1355 
1356  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1357  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dump(const Teuchos::ArrayRCP<bool>& v, std::string name) const {
1358  if (dump_matrices_) {
1359  GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1360  std::ofstream out(name);
1361  for (size_t i = 0; i < Teuchos::as<size_t>(v.size()); i++)
1362  out << v[i] << "\n";
1363  }
1364  }
1365 
1366 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
1367  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1369  if (dump_matrices_) {
1370  GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1371  std::ofstream out(name);
1372  auto vH = Kokkos::create_mirror_view (v);
1373  Kokkos::deep_copy(vH , v);
1374  for (size_t i = 0; i < vH.size(); i++)
1375  out << vH[i] << "\n";
1376  }
1377  }
1378 #endif
1379 
1380  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1381  Teuchos::RCP<Teuchos::TimeMonitor> RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getTimer(std::string name, RCP<const Teuchos::Comm<int> > comm) const {
1382  if (IsPrint(Timings)) {
1383  if (!syncTimers_)
1384  return Teuchos::rcp(new Teuchos::TimeMonitor(*Teuchos::TimeMonitor::getNewTimer(name)));
1385  else {
1386  if (comm.is_null())
1387  return Teuchos::rcp(new Teuchos::SyncTimeMonitor(*Teuchos::TimeMonitor::getNewTimer(name), SM_Matrix_->getRowMap()->getComm().ptr()));
1388  else
1389  return Teuchos::rcp(new Teuchos::SyncTimeMonitor(*Teuchos::TimeMonitor::getNewTimer(name), comm.ptr()));
1390  }
1391  } else
1392  return Teuchos::null;
1393  }
1394 
1395 
1396  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1398  // The P11 matrix maps node based aggregrates { A_j } to edges { e_i }.
1399  //
1400  // The old implementation used
1401  // P11(i, j*dim+k) = sum_{nodes n_l in e_i intersected with A_j} 0.5 * phi_k(e_i) * P(n_l, A_j)
1402  // yet the paper gives
1403  // P11(i, j*dim+k) = sum_{nodes n_l in e_i intersected with A_j} 0.5 * phi_k(e_i)
1404  // where phi_k is the k-th nullspace vector.
1405  //
1406  // The graph of D0 contains the incidence from nodes to edges.
1407  // The nodal prolongator P maps aggregates to nodes.
1408 
1409  const SC SC_ZERO = Teuchos::ScalarTraits<SC>::zero();
1410  const SC SC_ONE = Teuchos::ScalarTraits<SC>::one();
1411  const Scalar half = SC_ONE / (SC_ONE + SC_ONE);
1412  size_t dim = Nullspace_->getNumVectors();
1413  size_t numLocalRows = SM_Matrix_->getNodeNumRows();
1414 
1415  // build prolongator: algorithm 1 in the reference paper
1416  // First, build nodal unsmoothed prolongator using the matrix A_nodal
1417  RCP<Matrix> P_nodal;
1418  bool read_P_from_file = parameterList_.get("refmaxwell: read_P_from_file",false);
1419  if (read_P_from_file) {
1420  // This permits to read in an ML prolongator, so that we get the same hierarchy.
1421  // (ML and MueLu typically produce different aggregates.)
1422  std::string P_filename = parameterList_.get("refmaxwell: P_filename",std::string("P.m"));
1423  std::string domainmap_filename = parameterList_.get("refmaxwell: P_domainmap_filename",std::string("domainmap_P.m"));
1424  std::string colmap_filename = parameterList_.get("refmaxwell: P_colmap_filename",std::string("colmap_P.m"));
1425  std::string coords_filename = parameterList_.get("refmaxwell: CoordsH",std::string("coordsH.m"));
1426  RCP<const Map> colmap = Xpetra::IO<SC, LO, GO, NO>::ReadMap(colmap_filename, A_nodal_Matrix_->getDomainMap()->lib(),A_nodal_Matrix_->getDomainMap()->getComm());
1427  RCP<const Map> domainmap = Xpetra::IO<SC, LO, GO, NO>::ReadMap(domainmap_filename, A_nodal_Matrix_->getDomainMap()->lib(),A_nodal_Matrix_->getDomainMap()->getComm());
1428  P_nodal = Xpetra::IO<SC, LO, GO, NO>::Read(P_filename, A_nodal_Matrix_->getDomainMap(), colmap, domainmap, A_nodal_Matrix_->getDomainMap());
1429  CoordsH_ = Xpetra::IO<typename RealValuedMultiVector::scalar_type, LO, GO, NO>::ReadMultiVector(coords_filename, domainmap);
1430  } else {
1431  Level fineLevel, coarseLevel;
1432  fineLevel.SetFactoryManager(null);
1433  coarseLevel.SetFactoryManager(null);
1434  coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
1435  fineLevel.SetLevelID(0);
1436  coarseLevel.SetLevelID(1);
1437  fineLevel.Set("A",A_nodal_Matrix_);
1438  fineLevel.Set("Coordinates",Coords_);
1439  fineLevel.Set("DofsPerNode",1);
1440  coarseLevel.setlib(A_nodal_Matrix_->getDomainMap()->lib());
1441  fineLevel.setlib(A_nodal_Matrix_->getDomainMap()->lib());
1442  coarseLevel.setObjectLabel("RefMaxwell (1,1) A_nodal");
1443  fineLevel.setObjectLabel("RefMaxwell (1,1) A_nodal");
1444 
1445  LocalOrdinal NSdim = 1;
1446  RCP<MultiVector> nullSpace = MultiVectorFactory::Build(A_nodal_Matrix_->getRowMap(),NSdim);
1447  nullSpace->putScalar(SC_ONE);
1448  fineLevel.Set("Nullspace",nullSpace);
1449 
1450  RCP<Factory> amalgFact, dropFact, UncoupledAggFact, coarseMapFact, TentativePFact, Tfact, SaPFact;
1451 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
1452  if (useKokkos_) {
1453  amalgFact = rcp(new AmalgamationFactory_kokkos());
1454  dropFact = rcp(new CoalesceDropFactory_kokkos());
1455  UncoupledAggFact = rcp(new UncoupledAggregationFactory_kokkos());
1456  coarseMapFact = rcp(new CoarseMapFactory_kokkos());
1457  TentativePFact = rcp(new TentativePFactory_kokkos());
1458  if (parameterList_.get("multigrid algorithm","unsmoothed") == "sa")
1459  SaPFact = rcp(new SaPFactory_kokkos());
1460  Tfact = rcp(new CoordinatesTransferFactory_kokkos());
1461  } else
1462 #endif
1463  {
1464  amalgFact = rcp(new AmalgamationFactory());
1465  dropFact = rcp(new CoalesceDropFactory());
1466  UncoupledAggFact = rcp(new UncoupledAggregationFactory());
1467  coarseMapFact = rcp(new CoarseMapFactory());
1468  TentativePFact = rcp(new TentativePFactory());
1469  if (parameterList_.get("multigrid algorithm","unsmoothed") == "sa")
1470  SaPFact = rcp(new SaPFactory());
1471  Tfact = rcp(new CoordinatesTransferFactory());
1472  }
1473  dropFact->SetFactory("UnAmalgamationInfo", amalgFact);
1474  double dropTol = parameterList_.get("aggregation: drop tol",0.0);
1475  std::string dropScheme = parameterList_.get("aggregation: drop scheme","classical");
1476  std::string distLaplAlgo = parameterList_.get("aggregation: distance laplacian algo","default");
1477  dropFact->SetParameter("aggregation: drop tol",Teuchos::ParameterEntry(dropTol));
1478  dropFact->SetParameter("aggregation: drop scheme",Teuchos::ParameterEntry(dropScheme));
1479  if (!useKokkos_)
1480  dropFact->SetParameter("aggregation: distance laplacian algo",Teuchos::ParameterEntry(distLaplAlgo));
1481 
1482  UncoupledAggFact->SetFactory("Graph", dropFact);
1483  int minAggSize = parameterList_.get("aggregation: min agg size",2);
1484  UncoupledAggFact->SetParameter("aggregation: min agg size",Teuchos::ParameterEntry(minAggSize));
1485  int maxAggSize = parameterList_.get("aggregation: max agg size",-1);
1486  UncoupledAggFact->SetParameter("aggregation: max agg size",Teuchos::ParameterEntry(maxAggSize));
1487 
1488  coarseMapFact->SetFactory("Aggregates", UncoupledAggFact);
1489 
1490  TentativePFact->SetFactory("Aggregates", UncoupledAggFact);
1491  TentativePFact->SetFactory("UnAmalgamationInfo", amalgFact);
1492  TentativePFact->SetFactory("CoarseMap", coarseMapFact);
1493 
1494  Tfact->SetFactory("Aggregates", UncoupledAggFact);
1495  Tfact->SetFactory("CoarseMap", coarseMapFact);
1496 
1497  if (parameterList_.get("multigrid algorithm","unsmoothed") == "sa") {
1498  SaPFact->SetFactory("P", TentativePFact);
1499  coarseLevel.Request("P", SaPFact.get());
1500  } else
1501  coarseLevel.Request("P",TentativePFact.get());
1502  coarseLevel.Request("Coordinates",Tfact.get());
1503 
1504  RCP<AggregationExportFactory> aggExport;
1505  if (parameterList_.get("aggregation: export visualization data",false)) {
1506  aggExport = rcp(new AggregationExportFactory());
1507  ParameterList aggExportParams;
1508  aggExportParams.set("aggregation: output filename", "aggs.vtk");
1509  aggExportParams.set("aggregation: output file: agg style", "Jacks");
1510  aggExport->SetParameterList(aggExportParams);
1511 
1512  aggExport->SetFactory("Aggregates", UncoupledAggFact);
1513  aggExport->SetFactory("UnAmalgamationInfo", amalgFact);
1514  fineLevel.Request("Aggregates",UncoupledAggFact.get());
1515  fineLevel.Request("UnAmalgamationInfo",amalgFact.get());
1516  }
1517 
1518  if (parameterList_.get("multigrid algorithm","unsmoothed") == "sa")
1519  coarseLevel.Get("P",P_nodal,SaPFact.get());
1520  else
1521  coarseLevel.Get("P",P_nodal,TentativePFact.get());
1522  coarseLevel.Get("Coordinates",CoordsH_,Tfact.get());
1523 
1524  if (parameterList_.get("aggregation: export visualization data",false))
1525  aggExport->Build(fineLevel, coarseLevel);
1526  }
1527  dump(*P_nodal, "P_nodal.m");
1528 
1529  RCP<CrsMatrix> D0Crs = rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix();
1530 
1531  // Import off-rank rows of P_nodal into P_nodal_imported
1532  RCP<CrsMatrix> P_nodal_imported;
1533  int numProcs = P_nodal->getDomainMap()->getComm()->getSize();
1534  if (numProcs > 1) {
1535  RCP<CrsMatrixWrap> P_nodal_temp;
1536  RCP<const Map> targetMap = D0Crs->getColMap();
1537  P_nodal_temp = rcp(new CrsMatrixWrap(targetMap));
1538  RCP<const Import> importer = D0Crs->getCrsGraph()->getImporter();
1539  P_nodal_temp->doImport(*P_nodal, *importer, Xpetra::INSERT);
1540  P_nodal_temp->fillComplete(rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix()->getDomainMap(),
1541  rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix()->getRangeMap());
1542  P_nodal_imported = P_nodal_temp->getCrsMatrix();
1543  dump(*P_nodal_temp, "P_nodal_imported.m");
1544  } else
1545  P_nodal_imported = rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix();
1546 
1547 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
1548  if (useKokkos_) {
1549 
1550  using ATS = Kokkos::ArithTraits<SC>;
1551  using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
1553 
1554  typedef typename Matrix::local_matrix_type KCRS;
1555  typedef typename KCRS::device_type device_t;
1556  typedef typename KCRS::StaticCrsGraphType graph_t;
1557  typedef typename graph_t::row_map_type::non_const_type lno_view_t;
1558  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
1559  typedef typename KCRS::values_type::non_const_type scalar_view_t;
1560 
1561  // Get data out of P_nodal_imported and D0.
1562  auto localP = P_nodal_imported->getLocalMatrix();
1563  auto localD0 = D0_Matrix_->getLocalMatrix();
1564 
1565  // Which algorithm should we use for the construction of the special prolongator?
1566  // Option "mat-mat":
1567  // Multiply D0 * P_nodal, take graph, blow up the domain space and compute the entries.
1568  std::string defaultAlgo = "mat-mat";
1569  std::string algo = parameterList_.get("refmaxwell: prolongator compute algorithm",defaultAlgo);
1570 
1571  if (algo == "mat-mat") {
1572  RCP<Matrix> D0_P_nodal = MatrixFactory::Build(SM_Matrix_->getRowMap());
1573  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,false,*P_nodal,false,*D0_P_nodal,true,true);
1574 
1575 #ifdef HAVE_MUELU_DEBUG
1576  TEUCHOS_ASSERT(D0_P_nodal->getColMap()->isSameAs(*P_nodal_imported->getColMap()));
1577 #endif
1578 
1579  // Get data out of D0*P.
1580  auto localD0P = D0_P_nodal->getLocalMatrix();
1581 
1582  // Create the matrix object
1583  RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1584  RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1585 
1586  size_t nnzEstimate = dim*localD0P.graph.entries.size();
1587  lno_view_t P11rowptr("P11_rowptr", numLocalRows+1);
1588  lno_nnz_view_t P11colind("P11_colind",nnzEstimate);
1589  scalar_view_t P11vals("P11_vals",nnzEstimate);
1590 
1591  // adjust rowpointer
1592  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_adjustRowptr", range_type(0,numLocalRows+1),
1593  KOKKOS_LAMBDA(const size_t i) {
1594  P11rowptr(i) = dim*localD0P.graph.row_map(i);
1595  });
1596 
1597  // adjust column indices
1598  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_adjustColind", range_type(0,localD0P.graph.entries.size()),
1599  KOKKOS_LAMBDA(const size_t jj) {
1600  for (size_t k = 0; k < dim; k++) {
1601  P11colind(dim*jj+k) = dim*localD0P.graph.entries(jj)+k;
1602  P11vals(dim*jj+k) = SC_ZERO;
1603  }
1604  });
1605 
1606  auto localNullspace = Nullspace_->template getLocalView<device_t>();
1607 
1608  // enter values
1609  if (D0_Matrix_->getNodeMaxNumRowEntries()>2) {
1610  // The matrix D0 has too many entries per row.
1611  // Therefore we need to check whether its entries are actually non-zero.
1612  // This is the case for the matrices built by MiniEM.
1613  GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1614 
1615  magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1616 
1617  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_enterValues_D0wZeros", range_type(0,numLocalRows),
1618  KOKKOS_LAMBDA(const size_t i) {
1619  for (size_t ll = localD0.graph.row_map(i); ll < localD0.graph.row_map(i+1); ll++) {
1620  LO l = localD0.graph.entries(ll);
1621  SC p = localD0.values(ll);
1622  if (impl_ATS::magnitude(p) < tol)
1623  continue;
1624  for (size_t jj = localP.graph.row_map(l); jj < localP.graph.row_map(l+1); jj++) {
1625  LO j = localP.graph.entries(jj);
1626  SC v = localP.values(jj);
1627  for (size_t k = 0; k < dim; k++) {
1628  LO jNew = dim*j+k;
1629  SC n = localNullspace(i,k);
1630  size_t m;
1631  for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1632  if (P11colind(m) == jNew)
1633  break;
1634 #if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA)
1635  TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1636 #endif
1637  P11vals(m) += half * v * n;
1638  }
1639  }
1640  }
1641  });
1642 
1643  } else {
1644  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_enterValues", range_type(0,numLocalRows),
1645  KOKKOS_LAMBDA(const size_t i) {
1646  for (size_t ll = localD0.graph.row_map(i); ll < localD0.graph.row_map(i+1); ll++) {
1647  LO l = localD0.graph.entries(ll);
1648  for (size_t jj = localP.graph.row_map(l); jj < localP.graph.row_map(l+1); jj++) {
1649  LO j = localP.graph.entries(jj);
1650  SC v = localP.values(jj);
1651  for (size_t k = 0; k < dim; k++) {
1652  LO jNew = dim*j+k;
1653  SC n = localNullspace(i,k);
1654  size_t m;
1655  for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1656  if (P11colind(m) == jNew)
1657  break;
1658 #if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA)
1659  TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1660 #endif
1661  P11vals(m) += half * v * n;
1662  }
1663  }
1664  }
1665  });
1666  }
1667 
1668  P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1669  RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1670  P11Crs->setAllValues(P11rowptr, P11colind, P11vals);
1671  P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1672 
1673  } else
1674  TEUCHOS_TEST_FOR_EXCEPTION(false,std::invalid_argument,algo << " is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1675  } else {
1676 #endif // ifdef(HAVE_MUELU_KOKKOS_REFACTOR)
1677 
1678  // get nullspace vectors
1679  ArrayRCP<ArrayRCP<const SC> > nullspaceRCP(dim);
1680  ArrayRCP<ArrayView<const SC> > nullspace(dim);
1681  for(size_t i=0; i<dim; i++) {
1682  nullspaceRCP[i] = Nullspace_->getData(i);
1683  nullspace[i] = nullspaceRCP[i]();
1684  }
1685 
1686  // Get data out of P_nodal_imported and D0.
1687  ArrayRCP<const size_t> Prowptr_RCP, D0rowptr_RCP;
1688  ArrayRCP<const LO> Pcolind_RCP, D0colind_RCP;
1689  ArrayRCP<const SC> Pvals_RCP, D0vals_RCP;
1690  ArrayRCP<size_t> P11rowptr_RCP;
1691  ArrayRCP<LO> P11colind_RCP;
1692  ArrayRCP<SC> P11vals_RCP;
1693 
1694  P_nodal_imported->getAllValues(Prowptr_RCP, Pcolind_RCP, Pvals_RCP);
1695  rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getAllValues(D0rowptr_RCP, D0colind_RCP, D0vals_RCP);
1696 
1697  // For efficiency
1698  // Refers to an issue where Teuchos::ArrayRCP::operator[] may be
1699  // slower than Teuchos::ArrayView::operator[].
1700  ArrayView<const size_t> Prowptr, D0rowptr;
1701  ArrayView<const LO> Pcolind, D0colind;
1702  ArrayView<const SC> Pvals, D0vals;
1703  Prowptr = Prowptr_RCP(); Pcolind = Pcolind_RCP(); Pvals = Pvals_RCP();
1704  D0rowptr = D0rowptr_RCP(); D0colind = D0colind_RCP(); D0vals = D0vals_RCP();
1705 
1706  // Which algorithm should we use for the construction of the special prolongator?
1707  // Option "mat-mat":
1708  // Multiply D0 * P_nodal, take graph, blow up the domain space and compute the entries.
1709  // Option "gustavson":
1710  // Loop over D0, P and nullspace and allocate directly. (Gustavson-like)
1711  // More efficient, but only available for serial node.
1712  std::string defaultAlgo = "mat-mat";
1713  std::string algo = parameterList_.get("refmaxwell: prolongator compute algorithm",defaultAlgo);
1714 
1715  if (algo == "mat-mat") {
1716  RCP<Matrix> D0_P_nodal = MatrixFactory::Build(SM_Matrix_->getRowMap());
1717  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,false,*P_nodal,false,*D0_P_nodal,true,true);
1718 
1719  // Get data out of D0*P.
1720  ArrayRCP<const size_t> D0Prowptr_RCP;
1721  ArrayRCP<const LO> D0Pcolind_RCP;
1722  ArrayRCP<const SC> D0Pvals_RCP;
1723  rcp_dynamic_cast<CrsMatrixWrap>(D0_P_nodal)->getCrsMatrix()->getAllValues(D0Prowptr_RCP, D0Pcolind_RCP, D0Pvals_RCP);
1724 
1725  // For efficiency
1726  // Refers to an issue where Teuchos::ArrayRCP::operator[] may be
1727  // slower than Teuchos::ArrayView::operator[].
1728  ArrayView<const size_t> D0Prowptr;
1729  ArrayView<const LO> D0Pcolind;
1730  D0Prowptr = D0Prowptr_RCP(); D0Pcolind = D0Pcolind_RCP();
1731 
1732  // Create the matrix object
1733  RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1734  RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1735  P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1736  RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1737  size_t nnzEstimate = dim*D0Prowptr[numLocalRows];
1738  P11Crs->allocateAllValues(nnzEstimate, P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1739 
1740  ArrayView<size_t> P11rowptr = P11rowptr_RCP();
1741  ArrayView<LO> P11colind = P11colind_RCP();
1742  ArrayView<SC> P11vals = P11vals_RCP();
1743 
1744  // adjust rowpointer
1745  for (size_t i = 0; i < numLocalRows+1; i++) {
1746  P11rowptr[i] = dim*D0Prowptr[i];
1747  }
1748 
1749  // adjust column indices
1750  for (size_t jj = 0; jj < (size_t) D0Prowptr[numLocalRows]; jj++)
1751  for (size_t k = 0; k < dim; k++) {
1752  P11colind[dim*jj+k] = dim*D0Pcolind[jj]+k;
1753  P11vals[dim*jj+k] = SC_ZERO;
1754  }
1755 
1756  RCP<const Map> P_nodal_imported_colmap = P_nodal_imported->getColMap();
1757  RCP<const Map> D0_P_nodal_colmap = D0_P_nodal->getColMap();
1758  // enter values
1759  if (D0_Matrix_->getNodeMaxNumRowEntries()>2) {
1760  // The matrix D0 has too many entries per row.
1761  // Therefore we need to check whether its entries are actually non-zero.
1762  // This is the case for the matrices built by MiniEM.
1763  GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1764 
1765  magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1766  for (size_t i = 0; i < numLocalRows; i++) {
1767  for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1768  LO l = D0colind[ll];
1769  SC p = D0vals[ll];
1770  if (Teuchos::ScalarTraits<Scalar>::magnitude(p) < tol)
1771  continue;
1772  for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1773  LO j = Pcolind[jj];
1774  j = D0_P_nodal_colmap->getLocalElement(P_nodal_imported_colmap->getGlobalElement(j));
1775  SC v = Pvals[jj];
1776  for (size_t k = 0; k < dim; k++) {
1777  LO jNew = dim*j+k;
1778  SC n = nullspace[k][i];
1779  size_t m;
1780  for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1781  if (P11colind[m] == jNew)
1782  break;
1783 #ifdef HAVE_MUELU_DEBUG
1784  TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1785 #endif
1786  P11vals[m] += half * v * n;
1787  }
1788  }
1789  }
1790  }
1791  } else {
1792  // enter values
1793  for (size_t i = 0; i < numLocalRows; i++) {
1794  for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1795  LO l = D0colind[ll];
1796  for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1797  LO j = Pcolind[jj];
1798  j = D0_P_nodal_colmap->getLocalElement(P_nodal_imported_colmap->getGlobalElement(j));
1799  SC v = Pvals[jj];
1800  for (size_t k = 0; k < dim; k++) {
1801  LO jNew = dim*j+k;
1802  SC n = nullspace[k][i];
1803  size_t m;
1804  for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1805  if (P11colind[m] == jNew)
1806  break;
1807 #ifdef HAVE_MUELU_DEBUG
1808  TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1809 #endif
1810  P11vals[m] += half * v * n;
1811  }
1812  }
1813  }
1814  }
1815  }
1816 
1817  P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1818  P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1819 
1820  } else if (algo == "gustavson") {
1821 
1822  LO maxP11col = dim * P_nodal_imported->getColMap()->getMaxLocalIndex();
1823  const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1824  Array<size_t> P11_status(dim*maxP11col, ST_INVALID);
1825  // This is ad-hoc and should maybe be replaced with some better heuristics.
1826  size_t nnz_alloc = dim*D0vals_RCP.size();
1827 
1828  // Create the matrix object
1829  RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1830  RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1831  P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1832  RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1833  P11Crs->allocateAllValues(nnz_alloc, P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1834 
1835  ArrayView<size_t> P11rowptr = P11rowptr_RCP();
1836  ArrayView<LO> P11colind = P11colind_RCP();
1837  ArrayView<SC> P11vals = P11vals_RCP();
1838 
1839  size_t nnz;
1840  if (D0_Matrix_->getNodeMaxNumRowEntries()>2) {
1841  // The matrix D0 has too many entries per row.
1842  // Therefore we need to check whether its entries are actually non-zero.
1843  // This is the case for the matrices built by MiniEM.
1844  GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1845 
1846  magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1847  nnz = 0;
1848  size_t nnz_old = 0;
1849  for (size_t i = 0; i < numLocalRows; i++) {
1850  P11rowptr[i] = nnz;
1851  for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1852  LO l = D0colind[ll];
1853  SC p = D0vals[ll];
1854  if (Teuchos::ScalarTraits<Scalar>::magnitude(p) < tol)
1855  continue;
1856  for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1857  LO j = Pcolind[jj];
1858  SC v = Pvals[jj];
1859  for (size_t k = 0; k < dim; k++) {
1860  LO jNew = dim*j+k;
1861  SC n = nullspace[k][i];
1862  // do we already have an entry for (i, jNew)?
1863  if (P11_status[jNew] == ST_INVALID || P11_status[jNew] < nnz_old) {
1864  P11_status[jNew] = nnz;
1865  P11colind[nnz] = jNew;
1866  P11vals[nnz] = half * v * n;
1867  // or should it be
1868  // P11vals[nnz] = half * n;
1869  nnz++;
1870  } else {
1871  P11vals[P11_status[jNew]] += half * v * n;
1872  // or should it be
1873  // P11vals[P11_status[jNew]] += half * n;
1874  }
1875  }
1876  }
1877  }
1878  nnz_old = nnz;
1879  }
1880  P11rowptr[numLocalRows] = nnz;
1881  } else {
1882  nnz = 0;
1883  size_t nnz_old = 0;
1884  for (size_t i = 0; i < numLocalRows; i++) {
1885  P11rowptr[i] = nnz;
1886  for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1887  LO l = D0colind[ll];
1888  for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1889  LO j = Pcolind[jj];
1890  SC v = Pvals[jj];
1891  for (size_t k = 0; k < dim; k++) {
1892  LO jNew = dim*j+k;
1893  SC n = nullspace[k][i];
1894  // do we already have an entry for (i, jNew)?
1895  if (P11_status[jNew] == ST_INVALID || P11_status[jNew] < nnz_old) {
1896  P11_status[jNew] = nnz;
1897  P11colind[nnz] = jNew;
1898  P11vals[nnz] = half * v * n;
1899  // or should it be
1900  // P11vals[nnz] = half * n;
1901  nnz++;
1902  } else {
1903  P11vals[P11_status[jNew]] += half * v * n;
1904  // or should it be
1905  // P11vals[P11_status[jNew]] += half * n;
1906  }
1907  }
1908  }
1909  }
1910  nnz_old = nnz;
1911  }
1912  P11rowptr[numLocalRows] = nnz;
1913  }
1914 
1915  if (blockDomainMap->lib() == Xpetra::UseTpetra) {
1916  // Downward resize
1917  // - Cannot resize for Epetra, as it checks for same pointers
1918  // - Need to resize for Tpetra, as it checks ().size() == P11rowptr[numLocalRows]
1919  P11vals_RCP.resize(nnz);
1920  P11colind_RCP.resize(nnz);
1921  }
1922 
1923  P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1924  P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1925  } else
1926  TEUCHOS_TEST_FOR_EXCEPTION(false,std::invalid_argument,algo << " is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1927 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
1928  }
1929 #endif
1930  }
1931 
1932  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1934  RCP<Teuchos::TimeMonitor> tm = getTimer("MueLu RefMaxwell: Build coarse (1,1) matrix");
1935 
1936  const Scalar one = Teuchos::ScalarTraits<Scalar>::one();
1937 
1938  // coarse matrix for P11* (M1 + D1* M2 D1) P11
1939  RCP<Matrix> Matrix1;
1940  {
1941  Level fineLevel, coarseLevel;
1942  fineLevel.SetFactoryManager(null);
1943  coarseLevel.SetFactoryManager(null);
1944  coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
1945  fineLevel.SetLevelID(0);
1946  coarseLevel.SetLevelID(1);
1947  fineLevel.Set("A",SM_Matrix_);
1948  coarseLevel.Set("P",P11_);
1949 
1950  coarseLevel.setlib(SM_Matrix_->getDomainMap()->lib());
1951  fineLevel.setlib(SM_Matrix_->getDomainMap()->lib());
1952  coarseLevel.setObjectLabel("RefMaxwell (1,1)");
1953  fineLevel.setObjectLabel("RefMaxwell (1,1)");
1954 
1955  RCP<RAPFactory> rapFact = rcp(new RAPFactory());
1956  ParameterList rapList = *(rapFact->GetValidParameterList());
1957  rapList.set("transpose: use implicit", true);
1958  rapList.set("rap: fix zero diagonals", parameterList_.get<bool>("rap: fix zero diagonals", true));
1959  rapList.set("rap: triple product", parameterList_.get<bool>("rap: triple product", false));
1960  rapFact->SetParameterList(rapList);
1961 
1962  if (precList11_.isType<std::string>("reuse: type") && precList11_.get<std::string>("reuse: type") != "none") {
1963  if (!parameterList_.get<bool>("rap: triple product", false))
1964  coarseLevel.Request("AP reuse data", rapFact.get());
1965  coarseLevel.Request("RAP reuse data", rapFact.get());
1966  }
1967 
1968  if (!AH_AP_reuse_data_.is_null()) {
1969  coarseLevel.AddKeepFlag("AP reuse data", rapFact.get());
1970  coarseLevel.Set<Teuchos::RCP<Teuchos::ParameterList> >("AP reuse data", AH_AP_reuse_data_, rapFact.get());
1971  }
1972  if (!AH_RAP_reuse_data_.is_null()) {
1973  coarseLevel.AddKeepFlag("RAP reuse data", rapFact.get());
1974  coarseLevel.Set<Teuchos::RCP<Teuchos::ParameterList> >("RAP reuse data", AH_RAP_reuse_data_, rapFact.get());
1975  }
1976 
1977  coarseLevel.Request("A", rapFact.get());
1978 
1979  Matrix1 = coarseLevel.Get< RCP<Matrix> >("A", rapFact.get());
1980  if (precList11_.isType<std::string>("reuse: type") && precList11_.get<std::string>("reuse: type") != "none") {
1981  if (!parameterList_.get<bool>("rap: triple product", false))
1982  AH_AP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("AP reuse data", rapFact.get());
1983  AH_RAP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("RAP reuse data", rapFact.get());
1984  }
1985  }
1986 
1987  if (!AH_.is_null())
1988  AH_ = Teuchos::null;
1989 
1990  if(disable_addon_==true) {
1991  // if add-on is not chosen
1992  AH_=Matrix1;
1993  }
1994  else {
1995  if (Addon_Matrix_.is_null()) {
1996  RCP<Teuchos::TimeMonitor> tmAddon = getTimer("MueLu RefMaxwell: Build coarse addon matrix");
1997  // catch a failure
1998  TEUCHOS_TEST_FOR_EXCEPTION(M0inv_Matrix_==Teuchos::null,std::invalid_argument,
1999  "MueLu::RefMaxwell::formCoarseMatrix(): Inverse of "
2000  "lumped mass matrix required for add-on (i.e. M0inv_Matrix is null)");
2001 
2002  // coarse matrix for add-on, i.e P11* (M1 D0 M0inv D0* M1) P11
2003  RCP<Matrix> Zaux = MatrixFactory::Build(M1_Matrix_->getRowMap());
2004  RCP<Matrix> Z = MatrixFactory::Build(D0_Matrix_->getDomainMap());
2005 
2006  // construct Zaux = M1 P11
2007  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*M1_Matrix_,false,*P11_,false,*Zaux,true,true);
2008  // construct Z = D0* M1 P11 = D0* Zaux
2009  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,true,*Zaux,false,*Z,true,true);
2010 
2011  // construct Z* M0inv Z
2012  RCP<Matrix> Matrix2 = MatrixFactory::Build(Z->getDomainMap());
2013  if (M0inv_Matrix_->getGlobalMaxNumRowEntries()<=1) {
2014  // We assume that if M0inv has at most one entry per row then
2015  // these are all diagonal entries.
2016  RCP<Vector> diag = VectorFactory::Build(M0inv_Matrix_->getRowMap());
2017  M0inv_Matrix_->getLocalDiagCopy(*diag);
2018  ArrayRCP<Scalar> diagVals = diag->getDataNonConst(0);
2019  for (size_t j=0; j < diag->getMap()->getNodeNumElements(); j++) {
2020  diagVals[j] = Teuchos::ScalarTraits<Scalar>::squareroot(diagVals[j]);
2021  }
2022  if (Z->getRowMap()->isSameAs(*(diag->getMap())))
2023  Z->leftScale(*diag);
2024  else {
2025  RCP<Import> importer = ImportFactory::Build(diag->getMap(),Z->getRowMap());
2026  RCP<Vector> diag2 = VectorFactory::Build(Z->getRowMap());
2027  diag2->doImport(*diag,*importer,Xpetra::INSERT);
2028  Z->leftScale(*diag2);
2029  }
2030  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*Z,true,*Z,false,*Matrix2,true,true);
2031  } else if (parameterList_.get<bool>("rap: triple product", false) == false) {
2032  RCP<Matrix> C2 = MatrixFactory::Build(M0inv_Matrix_->getRowMap());
2033  // construct C2 = M0inv Z
2034  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*M0inv_Matrix_,false,*Z,false,*C2,true,true);
2035  // construct Matrix2 = Z* M0inv Z = Z* C2
2036  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*Z,true,*C2,false,*Matrix2,true,true);
2037  } else {
2038  // construct Matrix2 = Z* M0inv Z
2039  Xpetra::TripleMatrixMultiply<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
2040  MultiplyRAP(*Z, true, *M0inv_Matrix_, false, *Z, false, *Matrix2, true, true);
2041  }
2042  // Should we keep the addon for next setup?
2043  if (precList11_.isType<std::string>("reuse: type") && precList11_.get<std::string>("reuse: type") != "none")
2044  Addon_Matrix_ = Matrix2;
2045 
2046  // add matrices together
2047  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::TwoMatrixAdd(*Matrix1,false,one,*Matrix2,false,one,AH_,GetOStream(Runtime0));
2048  AH_->fillComplete();
2049  } else {
2050  // add matrices together
2051  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::TwoMatrixAdd(*Matrix1,false,one,*Addon_Matrix_,false,one,AH_,GetOStream(Runtime0));
2052  AH_->fillComplete();
2053  }
2054  }
2055 
2056  // set fixed block size for vector nodal matrix
2057  size_t dim = Nullspace_->getNumVectors();
2058  AH_->SetFixedBlockSize(dim);
2059  AH_->setObjectLabel("RefMaxwell coarse (1,1)");
2060 
2061  }
2062 
2063 
2064  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2065  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::resetMatrix(RCP<Matrix> SM_Matrix_new, bool ComputePrec) {
2066  bool reuse = !SM_Matrix_.is_null();
2067  SM_Matrix_ = SM_Matrix_new;
2068  dump(*SM_Matrix_, "SM.m");
2069  if (ComputePrec)
2070  compute(reuse);
2071  }
2072 
2073 
2074  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2075  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::applyInverseAdditive(const MultiVector& RHS, MultiVector& X) const {
2076 
2077  Scalar one = Teuchos::ScalarTraits<Scalar>::one();
2078 
2079  { // compute residual
2080 
2081  RCP<Teuchos::TimeMonitor> tmRes = getTimer("MueLu RefMaxwell: residual calculation");
2082  Utilities::Residual(*SM_Matrix_, X, RHS, *residual_);
2083  }
2084 
2085  { // restrict residual to sub-hierarchies
2086 
2087  if (implicitTranspose_) {
2088  {
2089  RCP<Teuchos::TimeMonitor> tmRes = getTimer("MueLu RefMaxwell: restriction coarse (1,1) (implicit)");
2090  P11_->apply(*residual_,*P11res_,Teuchos::TRANS);
2091  }
2092  {
2093  RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: restriction (2,2) (implicit)");
2094  D0_Matrix_->apply(*residual_,*D0res_,Teuchos::TRANS);
2095  }
2096  } else {
2097  if (D0_T_R11_colMapsMatch_) {
2098  // Column maps of D0_T and R11 match, and we're running Tpetra
2099  {
2100  RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: restrictions import");
2101  D0TR11Tmp_->doImport(*residual_, *rcp_dynamic_cast<CrsMatrixWrap>(R11_)->getCrsMatrix()->getCrsGraph()->getImporter(), Xpetra::INSERT);
2102  }
2103  {
2104  RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: restriction (2,2) (explicit)");
2105  rcp_dynamic_cast<TpetraCrsMatrix>(rcp_dynamic_cast<CrsMatrixWrap>(D0_T_Matrix_)->getCrsMatrix())->getTpetra_CrsMatrix()->localApply(toTpetra(*D0TR11Tmp_),toTpetra(*D0res_),Teuchos::NO_TRANS);
2106  }
2107  {
2108  RCP<Teuchos::TimeMonitor> tmP11 = getTimer("MueLu RefMaxwell: restriction coarse (1,1) (explicit)");
2109  rcp_dynamic_cast<TpetraCrsMatrix>(rcp_dynamic_cast<CrsMatrixWrap>(R11_)->getCrsMatrix())->getTpetra_CrsMatrix()->localApply(toTpetra(*D0TR11Tmp_),toTpetra(*P11res_),Teuchos::NO_TRANS);
2110  }
2111  } else {
2112  {
2113  RCP<Teuchos::TimeMonitor> tmP11 = getTimer("MueLu RefMaxwell: restriction coarse (1,1) (explicit)");
2114  R11_->apply(*residual_,*P11res_,Teuchos::NO_TRANS);
2115  }
2116  {
2117  RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: restriction (2,2) (explicit)");
2118  D0_T_Matrix_->apply(*residual_,*D0res_,Teuchos::NO_TRANS);
2119  }
2120  }
2121  }
2122  }
2123 
2124  {
2125  RCP<Teuchos::TimeMonitor> tmSubSolves = getTimer("MueLu RefMaxwell: subsolves");
2126 
2127  // block diagonal preconditioner on 2x2 (V-cycle for diagonal blocks)
2128 
2129  if (!ImporterH_.is_null() && !implicitTranspose_) {
2130  RCP<Teuchos::TimeMonitor> tmH = getTimer("MueLu RefMaxwell: import coarse (1,1)");
2131  P11resTmp_->doImport(*P11res_, *ImporterH_, Xpetra::INSERT);
2132  P11res_.swap(P11resTmp_);
2133  }
2134  if (!Importer22_.is_null() && !implicitTranspose_) {
2135  RCP<Teuchos::TimeMonitor> tm22 = getTimer("MueLu RefMaxwell: import (2,2)");
2136  D0resTmp_->doImport(*D0res_, *Importer22_, Xpetra::INSERT);
2137  D0res_.swap(D0resTmp_);
2138  }
2139 
2140  // iterate on coarse (1, 1) block
2141  if (!AH_.is_null()) {
2142  RCP<Teuchos::TimeMonitor> tmH = getTimer("MueLu RefMaxwell: solve coarse (1,1)", AH_->getRowMap()->getComm());
2143 
2144  RCP<const Map> origXMap = P11x_->getMap();
2145  RCP<const Map> origRhsMap = P11res_->getMap();
2146 
2147  // Replace maps with maps with a subcommunicator
2148  P11res_->replaceMap(AH_->getRangeMap());
2149  P11x_ ->replaceMap(AH_->getDomainMap());
2150  HierarchyH_->Iterate(*P11res_, *P11x_, numItersH_, true);
2151  P11x_ ->replaceMap(origXMap);
2152  P11res_->replaceMap(origRhsMap);
2153  }
2154 
2155  // iterate on (2, 2) block
2156  if (!A22_.is_null()) {
2157  RCP<Teuchos::TimeMonitor> tm22 = getTimer("MueLu RefMaxwell: solve (2,2)", A22_->getRowMap()->getComm());
2158 
2159  RCP<const Map> origXMap = D0x_->getMap();
2160  RCP<const Map> origRhsMap = D0res_->getMap();
2161 
2162  // Replace maps with maps with a subcommunicator
2163  D0res_->replaceMap(A22_->getRangeMap());
2164  D0x_ ->replaceMap(A22_->getDomainMap());
2165  Hierarchy22_->Iterate(*D0res_, *D0x_, numIters22_, true);
2166  D0x_ ->replaceMap(origXMap);
2167  D0res_->replaceMap(origRhsMap);
2168  }
2169 
2170  }
2171 
2172  if (fuseProlongationAndUpdate_) {
2173  { // prolongate (1,1) block
2174  RCP<Teuchos::TimeMonitor> tmP11 = getTimer("MueLu RefMaxwell: prolongation coarse (1,1) (fused)");
2175  P11_->apply(*P11x_,X,Teuchos::NO_TRANS,one,one);
2176  }
2177 
2178  { // prolongate (2,2) block
2179  RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: prolongation (2,2) (fused)");
2180  D0_Matrix_->apply(*D0x_,X,Teuchos::NO_TRANS,one,one);
2181  }
2182  } else {
2183  { // prolongate (1,1) block
2184  RCP<Teuchos::TimeMonitor> tmP11 = getTimer("MueLu RefMaxwell: prolongation coarse (1,1) (unfused)");
2185  P11_->apply(*P11x_,*residual_,Teuchos::NO_TRANS);
2186  }
2187 
2188  { // prolongate (2,2) block
2189  RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: prolongation (2,2) (unfused)");
2190  D0_Matrix_->apply(*D0x_,*residual_,Teuchos::NO_TRANS,one,one);
2191  }
2192 
2193  { // update current solution
2194  RCP<Teuchos::TimeMonitor> tmUpdate = getTimer("MueLu RefMaxwell: update");
2195  X.update(one, *residual_, one);
2196  }
2197  }
2198 
2199  if (!ImporterH_.is_null()) {
2200  P11res_.swap(P11resTmp_);
2201  }
2202  if (!Importer22_.is_null()) {
2203  D0res_.swap(D0resTmp_);
2204  }
2205 
2206  }
2207 
2208 
2209  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2210  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::applyInverse121(const MultiVector& RHS, MultiVector& X) const {
2211 
2212  // precondition (1,1)-block
2213  solveH(RHS,X);
2214  // precondition (2,2)-block
2215  solve22(RHS,X);
2216  // precondition (1,1)-block
2217  solveH(RHS,X);
2218 
2219  }
2220 
2221 
2222  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2223  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::applyInverse212(const MultiVector& RHS, MultiVector& X) const {
2224 
2225  // precondition (2,2)-block
2226  solve22(RHS,X);
2227  // precondition (1,1)-block
2228  solveH(RHS,X);
2229  // precondition (2,2)-block
2230  solve22(RHS,X);
2231 
2232  }
2233 
2234  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2235  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::solveH(const MultiVector& RHS, MultiVector& X) const {
2236 
2237  Scalar one = Teuchos::ScalarTraits<Scalar>::one();
2238 
2239  { // compute residual
2240  RCP<Teuchos::TimeMonitor> tmRes = getTimer("MueLu RefMaxwell: residual calculation");
2241  Utilities::Residual(*SM_Matrix_, X, RHS,*residual_);
2242  if (implicitTranspose_)
2243  P11_->apply(*residual_,*P11res_,Teuchos::TRANS);
2244  else
2245  R11_->apply(*residual_,*P11res_,Teuchos::NO_TRANS);
2246  }
2247 
2248  { // solve coarse (1,1) block
2249  if (!ImporterH_.is_null() && !implicitTranspose_) {
2250  RCP<Teuchos::TimeMonitor> tmH = getTimer("MueLu RefMaxwell: import coarse (1,1)");
2251  P11resTmp_->doImport(*P11res_, *ImporterH_, Xpetra::INSERT);
2252  P11res_.swap(P11resTmp_);
2253  }
2254  if (!AH_.is_null()) {
2255  RCP<Teuchos::TimeMonitor> tmH = getTimer("MueLu RefMaxwell: solve coarse (1,1)", AH_->getRowMap()->getComm());
2256 
2257  RCP<const Map> origXMap = P11x_->getMap();
2258  RCP<const Map> origRhsMap = P11res_->getMap();
2259 
2260  // Replace maps with maps with a subcommunicator
2261  P11res_->replaceMap(AH_->getRangeMap());
2262  P11x_ ->replaceMap(AH_->getDomainMap());
2263  HierarchyH_->Iterate(*P11res_, *P11x_, numItersH_, true);
2264  P11x_ ->replaceMap(origXMap);
2265  P11res_->replaceMap(origRhsMap);
2266  }
2267  }
2268 
2269  { // update current solution
2270  RCP<Teuchos::TimeMonitor> tmUp = getTimer("MueLu RefMaxwell: update");
2271  P11_->apply(*P11x_,*residual_,Teuchos::NO_TRANS);
2272  X.update(one, *residual_, one);
2273  }
2274  if (!ImporterH_.is_null()) {
2275  P11res_.swap(P11resTmp_);
2276  }
2277 
2278  }
2279 
2280 
2281  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2282  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::solve22(const MultiVector& RHS, MultiVector& X) const {
2283 
2284  Scalar one = Teuchos::ScalarTraits<Scalar>::one();
2285 
2286  { // compute residual
2287  RCP<Teuchos::TimeMonitor> tmRes = getTimer("MueLu RefMaxwell: residual calculation");
2288  Utilities::Residual(*SM_Matrix_, X, RHS, *residual_);
2289  if (implicitTranspose_)
2290  D0_Matrix_->apply(*residual_,*D0res_,Teuchos::TRANS);
2291  else
2292  D0_T_Matrix_->apply(*residual_,*D0res_,Teuchos::NO_TRANS);
2293  }
2294 
2295  { // solve (2,2) block
2296  if (!Importer22_.is_null() && !implicitTranspose_) {
2297  RCP<Teuchos::TimeMonitor> tm22 = getTimer("MueLu RefMaxwell: import (2,2)");
2298  D0resTmp_->doImport(*D0res_, *Importer22_, Xpetra::INSERT);
2299  D0res_.swap(D0resTmp_);
2300  }
2301  if (!A22_.is_null()) {
2302  RCP<Teuchos::TimeMonitor> tm22 = getTimer("MueLu RefMaxwell: solve (2,2)", A22_->getRowMap()->getComm());
2303 
2304  RCP<const Map> origXMap = D0x_->getMap();
2305  RCP<const Map> origRhsMap = D0res_->getMap();
2306 
2307  // Replace maps with maps with a subcommunicator
2308  D0res_->replaceMap(A22_->getRangeMap());
2309  D0x_ ->replaceMap(A22_->getDomainMap());
2310  Hierarchy22_->Iterate(*D0res_, *D0x_, numIters22_, true);
2311  D0x_ ->replaceMap(origXMap);
2312  D0res_->replaceMap(origRhsMap);
2313  }
2314  }
2315 
2316  { //update current solution
2317  RCP<Teuchos::TimeMonitor> tmUp = getTimer("MueLu RefMaxwell: update");
2318  D0_Matrix_->apply(*D0x_,*residual_,Teuchos::NO_TRANS);
2319  X.update(one, *residual_, one);
2320  }
2321  if (!Importer22_.is_null()) {
2322  D0res_.swap(D0resTmp_);
2323  }
2324 
2325  }
2326 
2327 
2328  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2329  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::apply (const MultiVector& RHS, MultiVector& X,
2330  Teuchos::ETransp /* mode */,
2331  Scalar /* alpha */,
2332  Scalar /* beta */) const {
2333 
2334  RCP<Teuchos::TimeMonitor> tm = getTimer("MueLu RefMaxwell: solve");
2335 
2336  // make sure that we have enough temporary memory
2337  if (X.getNumVectors() != P11res_->getNumVectors())
2338  allocateMemory(X.getNumVectors());
2339 
2340  { // apply pre-smoothing
2341 
2342  RCP<Teuchos::TimeMonitor> tmSm = getTimer("MueLu RefMaxwell: smoothing");
2343 
2344 #if defined(MUELU_REFMAXWELL_CAN_USE_HIPTMAIR)
2345  if (useHiptmairSmoothing_) {
2346  Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tX = Utilities::MV2NonConstTpetraMV(X);
2347  Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tRHS = Utilities::MV2TpetraMV(RHS);
2348  hiptmairPreSmoother_->apply(tRHS, tX);
2349  }
2350  else
2351 #endif
2352  PreSmoother_->Apply(X, RHS, use_as_preconditioner_);
2353  }
2354 
2355  // do solve for the 2x2 block system
2356  if(mode_=="additive")
2357  applyInverseAdditive(RHS,X);
2358  else if(mode_=="121")
2359  applyInverse121(RHS,X);
2360  else if(mode_=="212")
2361  applyInverse212(RHS,X);
2362  else if(mode_=="1")
2363  solveH(RHS,X);
2364  else if(mode_=="2")
2365  solve22(RHS,X);
2366  else if(mode_=="none") {
2367  // do nothing
2368  }
2369  else
2370  applyInverseAdditive(RHS,X);
2371 
2372  { // apply post-smoothing
2373 
2374  RCP<Teuchos::TimeMonitor> tmSm = getTimer("MueLu RefMaxwell: smoothing");
2375 
2376 #if defined(MUELU_REFMAXWELL_CAN_USE_HIPTMAIR)
2377  if (useHiptmairSmoothing_)
2378  {
2379  Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tX = Utilities::MV2NonConstTpetraMV(X);
2380  Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tRHS = Utilities::MV2TpetraMV(RHS);
2381  hiptmairPostSmoother_->apply(tRHS, tX);
2382  }
2383  else
2384 #endif
2385  PostSmoother_->Apply(X, RHS, false);
2386  }
2387 
2388  }
2389 
2390 
2391  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2393  return false;
2394  }
2395 
2396 
2397  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2399  initialize(const Teuchos::RCP<Matrix> & D0_Matrix,
2400  const Teuchos::RCP<Matrix> & Ms_Matrix,
2401  const Teuchos::RCP<Matrix> & M0inv_Matrix,
2402  const Teuchos::RCP<Matrix> & M1_Matrix,
2403  const Teuchos::RCP<MultiVector> & Nullspace,
2404  const Teuchos::RCP<RealValuedMultiVector> & Coords,
2405  Teuchos::ParameterList& List)
2406  {
2407  // some pre-conditions
2408  TEUCHOS_ASSERT(D0_Matrix!=Teuchos::null);
2409  TEUCHOS_ASSERT(Ms_Matrix!=Teuchos::null);
2410  TEUCHOS_ASSERT(M1_Matrix!=Teuchos::null);
2411 
2412 #ifdef HAVE_MUELU_DEBUG
2413  TEUCHOS_ASSERT(M1_Matrix->getDomainMap()->isSameAs(*M1_Matrix->getRangeMap()));
2414  TEUCHOS_ASSERT(M1_Matrix->getDomainMap()->isSameAs(*M1_Matrix->getRowMap()));
2415  TEUCHOS_ASSERT(M1_Matrix->getDomainMap()->isSameAs(*D0_Matrix->getRangeMap()));
2416 
2417  TEUCHOS_ASSERT(Ms_Matrix->getDomainMap()->isSameAs(*Ms_Matrix->getRangeMap()));
2418  TEUCHOS_ASSERT(Ms_Matrix->getDomainMap()->isSameAs(*Ms_Matrix->getRowMap()));
2419  TEUCHOS_ASSERT(Ms_Matrix->getDomainMap()->isSameAs(*D0_Matrix->getRangeMap()));
2420 
2421  TEUCHOS_ASSERT(D0_Matrix->getRangeMap()->isSameAs(*D0_Matrix->getRowMap()));
2422 
2423  if (!M0inv_Matrix) {
2424  TEUCHOS_ASSERT(M0inv_Matrix->getDomainMap()->isSameAs(*M0inv_Matrix->getRowMap()));
2425  TEUCHOS_ASSERT(M0inv_Matrix->getDomainMap()->isSameAs(*M0inv_Matrix->getRangeMap()));
2426  TEUCHOS_ASSERT(M0inv_Matrix->getDomainMap()->isSameAs(*D0_Matrix->getDomainMap()));
2427  }
2428 #endif
2429 
2430  HierarchyH_ = Teuchos::null;
2431  Hierarchy22_ = Teuchos::null;
2432  PreSmoother_ = Teuchos::null;
2433  PostSmoother_ = Teuchos::null;
2434  disable_addon_ = false;
2435  mode_ = "additive";
2436 
2437  // set parameters
2438  setParameters(List);
2439 
2440  if (D0_Matrix->getRowMap()->lib() == Xpetra::UseTpetra) {
2441  // We will remove boundary conditions from D0, and potentially change maps, so we copy the input.
2442  // Fortunately, D0 is quite sparse.
2443  // We cannot use the Tpetra copy constructor, since it does not copy the graph.
2444 
2445  RCP<Matrix> D0copy = MatrixFactory::Build(D0_Matrix->getRowMap(), D0_Matrix->getColMap(), 0);
2446  RCP<CrsMatrix> D0copyCrs = rcp_dynamic_cast<CrsMatrixWrap>(D0copy,true)->getCrsMatrix();
2447  ArrayRCP<const size_t> D0rowptr_RCP;
2448  ArrayRCP<const LO> D0colind_RCP;
2449  ArrayRCP<const SC> D0vals_RCP;
2450  rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix,true)->getCrsMatrix()->getAllValues(D0rowptr_RCP,
2451  D0colind_RCP,
2452  D0vals_RCP);
2453 
2454  ArrayRCP<size_t> D0copyrowptr_RCP;
2455  ArrayRCP<LO> D0copycolind_RCP;
2456  ArrayRCP<SC> D0copyvals_RCP;
2457  D0copyCrs->allocateAllValues(D0vals_RCP.size(),D0copyrowptr_RCP,D0copycolind_RCP,D0copyvals_RCP);
2458  D0copyrowptr_RCP.deepCopy(D0rowptr_RCP());
2459  D0copycolind_RCP.deepCopy(D0colind_RCP());
2460  D0copyvals_RCP.deepCopy(D0vals_RCP());
2461  D0copyCrs->setAllValues(D0copyrowptr_RCP,
2462  D0copycolind_RCP,
2463  D0copyvals_RCP);
2464  D0copyCrs->expertStaticFillComplete(D0_Matrix->getDomainMap(), D0_Matrix->getRangeMap(),
2465  rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix,true)->getCrsMatrix()->getCrsGraph()->getImporter(),
2466  rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix,true)->getCrsMatrix()->getCrsGraph()->getExporter());
2467  D0_Matrix_ = D0copy;
2468  } else
2469  D0_Matrix_ = MatrixFactory::BuildCopy(D0_Matrix);
2470 
2471 
2472  M0inv_Matrix_ = M0inv_Matrix;
2473  Ms_Matrix_ = Ms_Matrix;
2474  M1_Matrix_ = M1_Matrix;
2475  Coords_ = Coords;
2476  Nullspace_ = Nullspace;
2477 
2478  dump(*D0_Matrix_, "D0_clean.m");
2479  dump(*Ms_Matrix_, "Ms.m");
2480  dump(*M1_Matrix_, "M1.m");
2481  if (!M0inv_Matrix_.is_null()) dump(*M0inv_Matrix_, "M0inv.m");
2482  if (!Coords_.is_null()) dumpCoords(*Coords_, "coords.m");
2483 
2484  }
2485 
2486 
2487  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2489  describe(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel /* verbLevel */) const {
2490 
2491  std::ostringstream oss;
2492 
2493  RCP<const Teuchos::Comm<int> > comm = SM_Matrix_->getDomainMap()->getComm();
2494 
2495 #ifdef HAVE_MPI
2496  int root;
2497  if (!A22_.is_null())
2498  root = comm->getRank();
2499  else
2500  root = -1;
2501 
2502  int actualRoot;
2503  reduceAll(*comm, Teuchos::REDUCE_MAX, root, Teuchos::ptr(&actualRoot));
2504  root = actualRoot;
2505 #endif
2506 
2507 
2508  oss << "\n--------------------------------------------------------------------------------\n" <<
2509  "--- RefMaxwell Summary ---\n"
2510  "--------------------------------------------------------------------------------" << std::endl;
2511  oss << std::endl;
2512 
2513  GlobalOrdinal numRows;
2514  GlobalOrdinal nnz;
2515 
2516  SM_Matrix_->getRowMap()->getComm()->barrier();
2517 
2518  numRows = SM_Matrix_->getGlobalNumRows();
2519  nnz = SM_Matrix_->getGlobalNumEntries();
2520 
2521  Xpetra::global_size_t tt = numRows;
2522  int rowspacer = 3; while (tt != 0) { tt /= 10; rowspacer++; }
2523  tt = nnz;
2524  int nnzspacer = 2; while (tt != 0) { tt /= 10; nnzspacer++; }
2525 
2526  oss << "block " << std::setw(rowspacer) << " rows " << std::setw(nnzspacer) << " nnz " << std::setw(9) << " nnz/row" << std::endl;
2527  oss << "(1, 1)" << std::setw(rowspacer) << numRows << std::setw(nnzspacer) << nnz << std::setw(9) << as<double>(nnz) / numRows << std::endl;
2528 
2529  if (!A22_.is_null()) {
2530  // ToDo: make sure that this is printed correctly
2531  numRows = A22_->getGlobalNumRows();
2532  nnz = A22_->getGlobalNumEntries();
2533 
2534  oss << "(2, 2)" << std::setw(rowspacer) << numRows << std::setw(nnzspacer) << nnz << std::setw(9) << as<double>(nnz) / numRows << std::endl;
2535  }
2536 
2537  oss << std::endl;
2538 
2539 #if defined(MUELU_REFMAXWELL_CAN_USE_HIPTMAIR)
2540  if (useHiptmairSmoothing_) {
2541  if (hiptmairPreSmoother_ != null && hiptmairPreSmoother_ == hiptmairPostSmoother_)
2542  oss << "Smoother both : " << hiptmairPreSmoother_->description() << std::endl;
2543  else {
2544  oss << "Smoother pre : "
2545  << (hiptmairPreSmoother_ != null ? hiptmairPreSmoother_->description() : "no smoother") << std::endl;
2546  oss << "Smoother post : "
2547  << (hiptmairPostSmoother_ != null ? hiptmairPostSmoother_->description() : "no smoother") << std::endl;
2548  }
2549 
2550  } else
2551 #endif
2552  {
2553  if (PreSmoother_ != null && PreSmoother_ == PostSmoother_)
2554  oss << "Smoother both : " << PreSmoother_->description() << std::endl;
2555  else {
2556  oss << "Smoother pre : "
2557  << (PreSmoother_ != null ? PreSmoother_->description() : "no smoother") << std::endl;
2558  oss << "Smoother post : "
2559  << (PostSmoother_ != null ? PostSmoother_->description() : "no smoother") << std::endl;
2560  }
2561  }
2562  oss << std::endl;
2563 
2564  std::string outstr = oss.str();
2565 
2566 #ifdef HAVE_MPI
2567  RCP<const Teuchos::MpiComm<int> > mpiComm = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm);
2568  MPI_Comm rawComm = (*mpiComm->getRawMpiComm())();
2569 
2570  int strLength = outstr.size();
2571  MPI_Bcast(&strLength, 1, MPI_INT, root, rawComm);
2572  if (comm->getRank() != root)
2573  outstr.resize(strLength);
2574  MPI_Bcast(&outstr[0], strLength, MPI_CHAR, root, rawComm);
2575 #endif
2576 
2577  out << outstr;
2578 
2579  if (!HierarchyH_.is_null())
2580  HierarchyH_->describe(out, GetVerbLevel());
2581 
2582  if (!Hierarchy22_.is_null())
2583  Hierarchy22_->describe(out, GetVerbLevel());
2584 
2585  if (IsPrint(Statistics2)) {
2586  // Print the grid of processors
2587  std::ostringstream oss2;
2588 
2589  oss2 << "Sub-solver distribution over ranks" << std::endl;
2590  oss2 << "( (1,1) block only is indicated by '1', (2,2) block only by '2', and both blocks by 'B' and none by '.')" << std::endl;
2591 
2592  int numProcs = comm->getSize();
2593 #ifdef HAVE_MPI
2594  RCP<const Teuchos::MpiComm<int> > tmpic = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm);
2595  TEUCHOS_TEST_FOR_EXCEPTION(tmpic == Teuchos::null, Exceptions::RuntimeError, "Cannot cast base Teuchos::Comm to Teuchos::MpiComm object.");
2596  RCP<const Teuchos::OpaqueWrapper<MPI_Comm> > rawMpiComm = tmpic->getRawMpiComm();
2597 #endif
2598 
2599  char status = 0;
2600  if (!AH_.is_null())
2601  status += 1;
2602  if (!A22_.is_null())
2603  status += 2;
2604  std::vector<char> states(numProcs, 0);
2605 #ifdef HAVE_MPI
2606  MPI_Gather(&status, 1, MPI_CHAR, &states[0], 1, MPI_CHAR, 0, *rawMpiComm);
2607 #else
2608  states.push_back(status);
2609 #endif
2610 
2611  int rowWidth = std::min(Teuchos::as<int>(ceil(sqrt(numProcs))), 100);
2612  for (int proc = 0; proc < numProcs; proc += rowWidth) {
2613  for (int j = 0; j < rowWidth; j++)
2614  if (proc + j < numProcs)
2615  if (states[proc+j] == 0)
2616  oss2 << ".";
2617  else if (states[proc+j] == 1)
2618  oss2 << "1";
2619  else if (states[proc+j] == 2)
2620  oss2 << "2";
2621  else
2622  oss2 << "B";
2623  else
2624  oss2 << " ";
2625 
2626  oss2 << " " << proc << ":" << std::min(proc + rowWidth, numProcs) - 1 << std::endl;
2627  }
2628  oss2 << std::endl;
2629  GetOStream(Statistics2) << oss2.str();
2630  }
2631 
2632 
2633  }
2634 
2635 } // namespace
2636 
2637 #define MUELU_REFMAXWELL_SHORT
2638 #endif //ifdef MUELU_REFMAXWELL_DEF_HPP
Important warning messages (one line)
Generic Smoother Factory for generating the smoothers of the MG hierarchy.
This class specifies the default factory that should generate some data on a Level if the data does n...
#define MueLu_sumAll(rcpComm, in, out)
static void SetMueLuOFileStream(const std::string &filename)
Various adapters that will create a MueLu preconditioner that is an Xpetra::Matrix.
MueLu::DefaultLocalOrdinal LocalOrdinal
Factory for determing the number of partitions for rebalancing.
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access). Usage: Level->Get< RCP<Matrix> >("A", factory) if factory == NULL => use default factory.
Factory for generating coarse level map. Used by TentativePFactory.
static RCP< const Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > MV2TpetraMV(RCP< Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > const vec)
Helper utility to pull out the underlying Tpetra objects from an Xpetra object.
void solveH(const MultiVector &RHS, MultiVector &X) const
apply solve to 1-1 block only
static void ApplyOAZToMatrixRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const std::vector< LocalOrdinal > &dirichletRows)
#define MueLu_maxAll(rcpComm, in, out)
static void SetDefaultVerbLevel(const VerbLevel defaultVerbLevel)
Set the default (global) verbosity level.
KOKKOS_INLINE_FUNCTION constexpr std::enable_if< std::is_integral< iType >::value, size_t >::type extent(const iType &r) const noexcept
void FindNonZeros(const Teuchos::ArrayRCP< const Scalar > vals, Teuchos::ArrayRCP< bool > nonzeros)
Class that encapsulates external library smoothers.
static Teuchos::ArrayRCP< const bool > DetectDirichletRows(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude &tol=Teuchos::ScalarTraits< Scalar >::magnitude(0.), const bool count_twos_as_dirichlet=false)
Factory for building permutation matrix that can be be used to shuffle data (matrices, vectors) among processes.
void ZeroDirichletCols(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletCols, Scalar replaceWith)
void ZeroDirichletRows(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletRows, Scalar replaceWith)
One-liner description of what is happening.
std::string tolower(const std::string &str)
Namespace for MueLu classes and methods.
void SetPreviousLevel(const RCP< Level > &previousLevel)
Definition: MueLu_Level.cpp:85
Interface to Zoltan library.This interface provides access to partitioning methods in Zoltan...
void ApplyOAZToMatrixRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletRows)
static VerbLevel GetDefaultVerbLevel()
Get the default (global) verbosity level.
void SetFactoryManager(const RCP< const FactoryManagerBase > &factoryManager)
Set default factories (used internally by Hierarchy::SetLevel()).
Definition: MueLu_Level.cpp:92
void setParameters(Teuchos::ParameterList &list)
Set parameters.
void applyInverseAdditive(const MultiVector &RHS, MultiVector &X) const
apply additive algorithm for 2x2 solve
void buildProlongator()
Setup the prolongator for the (1,1)-block.
MueLu::DefaultNode Node
static RCP< Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > MV2NonConstTpetraMV(RCP< Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > vec)
#define MueLu_minAll(rcpComm, in, out)
Teuchos::ScalarTraits< Scalar >::magnitudeType magnitudeType
Factory for building tentative prolongator.
MsgType toVerbLevel(const std::string &verbLevelStr)
void allocateMemory(int numVectors) const
allocate multivectors for solve
Print even more statistics.
void setlib(Xpetra::UnderlyingLib lib2)
Print statistics that do not involve significant additional computation.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::VERB_HIGH) const
static RCP< Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Residual(const Xpetra::Operator< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, const Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, const Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &RHS)
void applyInverse121(const MultiVector &RHS, MultiVector &X) const
apply 1-2-1 algorithm for 2x2 solve
Kokkos::View< bool *, typename NO::device_type > DetectDirichletRows(const Xpetra::Matrix< SC, LO, GO, NO > &A, const typename Teuchos::ScalarTraits< SC >::magnitudeType &tol, const bool count_twos_as_dirichlet)
void dumpCoords(const RealValuedMultiVector &X, std::string name) const
dump out real-valued multivector
MueLu::DefaultScalar Scalar
void resetMatrix(Teuchos::RCP< Matrix > SM_Matrix_new, bool ComputePrec=true)
Reset system matrix.
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
static RCP< Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > RealValuedToScalarMultiVector(RCP< Xpetra::MultiVector< typename Teuchos::ScalarTraits< Scalar >::magnitudeType, LocalOrdinal, GlobalOrdinal, Node > > X)
Teuchos::RCP< const Map > getRangeMap() const
Returns the Xpetra::Map object associated with the range of this operator.
Factory to export aggregation info or visualize aggregates using VTK.
Interface to Zoltan2 library.This interface provides access to partitioning methods in Zoltan2...
static void SetMueLuOStream(const Teuchos::RCP< Teuchos::FancyOStream > &mueluOStream)
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Xpetra::MultiVector< coordinateType, LO, GO, NO > RealValuedMultiVector
AmalgamationFactory for subblocks of strided map based amalgamation data.
void AddKeepFlag(const std::string &ename, const FactoryBase *factory=NoFactory::get(), KeepType keep=MueLu::Keep)
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
static void ZeroDirichletRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const std::vector< LocalOrdinal > &dirichletRows, Scalar replaceWith=Teuchos::ScalarTraits< Scalar >::zero())
void compute(bool reuse=false)
Setup the preconditioner.
Applies permutation to grid transfer operators.
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
Print all timing information.
void initialize(const Teuchos::RCP< Matrix > &D0_Matrix, const Teuchos::RCP< Matrix > &Ms_Matrix, const Teuchos::RCP< Matrix > &M0inv_Matrix, const Teuchos::RCP< Matrix > &M1_Matrix, const Teuchos::RCP< MultiVector > &Nullspace, const Teuchos::RCP< RealValuedMultiVector > &Coords, Teuchos::ParameterList &List)
bool hasTransposeApply() const
Indicates whether this operator supports applying the adjoint operator.
void applyInverse212(const MultiVector &RHS, MultiVector &X) const
apply 2-1-2 algorithm for 2x2 solve
Factory for creating a graph base on a given matrix.
Teuchos::RCP< Teuchos::TimeMonitor > getTimer(std::string name, RCP< const Teuchos::Comm< int > > comm=Teuchos::null) const
get a (synced) timer
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename std::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=nullptr)
void SetLevelID(int levelID)
Set level number.
Definition: MueLu_Level.cpp:78
Teuchos::RCP< MueLu::Hierarchy< Scalar, LocalOrdinal, GlobalOrdinal, Node > > CreateXpetraPreconditioner(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > op, const Teuchos::ParameterList &inParamList)
Helper function to create a MueLu preconditioner that can be used by Xpetra.Given an Xpetra::Matrix...
Class for transferring coordinates from a finer level to a coarser one.
void dump(const Matrix &A, std::string name) const
dump out matrix
Factory for building coarse matrices.
void formCoarseMatrix()
Compute P11^{T}*A*P11 efficiently.
Exception throws to report errors in the internal logical of the program.
void apply(const MultiVector &X, MultiVector &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
void deep_copy(const View< DT, DP... > &dst, typename ViewTraits< DT, DP... >::const_value_type &value, typename std::enable_if< std::is_same< typename ViewTraits< DT, DP... >::specialize, void >::value >::type *=nullptr)
Description of what is happening (more verbose)
static void ZeroDirichletCols(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Teuchos::ArrayRCP< const bool > &dirichletCols, Scalar replaceWith=Teuchos::ScalarTraits< Scalar >::zero())
Factory for building coarse matrices.
Teuchos::ScalarTraits< Scalar >::coordinateType coordinateType
static RCP< Tpetra::RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Op2NonConstTpetraRow(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Op)
Factory for building Smoothed Aggregation prolongators.
Factory for building uncoupled aggregates.
Kokkos::View< bool *, typename NO::device_type > DetectDirichletCols(const Xpetra::Matrix< SC, LO, GO, NO > &A, const Kokkos::View< const bool *, typename NO::device_type > &dirichletRows)
static std::string translate(Teuchos::ParameterList &paramList, const std::string &defaultVals="")
: Translate ML parameters to MueLu parameter XML string
void solve22(const MultiVector &RHS, MultiVector &X) const
apply solve to 2-2 block only
Factory for building a thresholded operator.
Teuchos::RCP< const Map > getDomainMap() const
Returns the Xpetra::Map object associated with the domain of this operator.
void Request(const FactoryBase &factory)
Increment the storage counter for all the inputs of a factory.
static const RCP< const NoFactory > getRCP()
Static Get() functions.