MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_SemiCoarsenPFactory_kokkos_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_SEMICOARSENPFACTORY_KOKKOS_DEF_HPP
47#define MUELU_SEMICOARSENPFACTORY_KOKKOS_DEF_HPP
48
49#include <stdlib.h>
50
51#include <Kokkos_Core.hpp>
52
53#include <KokkosBatched_LU_Decl.hpp>
54#include <KokkosBatched_LU_Serial_Impl.hpp>
55#include <KokkosBatched_Trsm_Decl.hpp>
56#include <KokkosBatched_Trsm_Serial_Impl.hpp>
57#include <KokkosBatched_Util.hpp>
58#include <KokkosSparse_CrsMatrix.hpp>
59
60#include <Xpetra_CrsMatrixWrap.hpp>
61#include <Xpetra_ImportFactory.hpp>
62#include <Xpetra_Matrix.hpp>
63#include <Xpetra_MultiVectorFactory.hpp>
64#include <Xpetra_VectorFactory.hpp>
65
67#include "MueLu_MasterList.hpp"
68#include "MueLu_Monitor.hpp"
70
71namespace MueLu {
72
73template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
74 class DeviceType>
75RCP<const ParameterList>
76SemiCoarsenPFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal,
77 Kokkos::Compat::KokkosDeviceWrapperNode<
78 DeviceType>>::GetValidParameterList() const {
79 RCP<ParameterList> validParamList = rcp(new ParameterList());
80
81 std::string name = "semicoarsen: coarsen rate";
82 validParamList->setEntry(name, MasterList::getEntry(name));
83 validParamList->set<RCP<const FactoryBase>>(
84 "A", Teuchos::null, "Generating factory of the matrix A");
85 validParamList->set<RCP<const FactoryBase>>(
86 "Nullspace", Teuchos::null, "Generating factory of the nullspace");
87 validParamList->set<RCP<const FactoryBase>>(
88 "Coordinates", Teuchos::null, "Generating factory for coordinates");
89
90 validParamList->set<RCP<const FactoryBase>>(
91 "LineDetection_VertLineIds", Teuchos::null,
92 "Generating factory for LineDetection vertical line ids");
93 validParamList->set<RCP<const FactoryBase>>(
94 "LineDetection_Layers", Teuchos::null,
95 "Generating factory for LineDetection layer ids");
96 validParamList->set<RCP<const FactoryBase>>(
97 "CoarseNumZLayers", Teuchos::null,
98 "Generating factory for number of coarse z-layers");
99
100 return validParamList;
101}
102
103template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
104 class DeviceType>
107 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::
108 DeclareInput(Level &fineLevel, Level & /* coarseLevel */) const {
109 Input(fineLevel, "A");
110 Input(fineLevel, "Nullspace");
111
112 Input(fineLevel, "LineDetection_VertLineIds");
113 Input(fineLevel, "LineDetection_Layers");
114 Input(fineLevel, "CoarseNumZLayers");
115
116 // check whether fine level coordinate information is available.
117 // If yes, request the fine level coordinates and generate coarse coordinates
118 // during the Build call
119 if (fineLevel.GetLevelID() == 0) {
120 if (fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
121 fineLevel.DeclareInput("Coordinates", NoFactory::get(), this);
122 bTransferCoordinates_ = true;
123 }
124 } else if (bTransferCoordinates_ == true) {
125 // on coarser levels we check the default factory providing "Coordinates"
126 // or the factory declared to provide "Coordinates"
127 // first, check which factory is providing coordinate information
128 RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
129 if (myCoordsFact == Teuchos::null) {
130 myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates");
131 }
132 if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
133 fineLevel.DeclareInput("Coordinates", myCoordsFact.get(), this);
134 }
135 }
136}
137
138template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
139 class DeviceType>
141 Kokkos::Compat::KokkosDeviceWrapperNode<
142 DeviceType>>::Build(Level &fineLevel,
143 Level &coarseLevel)
144 const {
145 return BuildP(fineLevel, coarseLevel);
146}
147
148template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
149 class DeviceType>
151 Kokkos::Compat::KokkosDeviceWrapperNode<
152 DeviceType>>::BuildP(Level &fineLevel,
153 Level &coarseLevel)
154 const {
155 FactoryMonitor m(*this, "Build", coarseLevel);
156
157 // obtain general variables
158 RCP<Matrix> A = Get<RCP<Matrix>>(fineLevel, "A");
159 RCP<MultiVector> fineNullspace =
160 Get<RCP<MultiVector>>(fineLevel, "Nullspace");
161
162 // get user-provided coarsening rate parameter (constant over all levels)
163 const ParameterList &pL = GetParameterList();
164 LO CoarsenRate = as<LO>(pL.get<int>("semicoarsen: coarsen rate"));
165 TEUCHOS_TEST_FOR_EXCEPTION(
166 CoarsenRate < 2, Exceptions::RuntimeError,
167 "semicoarsen: coarsen rate must be greater than 1");
168
169 // collect general input data
170 LO BlkSize = A->GetFixedBlockSize();
171 RCP<const Map> rowMap = A->getRowMap();
172 LO Ndofs = rowMap->getLocalNumElements();
173 LO Nnodes = Ndofs / BlkSize;
174
175 // collect line detection information generated by the LineDetectionFactory
176 // instance
177 LO FineNumZLayers = Get<LO>(fineLevel, "CoarseNumZLayers");
178 Teuchos::ArrayRCP<LO> TVertLineId =
179 Get<Teuchos::ArrayRCP<LO>>(fineLevel, "LineDetection_VertLineIds");
180 Teuchos::ArrayRCP<LO> TLayerId =
181 Get<Teuchos::ArrayRCP<LO>>(fineLevel, "LineDetection_Layers");
182
183 // compute number of coarse layers
184 TEUCHOS_TEST_FOR_EXCEPTION(FineNumZLayers < 2, Exceptions::RuntimeError,
185 "Cannot coarsen further");
186 using coordT = typename Teuchos::ScalarTraits<Scalar>::coordinateType;
187 LO CoarseNumZLayers =
188 (LO)floor(((coordT)(FineNumZLayers + 1)) / ((coordT)CoarsenRate) - 1.0);
189 if (CoarseNumZLayers < 1)
190 CoarseNumZLayers = 1;
191
192 // generate transfer operator with semicoarsening
193 RCP<Matrix> P;
194 RCP<MultiVector> coarseNullspace;
195 BuildSemiCoarsenP(coarseLevel, Ndofs, Nnodes, BlkSize, FineNumZLayers,
196 CoarseNumZLayers, TLayerId, TVertLineId, A, fineNullspace, P,
197 coarseNullspace);
198
199 // Store number of coarse z-layers on the coarse level container
200 // This information is used by the LineDetectionAlgorithm
201 // TODO get rid of the NoFactory
202 coarseLevel.Set("NumZLayers", Teuchos::as<LO>(CoarseNumZLayers),
204
205 // store semicoarsening transfer on coarse level
206 Set(coarseLevel, "P", P);
207 Set(coarseLevel, "Nullspace", coarseNullspace);
208
209 // transfer coordinates
210 if (bTransferCoordinates_) {
211 SubFactoryMonitor m2(*this, "TransferCoordinates", coarseLevel);
212 typedef Xpetra::MultiVector<
213 typename Teuchos::ScalarTraits<Scalar>::coordinateType, LO, GO, NO>
214 xdMV;
215 RCP<xdMV> fineCoords = Teuchos::null;
216 if (fineLevel.GetLevelID() == 0 &&
217 fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
218 fineCoords = fineLevel.Get<RCP<xdMV>>("Coordinates", NoFactory::get());
219 } else {
220 RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
221 if (myCoordsFact == Teuchos::null) {
222 myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates");
223 }
224 if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
225 fineCoords =
226 fineLevel.Get<RCP<xdMV>>("Coordinates", myCoordsFact.get());
227 }
228 }
229
230 TEUCHOS_TEST_FOR_EXCEPTION(fineCoords == Teuchos::null,
232 "No Coordinates found provided by the user.");
233
234 TEUCHOS_TEST_FOR_EXCEPTION(fineCoords->getNumVectors() != 3,
236 "Three coordinates arrays must be supplied if "
237 "line detection orientation not given.");
238 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> x =
239 fineCoords->getDataNonConst(0);
240 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> y =
241 fineCoords->getDataNonConst(1);
242 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> z =
243 fineCoords->getDataNonConst(2);
244
245 // determine the maximum and minimum z coordinate value on the current
246 // processor.
247 typename Teuchos::ScalarTraits<Scalar>::coordinateType zval_max =
248 -Teuchos::ScalarTraits<
249 typename Teuchos::ScalarTraits<Scalar>::coordinateType>::one() /
250 Teuchos::ScalarTraits<
251 typename Teuchos::ScalarTraits<Scalar>::coordinateType>::sfmin();
252 typename Teuchos::ScalarTraits<Scalar>::coordinateType zval_min =
253 Teuchos::ScalarTraits<
254 typename Teuchos::ScalarTraits<Scalar>::coordinateType>::one() /
255 Teuchos::ScalarTraits<
256 typename Teuchos::ScalarTraits<Scalar>::coordinateType>::sfmin();
257 for (auto it = z.begin(); it != z.end(); ++it) {
258 if (*it > zval_max)
259 zval_max = *it;
260 if (*it < zval_min)
261 zval_min = *it;
262 }
263
264 LO myCoarseZLayers = Teuchos::as<LO>(CoarseNumZLayers);
265
266 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType>
267 myZLayerCoords = Teuchos::arcp<
268 typename Teuchos::ScalarTraits<Scalar>::coordinateType>(
269 myCoarseZLayers);
270 if (myCoarseZLayers == 1) {
271 myZLayerCoords[0] = zval_min;
272 } else {
273 typename Teuchos::ScalarTraits<Scalar>::coordinateType dz =
274 (zval_max - zval_min) / (myCoarseZLayers - 1);
275 for (LO k = 0; k < myCoarseZLayers; ++k) {
276 myZLayerCoords[k] = k * dz;
277 }
278 }
279
280 // Note, that the coarse level node coordinates have to be in vertical
281 // ordering according to the numbering of the vertical lines
282
283 // number of vertical lines on current node:
284 LO numVertLines = Nnodes / FineNumZLayers;
285 LO numLocalCoarseNodes = numVertLines * myCoarseZLayers;
286
287 RCP<const Map> coarseCoordMap = MapFactory::Build(
288 fineCoords->getMap()->lib(),
289 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
290 Teuchos::as<size_t>(numLocalCoarseNodes),
291 fineCoords->getMap()->getIndexBase(), fineCoords->getMap()->getComm());
292 RCP<xdMV> coarseCoords = Xpetra::MultiVectorFactory<
293 typename Teuchos::ScalarTraits<Scalar>::coordinateType, LO, GO,
294 NO>::Build(coarseCoordMap, fineCoords->getNumVectors());
295 coarseCoords->putScalar(-1.0);
296 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cx =
297 coarseCoords->getDataNonConst(0);
298 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cy =
299 coarseCoords->getDataNonConst(1);
300 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cz =
301 coarseCoords->getDataNonConst(2);
302
303 // loop over all vert line indices (stop as soon as possible)
304 LO cntCoarseNodes = 0;
305 for (LO vt = 0; vt < TVertLineId.size(); ++vt) {
306 // vertical line id in *vt
307 LO curVertLineId = TVertLineId[vt];
308
309 if (cx[curVertLineId * myCoarseZLayers] == -1.0 &&
310 cy[curVertLineId * myCoarseZLayers] == -1.0) {
311 // loop over all local myCoarseZLayers
312 for (LO n = 0; n < myCoarseZLayers; ++n) {
313 cx[curVertLineId * myCoarseZLayers + n] = x[vt];
314 cy[curVertLineId * myCoarseZLayers + n] = y[vt];
315 cz[curVertLineId * myCoarseZLayers + n] = myZLayerCoords[n];
316 }
317 cntCoarseNodes += myCoarseZLayers;
318 }
319
320 TEUCHOS_TEST_FOR_EXCEPTION(cntCoarseNodes > numLocalCoarseNodes,
322 "number of coarse nodes is inconsistent.");
323 if (cntCoarseNodes == numLocalCoarseNodes)
324 break;
325 }
326
327 // set coarse level coordinates
328 Set(coarseLevel, "Coordinates", coarseCoords);
329 } /* end bool bTransferCoordinates */
330}
331
332template <class Scalar, class LocalOrdinal, class GlobalOrdinal,
333 class DeviceType>
336 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::
337 BuildSemiCoarsenP(Level &coarseLevel, const LO NFRows, const LO NFNodes,
338 const LO DofsPerNode, const LO NFLayers,
339 const LO NCLayers, const ArrayRCP<LO> LayerId,
340 const ArrayRCP<LO> VertLineId, const RCP<Matrix> &Amat,
341 const RCP<MultiVector> fineNullspace, RCP<Matrix> &P,
342 RCP<MultiVector> &coarseNullspace) const {
343 SubFactoryMonitor m2(*this, "BuildSemiCoarsenP", coarseLevel);
344 using impl_SC = typename Kokkos::ArithTraits<SC>::val_type;
345 using impl_ATS = Kokkos::ArithTraits<impl_SC>;
346 using LOView1D = Kokkos::View<LO *, DeviceType>;
347 using LOView2D = Kokkos::View<LO **, DeviceType>;
348
349 // Construct a map from fine level column to layer ids (including ghost nodes)
350 // Note: this is needed to sum all couplings within a layer
351 const auto FCol2LayerVector =
352 Xpetra::VectorFactory<LO, LO, GO, NO>::Build(Amat->getColMap());
353 const auto localTemp =
354 Xpetra::VectorFactory<LO, LO, GO, NO>::Build(Amat->getDomainMap());
355 RCP<const Import> importer = Amat->getCrsGraph()->getImporter();
356 if (importer == Teuchos::null)
357 importer = ImportFactory::Build(Amat->getDomainMap(), Amat->getColMap());
358 {
359 // Fill local temp with layer ids and fill ghost nodes
360 const auto localTempHost = localTemp->getHostLocalView(Xpetra::Access::ReadWrite);
361 for (int row = 0; row < NFRows; row++)
362 localTempHost(row, 0) = LayerId[row / DofsPerNode];
363 const auto localTempView = localTemp->getDeviceLocalView(Xpetra::Access::ReadWrite);
364 Kokkos::deep_copy(localTempView, localTempHost);
365 FCol2LayerVector->doImport(*localTemp, *(importer), Xpetra::INSERT);
366 }
367 const auto FCol2LayerView = FCol2LayerVector->getDeviceLocalView(Xpetra::Access::ReadOnly);
368 const auto FCol2Layer = Kokkos::subview(FCol2LayerView, Kokkos::ALL(), 0);
369
370 // Construct a map from fine level column to local dof per node id (including
371 // ghost nodes) Note: this is needed to sum all couplings within a layer
372 const auto FCol2DofVector =
373 Xpetra::VectorFactory<LO, LO, GO, NO>::Build(Amat->getColMap());
374 {
375 // Fill local temp with local dof per node ids and fill ghost nodes
376 const auto localTempHost = localTemp->getHostLocalView(Xpetra::Access::ReadWrite);
377 for (int row = 0; row < NFRows; row++)
378 localTempHost(row, 0) = row % DofsPerNode;
379 const auto localTempView = localTemp->getDeviceLocalView(Xpetra::Access::ReadWrite);
380 Kokkos::deep_copy(localTempView, localTempHost);
381 FCol2DofVector->doImport(*localTemp, *(importer), Xpetra::INSERT);
382 }
383 const auto FCol2DofView = FCol2DofVector->getDeviceLocalView(Xpetra::Access::ReadOnly);
384 const auto FCol2Dof = Kokkos::subview(FCol2DofView, Kokkos::ALL(), 0);
385
386 // Compute NVertLines
387 // TODO: Read this from line detection factory
388 int NVertLines = -1;
389 if (NFNodes != 0)
390 NVertLines = VertLineId[0];
391 for (int node = 1; node < NFNodes; ++node)
392 if (VertLineId[node] > NVertLines)
393 NVertLines = VertLineId[node];
394 NVertLines++;
395
396 // Construct a map from Line, Layer ids to fine level node
397 LOView2D LineLayer2Node("LineLayer2Node", NVertLines, NFLayers);
398 typename LOView2D::HostMirror LineLayer2NodeHost =
399 Kokkos::create_mirror_view(LineLayer2Node);
400 for (int node = 0; node < NFNodes; ++node)
401 LineLayer2NodeHost(VertLineId[node], LayerId[node]) = node;
402 Kokkos::deep_copy(LineLayer2Node, LineLayer2NodeHost);
403
404 // Construct a map from coarse layer id to fine layer id
405 LOView1D CLayer2FLayer("CLayer2FLayer", NCLayers);
406 typename LOView1D::HostMirror CLayer2FLayerHost =
407 Kokkos::create_mirror_view(CLayer2FLayer);
408 using coordT = typename Teuchos::ScalarTraits<Scalar>::coordinateType;
409 const LO FirstStride =
410 (LO)ceil(((coordT)(NFLayers + 1)) / ((coordT)(NCLayers + 1)));
411 const coordT RestStride =
412 ((coordT)(NFLayers - FirstStride + 1)) / ((coordT)NCLayers);
413 const LO NCpts =
414 (LO)floor((((coordT)(NFLayers - FirstStride + 1)) / RestStride) + .00001);
415 TEUCHOS_TEST_FOR_EXCEPTION(NCLayers != NCpts, Exceptions::RuntimeError,
416 "sizes do not match.");
417 coordT stride = (coordT)FirstStride;
418 for (int clayer = 0; clayer < NCLayers; ++clayer) {
419 CLayer2FLayerHost(clayer) = (LO)floor(stride) - 1;
420 stride += RestStride;
421 }
422 Kokkos::deep_copy(CLayer2FLayer, CLayer2FLayerHost);
423
424 // Compute start layer and stencil sizes for layer interpolation at each
425 // coarse layer
426 int MaxStencilSize = 1;
427 LOView1D CLayer2StartLayer("CLayer2StartLayer", NCLayers);
428 LOView1D CLayer2StencilSize("CLayer2StencilSize", NCLayers);
429 typename LOView1D::HostMirror CLayer2StartLayerHost =
430 Kokkos::create_mirror_view(CLayer2StartLayer);
431 typename LOView1D::HostMirror CLayer2StencilSizeHost =
432 Kokkos::create_mirror_view(CLayer2StencilSize);
433 for (int clayer = 0; clayer < NCLayers; ++clayer) {
434 const int startLayer = (clayer > 0) ? CLayer2FLayerHost(clayer - 1) + 1 : 0;
435 const int stencilSize = (clayer < NCLayers - 1)
436 ? CLayer2FLayerHost(clayer + 1) - startLayer
437 : NFLayers - startLayer;
438
439 if (MaxStencilSize < stencilSize)
440 MaxStencilSize = stencilSize;
441 CLayer2StartLayerHost(clayer) = startLayer;
442 CLayer2StencilSizeHost(clayer) = stencilSize;
443 }
444 Kokkos::deep_copy(CLayer2StartLayer, CLayer2StartLayerHost);
445 Kokkos::deep_copy(CLayer2StencilSize, CLayer2StencilSizeHost);
446
447 // Allocate storage for the coarse layer interpolation matrices on all
448 // vertical lines Note: Contributions to each matrix are collapsed to vertical
449 // lines. Thus, each vertical line gives rise to a block tridiagonal matrix.
450 // Here we store the full matrix to be compatible with kokkos kernels batch LU
451 // and tringular solve.
452 int Nmax = MaxStencilSize * DofsPerNode;
453 Kokkos::View<impl_SC ***, DeviceType> BandMat(
454 "BandMat", NVertLines, Nmax, Nmax);
455 Kokkos::View<impl_SC ***, DeviceType> BandSol(
456 "BandSol", NVertLines, Nmax, DofsPerNode);
457
458 // Precompute number of nonzeros in prolongation matrix and allocate P views
459 // Note: Each coarse dof (NVertLines*NCLayers*DofsPerNode) contributes an
460 // interpolation stencil (StencilSize*DofsPerNode)
461 int NnzP = 0;
462 for (int clayer = 0; clayer < NCLayers; ++clayer)
463 NnzP += CLayer2StencilSizeHost(clayer);
464 NnzP *= NVertLines * DofsPerNode * DofsPerNode;
465 Kokkos::View<impl_SC *, DeviceType> Pvals("Pvals", NnzP);
466 Kokkos::View<LO *, DeviceType> Pcols("Pcols", NnzP);
467
468 // Precompute Pptr
469 // Note: Each coarse layer stencil dof contributes DofsPerNode to the
470 // corresponding row in P
471 Kokkos::View<size_t *, DeviceType> Pptr("Pptr", NFRows + 1);
472 typename Kokkos::View<size_t *, DeviceType>::HostMirror PptrHost =
473 Kokkos::create_mirror_view(Pptr);
474 Kokkos::deep_copy(PptrHost, 0);
475 for (int line = 0; line < NVertLines; ++line) {
476 for (int clayer = 0; clayer < NCLayers; ++clayer) {
477 const int stencilSize = CLayer2StencilSizeHost(clayer);
478 const int startLayer = CLayer2StartLayerHost(clayer);
479 for (int snode = 0; snode < stencilSize; ++snode) {
480 for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
481 const int layer = startLayer + snode;
482 const int AmatBlkRow = LineLayer2NodeHost(line, layer);
483 const int AmatRow = AmatBlkRow * DofsPerNode + dofi;
484 PptrHost(AmatRow + 1) += DofsPerNode;
485 }
486 }
487 }
488 }
489 for (int i = 2; i < NFRows + 1; ++i)
490 PptrHost(i) += PptrHost(i - 1);
491 TEUCHOS_TEST_FOR_EXCEPTION(NnzP != (int)PptrHost(NFRows),
493 "Number of nonzeros in P does not match");
494 Kokkos::deep_copy(Pptr, PptrHost);
495
496 // Precompute Pptr offsets
497 // Note: These are used to determine the nonzero index in Pvals and Pcols
498 Kokkos::View<LO *, Kokkos::DefaultHostExecutionSpace> layerBuckets(
499 "layerBuckets", NFLayers);
500 Kokkos::deep_copy(layerBuckets, 0);
501 LOView2D CLayerSNode2PptrOffset("CLayerSNode2PptrOffset", NCLayers,
502 MaxStencilSize);
503 typename LOView2D::HostMirror CLayerSNode2PptrOffsetHost =
504 Kokkos::create_mirror_view(CLayerSNode2PptrOffset);
505 for (int clayer = 0; clayer < NCLayers; ++clayer) {
506 const int stencilSize = CLayer2StencilSizeHost(clayer);
507 const int startLayer = CLayer2StartLayerHost(clayer);
508 for (int snode = 0; snode < stencilSize; ++snode) {
509 const int layer = startLayer + snode;
510 CLayerSNode2PptrOffsetHost(clayer, snode) = layerBuckets(layer);
511 layerBuckets(layer)++;
512 }
513 }
514 Kokkos::deep_copy(CLayerSNode2PptrOffset, CLayerSNode2PptrOffsetHost);
515
516 { // Fill P - fill and solve each block tridiagonal system and fill P views
517 SubFactoryMonitor m3(*this, "Fill P", coarseLevel);
518
519 const auto localAmat = Amat->getLocalMatrixDevice();
520 const auto zero = impl_ATS::zero();
521 const auto one = impl_ATS::one();
522
523 using range_policy = Kokkos::RangePolicy<execution_space>;
524 Kokkos::parallel_for(
525 "MueLu::SemiCoarsenPFactory_kokkos::BuildSemiCoarsenP Fill P",
526 range_policy(0, NVertLines), KOKKOS_LAMBDA(const int line) {
527 for (int clayer = 0; clayer < NCLayers; ++clayer) {
528
529 // Initialize BandSol
530 auto bandSol =
531 Kokkos::subview(BandSol, line, Kokkos::ALL(), Kokkos::ALL());
532 for (int row = 0; row < Nmax; ++row)
533 for (int dof = 0; dof < DofsPerNode; ++dof)
534 bandSol(row, dof) = zero;
535
536 // Initialize BandMat (set unused row diagonal to 1.0)
537 const int stencilSize = CLayer2StencilSize(clayer);
538 const int N = stencilSize * DofsPerNode;
539 auto bandMat =
540 Kokkos::subview(BandMat, line, Kokkos::ALL(), Kokkos::ALL());
541 for (int row = 0; row < Nmax; ++row)
542 for (int col = 0; col < Nmax; ++col)
543 bandMat(row, col) =
544 (row == col && row >= N) ? one : zero;
545
546 // Loop over layers in stencil and fill banded matrix and rhs
547 const int flayer = CLayer2FLayer(clayer);
548 const int startLayer = CLayer2StartLayer(clayer);
549 for (int snode = 0; snode < stencilSize; ++snode) {
550
551 const int layer = startLayer + snode;
552 if (layer == flayer) { // If layer in stencil is a coarse layer
553 for (int dof = 0; dof < DofsPerNode; ++dof) {
554 const int row = snode * DofsPerNode + dof;
555 bandMat(row, row) = one;
556 bandSol(row, dof) = one;
557 }
558 } else { // Not a coarse layer
559 const int AmatBlkRow = LineLayer2Node(line, layer);
560 for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
561
562 // Get Amat row info
563 const int AmatRow = AmatBlkRow * DofsPerNode + dofi;
564 const auto localAmatRow = localAmat.rowConst(AmatRow);
565 const int AmatRowLeng = localAmatRow.length;
566
567 const int row = snode * DofsPerNode + dofi;
568 for (int dofj = 0; dofj < DofsPerNode; ++dofj) {
569 const int col = snode * DofsPerNode + dofj;
570
571 // Sum values along row which correspond to stencil
572 // layer/dof and fill bandMat
573 auto val = zero;
574 for (int i = 0; i < AmatRowLeng; ++i) {
575 const int colidx = localAmatRow.colidx(i);
576 if (FCol2Layer(colidx) == layer &&
577 FCol2Dof(colidx) == dofj)
578 val += localAmatRow.value(i);
579 }
580 bandMat(row, col) = val;
581
582 if (snode > 0) {
583 // Sum values along row which correspond to stencil
584 // layer/dof below and fill bandMat
585 val = zero;
586 for (int i = 0; i < AmatRowLeng; ++i) {
587 const int colidx = localAmatRow.colidx(i);
588 if (FCol2Layer(colidx) == layer - 1 &&
589 FCol2Dof(colidx) == dofj)
590 val += localAmatRow.value(i);
591 }
592 bandMat(row, col - DofsPerNode) = val;
593 }
594
595 if (snode < stencilSize - 1) {
596 // Sum values along row which correspond to stencil
597 // layer/dof above and fill bandMat
598 val = zero;
599 for (int i = 0; i < AmatRowLeng; ++i) {
600 const int colidx = localAmatRow.colidx(i);
601 if (FCol2Layer(colidx) == layer + 1 &&
602 FCol2Dof(colidx) == dofj)
603 val += localAmatRow.value(i);
604 }
605 bandMat(row, col + DofsPerNode) = val;
606 }
607 }
608 }
609 }
610 }
611
612 // Batch LU and triangular solves
613 namespace KB = KokkosBatched;
614 using lu_type = typename KB::SerialLU<KB::Algo::LU::Unblocked>;
615 lu_type::invoke(bandMat);
616 using trsv_l_type =
617 typename KB::SerialTrsm<KB::Side::Left, KB::Uplo::Lower,
618 KB::Trans::NoTranspose, KB::Diag::Unit,
619 KB::Algo::Trsm::Unblocked>;
620 trsv_l_type::invoke(one, bandMat, bandSol);
621 using trsv_u_type = typename KB::SerialTrsm<
622 KB::Side::Left, KB::Uplo::Upper, KB::Trans::NoTranspose,
623 KB::Diag::NonUnit, KB::Algo::Trsm::Unblocked>;
624 trsv_u_type::invoke(one, bandMat, bandSol);
625
626 // Fill prolongation views with solution
627 for (int snode = 0; snode < stencilSize; ++snode) {
628 for (int dofj = 0; dofj < DofsPerNode; ++dofj) {
629 for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
630 const int layer = startLayer + snode;
631 const int AmatBlkRow = LineLayer2Node(line, layer);
632 const int AmatRow = AmatBlkRow * DofsPerNode + dofi;
633
634 const int pptrOffset = CLayerSNode2PptrOffset(clayer, snode);
635 const int pptr =
636 Pptr(AmatRow) + pptrOffset * DofsPerNode + dofj;
637
638 const int col =
639 line * NCLayers + clayer; // coarse node (block row) index
640 Pcols(pptr) = col * DofsPerNode + dofj;
641 Pvals(pptr) = bandSol(snode * DofsPerNode + dofi, dofj);
642 }
643 }
644 }
645 }
646 });
647 } // Fill P
648
649 // Build P
650 RCP<const Map> rowMap = Amat->getRowMap();
651 Xpetra::global_size_t GNdofs = rowMap->getGlobalNumElements();
652 Xpetra::global_size_t itemp = GNdofs / NFLayers;
653 std::vector<size_t> stridingInfo_{(size_t)DofsPerNode};
654 RCP<const Map> coarseMap = StridedMapFactory::Build(
655 rowMap->lib(), NCLayers * itemp, NCLayers * NVertLines * DofsPerNode, 0,
656 stridingInfo_, rowMap->getComm(), -1, 0);
657 P = rcp(new CrsMatrixWrap(rowMap, coarseMap, 0));
658 RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
659 PCrs->setAllValues(Pptr, Pcols, Pvals);
660 PCrs->expertStaticFillComplete(coarseMap, Amat->getDomainMap());
661
662 // set StridingInformation of P
663 if (Amat->IsView("stridedMaps") == true)
664 P->CreateView("stridedMaps", Amat->getRowMap("stridedMaps"), coarseMap);
665 else
666 P->CreateView("stridedMaps", P->getRangeMap(), coarseMap);
667
668 // Construct coarse nullspace and inject fine nullspace
669 coarseNullspace =
670 MultiVectorFactory::Build(coarseMap, fineNullspace->getNumVectors());
671 const int numVectors = fineNullspace->getNumVectors();
672 const auto fineNullspaceView = fineNullspace->getDeviceLocalView(Xpetra::Access::ReadOnly);
673 const auto coarseNullspaceView = coarseNullspace->getDeviceLocalView(Xpetra::Access::ReadWrite);
674 using range_policy = Kokkos::RangePolicy<execution_space>;
675 Kokkos::parallel_for(
676 "MueLu::SemiCoarsenPFactory_kokkos::BuildSemiCoarsenP Inject Nullspace",
677 range_policy(0, NVertLines), KOKKOS_LAMBDA(const int line) {
678 for (int clayer = 0; clayer < NCLayers; ++clayer) {
679 const int layer = CLayer2FLayer(clayer);
680 const int AmatBlkRow =
681 LineLayer2Node(line, layer); // fine node (block row) index
682 const int col =
683 line * NCLayers + clayer; // coarse node (block row) index
684 for (int k = 0; k < numVectors; ++k) {
685 for (int dofi = 0; dofi < DofsPerNode; ++dofi) {
686 const int fRow = AmatBlkRow * DofsPerNode + dofi;
687 const int cRow = col * DofsPerNode + dofi;
688 coarseNullspaceView(cRow, k) = fineNullspaceView(fRow, k);
689 }
690 }
691 }
692 });
693}
694
695} // namespace MueLu
696
697#define MUELU_SEMICOARSENPFACTORY_KOKKOS_SHORT
698#endif // MUELU_SEMICOARSENPFACTORY_KOKKOS_DEF_HPP
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultScalar Scalar
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
Class that holds all level-specific information.
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need's value has been saved.
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
const RCP< const FactoryManagerBase > GetFactoryManager()
returns the current factory manager
int GetLevelID() const
Return level number.
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access)....
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
static const Teuchos::ParameterEntry & getEntry(const std::string &name)
Returns default entry from the "master" list corresponding to the specified name.
static const NoFactory * get()
Prolongator factory performing semi-coarsening.
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
Namespace for MueLu classes and methods.