MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_RegionRFactory_kokkos_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_REGIONRFACTORY_KOKKOS_DEF_HPP
47#define MUELU_REGIONRFACTORY_KOKKOS_DEF_HPP
48
49#include "Kokkos_UnorderedMap.hpp"
50
52
53#include <Xpetra_Matrix.hpp>
54#include <Xpetra_CrsGraphFactory.hpp>
55
56#include "MueLu_Types.hpp"
57
58#include "MueLu_Monitor.hpp"
59#include "MueLu_MasterList.hpp"
60#include "MueLu_Utilities_kokkos.hpp"
61
62
63namespace MueLu {
64
65 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
68 RCP<ParameterList> validParamList = rcp(new ParameterList());
69
70 validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null,
71 "Generating factory of the matrix A");
72 validParamList->set<RCP<const FactoryBase> >("numDimensions", Teuchos::null,
73 "Number of spatial dimensions in the problem.");
74 validParamList->set<RCP<const FactoryBase> >("lNodesPerDim", Teuchos::null,
75 "Number of local nodes per spatial dimension on the fine grid.");
76 validParamList->set<RCP<const FactoryBase> >("Nullspace", Teuchos::null,
77 "Fine level nullspace used to construct the coarse level nullspace.");
78 validParamList->set<RCP<const FactoryBase> >("Coordinates", Teuchos::null,
79 "Fine level coordinates used to construct piece-wise linear prolongator and coarse level coordinates.");
80 validParamList->set<bool> ("keep coarse coords", false, "Flag to keep coordinates for special coarse grid solve");
81
82 return validParamList;
83 } // GetValidParameterList()
84
85 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
87 DeclareInput(Level& fineLevel, Level& /* coarseLevel */) const {
88
89 Input(fineLevel, "A");
90 Input(fineLevel, "numDimensions");
91 Input(fineLevel, "lNodesPerDim");
92 Input(fineLevel, "Nullspace");
93 Input(fineLevel, "Coordinates");
94
95 } // DeclareInput()
96
97 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
99 Build(Level& fineLevel, Level& coarseLevel) const {
100
101 // Set debug outputs based on environment variable
102 RCP<Teuchos::FancyOStream> out;
103 if(const char* dbg = std::getenv("MUELU_REGIONRFACTORY_DEBUG")) {
104 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
105 out->setShowAllFrontMatter(false).setShowProcRank(true);
106 } else {
107 out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
108 }
109
110 *out << "Starting RegionRFactory_kokkos::Build." << std::endl;
111
112 // First get the inputs from the fineLevel
113 const int numDimensions = Get<int>(fineLevel, "numDimensions");
114 Array<LO> lFineNodesPerDim(3, Teuchos::OrdinalTraits<LO>::one());
115 {
116 Array<LO> lNodesPerDim = Get<Array<LO> >(fineLevel, "lNodesPerDim");
117 for(int dim = 0; dim < numDimensions; ++dim) {
118 lFineNodesPerDim[dim] = lNodesPerDim[dim];
119 }
120 }
121 *out << "numDimensions " << numDimensions << " and lFineNodesPerDim: " << lFineNodesPerDim
122 << std::endl;
123
124 // Let us check that the inputs verify our assumptions
125 if(numDimensions < 1 || numDimensions > 3) {
126 throw std::runtime_error("numDimensions must be 1, 2 or 3!");
127 }
128 for(int dim = 0; dim < numDimensions; ++dim) {
129 if(lFineNodesPerDim[dim] % 3 != 1) {
130 throw std::runtime_error("The number of fine node in each direction need to be 3n+1");
131 }
132 }
133 Array<LO> lCoarseNodesPerDim(3, Teuchos::OrdinalTraits<LO>::one());
134
135 const RCP<Matrix> A = Get<RCP<Matrix> >(fineLevel, "A");
136
137 RCP<realvaluedmultivector_type> fineCoordinates, coarseCoordinates;
138 fineCoordinates = Get< RCP<realvaluedmultivector_type> >(fineLevel, "Coordinates");
139 if(static_cast<int>(fineCoordinates->getNumVectors()) != numDimensions) {
140 throw std::runtime_error("The number of vectors in the coordinates is not equal to numDimensions!");
141 }
142
143 // Let us create R and pass it down to the
144 // appropriate specialization and see what we
145 // get back!
146 RCP<Matrix> R;
147
148 if(numDimensions == 1) {
149 throw std::runtime_error("RegionRFactory_kokkos no implemented for 1D case yet.");
150 } else if(numDimensions == 2) {
151 throw std::runtime_error("RegionRFactory_kokkos no implemented for 2D case yet.");
152 } else if(numDimensions == 3) {
153 Build3D(numDimensions, lFineNodesPerDim, A, fineCoordinates,
154 R, coarseCoordinates, lCoarseNodesPerDim);
155 }
156
157 const Teuchos::ParameterList& pL = GetParameterList();
158
159 // Reuse pattern if available (multiple solve)
160 RCP<ParameterList> Tparams;
161 if(pL.isSublist("matrixmatrix: kernel params"))
162 Tparams=rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
163 else
164 Tparams= rcp(new ParameterList);
165
166 // R->describe(*out, Teuchos::VERB_EXTREME);
167 *out << "Compute P=R^t" << std::endl;
168 // By default, we don't need global constants for transpose
169 Tparams->set("compute global constants: temporaries",Tparams->get("compute global constants: temporaries", false));
170 Tparams->set("compute global constants", Tparams->get("compute global constants",false));
171 std::string label = "MueLu::RegionR-transR" + Teuchos::toString(coarseLevel.GetLevelID());
172 RCP<Matrix> P = Utilities::Transpose(*R, true, label, Tparams);
173
174 *out << "Compute coarse nullspace" << std::endl;
175 RCP<MultiVector> fineNullspace = Get<RCP<MultiVector> >(fineLevel, "Nullspace");
176 RCP<MultiVector> coarseNullspace = MultiVectorFactory::Build(R->getRowMap(),
177 fineNullspace->getNumVectors());
178 R->apply(*fineNullspace, *coarseNullspace, Teuchos::NO_TRANS, Teuchos::ScalarTraits<SC>::one(),
179 Teuchos::ScalarTraits<SC>::zero());
180
181 *out << "Set data on coarse level" << std::endl;
182 Set(coarseLevel, "numDimensions", numDimensions);
183 Set(coarseLevel, "lNodesPerDim", lCoarseNodesPerDim);
184 Set(coarseLevel, "Nullspace", coarseNullspace);
185 Set(coarseLevel, "Coordinates", coarseCoordinates);
186 if(pL.get<bool>("keep coarse coords")) {
187 coarseLevel.Set<RCP<realvaluedmultivector_type> >("Coordinates2", coarseCoordinates, NoFactory::get());
188 }
189
190 R->SetFixedBlockSize(A->GetFixedBlockSize());
191 P->SetFixedBlockSize(A->GetFixedBlockSize());
192
193 Set(coarseLevel, "R", R);
194 Set(coarseLevel, "P", P);
195
196 } // Build()
197
198 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
200 Build3D(const int numDimensions,
201 Teuchos::Array<LocalOrdinal>& lFineNodesPerDim,
202 const RCP<Matrix>& A,
203 const RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::coordinateType, LocalOrdinal, GlobalOrdinal, Node> >& fineCoordinates,
204 RCP<Matrix>& R,
205 RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::coordinateType, LocalOrdinal, GlobalOrdinal, Node> >& coarseCoordinates,
206 Teuchos::Array<LocalOrdinal>& lCoarseNodesPerDim) const {
207 using local_matrix_type = typename CrsMatrix::local_matrix_type;
208 using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
209 using row_map_type = typename local_matrix_type::row_map_type::non_const_type;
210 using entries_type = typename local_matrix_type::index_type::non_const_type;
211 using values_type = typename local_matrix_type::values_type::non_const_type;
212 using impl_scalar_type = typename Kokkos::ArithTraits<Scalar>::val_type;
213
214 // Set debug outputs based on environment variable
215 RCP<Teuchos::FancyOStream> out;
216 if(const char* dbg = std::getenv("MUELU_REGIONRFACTORY_DEBUG")) {
217 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
218 out->setShowAllFrontMatter(false).setShowProcRank(true);
219 } else {
220 out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
221 }
222
223 // Now compute number of coarse grid points
224 for(int dim = 0; dim < numDimensions; ++dim) {
225 lCoarseNodesPerDim[dim] = lFineNodesPerDim[dim] / 3 + 1;
226 }
227 *out << "lCoarseNodesPerDim " << lCoarseNodesPerDim << std::endl;
228
229 // Grab the block size here and multiply all existing offsets by it
230 const LO blkSize = A->GetFixedBlockSize();
231 *out << "blkSize " << blkSize << std::endl;
232
233 // Based on lCoarseNodesPerDim and lFineNodesPerDim
234 // we can compute numRows, numCols and NNZ for R
235 const LO numRows = blkSize*lCoarseNodesPerDim[0]*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[2];
236 const LO numCols = blkSize*lFineNodesPerDim[0]*lFineNodesPerDim[1]*lFineNodesPerDim[2];
237
238 // Create the coarse coordinates multivector
239 // so we can fill it on the fly while computing
240 // the restriction operator
241 RCP<Map> rowMap = MapFactory::Build(A->getRowMap()->lib(),
242 Teuchos::OrdinalTraits<GO>::invalid(),
243 numRows,
244 A->getRowMap()->getIndexBase(),
245 A->getRowMap()->getComm());
246
247 RCP<Map> coordRowMap = MapFactory::Build(A->getRowMap()->lib(),
248 Teuchos::OrdinalTraits<GO>::invalid(),
249 lCoarseNodesPerDim[0]*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[2],
250 A->getRowMap()->getIndexBase(),
251 A->getRowMap()->getComm());
252
253 coarseCoordinates = Xpetra::MultiVectorFactory<real_type, LO, GO, NO>::Build(coordRowMap,
254 numDimensions);
255
256 // Get device views of coordinates
257 auto fineCoordsView = fineCoordinates ->getDeviceLocalView(Xpetra::Access::ReadOnly);
258 auto coarseCoordsView = coarseCoordinates->getDeviceLocalView(Xpetra::Access::OverwriteAll);
259
260
261 Array<ArrayRCP<const real_type> > fineCoordData(numDimensions);
262 Array<ArrayRCP<real_type> > coarseCoordData(numDimensions);
263 for(int dim = 0; dim < numDimensions; ++dim) {
264 fineCoordData[dim] = fineCoordinates->getData(dim);
265 coarseCoordData[dim] = coarseCoordinates->getDataNonConst(dim);
266 }
267
268 // Let us set some parameter that will be useful
269 // while constructing R
270
271 // Length of interpolation stencils based on geometry
272 const LO cornerStencilLength = 27;
273 const LO edgeStencilLength = 45;
274 const LO faceStencilLength = 75;
275 const LO interiorStencilLength = 125;
276
277 // Number of corner, edge, face and interior nodes
278 const LO numCorners = 8;
279 const LO numEdges = 4*(lCoarseNodesPerDim[0] - 2)
280 + 4*(lCoarseNodesPerDim[1] - 2)
281 + 4*(lCoarseNodesPerDim[2] - 2);
282 const LO numFaces = 2*(lCoarseNodesPerDim[0] - 2)*(lCoarseNodesPerDim[1] - 2)
283 + 2*(lCoarseNodesPerDim[0] - 2)*(lCoarseNodesPerDim[2] - 2)
284 + 2*(lCoarseNodesPerDim[1] - 2)*(lCoarseNodesPerDim[2] - 2);
285 const LO numInteriors = (lCoarseNodesPerDim[0] - 2)*(lCoarseNodesPerDim[1] - 2)
286 *(lCoarseNodesPerDim[2] - 2);
287
288 const LO nnz = (numCorners*cornerStencilLength + numEdges*edgeStencilLength
289 + numFaces*faceStencilLength + numInteriors*interiorStencilLength)*blkSize;
290
291 // Having the number of rows and columns we can genrate
292 // the appropriate maps for our operator.
293
294 *out << "R statistics:" << std::endl
295 << " -numRows= " << numRows << std::endl
296 << " -numCols= " << numCols << std::endl
297 << " -nnz= " << nnz << std::endl;
298
299 row_map_type row_map(Kokkos::ViewAllocateWithoutInitializing("row_map"), numRows + 1);
300 typename row_map_type::HostMirror row_map_h = Kokkos::create_mirror_view(row_map);
301
302 entries_type entries(Kokkos::ViewAllocateWithoutInitializing("entries"), nnz);
303 typename entries_type::HostMirror entries_h = Kokkos::create_mirror_view(entries);
304
305 values_type values(Kokkos::ViewAllocateWithoutInitializing("values"), nnz);
306 typename values_type::HostMirror values_h = Kokkos::create_mirror_view(values);
307
308 // Compute the basic interpolation
309 // coefficients for 1D rate of 3
310 // coarsening.
311 Array<SC> coeffs({1.0/3.0, 2.0/3.0, 1.0, 2.0/3.0, 1.0/3.0});
312 row_map_h(0) = 0;
313
314 // Define some offsets that
315 // will be needed often later on
316 const LO edgeLineOffset = 2*cornerStencilLength + (lCoarseNodesPerDim[0] - 2)*edgeStencilLength;
317 const LO faceLineOffset = 2*edgeStencilLength + (lCoarseNodesPerDim[0] - 2)*faceStencilLength;
318 const LO interiorLineOffset = 2*faceStencilLength
319 + (lCoarseNodesPerDim[0] - 2)*interiorStencilLength;
320
321 const LO facePlaneOffset = 2*edgeLineOffset + (lCoarseNodesPerDim[1] - 2)*faceLineOffset;
322 const LO interiorPlaneOffset = 2*faceLineOffset + (lCoarseNodesPerDim[1] - 2)*interiorLineOffset;
323
324 // Let us take care of the corners
325 // first since we always have
326 // corners to deal with!
327 {
328 // Corner 1
329 LO coordRowIdx = 0, rowIdx = 0, coordColumnOffset = 0, columnOffset = 0, entryOffset = 0;
330 for(LO l = 0; l < blkSize; ++l) {
331 for(LO k = 0; k < 3; ++k) {
332 for(LO j = 0; j < 3; ++j) {
333 for(LO i = 0; i < 3; ++i) {
334 entries_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = columnOffset
335 + (k*lFineNodesPerDim[1]*lFineNodesPerDim[0] + j*lFineNodesPerDim[0] + i)*blkSize + l;
336 values_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = coeffs[k + 2]*coeffs[j + 2]*coeffs[i + 2];
337 }
338 }
339 }
340 }
341 for(LO l = 0; l < blkSize; ++l) {
342 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength*(l+1);
343 }
344 for(int dim = 0; dim <numDimensions; ++dim) {
345 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
346 }
347
348 // Corner 5
349 coordRowIdx += (lCoarseNodesPerDim[2] - 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0];
350 rowIdx = coordRowIdx*blkSize;
351 coordColumnOffset += (lFineNodesPerDim[2] - 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0];
352 columnOffset = coordColumnOffset*blkSize;
353 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2)*interiorPlaneOffset)*blkSize;
354 for(LO l = 0; l < blkSize; ++l) {
355 for(LO k = 0; k < 3; ++k) {
356 for(LO j = 0; j < 3; ++j) {
357 for(LO i = 0; i < 3; ++i) {
358 entries_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = columnOffset
359 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + j*lFineNodesPerDim[0] + i)*blkSize + l;
360 values_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = coeffs[k]*coeffs[j + 2]*coeffs[i + 2];
361 }
362 }
363 }
364 }
365 for(LO l = 0; l < blkSize; ++l) {
366 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength*(l+1);
367 }
368 for(int dim = 0; dim <numDimensions; ++dim) {
369 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
370 }
371
372 // Corner 2
373 coordRowIdx = (lCoarseNodesPerDim[0] - 1);
374 rowIdx = coordRowIdx*blkSize;
375 coordColumnOffset = (lFineNodesPerDim[0] - 1);
376 columnOffset = coordColumnOffset*blkSize;
377 entryOffset = (cornerStencilLength + (lCoarseNodesPerDim[0] - 2)*edgeStencilLength)*blkSize;
378 for(LO l = 0; l < blkSize; ++l) {
379 for(LO k = 0; k < 3; ++k) {
380 for(LO j = 0; j < 3; ++j) {
381 for(LO i = 0; i < 3; ++i) {
382 entries_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = columnOffset
383 + (k*lFineNodesPerDim[1]*lFineNodesPerDim[0]
384 + j*lFineNodesPerDim[0] + (i - 2))*blkSize + l;
385 values_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = coeffs[k + 2]*coeffs[j + 2]*coeffs[i];
386 }
387 }
388 }
389 }
390 for(LO l = 0; l < blkSize; ++l) {
391 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength*(l+1);
392 }
393 for(int dim = 0; dim <numDimensions; ++dim) {
394 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
395 }
396
397 // Corner 6
398 coordRowIdx += (lCoarseNodesPerDim[2] - 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0];
399 rowIdx = coordRowIdx*blkSize;
400 coordColumnOffset += (lFineNodesPerDim[2] - 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0];
401 columnOffset = coordColumnOffset*blkSize;
402 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2)*interiorPlaneOffset)*blkSize;
403 for(LO l = 0; l < blkSize; ++l) {
404 for(LO k = 0; k < 3; ++k) {
405 for(LO j = 0; j < 3; ++j) {
406 for(LO i = 0; i < 3; ++i) {
407 entries_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = columnOffset
408 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + j*lFineNodesPerDim[0] + i - 2)*blkSize + l;
409 values_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = coeffs[k]*coeffs[j + 2]*coeffs[i];
410 }
411 }
412 }
413 }
414 for(LO l = 0; l < blkSize; ++l) {
415 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength*(l+1);
416 }
417 for(int dim = 0; dim <numDimensions; ++dim) {
418 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
419 }
420
421 // Corner 3
422 coordRowIdx = (lCoarseNodesPerDim[1] - 1)*lCoarseNodesPerDim[0];
423 rowIdx = coordRowIdx*blkSize;
424 coordColumnOffset = (lFineNodesPerDim[1] - 1)*lFineNodesPerDim[0];
425 columnOffset = coordColumnOffset*blkSize;
426 entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2)*faceLineOffset)*blkSize;
427 for(LO l = 0; l < blkSize; ++l) {
428 for(LO k = 0; k < 3; ++k) {
429 for(LO j = 0; j < 3; ++j) {
430 for(LO i = 0; i < 3; ++i) {
431 entries_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = columnOffset
432 + (k*lFineNodesPerDim[1]*lFineNodesPerDim[0]
433 + (j - 2)*lFineNodesPerDim[0] + i)*blkSize + l;
434 values_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = coeffs[k + 2]*coeffs[j]*coeffs[i + 2];
435 }
436 }
437 }
438 }
439 for(LO l = 0; l < blkSize; ++l) {
440 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength*(l+1);
441 }
442 for(int dim = 0; dim <numDimensions; ++dim) {
443 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
444 }
445
446 // Corner 7
447 coordRowIdx += (lCoarseNodesPerDim[2] - 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0];
448 rowIdx = coordRowIdx*blkSize;
449 coordColumnOffset += (lFineNodesPerDim[2] - 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0];
450 columnOffset = coordColumnOffset*blkSize;
451 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2)*interiorPlaneOffset)*blkSize;
452 for(LO l = 0; l < blkSize; ++l) {
453 for(LO k = 0; k < 3; ++k) {
454 for(LO j = 0; j < 3; ++j) {
455 for(LO i = 0; i < 3; ++i) {
456 entries_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = columnOffset
457 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + (j - 2)*lFineNodesPerDim[0] + i)*blkSize + l;
458 values_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = coeffs[k]*coeffs[j]*coeffs[i + 2];
459 }
460 }
461 }
462 }
463 for(LO l = 0; l < blkSize; ++l) {
464 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength*(l+1);
465 }
466 for(int dim = 0; dim <numDimensions; ++dim) {
467 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
468 }
469
470 // Corner 4
471 coordRowIdx = (lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0] - 1);
472 rowIdx = coordRowIdx*blkSize;
473 coordColumnOffset = (lFineNodesPerDim[1]*lFineNodesPerDim[0] - 1);
474 columnOffset = coordColumnOffset*blkSize;
475 entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2)*faceLineOffset +
476 cornerStencilLength + (lCoarseNodesPerDim[0] - 2)*edgeStencilLength)*blkSize;
477 for(LO l = 0; l < blkSize; ++l) {
478 for(LO k = 0; k < 3; ++k) {
479 for(LO j = 0; j < 3; ++j) {
480 for(LO i = 0; i < 3; ++i) {
481 entries_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = columnOffset
482 + (k*lFineNodesPerDim[1]*lFineNodesPerDim[0]
483 + (j - 2)*lFineNodesPerDim[0] + (i - 2))*blkSize + l;
484 values_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = coeffs[k + 2]*coeffs[j]*coeffs[i];
485 }
486 }
487 }
488 }
489 for(LO l = 0; l < blkSize; ++l) {
490 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength*(l+1);
491 }
492 for(int dim = 0; dim <numDimensions; ++dim) {
493 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
494 }
495
496 // Corner 8
497 coordRowIdx += (lCoarseNodesPerDim[2] - 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0];
498 rowIdx = coordRowIdx*blkSize;
499 coordColumnOffset += (lFineNodesPerDim[2] - 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0];
500 columnOffset = coordColumnOffset*blkSize;
501 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2)*interiorPlaneOffset)*blkSize;
502 for(LO l = 0; l < blkSize; ++l) {
503 for(LO k = 0; k < 3; ++k) {
504 for(LO j = 0; j < 3; ++j) {
505 for(LO i = 0; i < 3; ++i) {
506 entries_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = columnOffset
507 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + (j - 2)*lFineNodesPerDim[0] + (i - 2))*blkSize + l;
508 values_h(entryOffset + k*9 + j*3 + i + cornerStencilLength*l) = coeffs[k]*coeffs[j]*coeffs[i];
509 }
510 }
511 }
512 }
513 for(LO l = 0; l < blkSize; ++l) {
514 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength*(l+1);
515 }
516 for(int dim = 0; dim <numDimensions; ++dim) {
517 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
518 }
519 } // Corners are done!
520
521 // Edges along 0 direction
522 if(lCoarseNodesPerDim[0] - 2 > 0) {
523
524 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
525 for(LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[0] - 2; ++edgeIdx) {
526
527 // Edge 0
528 coordRowIdx = (edgeIdx + 1);
529 rowIdx = coordRowIdx*blkSize;
530 coordColumnOffset = (edgeIdx + 1)*3;
531 columnOffset = coordColumnOffset*blkSize;
532 entryOffset = (cornerStencilLength + edgeIdx*edgeStencilLength)*blkSize;
533 for(LO l = 0; l < blkSize; ++l) {
534 for(LO k = 0; k < 3; ++k) {
535 for(LO j = 0; j < 3; ++j) {
536 for(LO i = 0; i < 5; ++i) {
537 entries_h(entryOffset + k*15 + j*5 + i + edgeStencilLength*l) = columnOffset
538 + (k*lFineNodesPerDim[1]*lFineNodesPerDim[0] + j*lFineNodesPerDim[0] + i - 2)*blkSize + l;
539 values_h(entryOffset + k*15 + j*5 + i + edgeStencilLength*l) = coeffs[k + 2]*coeffs[j + 2]*coeffs[i];
540 }
541 }
542 }
543 }
544 for(LO l = 0; l < blkSize; ++l) {
545 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
546 }
547 for(int dim = 0; dim <numDimensions; ++dim) {
548 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
549 }
550
551 // Edge 1
552 coordRowIdx = ((lCoarseNodesPerDim[1] - 1)*lCoarseNodesPerDim[0] + edgeIdx + 1);
553 rowIdx = coordRowIdx*blkSize;
554 coordColumnOffset = ((lFineNodesPerDim[1] - 1)*lFineNodesPerDim[0] + (edgeIdx + 1)*3);
555 columnOffset = coordColumnOffset*blkSize;
556 entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2)*faceLineOffset
557 + cornerStencilLength + edgeIdx*edgeStencilLength)*blkSize;
558 for(LO l = 0; l < blkSize; ++l) {
559 for(LO k = 0; k < 3; ++k) {
560 for(LO j = 0; j < 3; ++j) {
561 for(LO i = 0; i < 5; ++i) {
562 entries_h(entryOffset + k*15 + j*5 + i + edgeStencilLength*l) = columnOffset
563 + (k*lFineNodesPerDim[1]*lFineNodesPerDim[0] + (j - 2)*lFineNodesPerDim[0] + i - 2)*blkSize + l;
564 values_h(entryOffset + k*15 + j*5 + i + edgeStencilLength*l) = coeffs[k + 2]*coeffs[j]*coeffs[i];
565 }
566 }
567 }
568 }
569 for(LO l = 0; l < blkSize; ++l) {
570 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
571 }
572 for(int dim = 0; dim <numDimensions; ++dim) {
573 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
574 }
575
576 // Edge 2
577 coordRowIdx = ((lCoarseNodesPerDim[2] - 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0]
578 + edgeIdx + 1);
579 rowIdx = coordRowIdx*blkSize;
580 coordColumnOffset = ((lFineNodesPerDim[2] - 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0]
581 + (edgeIdx + 1)*3);
582 columnOffset = coordColumnOffset*blkSize;
583 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2)*interiorPlaneOffset
584 + cornerStencilLength + edgeIdx*edgeStencilLength)*blkSize;
585 for(LO l = 0; l < blkSize; ++l) {
586 for(LO k = 0; k < 3; ++k) {
587 for(LO j = 0; j < 3; ++j) {
588 for(LO i = 0; i < 5; ++i) {
589 entries_h(entryOffset + k*15 + j*5 + i + edgeStencilLength*l) = columnOffset
590 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + j*lFineNodesPerDim[0] + i - 2)*blkSize + l;
591 values_h(entryOffset + k*15 + j*5 + i + edgeStencilLength*l) = coeffs[k]*coeffs[j + 2]*coeffs[i];
592 }
593 }
594 }
595 }
596 for(LO l = 0; l < blkSize; ++l) {
597 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
598 }
599 for(int dim = 0; dim <numDimensions; ++dim) {
600 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
601 }
602
603 // Edge 3
604 coordRowIdx = ((lCoarseNodesPerDim[2] - 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0]
605 + (lCoarseNodesPerDim[1] - 1)*lCoarseNodesPerDim[0] + edgeIdx + 1);
606 rowIdx = coordRowIdx*blkSize;
607 coordColumnOffset = ((lFineNodesPerDim[2] - 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0]
608 + (lFineNodesPerDim[1] - 1)*lFineNodesPerDim[0] + (edgeIdx + 1)*3);
609 columnOffset = coordColumnOffset*blkSize;
610 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2)*interiorPlaneOffset
611 + edgeLineOffset + (lCoarseNodesPerDim[1] - 2)*faceLineOffset
612 + cornerStencilLength + edgeIdx*edgeStencilLength)*blkSize;
613 for(LO l = 0; l < blkSize; ++l) {
614 for(LO k = 0; k < 3; ++k) {
615 for(LO j = 0; j < 3; ++j) {
616 for(LO i = 0; i < 5; ++i) {
617 entries_h(entryOffset + k*15 + j*5 + i + edgeStencilLength*l) = columnOffset
618 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0]
619 + (j - 2)*lFineNodesPerDim[0] + i - 2)*blkSize + l;
620 values_h(entryOffset + k*15 + j*5 + i + edgeStencilLength*l) = coeffs[k]*coeffs[j]*coeffs[i];
621 }
622 }
623 }
624 }
625 for(LO l = 0; l < blkSize; ++l) {
626 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
627 }
628 for(int dim = 0; dim <numDimensions; ++dim) {
629 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
630 }
631 }
632 }
633
634 // Edges along 1 direction
635 if(lCoarseNodesPerDim[1] - 2 > 0) {
636
637 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
638 for(LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[1] - 2; ++edgeIdx) {
639
640 // Edge 0
641 coordRowIdx = (edgeIdx + 1)*lCoarseNodesPerDim[0];
642 rowIdx = coordRowIdx*blkSize;
643 coordColumnOffset = (edgeIdx + 1)*3*lFineNodesPerDim[0];
644 columnOffset = coordColumnOffset*blkSize;
645 entryOffset = (edgeLineOffset + edgeIdx*faceLineOffset)*blkSize;
646 for(LO l = 0; l < blkSize; ++l) {
647 for(LO k = 0; k < 3; ++k) {
648 for(LO j = 0; j < 5; ++j) {
649 for(LO i = 0; i < 3; ++i) {
650 entries_h(entryOffset + k*15 + j*3 + i + edgeStencilLength*l) = columnOffset
651 + (k*lFineNodesPerDim[1]*lFineNodesPerDim[0] + (j - 2)*lFineNodesPerDim[0] + i)*blkSize + l;
652 values_h(entryOffset + k*15 + j*3 + i + edgeStencilLength*l) = coeffs[k + 2]*coeffs[j]*coeffs[i + 2];
653 }
654 }
655 }
656 }
657 for(LO l = 0; l < blkSize; ++l) {
658 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
659 }
660 for(int dim = 0; dim <numDimensions; ++dim) {
661 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
662 }
663
664 // Edge 1
665 coordRowIdx = ((edgeIdx + 1)*lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
666 rowIdx = coordRowIdx*blkSize;
667 coordColumnOffset = ((edgeIdx + 1)*3*lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
668 columnOffset = coordColumnOffset*blkSize;
669 entryOffset = (edgeLineOffset + edgeIdx*faceLineOffset
670 + edgeStencilLength + (lCoarseNodesPerDim[0] - 2)*faceStencilLength)*blkSize;
671 for(LO l = 0; l < blkSize; ++l) {
672 for(LO k = 0; k < 3; ++k) {
673 for(LO j = 0; j < 5; ++j) {
674 for(LO i = 0; i < 3; ++i) {
675 entries_h(entryOffset + k*15 + j*3 + i + edgeStencilLength*l) = columnOffset
676 + (k*lFineNodesPerDim[1]*lFineNodesPerDim[0] + (j - 2)*lFineNodesPerDim[0] + i - 2)*blkSize + l;
677 values_h(entryOffset + k*15 + j*3 + i + edgeStencilLength*l) = coeffs[k + 2]*coeffs[j]*coeffs[i];
678 }
679 }
680 }
681 }
682 for(LO l = 0; l < blkSize; ++l) {
683 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
684 }
685 for(int dim = 0; dim <numDimensions; ++dim) {
686 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
687 }
688
689 // Edge 2
690 coordRowIdx = ((lCoarseNodesPerDim[2] - 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0]
691 + (edgeIdx + 1)*lCoarseNodesPerDim[0]);
692 rowIdx = coordRowIdx*blkSize;
693 coordColumnOffset = ((lFineNodesPerDim[2] - 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0]
694 + (edgeIdx + 1)*3*lFineNodesPerDim[0]);
695 columnOffset = coordColumnOffset*blkSize;
696 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2)*interiorPlaneOffset
697 + edgeLineOffset + edgeIdx*faceLineOffset)*blkSize;
698 for(LO l = 0; l < blkSize; ++l) {
699 for(LO k = 0; k < 3; ++k) {
700 for(LO j = 0; j < 5; ++j) {
701 for(LO i = 0; i < 3; ++i) {
702 entries_h(entryOffset + k*15 + j*3 + i + edgeStencilLength*l) = columnOffset
703 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + (j - 2)*lFineNodesPerDim[0] + i)*blkSize + l;
704 values_h(entryOffset + k*15 + j*3 + i + edgeStencilLength*l) = coeffs[k]*coeffs[j]*coeffs[i + 2];
705 }
706 }
707 }
708 }
709 for(LO l = 0; l < blkSize; ++l) {
710 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
711 }
712 for(int dim = 0; dim <numDimensions; ++dim) {
713 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
714 }
715
716 // Edge 3
717 coordRowIdx = ((lCoarseNodesPerDim[2] - 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0]
718 + (edgeIdx + 1)*lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
719 rowIdx = coordRowIdx*blkSize;
720 coordColumnOffset = ((lFineNodesPerDim[2] - 1)*lFineNodesPerDim[1]*lFineNodesPerDim[0]
721 + (edgeIdx + 1)*3*lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
722 columnOffset = coordColumnOffset*blkSize;
723 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2)*interiorPlaneOffset
724 + edgeLineOffset + edgeIdx*faceLineOffset
725 + edgeStencilLength + (lCoarseNodesPerDim[0] - 2)*faceStencilLength)*blkSize;
726 for(LO l = 0; l < blkSize; ++l) {
727 for(LO k = 0; k < 3; ++k) {
728 for(LO j = 0; j < 5; ++j) {
729 for(LO i = 0; i < 3; ++i) {
730 entries_h(entryOffset + k*15 + j*3 + i + edgeStencilLength*l) = columnOffset
731 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0]
732 + (j - 2)*lFineNodesPerDim[0] + i - 2)*blkSize + l;
733 values_h(entryOffset + k*15 + j*3 + i + edgeStencilLength*l) = coeffs[k]*coeffs[j]*coeffs[i];
734 }
735 }
736 }
737 }
738 for(LO l = 0; l < blkSize; ++l) {
739 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
740 }
741 for(int dim = 0; dim <numDimensions; ++dim) {
742 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
743 }
744 }
745 }
746
747 // Edges along 2 direction
748 if(lCoarseNodesPerDim[2] - 2 > 0) {
749
750 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
751 for(LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[2] - 2; ++edgeIdx) {
752
753 // Edge 0
754 coordRowIdx = (edgeIdx + 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0];
755 rowIdx = coordRowIdx*blkSize;
756 coordColumnOffset = (edgeIdx + 1)*3*lFineNodesPerDim[1]*lFineNodesPerDim[0];
757 columnOffset = coordColumnOffset*blkSize;
758 entryOffset = (facePlaneOffset + edgeIdx*interiorPlaneOffset)*blkSize;
759 for(LO l = 0; l < blkSize; ++l) {
760 for(LO k = 0; k < 5; ++k) {
761 for(LO j = 0; j < 3; ++j) {
762 for(LO i = 0; i < 3; ++i) {
763 entries_h(entryOffset + k*9 + j*3 + i + edgeStencilLength*l) = columnOffset
764 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + j*lFineNodesPerDim[0] + i)*blkSize + l;
765 values_h(entryOffset + k*9 + j*3 + i + edgeStencilLength*l) = coeffs[k]*coeffs[j + 2]*coeffs[i + 2];
766 }
767 }
768 }
769 }
770 for(LO l = 0; l < blkSize; ++l) {
771 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
772 }
773 for(int dim = 0; dim <numDimensions; ++dim) {
774 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
775 }
776
777 // Edge 1
778 coordRowIdx = ((edgeIdx + 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0]
779 + lCoarseNodesPerDim[0] - 1);
780 rowIdx = coordRowIdx*blkSize;
781 coordColumnOffset = ((edgeIdx + 1)*3*lFineNodesPerDim[1]*lFineNodesPerDim[0]
782 + lFineNodesPerDim[0] - 1);
783 columnOffset = coordColumnOffset*blkSize;
784 entryOffset = (facePlaneOffset + faceLineOffset - edgeStencilLength
785 + edgeIdx*interiorPlaneOffset)*blkSize;
786 for(LO l = 0; l < blkSize; ++l) {
787 for(LO k = 0; k < 5; ++k) {
788 for(LO j = 0; j < 3; ++j) {
789 for(LO i = 0; i < 3; ++i) {
790 entries_h(entryOffset + k*9 + j*3 + i + edgeStencilLength*l) = columnOffset
791 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + j*lFineNodesPerDim[0] + i - 2)*blkSize + l;
792 values_h(entryOffset + k*9 + j*3 + i + edgeStencilLength*l) = coeffs[k]*coeffs[j + 2]*coeffs[i];
793 }
794 }
795 }
796 }
797 for(LO l = 0; l < blkSize; ++l) {
798 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
799 }
800 for(int dim = 0; dim <numDimensions; ++dim) {
801 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
802 }
803
804 // Edge 2
805 coordRowIdx = ((edgeIdx + 1)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0]
806 + (lCoarseNodesPerDim[1] - 1)*lCoarseNodesPerDim[0]);
807 rowIdx = coordRowIdx*blkSize;
808 coordColumnOffset = ((edgeIdx + 1)*3*lFineNodesPerDim[1]*lFineNodesPerDim[0]
809 + (lFineNodesPerDim[1] - 1)*lFineNodesPerDim[0]);
810 columnOffset = coordColumnOffset*blkSize;
811 entryOffset = (facePlaneOffset + edgeIdx*interiorPlaneOffset + faceLineOffset
812 + (lCoarseNodesPerDim[1] - 2)*interiorLineOffset)*blkSize;
813 for(LO l = 0; l < blkSize; ++l) {
814 for(LO k = 0; k < 5; ++k) {
815 for(LO j = 0; j < 3; ++j) {
816 for(LO i = 0; i < 3; ++i) {
817 entries_h(entryOffset + k*9 + j*3 + i + edgeStencilLength*l) = columnOffset
818 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0] + (j - 2)*lFineNodesPerDim[0] + i)*blkSize + l;
819 values_h(entryOffset + k*9 + j*3 + i + edgeStencilLength*l) = coeffs[k]*coeffs[j]*coeffs[i + 2];
820 }
821 }
822 }
823 }
824 for(LO l = 0; l < blkSize; ++l) {
825 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
826 }
827 for(int dim = 0; dim <numDimensions; ++dim) {
828 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
829 }
830
831 // Edge 3
832 coordRowIdx = ((edgeIdx + 2)*lCoarseNodesPerDim[1]*lCoarseNodesPerDim[0] - 1);
833 rowIdx = coordRowIdx*blkSize;
834 coordColumnOffset = ((edgeIdx + 1)*3*lFineNodesPerDim[1]*lFineNodesPerDim[0]
835 + lFineNodesPerDim[1]*lFineNodesPerDim[0] - 1);
836 columnOffset = coordColumnOffset*blkSize;
837 entryOffset = (facePlaneOffset + (edgeIdx + 1)*interiorPlaneOffset - edgeStencilLength)*blkSize;
838 for(LO l = 0; l < blkSize; ++l) {
839 for(LO k = 0; k < 5; ++k) {
840 for(LO j = 0; j < 3; ++j) {
841 for(LO i = 0; i < 3; ++i) {
842 entries_h(entryOffset + k*9 + j*3 + i + edgeStencilLength*l) = columnOffset
843 + ((k - 2)*lFineNodesPerDim[1]*lFineNodesPerDim[0]
844 + (j - 2)*lFineNodesPerDim[0] + i - 2)*blkSize + l;
845 values_h(entryOffset + k*9 + j*3 + i + edgeStencilLength*l) = coeffs[k]*coeffs[j]*coeffs[i];
846 }
847 }
848 }
849 }
850 for(LO l = 0; l < blkSize; ++l) {
851 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength*(l+1);
852 }
853 for(int dim = 0; dim <numDimensions; ++dim) {
854 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
855 }
856 }
857 }
858
859 //TODO: KOKKOS parallel_for used from here. Not sure if it should be used for edges.
860 Kokkos::deep_copy(row_map, row_map_h);
861 Kokkos::deep_copy(entries, entries_h);
862 Kokkos::deep_copy(values, values_h);
863
864 // Create views on device for nodes per dim
865 LOTupleView lFineNodesPerDim_d("lFineNodesPerDim");
866 LOTupleView lCoarseNodesPerDim_d("lCoarseNodesPerDim");
867
868 typename Kokkos::View<LO[3], device_type>::HostMirror lCoarseNodesPerDim_h = Kokkos::create_mirror_view( lCoarseNodesPerDim_d );
869 typename Kokkos::View<LO[3], device_type>::HostMirror lFineNodesPerDim_h = Kokkos::create_mirror_view( lFineNodesPerDim_d );
870
871 for(int dim = 0; dim < numDimensions; ++dim) {
872 lCoarseNodesPerDim_h(dim) = lCoarseNodesPerDim[dim];
873 lFineNodesPerDim_h(dim) = lFineNodesPerDim[dim];
874 }
875
876 Kokkos::deep_copy(lCoarseNodesPerDim_d, lCoarseNodesPerDim_h);
877 Kokkos::deep_copy(lFineNodesPerDim_d, lFineNodesPerDim_h);
878
879
880 // Faces in 0-1 plane
881 if((lCoarseNodesPerDim[0] - 2 > 0) && (lCoarseNodesPerDim[1] - 2 > 0)) {
882
883 Kokkos::parallel_for("Faces in 0-1 plane region R",
884 Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[1]-2)*(lCoarseNodesPerDim[0]-2) ),
885 KOKKOS_LAMBDA(const LO faceIdx) {
886 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
887 LO gridIdx[3] = {0,0,0};
888 impl_scalar_type coeffs_d[5] = {1.0/3.0, 2.0/3.0, 1.0, 2.0/3.0, 1.0/3.0};
889 // Last step in the loop
890 // update the grid indices
891 // for next grid point
892 for(LO i = 0; i < faceIdx; i++){
893 ++gridIdx[0];
894 if(gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
895 gridIdx[0] = 0;
896 ++gridIdx[1];
897 }
898 }
899
900 // Face 0
901 coordRowIdx = ((gridIdx[1] + 1)*lCoarseNodesPerDim_d(0) + gridIdx[0] + 1);
902 rowIdx = coordRowIdx*blkSize;
903 coordColumnOffset = 3*((gridIdx[1] + 1)*lFineNodesPerDim_d(0) + gridIdx[0] + 1);
904 columnOffset = coordColumnOffset*blkSize;
905 entryOffset = (edgeLineOffset + edgeStencilLength
906 + gridIdx[1]*faceLineOffset + gridIdx[0]*faceStencilLength)*blkSize;
907 for(LO l = 0; l < blkSize; ++l) {
908 for(LO k = 0; k < 3; ++k) {
909 for(LO j = 0; j < 5; ++j) {
910 for(LO i = 0; i < 5; ++i) {
911 entries(entryOffset + k*25 + j*5 + i + faceStencilLength*l) = columnOffset
912 + (k*lFineNodesPerDim_d(1)*lFineNodesPerDim_d(0) + (j - 2)*lFineNodesPerDim_d(0) + i - 2)*blkSize + l;
913 values(entryOffset + k*25 + j*5 + i + faceStencilLength*l) = coeffs_d[k + 2]*coeffs_d[j]*coeffs_d[i];
914 }
915 }
916 }
917 }
918 for(LO l = 0; l < blkSize; ++l) {
919 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength*(l+1);
920 }
921 for(int dim = 0; dim <numDimensions; ++dim) {
922 coarseCoordsView(coordRowIdx,dim) = fineCoordsView(coordColumnOffset, dim);
923 }
924
925 // Face 1
926 coordRowIdx += (lCoarseNodesPerDim_d(2) - 1)*lCoarseNodesPerDim_d(1)*lCoarseNodesPerDim_d(0);
927 rowIdx = coordRowIdx*blkSize;
928 coordColumnOffset += (lFineNodesPerDim_d(2) - 1)*lFineNodesPerDim_d(1)*lFineNodesPerDim_d(0);
929 columnOffset = coordColumnOffset*blkSize;
930 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim_d(2) - 2)*interiorPlaneOffset)*blkSize;
931 for(LO l = 0; l < blkSize; ++l) {
932 for(LO k = 0; k < 3; ++k) {
933 for(LO j = 0; j < 5; ++j) {
934 for(LO i = 0; i < 5; ++i) {
935 entries(entryOffset + k*25 + j*5 + i + faceStencilLength*l) = columnOffset
936 + ((k - 2)*lFineNodesPerDim_d(1)*lFineNodesPerDim_d(0)
937 + (j - 2)*lFineNodesPerDim_d(0) + i - 2)*blkSize + l;
938 values(entryOffset + k*25 + j*5 + i + faceStencilLength*l) = coeffs_d[k]*coeffs_d[j]*coeffs_d[i];
939 }
940 }
941 }
942 }
943 for(LO l = 0; l < blkSize; ++l) {
944 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength*(l+1);
945 }
946 for(int dim = 0; dim <numDimensions; ++dim) {
947 coarseCoordsView(coordRowIdx,dim) = fineCoordsView(coordColumnOffset, dim);
948 }
949
950 });// parallel_for faces in 0-1 plane
951 }
952
953 // Faces in 0-2 plane
954 if((lCoarseNodesPerDim[0] - 2 > 0) && (lCoarseNodesPerDim[2] - 2 > 0)) {
955
956 Kokkos::parallel_for("Faces in 0-2 plane region R",
957 Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[2]-2)*(lCoarseNodesPerDim[0]-2) ),
958 KOKKOS_LAMBDA(const LO faceIdx) {
959 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
960 LO gridIdx[3] = {0,0,0};
961 impl_scalar_type coeffs_d[5] = {1.0/3.0, 2.0/3.0, 1.0, 2.0/3.0, 1.0/3.0};
962 // Last step in the loop
963 // update the grid indices
964 // for next grid point
965 for(LO i = 0; i < faceIdx; i++){
966 ++gridIdx[0];
967 if(gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
968 gridIdx[0] = 0;
969 ++gridIdx[2];
970 }
971 }
972
973 // Face 0
974 coordRowIdx = ((gridIdx[2] + 1)*lCoarseNodesPerDim_d(1)*lCoarseNodesPerDim_d(0) + (gridIdx[0] + 1));
975 rowIdx = coordRowIdx*blkSize;
976 coordColumnOffset = ((gridIdx[2] + 1)*lFineNodesPerDim_d(1)*lFineNodesPerDim_d(0)
977 + gridIdx[0] + 1)*3;
978 columnOffset = coordColumnOffset*blkSize;
979 entryOffset = (facePlaneOffset + gridIdx[2]*interiorPlaneOffset + edgeStencilLength
980 + gridIdx[0]*faceStencilLength)*blkSize;
981 for(LO l = 0; l < blkSize; ++l) {
982 for(LO k = 0; k < 5; ++k) {
983 for(LO j = 0; j < 3; ++j) {
984 for(LO i = 0; i < 5; ++i) {
985 entries(entryOffset + k*15 + j*5 + i + faceStencilLength*l) = columnOffset
986 + ((k - 2)*lFineNodesPerDim_d(1)*lFineNodesPerDim_d(0) + j*lFineNodesPerDim_d(0) + i - 2)*blkSize + l;
987 values(entryOffset + k*15 + j*5 + i + faceStencilLength*l) = coeffs_d[k]*coeffs_d[j + 2]*coeffs_d[i];
988 }
989 }
990 }
991 }
992 for(LO l = 0; l < blkSize; ++l) {
993 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength*(l+1);
994 }
995 for(int dim = 0; dim <numDimensions; ++dim) {
996 coarseCoordsView(coordRowIdx,dim) = fineCoordsView(coordColumnOffset, dim);
997 }
998
999 // Face 1
1000 coordRowIdx += (lCoarseNodesPerDim_d(1) - 1)*lCoarseNodesPerDim_d(0);
1001 rowIdx = coordRowIdx*blkSize;
1002 coordColumnOffset += (lFineNodesPerDim_d(1) - 1)*lFineNodesPerDim_d(0);
1003 columnOffset = coordColumnOffset*blkSize;
1004 entryOffset += (faceLineOffset + (lCoarseNodesPerDim_d(1) - 2)*interiorLineOffset)*blkSize;
1005 for(LO l = 0; l < blkSize; ++l) {
1006 for(LO k = 0; k < 5; ++k) {
1007 for(LO j = 0; j < 3; ++j) {
1008 for(LO i = 0; i < 5; ++i) {
1009 entries(entryOffset + k*15 + j*5 + i + faceStencilLength*l) = columnOffset
1010 + ((k - 2)*lFineNodesPerDim_d(1)*lFineNodesPerDim_d(0)
1011 + (j - 2)*lFineNodesPerDim_d(0) + i - 2)*blkSize + l;
1012 values(entryOffset + k*15 + j*5 + i + faceStencilLength*l) = coeffs_d[k]*coeffs_d[j]*coeffs_d[i];
1013 }
1014 }
1015 }
1016 }
1017 for(LO l = 0; l < blkSize; ++l) {
1018 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength*(l+1);
1019 }
1020 for(int dim = 0; dim <numDimensions; ++dim) {
1021 coarseCoordsView(coordRowIdx,dim) = fineCoordsView(coordColumnOffset, dim);
1022 }
1023
1024 });// parallel_for faces in 0-2 plane
1025 }
1026
1027 // Faces in 1-2 plane
1028 if((lCoarseNodesPerDim[1] - 2 > 0) && (lCoarseNodesPerDim[2] - 2 > 0)) {
1029
1030 Kokkos::parallel_for("Faces in 1-2 plane region R",
1031 Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[2]-2)*(lCoarseNodesPerDim[1]-2) ),
1032 KOKKOS_LAMBDA(const LO faceIdx) {
1033 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
1034 LO gridIdx[3] = {0,0,0};
1035 impl_scalar_type coeffs_d[5] = {1.0/3.0, 2.0/3.0, 1.0, 2.0/3.0, 1.0/3.0};
1036 // Last step in the loop
1037 // update the grid indices
1038 // for next grid point
1039 for(LO i = 0; i < faceIdx; i++){
1040 ++gridIdx[1];
1041 if(gridIdx[1] == lCoarseNodesPerDim_d(1) - 2) {
1042 gridIdx[1] = 0;
1043 ++gridIdx[2];
1044 }
1045 }
1046
1047 // Face 0
1048 coordRowIdx = ((gridIdx[2] + 1)*lCoarseNodesPerDim_d(1)*lCoarseNodesPerDim_d(0)
1049 + (gridIdx[1] + 1)*lCoarseNodesPerDim_d(0));
1050 rowIdx = coordRowIdx*blkSize;
1051 coordColumnOffset = ((gridIdx[2] + 1)*lFineNodesPerDim_d(1)*lFineNodesPerDim_d(0)
1052 + (gridIdx[1] + 1)*lFineNodesPerDim_d(0))*3;
1053 columnOffset = coordColumnOffset*blkSize;
1054 entryOffset = (facePlaneOffset + gridIdx[2]*interiorPlaneOffset + faceLineOffset
1055 + gridIdx[1]*interiorLineOffset)*blkSize;
1056 for(LO l = 0; l < blkSize; ++l) {
1057 for(LO k = 0; k < 5; ++k) {
1058 for(LO j = 0; j < 5; ++j) {
1059 for(LO i = 0; i < 3; ++i) {
1060 entries(entryOffset + k*15 + j*3 + i + faceStencilLength*l) = columnOffset
1061 + ((k - 2)*lFineNodesPerDim_d(1)*lFineNodesPerDim_d(0) + (j - 2)*lFineNodesPerDim_d(0) + i)*blkSize + l;
1062 values(entryOffset + k*15 + j*3 + i + faceStencilLength*l) = coeffs_d[k]*coeffs_d[j]*coeffs_d[i + 2];
1063 }
1064 }
1065 }
1066 }
1067 for(LO l = 0; l < blkSize; ++l) {
1068 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength*(l+1);
1069 }
1070 for(int dim = 0; dim <numDimensions; ++dim) {
1071 coarseCoordsView(coordRowIdx,dim) = fineCoordsView(coordColumnOffset, dim);
1072 }
1073
1074 // Face 1
1075 coordRowIdx += (lCoarseNodesPerDim_d(0) - 1);
1076 rowIdx = coordRowIdx*blkSize;
1077 coordColumnOffset += (lFineNodesPerDim_d(0) - 1);
1078 columnOffset = coordColumnOffset*blkSize;
1079 entryOffset += (faceStencilLength + (lCoarseNodesPerDim_d(0) - 2)*interiorStencilLength)*blkSize;
1080 for(LO l = 0; l < blkSize; ++l) {
1081 for(LO k = 0; k < 5; ++k) {
1082 for(LO j = 0; j < 5; ++j) {
1083 for(LO i = 0; i < 3; ++i) {
1084 entries(entryOffset + k*15 + j*3 + i + faceStencilLength*l) = columnOffset
1085 + ((k - 2)*lFineNodesPerDim_d(1)*lFineNodesPerDim_d(0)
1086 + (j - 2)*lFineNodesPerDim_d(0) + i - 2)*blkSize + l;
1087 values(entryOffset + k*15 + j*3 + i + faceStencilLength*l) = coeffs_d[k]*coeffs_d[j]*coeffs_d[i];
1088 }
1089 }
1090 }
1091 }
1092 for(LO l = 0; l < blkSize; ++l) {
1093 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength*(l+1);
1094 }
1095 for(int dim = 0; dim <numDimensions; ++dim) {
1096 coarseCoordsView(coordRowIdx,dim) = fineCoordsView(coordColumnOffset, dim);
1097 }
1098
1099 });// parallel_for faces in 1-2 plane
1100 }
1101
1102 if(numInteriors > 0) {
1103
1104 // Allocate and compute arrays
1105 // containing column offsets
1106 // and values associated with
1107 // interior points
1108 LO countRowEntries = 0;
1109 Kokkos::View<LO[125]> coordColumnOffsets_d("coordColOffset");
1110 auto coordColumnOffsets_h = Kokkos::create_mirror_view( coordColumnOffsets_d );
1111
1112 for(LO k = -2; k < 3; ++k) {
1113 for(LO j = -2; j < 3; ++j) {
1114 for(LO i = -2; i < 3; ++i) {
1115 coordColumnOffsets_h(countRowEntries) = k*lFineNodesPerDim[1]*lFineNodesPerDim[0]
1116 + j*lFineNodesPerDim[0] + i;
1117 ++countRowEntries;
1118 }
1119 }
1120 }
1121 Kokkos::deep_copy(coordColumnOffsets_d, coordColumnOffsets_h);
1122
1123 LO countValues = 0;
1124 Kokkos::View<impl_scalar_type*> interiorValues_d("interiorValues",125);
1125 auto interiorValues_h = Kokkos::create_mirror_view( interiorValues_d );
1126 for(LO k = 0; k < 5; ++k) {
1127 for(LO j = 0; j < 5; ++j) {
1128 for(LO i = 0; i < 5; ++i) {
1129 interiorValues_h(countValues) = coeffs[k]*coeffs[j]*coeffs[i];
1130 ++countValues;
1131 }
1132 }
1133 }
1134 Kokkos::deep_copy(interiorValues_d, interiorValues_h);
1135
1136 Kokkos::parallel_for("interior idx region R",Kokkos::RangePolicy<typename device_type::execution_space>(0, numInteriors),
1137 KOKKOS_LAMBDA(const LO interiorIdx) {
1138 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
1139 LO gridIdx[3];
1140 gridIdx[0] = 0; gridIdx[1] = 0; gridIdx[2] = 0;
1141 // First step in the loop
1142 // update the grid indices
1143 // for the grid point
1144 for(LO i=0; i<interiorIdx; i++){
1145 ++gridIdx[0];
1146 if(gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
1147 gridIdx[0] = 0;
1148 ++gridIdx[1];
1149 if(gridIdx[1] == lCoarseNodesPerDim_d(1) - 2) {
1150 gridIdx[1] = 0;
1151 ++gridIdx[2];
1152 }
1153 }
1154 }
1155
1156 coordRowIdx = ((gridIdx[2] + 1)*lCoarseNodesPerDim_d(0)*lCoarseNodesPerDim_d(1)
1157 + (gridIdx[1] + 1)*lCoarseNodesPerDim_d(0)
1158 + gridIdx[0] + 1);
1159 rowIdx = coordRowIdx*blkSize;
1160 coordColumnOffset = ((gridIdx[2] + 1)*3*lFineNodesPerDim_d(1)*lFineNodesPerDim_d(0)
1161 + (gridIdx[1] + 1)*3*lFineNodesPerDim_d(0) + (gridIdx[0] + 1)*3);
1162 columnOffset = coordColumnOffset*blkSize;
1163 entryOffset = (facePlaneOffset + faceLineOffset + faceStencilLength
1164 + gridIdx[2]*interiorPlaneOffset + gridIdx[1]*interiorLineOffset
1165 + gridIdx[0]*interiorStencilLength)*blkSize;
1166 for(LO l = 0; l < blkSize; ++l) {
1167 row_map(rowIdx + 1 + l) = entryOffset + interiorStencilLength*(l+1);
1168 }
1169 // Fill the column indices
1170 // and values in the approproate
1171 // views.
1172 for(LO l = 0; l < blkSize; ++l) {
1173 for(LO entryIdx = 0; entryIdx < interiorStencilLength; ++entryIdx) {
1174 entries(entryOffset + entryIdx + interiorStencilLength*l) = columnOffset + coordColumnOffsets_d(entryIdx)*blkSize + l;
1175 values(entryOffset + entryIdx + interiorStencilLength*l) = interiorValues_d(entryIdx);
1176 }
1177 }
1178 for(int dim = 0; dim <numDimensions; ++dim) {
1179 coarseCoordsView(coordRowIdx,dim) = fineCoordsView(coordColumnOffset, dim);
1180 }
1181
1182 });// Kokkos::parallel_for interior idx
1183 //
1184 }
1185
1186 local_graph_type localGraph(entries, row_map);
1187 local_matrix_type localR("R", numCols, values, localGraph);
1188
1189 R = MatrixFactory::Build(localR, // the local data
1190 rowMap, // rowMap
1191 A->getRowMap(), // colMap
1192 A->getRowMap(), // domainMap == colMap
1193 rowMap, // rangeMap == rowMap
1194 Teuchos::null); // params for optimized construction
1195
1196 } // Build3D()
1197
1198} //namespace MueLu
1199
1200#define MUELU_REGIONRFACTORY_KOKKOS_SHORT
1201#endif // MUELU_REGIONRFACTORY_DEF_HPP
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultGlobalOrdinal GlobalOrdinal
MueLu::DefaultNode Node
Class that holds all level-specific information.
int GetLevelID() const
Return level number.
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
static const NoFactory * get()
typename Kokkos::View< LO[3], device_type > LOTupleView
void Build3D(const int numDimensions, Array< LO > &lFineNodesPerDim, const RCP< Matrix > &A, const RCP< realvaluedmultivector_type > &fineCoordinates, RCP< Matrix > &R, RCP< realvaluedmultivector_type > &coarseCoordinates, Array< LO > &lCoarseNodesPerDim) const
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
RCP< const ParameterList > GetValidParameterList() const
Input.
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Namespace for MueLu classes and methods.