MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_AlgebraicPermutationStrategy_def.hpp
Go to the documentation of this file.
1/*
2 * MueLu_AlgebraicPermutationStrategy_def.hpp
3 *
4 * Created on: Feb 20, 2013
5 * Author: tobias
6 */
7
8#ifndef MUELU_ALGEBRAICPERMUTATIONSTRATEGY_DEF_HPP_
9#define MUELU_ALGEBRAICPERMUTATIONSTRATEGY_DEF_HPP_
10
11#include <queue>
12
13#include <Teuchos_ScalarTraits.hpp>
14
15#include <Xpetra_MultiVector.hpp>
16#include <Xpetra_Matrix.hpp>
17#include <Xpetra_CrsGraph.hpp>
18#include <Xpetra_Vector.hpp>
19#include <Xpetra_MultiVectorFactory.hpp>
20#include <Xpetra_VectorFactory.hpp>
21#include <Xpetra_CrsMatrixWrap.hpp>
22#include <Xpetra_Export.hpp>
23#include <Xpetra_ExportFactory.hpp>
24#include <Xpetra_Import.hpp>
25#include <Xpetra_ImportFactory.hpp>
26#include <Xpetra_MatrixMatrix.hpp>
27
28#include "MueLu_Utilities.hpp"
30
31namespace MueLu {
32
33 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
35 BuildPermutation(const Teuchos::RCP<Matrix> & A, const Teuchos::RCP<const Map>& permRowMap,
36 Level & currentLevel, const FactoryBase* genFactory) const {
37
38 const Scalar SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
39 const Scalar SC_ONE = Teuchos::ScalarTraits<Scalar>::one();
40
41 typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType MT;
42 const MT MT_ZERO = Teuchos::ScalarTraits<MT>::zero();
43 const MT MT_ONE = Teuchos::ScalarTraits<MT>::one();
44
45 const Teuchos::RCP< const Teuchos::Comm< int > > comm = A->getRowMap()->getComm();
46 int numProcs = comm->getSize();
47 int myRank = comm->getRank();
48
49 size_t nDofsPerNode = 1;
50 if (A->IsView("stridedMaps")) {
51 Teuchos::RCP<const Map> permRowMapStrided = A->getRowMap("stridedMaps");
52 nDofsPerNode = Teuchos::rcp_dynamic_cast<const StridedMap>(permRowMapStrided)->getFixedBlockSize();
53 }
54
55 std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > permutedDiagCandidates;
56 std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > keepDiagonalEntries;
57 std::vector<MT> Weights;
58
59 // loop over all local rows in matrix A and keep diagonal entries if corresponding
60 // matrix rows are not contained in permRowMap
61 for (size_t row = 0; row < A->getRowMap()->getLocalNumElements(); row++) {
62 GlobalOrdinal grow = A->getRowMap()->getGlobalElement(row);
63
64 if(permRowMap->isNodeGlobalElement(grow) == true) continue;
65
66 size_t nnz = A->getNumEntriesInLocalRow(row);
67
68 // extract local row information from matrix
69 Teuchos::ArrayView<const LocalOrdinal> indices;
70 Teuchos::ArrayView<const Scalar> vals;
71 A->getLocalRowView(row, indices, vals);
72
73 TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(indices.size()) != nnz, Exceptions::RuntimeError,
74 "MueLu::PermutationFactory::Build: number of nonzeros not equal to number of indices? Error.");
75
76 // find column entry with max absolute value
77 GlobalOrdinal gMaxValIdx = 0;
78 MT norm1 = MT_ZERO;
79 MT maxVal = MT_ZERO;
80 for (size_t j = 0; j < Teuchos::as<size_t>(indices.size()); j++) {
81 norm1 += Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]);
82 if(Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]) > maxVal) {
83 maxVal = Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]);
84 gMaxValIdx = A->getColMap()->getGlobalElement(indices[j]);
85 }
86 }
87
88 if(grow == gMaxValIdx) // only keep row/col pair if it's diagonal dominant!!!
89 keepDiagonalEntries.push_back(std::make_pair(grow,grow));
90 }
91
93 // handle rows that are marked to be relevant for permutations
94 for (size_t row = 0; row < permRowMap->getLocalNumElements(); row++) {
95 GlobalOrdinal grow = permRowMap->getGlobalElement(row);
96 LocalOrdinal lArow = A->getRowMap()->getLocalElement(grow);
97 size_t nnz = A->getNumEntriesInLocalRow(lArow);
98
99 // extract local row information from matrix
100 Teuchos::ArrayView<const LocalOrdinal> indices;
101 Teuchos::ArrayView<const Scalar> vals;
102 A->getLocalRowView(lArow, indices, vals);
103
104 TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(indices.size()) != nnz, Exceptions::RuntimeError,
105 "MueLu::PermutationFactory::Build: number of nonzeros not equal to number of indices? Error.");
106
107 // find column entry with max absolute value
108 GlobalOrdinal gMaxValIdx = 0;
109 MT norm1 = MT_ZERO;
110 MT maxVal = MT_ZERO;
111 for (size_t j = 0; j < Teuchos::as<size_t>(indices.size()); j++) {
112 norm1 += Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]);
113 if(Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]) > maxVal) {
114 maxVal = Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]);
115 gMaxValIdx = A->getColMap()->getGlobalElement(indices[j]);
116 }
117 }
118
119 if(maxVal > Teuchos::ScalarTraits<MT>::zero ()) { // keep only max Entries \neq 0.0
120 permutedDiagCandidates.push_back(std::make_pair(grow,gMaxValIdx));
121 Weights.push_back(maxVal/(norm1*Teuchos::as<MT>(nnz)));
122 } else {
123 std::cout << "ATTENTION: row " << grow << " has only zero entries -> singular matrix!" << std::endl;
124 }
125
126 }
127
128 // sort Weights in descending order
129 std::vector<int> permutation;
130 sortingPermutation(Weights,permutation);
131
132 // create new vector with exactly one possible entry for each column
133
134 // each processor which requests the global column id gcid adds 1 to gColVec
135 // gColVec will be summed up over all processors and communicated to gDomVec
136 // which is based on the non-overlapping domain map of A.
137
138 Teuchos::RCP<Vector> gColVec = VectorFactory::Build(A->getColMap());
139 Teuchos::RCP<Vector> gDomVec = VectorFactory::Build(A->getDomainMap());
140 gColVec->putScalar(SC_ZERO);
141 gDomVec->putScalar(SC_ZERO);
142
143 // put in all keep diagonal entries
144 for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = keepDiagonalEntries.begin(); p != keepDiagonalEntries.end(); ++p) {
145 gColVec->sumIntoGlobalValue((*p).second,1.0);
146 }
147
148 Teuchos::RCP<Export> exporter = ExportFactory::Build(gColVec->getMap(), gDomVec->getMap());
149 gDomVec->doExport(*gColVec,*exporter,Xpetra::ADD); // communicate blocked gcolids to all procs
150 gColVec->doImport(*gDomVec,*exporter,Xpetra::INSERT);
151
152 std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > permutedDiagCandidatesFiltered; // TODO reserve memory
153 std::map<GlobalOrdinal, MT> gColId2Weight;
154
155 {
156 Teuchos::ArrayRCP< Scalar > ddata = gColVec->getDataNonConst(0);
157 for(size_t i = 0; i < permutedDiagCandidates.size(); ++i) {
158 // loop over all candidates
159 std::pair<GlobalOrdinal, GlobalOrdinal> pp = permutedDiagCandidates[permutation[i]];
160 GlobalOrdinal grow = pp.first;
161 GlobalOrdinal gcol = pp.second;
162
163 LocalOrdinal lcol = A->getColMap()->getLocalElement(gcol);
164 if(Teuchos::ScalarTraits<Scalar>::real (ddata[lcol]) > MT_ZERO){
165 continue; // skip lcol: column already handled by another row
166 }
167
168 // mark column as already taken
169 ddata[lcol] += SC_ONE;
170
171 permutedDiagCandidatesFiltered.push_back(std::make_pair(grow,gcol));
172 gColId2Weight[gcol] = Weights[permutation[i]];
173 }
174 }
175
176 // communicate how often each column index is requested by the different procs
177 gDomVec->doExport(*gColVec,*exporter,Xpetra::ADD);
178 gColVec->doImport(*gDomVec,*exporter,Xpetra::INSERT); // probably not needed // TODO check me
179
180 //*****************************************************************************************
181 // first communicate ALL global ids of column indices which are requested by more
182 // than one proc to all other procs
183 // detect which global col indices are requested by more than one proc
184 // and store them in the multipleColRequests vector
185 std::vector<GlobalOrdinal> multipleColRequests; // store all global column indices from current processor that are also
186 // requested by another processor. This is possible, since they are stored
187 // in gDomVec which is based on the nonoverlapping domain map. That is, each
188 // global col id is handled by exactly one proc.
189 std::queue<GlobalOrdinal> unusedColIdx; // unused column indices on current processor
190
191 for(size_t sz = 0; sz < gDomVec->getLocalLength(); ++sz) {
192 Teuchos::ArrayRCP< const Scalar > arrDomVec = gDomVec->getData(0);
193 //
194 // FIXME (mfh 30 Oct 2015) I _think_ it's OK to check just the
195 // real part, because this is a count. (Shouldn't MueLu use
196 // mag_type instead of Scalar here to save space?)
197 //
198 if(Teuchos::ScalarTraits<Scalar>::real(arrDomVec[sz]) > MT_ONE) {
199 multipleColRequests.push_back(gDomVec->getMap()->getGlobalElement(sz));
200 } else if(Teuchos::ScalarTraits<Scalar>::real (arrDomVec[sz]) == MT_ZERO) {
201 unusedColIdx.push(gDomVec->getMap()->getGlobalElement(sz));
202 }
203 }
204
205 // communicate the global number of column indices which are requested by more than one proc
206 LocalOrdinal localMultColRequests = Teuchos::as<LocalOrdinal>(multipleColRequests.size());
207 LocalOrdinal globalMultColRequests = 0;
208
209 // sum up all entries in multipleColRequests over all processors
210 //
211 // FIXME (lbv 11 Feb 2019) why cast localMultColRequests as LocalOrdinal
212 // when it was properly declared as such just a line above?
213 //
214 MueLu_sumAll(gDomVec->getMap()->getComm(), (LocalOrdinal)localMultColRequests, globalMultColRequests);
215
216 if(globalMultColRequests > 0) {
217 // special handling: two processors request the same global column id.
218 // decide which processor gets it
219
220 // distribute number of multipleColRequests to all processors
221 // each processor stores how many column ids for exchange are handled by the cur proc
222 std::vector<GlobalOrdinal> numMyMultColRequests(numProcs, 0);
223 std::vector<GlobalOrdinal> numGlobalMultColRequests(numProcs, 0);
224 numMyMultColRequests[myRank] = localMultColRequests;
225 Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs,
226 numMyMultColRequests.data(),
227 numGlobalMultColRequests.data());
228
229 // communicate multipleColRequests entries to all processors
230 int nMyOffset = 0;
231 for (int i=0; i<myRank-1; i++)
232 nMyOffset += numGlobalMultColRequests[i]; // calculate offset to store the weights on the corresponding place in procOverlappingWeights
233
234 const GlobalOrdinal zero = 0;
235 std::vector<GlobalOrdinal> procMultRequestedColIds(globalMultColRequests, zero);
236 std::vector<GlobalOrdinal> global_procMultRequestedColIds(globalMultColRequests, zero);
237
238 // loop over all local column GIDs that are also requested by other procs
239 for(size_t i = 0; i < multipleColRequests.size(); i++) {
240 procMultRequestedColIds[nMyOffset + i] = multipleColRequests[i]; // all weights are > 0 ?
241 }
242
243 // template ordinal, package (double)
244 Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, Teuchos::as<int>(globalMultColRequests),
245 procMultRequestedColIds.data(),
246 global_procMultRequestedColIds.data());
247
248 // loop over global_procOverlappingWeights and eliminate wrong entries...
249 for (size_t k = 0; k<global_procMultRequestedColIds.size(); k++) {
250 GlobalOrdinal globColId = global_procMultRequestedColIds[k];
251
252 std::vector<MT> MyWeightForColId(numProcs, MT_ZERO);
253 std::vector<MT> GlobalWeightForColId(numProcs, MT_ZERO);
254
255 if(gColVec->getMap()->isNodeGlobalElement(globColId)) {
256 MyWeightForColId[myRank] = gColId2Weight[globColId];
257 } else {
258 MyWeightForColId[myRank] = MT_ZERO;
259 }
260
261 Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs,
262 MyWeightForColId.data(),
263 GlobalWeightForColId.data());
264
265 if(gColVec->getMap()->isNodeGlobalElement(globColId)) {
266 // note: 2 procs could have the same weight for a column index.
267 // pick the first one.
268 MT winnerValue = MT_ZERO;
269 int winnerProcRank = 0;
270 for (int proc = 0; proc < numProcs; proc++) {
271 if(GlobalWeightForColId[proc] > winnerValue) {
272 winnerValue = GlobalWeightForColId[proc];
273 winnerProcRank = proc;
274 }
275 }
276
277 // winnerProcRank is the winner for handling globColId.
278 // winnerProcRank is unique (even if two procs have the same weight for a column index)
279
280 if(myRank != winnerProcRank) {
281 // remove corresponding entry from permutedDiagCandidatesFiltered
282 typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = permutedDiagCandidatesFiltered.begin();
283 while(p != permutedDiagCandidatesFiltered.end() )
284 {
285 if((*p).second == globColId)
286 p = permutedDiagCandidatesFiltered.erase(p);
287 else
288 p++;
289 }
290 }
291
292 } // end if isNodeGlobalElement
293 } // end loop over global_procOverlappingWeights and eliminate wrong entries...
294 } // end if globalMultColRequests > 0
295
296 // put together all pairs:
297 //size_t sizeRowColPairs = keepDiagonalEntries.size() + permutedDiagCandidatesFiltered.size();
298 std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > RowColPairs;
299 RowColPairs.insert( RowColPairs.end(), keepDiagonalEntries.begin(), keepDiagonalEntries.end());
300 RowColPairs.insert( RowColPairs.end(), permutedDiagCandidatesFiltered.begin(), permutedDiagCandidatesFiltered.end());
301
302#ifdef DEBUG_OUTPUT
303 //&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
304 // plausibility check
305 gColVec->putScalar(SC_ZERO);
306 gDomVec->putScalar(SC_ZERO);
307 typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator pl = RowColPairs.begin();
308 while(pl != RowColPairs.end() )
309 {
310 //GlobalOrdinal ik = (*pl).first;
311 GlobalOrdinal jk = (*pl).second;
312
313 gColVec->sumIntoGlobalValue(jk,1.0);
314 pl++;
315 }
316 gDomVec->doExport(*gColVec,*exporter,Xpetra::ADD);
317 for(size_t sz = 0; sz < gDomVec->getLocalLength(); ++sz) {
318 Teuchos::ArrayRCP< const Scalar > arrDomVec = gDomVec->getData(0);
319 if(arrDomVec[sz] > 1.0) {
320 GetOStream(Runtime0) << "RowColPairs has multiple column [" << sz << "]=" << arrDomVec[sz] << std::endl;
321 } else if(arrDomVec[sz] == SC_ZERO) {
322 GetOStream(Runtime0) << "RowColPairs has empty column [" << sz << "]=" << arrDomVec[sz] << std::endl;
323 }
324 }
325 //&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
326#endif
327
329 // assumption: on each processor RowColPairs now contains
330 // a valid set of (row,column) pairs, where the row entries
331 // are a subset of the processor's rows and the column entries
332 // are unique throughout all processors.
333 // Note: the RowColPairs are only defined for a subset of all rows,
334 // so there might be rows without an entry in RowColPairs.
335 // It can be, that some rows seem to be missing in RowColPairs, since
336 // the entry in that row with maximum absolute value has been reserved
337 // by another row already (e.g. as already diagonal dominant row outside
338 // of perRowMap).
339 // In fact, the RowColPairs vector only defines the (row,column) pairs
340 // that will be definitely moved to the diagonal after permutation.
341
342#ifdef DEBUG_OUTPUT
343 // for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = RowColPairs.begin(); p != RowColPairs.end(); ++p) {
344 // std::cout << "proc: " << myRank << " r/c: " << (*p).first << "/" << (*p).second << std::endl;
345 // }
346 // for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = RowColPairs.begin(); p != RowColPairs.end(); ++p)
347 // {
349 // std::cout << (*p).first +1 << " " << (*p).second+1 << std::endl;
350 // }
351 // std::cout << "\n";
352#endif
353
354 // vectors to store permutation information
355 Teuchos::RCP<Vector> Pperm = VectorFactory::Build(A->getRowMap());
356 Teuchos::RCP<Vector> Qperm = VectorFactory::Build(A->getDomainMap()); // global variant (based on domain map)
357 Teuchos::RCP<Vector> lQperm = VectorFactory::Build(A->getColMap()); // local variant (based on column map)
358
359 Pperm->putScalar(SC_ZERO);
360 Qperm->putScalar(SC_ZERO);
361 lQperm->putScalar(SC_ZERO);
362
363 // setup exporter for Qperm
364 Teuchos::RCP<Export> QpermExporter = ExportFactory::Build(lQperm->getMap(), Qperm->getMap());
365
366 Teuchos::RCP<Vector> RowIdStatus = VectorFactory::Build(A->getRowMap());
367 Teuchos::RCP<Vector> ColIdStatus = VectorFactory::Build(A->getDomainMap()); // global variant (based on domain map)
368 Teuchos::RCP<Vector> lColIdStatus = VectorFactory::Build(A->getColMap()); // local variant (based on column map)
369 Teuchos::RCP<Vector> ColIdUsed = VectorFactory::Build(A->getDomainMap()); // mark column ids to be already in use
370 RowIdStatus->putScalar(SC_ZERO);
371 ColIdStatus->putScalar(SC_ZERO);
372 lColIdStatus->putScalar(SC_ZERO);
373 ColIdUsed->putScalar(SC_ZERO); // no column ids are used
374
375
376 // count wide-range permutations
377 // a wide-range permutation is defined as a permutation of rows/columns which do not
378 // belong to the same node
379 LocalOrdinal lWideRangeRowPermutations = 0;
380 GlobalOrdinal gWideRangeRowPermutations = 0;
381 LocalOrdinal lWideRangeColPermutations = 0;
382 GlobalOrdinal gWideRangeColPermutations = 0;
383
384 // run 1: mark all "identity" permutations
385 {
386 Teuchos::ArrayRCP< Scalar > RowIdStatusArray = RowIdStatus->getDataNonConst(0);
387 Teuchos::ArrayRCP< Scalar > lColIdStatusArray = lColIdStatus->getDataNonConst(0);
388
389 typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = RowColPairs.begin();
390 while(p != RowColPairs.end() )
391 {
392 GlobalOrdinal ik = (*p).first;
393 GlobalOrdinal jk = (*p).second;
394
395 LocalOrdinal lik = A->getRowMap()->getLocalElement(ik);
396 LocalOrdinal ljk = A->getColMap()->getLocalElement(jk);
397
398 if(RowIdStatusArray[lik] == SC_ZERO) {
399 RowIdStatusArray[lik] = SC_ONE; // use this row id
400 lColIdStatusArray[ljk] = SC_ONE; // use this column id
401 Pperm->replaceLocalValue(lik, ik);
402 lQperm->replaceLocalValue(ljk, ik); // use column map
403 ColIdUsed->replaceGlobalValue(ik,SC_ONE); // ik is now used
404 p = RowColPairs.erase(p);
405
406 // detect wide range permutations
407 if(floor(ik/nDofsPerNode) != floor(jk/nDofsPerNode)) {
408 lWideRangeColPermutations++;
409 }
410 }
411 else
412 p++;
413 }
414
415
416 // communicate column map -> domain map
417 Qperm->doExport(*lQperm, *QpermExporter, Xpetra::ABSMAX);
418 ColIdStatus->doExport(*lColIdStatus, *QpermExporter, Xpetra::ABSMAX);
419
420 // plausibility check
421 if(RowColPairs.size()>0) GetOStream(Warnings0) << "MueLu::PermutationFactory: There are Row/Col pairs left!!!" << std::endl; // TODO fix me
422
423 // close Pperm
424
425 // count, how many row permutations are missing on current proc
426 size_t cntFreeRowIdx = 0;
427 std::queue<GlobalOrdinal> qFreeGRowIdx; // store global row ids of "free" rows
428 for (size_t lik = 0; lik < RowIdStatus->getLocalLength(); ++lik) {
429 if(RowIdStatusArray[lik] == SC_ZERO) {
430 cntFreeRowIdx++;
431 qFreeGRowIdx.push(RowIdStatus->getMap()->getGlobalElement(lik));
432 }
433 }
434
435 // fix Pperm
436 for (size_t lik = 0; lik < RowIdStatus->getLocalLength(); ++lik) {
437 if(RowIdStatusArray[lik] == SC_ZERO) {
438 RowIdStatusArray[lik] = SC_ONE; // use this row id
439 Pperm->replaceLocalValue(lik, qFreeGRowIdx.front());
440 // detect wide range permutations
441 if(floor(qFreeGRowIdx.front()/nDofsPerNode) != floor(RowIdStatus->getMap()->getGlobalElement(lik)/nDofsPerNode)) {
442 lWideRangeRowPermutations++;
443 }
444 qFreeGRowIdx.pop();
445 }
446 }
447 }
448
449 size_t cntUnusedColIdx = 0;
450 std::queue<GlobalOrdinal> qUnusedGColIdx; // store global column ids of "free" available columns
451 size_t cntFreeColIdx = 0;
452 std::queue<GlobalOrdinal> qFreeGColIdx; // store global column ids of "free" available columns
453 {
454 Teuchos::ArrayRCP< Scalar > ColIdStatusArray = ColIdStatus->getDataNonConst(0);
455 Teuchos::ArrayRCP< Scalar > ColIdUsedArray = ColIdUsed->getDataNonConst(0); // not sure about this
456
457 // close Qperm (free permutation entries in Qperm)
458 for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
459 if(ColIdStatusArray[ljk] == SC_ZERO) {
460 cntFreeColIdx++;
461 qFreeGColIdx.push(ColIdStatus->getMap()->getGlobalElement(ljk));
462 }
463 }
464
465 for (size_t ljk = 0; ljk < ColIdUsed->getLocalLength(); ++ljk) {
466 if(ColIdUsedArray[ljk] == SC_ZERO) {
467 cntUnusedColIdx++;
468 qUnusedGColIdx.push(ColIdUsed->getMap()->getGlobalElement(ljk));
469 }
470 }
471
472
473 // fix Qperm with local entries
474 for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
475 // stop if no (local) unused column idx are left
476 if(cntUnusedColIdx == 0) break;
477
478 if(ColIdStatusArray[ljk] == SC_ZERO) {
479 ColIdStatusArray[ljk] = SC_ONE; // use this row id
480 Qperm->replaceLocalValue(ljk, qUnusedGColIdx.front()); // loop over ColIdStatus (lives on domain map)
481 ColIdUsed->replaceGlobalValue(qUnusedGColIdx.front(),SC_ONE); // ljk is now used, too
482 // detect wide range permutations
483 if(floor(qUnusedGColIdx.front()/nDofsPerNode) != floor(ColIdStatus->getMap()->getGlobalElement(ljk)/nDofsPerNode)) {
484 lWideRangeColPermutations++;
485 }
486 qUnusedGColIdx.pop();
487 cntUnusedColIdx--;
488 cntFreeColIdx--;
489 }
490 }
491
492
493 //Qperm->doExport(*lQperm,*QpermExporter,Xpetra::ABSMAX); // no export necessary, since changes only locally
494 //ColIdStatus->doExport(*lColIdStatus,*QpermExporter,Xpetra::ABSMAX);
495
496 // count, how many unused column idx are needed on current processor
497 // to complete Qperm
498 cntFreeColIdx = 0;
499 for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) { // TODO avoid this loop
500 if(ColIdStatusArray[ljk] == SC_ZERO) {
501 cntFreeColIdx++;
502 }
503 }
504 }
505
506 GlobalOrdinal global_cntFreeColIdx = 0;
507 LocalOrdinal local_cntFreeColIdx = cntFreeColIdx;
508 MueLu_sumAll(comm, Teuchos::as<GlobalOrdinal>(local_cntFreeColIdx), global_cntFreeColIdx);
509#ifdef DEBUG_OUTPUT
510 std::cout << "global # of empty column idx entries in Qperm: " << global_cntFreeColIdx << std::endl;
511#endif
512
513 // avoid global communication if possible
514 if(global_cntFreeColIdx > 0) {
515
516 // 1) count how many unused column ids are left
517 GlobalOrdinal global_cntUnusedColIdx = 0;
518 LocalOrdinal local_cntUnusedColIdx = cntUnusedColIdx;
519 MueLu_sumAll(comm, Teuchos::as<GlobalOrdinal>(local_cntUnusedColIdx), global_cntUnusedColIdx);
520#ifdef DEBUG_OUTPUT
521 std::cout << "global # of unused column idx: " << global_cntUnusedColIdx << std::endl;
522#endif
523
524 // 2) communicate how many unused column ids are available on procs
525 std::vector<LocalOrdinal> local_UnusedColIdxOnProc (numProcs);
526 std::vector<LocalOrdinal> global_UnusedColIdxOnProc(numProcs);
527 local_UnusedColIdxOnProc[myRank] = local_cntUnusedColIdx;
528 Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs,
529 local_UnusedColIdxOnProc.data(),
530 global_UnusedColIdxOnProc.data());
531
532#ifdef DEBUG_OUTPUT
533 std::cout << "PROC " << myRank << " global num unused indices per proc: ";
534 for (size_t ljk = 0; ljk < global_UnusedColIdxOnProc.size(); ++ljk) {
535 std::cout << " " << global_UnusedColIdxOnProc[ljk];
536 }
537 std::cout << std::endl;
538#endif
539
540 // 3) build array of length global_cntUnusedColIdx to globally replicate unused column idx
541 std::vector<GlobalOrdinal> local_UnusedColIdxVector(Teuchos::as<size_t>(global_cntUnusedColIdx));
542 std::vector<GlobalOrdinal> global_UnusedColIdxVector(Teuchos::as<size_t>(global_cntUnusedColIdx));
543 GlobalOrdinal global_cntUnusedColIdxStartIter = 0;
544 for(int proc=0; proc<myRank; proc++) {
545 global_cntUnusedColIdxStartIter += global_UnusedColIdxOnProc[proc];
546 }
547 for(GlobalOrdinal k = global_cntUnusedColIdxStartIter; k < global_cntUnusedColIdxStartIter+local_cntUnusedColIdx; k++) {
548 local_UnusedColIdxVector[k] = qUnusedGColIdx.front();
549 qUnusedGColIdx.pop();
550 }
551 Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, Teuchos::as<int>(global_cntUnusedColIdx),
552 local_UnusedColIdxVector.data(),
553 global_UnusedColIdxVector.data());
554#ifdef DEBUG_OUTPUT
555 std::cout << "PROC " << myRank << " global UnusedGColIdx: ";
556 for (size_t ljk = 0; ljk < global_UnusedColIdxVector.size(); ++ljk) {
557 std::cout << " " << global_UnusedColIdxVector[ljk];
558 }
559 std::cout << std::endl;
560#endif
561
562
563
564 // 4) communicate, how many column idx are needed on each processor
565 // to complete Qperm
566 std::vector<LocalOrdinal> local_EmptyColIdxOnProc (numProcs);
567 std::vector<LocalOrdinal> global_EmptyColIdxOnProc(numProcs);
568 local_EmptyColIdxOnProc[myRank] = local_cntFreeColIdx;
569 Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs,
570 local_EmptyColIdxOnProc.data(),
571 global_EmptyColIdxOnProc.data());
572
573#ifdef DEBUG_OUTPUT
574 std::cout << "PROC " << myRank << " global num of needed column indices: ";
575 for (size_t ljk = 0; ljk < global_EmptyColIdxOnProc.size(); ++ljk) {
576 std::cout << " " << global_EmptyColIdxOnProc[ljk];
577 }
578 std::cout << std::endl;
579#endif
580
581 // 5) determine first index in global_UnusedColIdxVector for unused column indices,
582 // that are marked to be used by this processor
583 GlobalOrdinal global_UnusedColStartIdx = 0;
584 for(int proc=0; proc<myRank; proc++) {
585 global_UnusedColStartIdx += global_EmptyColIdxOnProc[proc];
586 }
587
588#ifdef DEBUG_OUTPUT
589 GetOStream(Statistics0) << "PROC " << myRank << " is allowd to use the following column gids: ";
590 for(GlobalOrdinal k = global_UnusedColStartIdx; k < global_UnusedColStartIdx + Teuchos::as<GlobalOrdinal>(cntFreeColIdx); k++) {
591 GetOStream(Statistics0) << global_UnusedColIdxVector[k] << " ";
592 }
593 GetOStream(Statistics0) << std::endl;
594#endif
595
596 // 6.) fix Qperm with global entries
597 {
598 Teuchos::ArrayRCP< Scalar > ColIdStatusArray = ColIdStatus->getDataNonConst(0);
599 GlobalOrdinal array_iter = 0;
600 for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
601
602 if(ColIdStatusArray[ljk] == SC_ZERO) {
603 ColIdStatusArray[ljk] = SC_ONE; // use this row id
604 Qperm->replaceLocalValue(ljk, global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter]);
605 ColIdUsed->replaceGlobalValue(global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter],SC_ONE);
606 // detect wide range permutations
607 if(floor(global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter]/nDofsPerNode) != floor(ColIdStatus->getMap()->getGlobalElement(ljk)/nDofsPerNode)) {
608 lWideRangeColPermutations++;
609 }
610 array_iter++;
611 //cntUnusedColIdx--; // check me
612 }
613 }
614 }
615 } // end if global_cntFreeColIdx > 0
617
618
619 // create new empty Matrix
620 Teuchos::RCP<CrsMatrixWrap> permPTmatrix = Teuchos::rcp(new CrsMatrixWrap(A->getRowMap(),1));
621 Teuchos::RCP<CrsMatrixWrap> permQTmatrix = Teuchos::rcp(new CrsMatrixWrap(A->getRowMap(),1));
622
623 {
624 Teuchos::ArrayRCP< Scalar > PpermData = Pperm->getDataNonConst(0);
625 Teuchos::ArrayRCP< Scalar > QpermData = Qperm->getDataNonConst(0);
626
627 for(size_t row=0; row<A->getLocalNumRows(); row++) {
628 // FIXME (mfh 30 Oct 2015): Teuchos::as doesn't know how to
629 // convert from complex Scalar to GO, so we have to take the real
630 // part first. I think that's the right thing to do in this case.
631 Teuchos::ArrayRCP<GlobalOrdinal> indoutP(1,Teuchos::as<GO>(Teuchos::ScalarTraits<Scalar>::real(PpermData[row]))); // column idx for Perm^T
632 Teuchos::ArrayRCP<GlobalOrdinal> indoutQ(1,Teuchos::as<GO>(Teuchos::ScalarTraits<Scalar>::real(QpermData[row]))); // column idx for Qperm
633 Teuchos::ArrayRCP<Scalar> valout(1,SC_ONE);
634 permPTmatrix->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indoutP.view(0,indoutP.size()), valout.view(0,valout.size()));
635 permQTmatrix->insertGlobalValues (A->getRowMap()->getGlobalElement(row), indoutQ.view(0,indoutQ.size()), valout.view(0,valout.size()));
636 }
637 }
638
639 permPTmatrix->fillComplete();
640 permQTmatrix->fillComplete();
641
642 Teuchos::RCP<Matrix> permPmatrix = Utilities::Transpose(*permPTmatrix, true);
643
644 for(size_t row=0; row<permPTmatrix->getLocalNumRows(); row++) {
645 if(permPTmatrix->getNumEntriesInLocalRow(row) != 1)
646 GetOStream(Warnings0) <<"#entries in row " << row << " of permPTmatrix is " << permPTmatrix->getNumEntriesInLocalRow(row) << std::endl;
647 if(permPmatrix->getNumEntriesInLocalRow(row) != 1)
648 GetOStream(Warnings0) <<"#entries in row " << row << " of permPmatrix is " << permPmatrix->getNumEntriesInLocalRow(row) << std::endl;
649 if(permQTmatrix->getNumEntriesInLocalRow(row) != 1)
650 GetOStream(Warnings0) <<"#entries in row " << row << " of permQmatrix is " << permQTmatrix->getNumEntriesInLocalRow(row) << std::endl;
651 }
652
653 // build permP * A * permQT
654 Teuchos::RCP<Matrix> ApermQt = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*A, false, *permQTmatrix, false, GetOStream(Statistics2));
655 Teuchos::RCP<Matrix> permPApermQt = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*permPmatrix, false, *ApermQt, false, GetOStream(Statistics2));
656
657 /*
658 MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("A.mat", *A);
659 MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("permP.mat", *permPmatrix);
660 MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("permQt.mat", *permQTmatrix);
661 MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("permPApermQt.mat", *permPApermQt);
662 */
663 // build scaling matrix
664 Teuchos::RCP<Vector> diagVec = VectorFactory::Build(permPApermQt->getRowMap(),true);
665 Teuchos::RCP<Vector> invDiagVec = VectorFactory::Build(permPApermQt->getRowMap(),true);
666 Teuchos::RCP<CrsMatrixWrap> diagScalingOp = Teuchos::rcp(new CrsMatrixWrap(permPApermQt->getRowMap(),1));
667 {
668 permPApermQt->getLocalDiagCopy(*diagVec);
669 Teuchos::ArrayRCP< const Scalar > diagVecData = diagVec->getData(0);
670 Teuchos::ArrayRCP< Scalar > invDiagVecData = invDiagVec->getDataNonConst(0);
671 for(size_t i = 0; i<diagVec->getMap()->getLocalNumElements(); ++i) {
672 if(diagVecData[i] != SC_ZERO)
673 invDiagVecData[i] = SC_ONE / diagVecData[i];
674 else {
675 invDiagVecData[i] = SC_ONE;
676 GetOStream(Statistics0) << "MueLu::PermutationFactory: found zero on diagonal in row " << i << std::endl;
677 }
678 }
679
680 for(size_t row=0; row<A->getLocalNumRows(); row++) {
681 Teuchos::ArrayRCP<GlobalOrdinal> indout(1,permPApermQt->getRowMap()->getGlobalElement(row)); // column idx for Perm^T
682 Teuchos::ArrayRCP<Scalar> valout(1,invDiagVecData[row]);
683 diagScalingOp->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indout.view(0,indout.size()), valout.view(0,valout.size()));
684 }
685 }
686 diagScalingOp->fillComplete();
687
688 Teuchos::RCP<Matrix> scaledA = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*diagScalingOp, false, *permPApermQt, false, GetOStream(Statistics2));
689 currentLevel.Set("A", Teuchos::rcp_dynamic_cast<Matrix>(scaledA), genFactory/*this*/);
690
691 currentLevel.Set("permA", Teuchos::rcp_dynamic_cast<Matrix>(permPApermQt), genFactory/*this*/); // TODO careful with this!!!
692 currentLevel.Set("permP", Teuchos::rcp_dynamic_cast<Matrix>(permPmatrix), genFactory/*this*/);
693 currentLevel.Set("permQT", Teuchos::rcp_dynamic_cast<Matrix>(permQTmatrix), genFactory/*this*/);
694 currentLevel.Set("permScaling", Teuchos::rcp_dynamic_cast<Matrix>(diagScalingOp), genFactory/*this*/);
695
697 // count zeros on diagonal in P -> number of row permutations
698 Teuchos::RCP<Vector> diagPVec = VectorFactory::Build(permPmatrix->getRowMap(),true);
699 permPmatrix->getLocalDiagCopy(*diagPVec);
700 LocalOrdinal lNumRowPermutations = 0;
701 GlobalOrdinal gNumRowPermutations = 0;
702 {
703 Teuchos::ArrayRCP< const Scalar > diagPVecData = diagPVec->getData(0);
704 for(size_t i = 0; i<diagPVec->getMap()->getLocalNumElements(); ++i) {
705 if(diagPVecData[i] == SC_ZERO) {
706 lNumRowPermutations++;
707 }
708 }
709 }
710
711 // sum up all entries in multipleColRequests over all processors
712 MueLu_sumAll(diagPVec->getMap()->getComm(), Teuchos::as<GlobalOrdinal>(lNumRowPermutations), gNumRowPermutations);
713
715 // count zeros on diagonal in Q^T -> number of column permutations
716 Teuchos::RCP<Vector> diagQTVec = VectorFactory::Build(permQTmatrix->getRowMap(),true);
717 permQTmatrix->getLocalDiagCopy(*diagQTVec);
718 LocalOrdinal lNumColPermutations = 0;
719 GlobalOrdinal gNumColPermutations = 0;
720 {
721 Teuchos::ArrayRCP< const Scalar > diagQTVecData = diagQTVec->getData(0);
722 for(size_t i = 0; i<diagQTVec->getMap()->getLocalNumElements(); ++i) {
723 if(diagQTVecData[i] == SC_ZERO) {
724 lNumColPermutations++;
725 }
726 }
727 }
728
729 // sum up all entries in multipleColRequests over all processors
730 MueLu_sumAll(diagQTVec->getMap()->getComm(), Teuchos::as<GlobalOrdinal>(lNumColPermutations), gNumColPermutations);
731
732 currentLevel.Set("#RowPermutations", gNumRowPermutations, genFactory/*this*/);
733 currentLevel.Set("#ColPermutations", gNumColPermutations, genFactory/*this*/);
734 currentLevel.Set("#WideRangeRowPermutations", gWideRangeRowPermutations, genFactory/*this*/);
735 currentLevel.Set("#WideRangeColPermutations", gWideRangeColPermutations, genFactory/*this*/);
736
737 GetOStream(Statistics0) << "#Row permutations/max possible permutations: " << gNumRowPermutations << "/" << diagPVec->getMap()->getGlobalNumElements() << std::endl;
738 GetOStream(Statistics0) << "#Column permutations/max possible permutations: " << gNumColPermutations << "/" << diagQTVec->getMap()->getGlobalNumElements() << std::endl;
739 GetOStream(Runtime1) << "#wide range row permutations: " << gWideRangeRowPermutations << " #wide range column permutations: " << gWideRangeColPermutations << std::endl;
740 }
741
742} // namespace MueLu
743
744#endif /* MUELU_ALGEBRAICPERMUTATIONSTRATEGY_DEF_HPP_ */
#define MueLu_sumAll(rcpComm, in, out)
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultScalar Scalar
MueLu::DefaultGlobalOrdinal GlobalOrdinal
void BuildPermutation(const Teuchos::RCP< Matrix > &A, const Teuchos::RCP< const Map > &permRowMap, Level &currentLevel, const FactoryBase *genFactory) const
build permutation operators
Exception throws to report errors in the internal logical of the program.
Base class for factories (e.g., R, P, and A_coarse).
Class that holds all level-specific information.
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Namespace for MueLu classes and methods.
@ Warnings0
Important warning messages (one line)
@ Statistics2
Print even more statistics.
@ Runtime0
One-liner description of what is happening.
@ Runtime1
Description of what is happening (more verbose)
@ Statistics0
Print statistics that do not involve significant additional computation.
void sortingPermutation(const std::vector< Scalar > &values, std::vector< LocalOrdinal > &v)