Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_DirectoryImpl_def.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Tpetra: Templated Linear Algebra Services Package
5// Copyright (2008) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// ************************************************************************
38// @HEADER
39
40#ifndef TPETRA_DIRECTORYIMPL_DEF_HPP
41#define TPETRA_DIRECTORYIMPL_DEF_HPP
42
45
46#include "Tpetra_Distributor.hpp"
47#include "Tpetra_Map.hpp"
48#include "Tpetra_TieBreak.hpp"
49#include "Tpetra_Util.hpp"
50#include "Tpetra_Details_FixedHashTable.hpp"
51#include "Teuchos_Comm.hpp"
52#include <memory>
53#include <sstream>
54
55// FIXME (mfh 16 Apr 2013) GIANT HACK BELOW
56#ifdef HAVE_TPETRACORE_MPI
57# include <mpi.h>
58#endif // HAVE_TPETRACORE_MPI
59// FIXME (mfh 16 Apr 2013) GIANT HACK ABOVE
60
61namespace Tpetra {
62 namespace Details {
63 template<class LO, class GO, class NT>
67 const Teuchos::ArrayView<const GO> &globalIDs,
68 const Teuchos::ArrayView<int> &nodeIDs,
69 const Teuchos::ArrayView<LO> &localIDs,
70 const bool computeLIDs) const
71 {
72 // Ensure that globalIDs, nodeIDs, and localIDs (if applicable)
73 // all have the same size, before modifying any output arguments.
75 std::invalid_argument, Teuchos::typeName(*this) << "::getEntries(): "
76 "Output arrays do not have the right sizes. nodeIDs.size() = "
77 << nodeIDs.size() << " != globalIDs.size() = " << globalIDs.size()
78 << ".");
80 computeLIDs && localIDs.size() != globalIDs.size(),
81 std::invalid_argument, Teuchos::typeName(*this) << "::getEntries(): "
82 "Output array do not have the right sizes. localIDs.size() = "
83 << localIDs.size() << " != globalIDs.size() = " << globalIDs.size()
84 << ".");
85
86 // Initially, fill nodeIDs and localIDs (if applicable) with
87 // invalid values. The "invalid" process ID is -1 (this means
88 // the same thing as MPI_ANY_SOURCE to Teuchos, so it's an
89 // "invalid" process ID); the invalid local ID comes from
90 // OrdinalTraits.
91 std::fill (nodeIDs.begin(), nodeIDs.end(), -1);
92 if (computeLIDs) {
93 std::fill (localIDs.begin(), localIDs.end(),
94 Teuchos::OrdinalTraits<LO>::invalid ());
95 }
96 // Actually do the work.
97 return this->getEntriesImpl (map, globalIDs, nodeIDs, localIDs, computeLIDs);
98 }
99
100
101 template<class LO, class GO, class NT>
104 numProcs_ (map.getComm ()->getSize ())
105 {}
106
107
108 template<class LO, class GO, class NT>
109 bool
111 isOneToOne (const Teuchos::Comm<int>& /* comm */) const
112 {
113 // A locally replicated Map is one-to-one only if there is no
114 // replication, that is, only if the Map's communicator only has
115 // one process.
116 return (numProcs_ == 1);
117 }
118
119
120 template<class LO, class GO, class NT>
121 std::string
123 {
124 std::ostringstream os;
125 os << "ReplicatedDirectory"
126 << "<" << Teuchos::TypeNameTraits<LO>::name ()
127 << ", " << Teuchos::TypeNameTraits<GO>::name ()
128 << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
129 return os.str ();
130 }
131
132
133 template<class LO, class GO, class NT>
135 ContiguousUniformDirectory (const map_type& map)
136 {
137 TEUCHOS_TEST_FOR_EXCEPTION(! map.isContiguous (), std::invalid_argument,
138 Teuchos::typeName (*this) << " constructor: Map is not contiguous.");
139 TEUCHOS_TEST_FOR_EXCEPTION(! map.isUniform (), std::invalid_argument,
140 Teuchos::typeName (*this) << " constructor: Map is not uniform.");
141 }
142
143
144 template<class LO, class GO, class NT>
145 std::string
147 {
148 std::ostringstream os;
149 os << "ContiguousUniformDirectory"
150 << "<" << Teuchos::TypeNameTraits<LO>::name ()
151 << ", " << Teuchos::TypeNameTraits<GO>::name ()
152 << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
153 return os.str ();
154 }
155
156
157 template<class LO, class GO, class NT>
161 const Teuchos::ArrayView<const GO> &globalIDs,
162 const Teuchos::ArrayView<int> &nodeIDs,
163 const Teuchos::ArrayView<LO> &localIDs,
164 const bool computeLIDs) const
165 {
166 using Teuchos::Comm;
167 using Teuchos::RCP;
168 typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
169 const LO invalidLid = Teuchos::OrdinalTraits<LO>::invalid ();
171
172 RCP<const Comm<int> > comm = map.getComm ();
173 const GO g_min = map.getMinAllGlobalIndex ();
174
175 // Let N_G be the global number of elements in the Map,
176 // and P be the number of processes in its communicator.
177 // Then, N_G = P * N_L + R = R*(N_L + 1) + (P - R)*N_L.
178 //
179 // The first R processes own N_L+1 elements.
180 // The remaining P-R processes own N_L elements.
181 //
182 // Let g be the current GID, g_min be the global minimum GID,
183 // and g_0 = g - g_min. If g is a valid GID in this Map, then
184 // g_0 is in [0, N_G - 1].
185 //
186 // If g is a valid GID in this Map and g_0 < R*(N_L + 1), then
187 // the rank of the process that owns g is floor(g_0 / (N_L +
188 // 1)), and its corresponding local index on that process is g_0
189 // mod (N_L + 1).
190 //
191 // Let g_R = g_0 - R*(N_L + 1). If g is a valid GID in this Map
192 // and g_0 >= R*(N_L + 1), then the rank of the process that
193 // owns g is then R + floor(g_R / N_L), and its corresponding
194 // local index on that process is g_R mod N_L.
195
196 const size_type N_G =
197 static_cast<size_type> (map.getGlobalNumElements ());
198 const size_type P = static_cast<size_type> (comm->getSize ());
199 const size_type N_L = N_G / P;
200 const size_type R = N_G - N_L * P; // N_G mod P
201 const size_type N_R = R * (N_L + static_cast<size_type> (1));
202
203#ifdef HAVE_TPETRA_DEBUG
205 N_G != P*N_L + R, std::logic_error,
206 "Tpetra::ContiguousUniformDirectory::getEntriesImpl: "
207 "N_G = " << N_G << " != P*N_L + R = " << P << "*" << N_L << " + " << R
208 << " = " << P*N_L + R << ". "
209 "Please report this bug to the Tpetra developers.");
210#endif // HAVE_TPETRA_DEBUG
211
212 const size_type numGids = globalIDs.size (); // for const loop bound
213 // Avoid signed/unsigned comparisons below, in case GO is
214 // unsigned. (Integer literals are generally signed.)
215 const GO ONE = static_cast<GO> (1);
216
217 if (computeLIDs) {
218 for (size_type k = 0; k < numGids; ++k) {
219 const GO g_0 = globalIDs[k] - g_min;
220
221 // The first test is a little strange just in case GO is
222 // unsigned. Compilers raise a warning on tests like "x <
223 // 0" if x is unsigned, but don't usually raise a warning if
224 // the expression is a bit more complicated than that.
225 if (g_0 + ONE < ONE || g_0 >= static_cast<GO> (N_G)) {
226 nodeIDs[k] = -1;
229 }
230 else if (g_0 < static_cast<GO> (N_R)) {
231 // The GID comes from the initial sequence of R processes.
232 nodeIDs[k] = static_cast<int> (g_0 / static_cast<GO> (N_L + 1));
233 localIDs[k] = static_cast<LO> (g_0 % static_cast<GO> (N_L + 1));
234 }
235 else if (g_0 >= static_cast<GO> (N_R)) {
236 // The GID comes from the remaining P-R processes.
237 const GO g_R = g_0 - static_cast<GO> (N_R);
238 nodeIDs[k] = static_cast<int> (R + g_R / N_L);
239 localIDs[k] = static_cast<int> (g_R % N_L);
240 }
241#ifdef HAVE_TPETRA_DEBUG
242 else {
243 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
244 "Tpetra::ContiguousUniformDirectory::getEntriesImpl: "
245 "should never get here. "
246 "Please report this bug to the Tpetra developers.");
247 }
248#endif // HAVE_TPETRA_DEBUG
249 }
250 }
251 else { // don't compute local indices
252 for (size_type k = 0; k < numGids; ++k) {
253 const GO g_0 = globalIDs[k] - g_min;
254 // The first test is a little strange just in case GO is
255 // unsigned. Compilers raise a warning on tests like "x <
256 // 0" if x is unsigned, but don't usually raise a warning if
257 // the expression is a bit more complicated than that.
258 if (g_0 + ONE < ONE || g_0 >= static_cast<GO> (N_G)) {
259 nodeIDs[k] = -1;
261 }
262 else if (g_0 < static_cast<GO> (N_R)) {
263 // The GID comes from the initial sequence of R processes.
264 nodeIDs[k] = static_cast<int> (g_0 / static_cast<GO> (N_L + 1));
265 }
266 else if (g_0 >= static_cast<GO> (N_R)) {
267 // The GID comes from the remaining P-R processes.
268 const GO g_R = g_0 - static_cast<GO> (N_R);
269 nodeIDs[k] = static_cast<int> (R + g_R / N_L);
270 }
271#ifdef HAVE_TPETRA_DEBUG
272 else {
273 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
274 "Tpetra::ContiguousUniformDirectory::getEntriesImpl: "
275 "should never get here. "
276 "Please report this bug to the Tpetra developers.");
277 }
278#endif // HAVE_TPETRA_DEBUG
279 }
280 }
281 return res;
282 }
283
284 template<class LO, class GO, class NT>
286 DistributedContiguousDirectory (const map_type& map)
287 {
288 using Teuchos::arcp;
289 using Teuchos::gatherAll;
290 using Teuchos::RCP;
291
292 RCP<const Teuchos::Comm<int> > comm = map.getComm ();
293
294 TEUCHOS_TEST_FOR_EXCEPTION(! map.isDistributed (), std::invalid_argument,
295 Teuchos::typeName (*this) << " constructor: Map is not distributed.");
296 TEUCHOS_TEST_FOR_EXCEPTION(! map.isContiguous (), std::invalid_argument,
297 Teuchos::typeName (*this) << " constructor: Map is not contiguous.");
298
299 const int numProcs = comm->getSize ();
300
301 // Make room for the min global ID on each process, plus one
302 // entry at the end for the "max cap."
303 allMinGIDs_ = arcp<GO> (numProcs + 1);
304 // Get my process' min global ID.
305 GO minMyGID = map.getMinGlobalIndex ();
306 // Gather all of the min global IDs into the first numProcs
307 // entries of allMinGIDs_.
308
309 // FIXME (mfh 16 Apr 2013) GIANT HACK BELOW
310 //
311 // The purpose of this giant hack is that gatherAll appears to
312 // interpret the "receive count" argument differently than
313 // MPI_Allgather does. Matt Bettencourt reports Valgrind issues
314 // (memcpy with overlapping data) with MpiComm<int>::gatherAll,
315 // which could relate either to this, or to OpenMPI.
316#ifdef HAVE_TPETRACORE_MPI
318 bool useRawMpi = true;
319 if (typeid (GO) == typeid (int)) {
321 } else if (typeid (GO) == typeid (long)) {
323 } else {
324 useRawMpi = false;
325 }
326 if (useRawMpi) {
327 using Teuchos::rcp_dynamic_cast;
328 using Teuchos::MpiComm;
329 RCP<const MpiComm<int> > mpiComm =
330 rcp_dynamic_cast<const MpiComm<int> > (comm);
331 // It could be a SerialComm instead, even in an MPI build, so
332 // be sure to check.
333 if (! comm.is_null ()) {
334 MPI_Comm rawMpiComm = * (mpiComm->getRawMpiComm ());
335 const int err =
336 MPI_Allgather (&minMyGID, 1, rawMpiType,
337 allMinGIDs_.getRawPtr (), 1, rawMpiType,
338 rawMpiComm);
339 TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
340 "Tpetra::DistributedContiguousDirectory: MPI_Allgather failed");
341 } else {
342 gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
343 }
344 } else {
345 gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
346 }
347#else // NOT HAVE_TPETRACORE_MPI
348 gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
349#endif // HAVE_TPETRACORE_MPI
350 // FIXME (mfh 16 Apr 2013) GIANT HACK ABOVE
351
352 //gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
353
354 // Put the max cap at the end. Adding one lets us write loops
355 // over the global IDs with the usual strict less-than bound.
356 allMinGIDs_[numProcs] = map.getMaxAllGlobalIndex ()
357 + Teuchos::OrdinalTraits<GO>::one ();
358 }
359
360 template<class LO, class GO, class NT>
361 std::string
363 {
364 std::ostringstream os;
365 os << "DistributedContiguousDirectory"
366 << "<" << Teuchos::TypeNameTraits<LO>::name ()
367 << ", " << Teuchos::TypeNameTraits<GO>::name ()
368 << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
369 return os.str ();
370 }
371
372 template<class LO, class GO, class NT>
376 const Teuchos::ArrayView<const GO> &globalIDs,
377 const Teuchos::ArrayView<int> &nodeIDs,
378 const Teuchos::ArrayView<LO> &localIDs,
379 const bool computeLIDs) const
380 {
381 using Teuchos::Array;
382 using Teuchos::ArrayRCP;
383 using Teuchos::ArrayView;
384 using Teuchos::as;
385 using Teuchos::Comm;
386 using Teuchos::RCP;
387
389 RCP<const Teuchos::Comm<int> > comm = map.getComm ();
390 const int myRank = comm->getRank ();
391
392 // Map is on one process or is locally replicated.
393 typename ArrayView<int>::iterator procIter = nodeIDs.begin();
394 typename ArrayView<LO>::iterator lidIter = localIDs.begin();
396 for (gidIter = globalIDs.begin(); gidIter != globalIDs.end(); ++gidIter) {
397 if (map.isNodeGlobalElement (*gidIter)) {
398 *procIter++ = myRank;
399 if (computeLIDs) {
400 *lidIter++ = map.getLocalElement (*gidIter);
401 }
402 }
403 else {
404 // Advance the pointers, leaving these values set to invalid
405 procIter++;
406 if (computeLIDs) {
407 lidIter++;
408 }
410 }
411 }
412 return res;
413 }
414
415 template<class LO, class GO, class NT>
419 const Teuchos::ArrayView<const GO> &globalIDs,
420 const Teuchos::ArrayView<int> &nodeIDs,
421 const Teuchos::ArrayView<LO> &localIDs,
422 const bool computeLIDs) const
423 {
424 using Teuchos::Array;
425 using Teuchos::ArrayRCP;
426 using Teuchos::ArrayView;
427 using Teuchos::as;
428 using Teuchos::Comm;
429 using Teuchos::RCP;
430
431 RCP<const Teuchos::Comm<int> > comm = map.getComm ();
432 const int numProcs = comm->getSize ();
433 const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid();
434 const GO noGIDsOnProc = std::numeric_limits<GO>::max();
436
437 // Find the first initialized GID for search below
441 if (allMinGIDs_[firstProcWithGIDs] != noGIDsOnProc) break;
442 }
443
444 // If Map is empty, return invalid values for all requested lookups
445 // This case should not happen, as an empty Map is not considered
446 // Distributed.
448 // No entries in Map
449 res = (globalIDs.size() > 0) ? IDNotPresent : AllIDsPresent;
450 std::fill(nodeIDs.begin(), nodeIDs.end(), -1);
451 if (computeLIDs)
452 std::fill(localIDs.begin(), localIDs.end(), LINVALID);
453 return res;
454 }
455
456 const GO one = as<GO> (1);
457 const GO nOverP = as<GO> (map.getGlobalNumElements ()
459
460 // Map is distributed but contiguous.
461 typename ArrayView<int>::iterator procIter = nodeIDs.begin();
462 typename ArrayView<LO>::iterator lidIter = localIDs.begin();
464 for (gidIter = globalIDs.begin(); gidIter != globalIDs.end(); ++gidIter) {
465 LO LID = LINVALID; // Assume not found until proven otherwise
466 int image = -1;
467 GO GID = *gidIter;
468 // Guess uniform distribution (TODO: maybe replace by a binary search)
469 // We go through all this trouble to avoid overflow and
470 // signed / unsigned casting mistakes (that were made in
471 // previous versions of this code).
472 int curRank;
473 const GO firstGuess = firstProcWithGIDs + GID / std::max(nOverP, one);
474 curRank = as<int>(std::min(firstGuess, as<GO>(numProcs - 1)));
475
476 // This while loop will stop because
477 // allMinGIDs_[np] == global num elements
478 while (allMinGIDs_[curRank] == noGIDsOnProc) curRank++;
479
480 bool badGID = false;
481 while (curRank >= firstProcWithGIDs && GID < allMinGIDs_[curRank]) {
482 curRank--;
483 while (curRank >= firstProcWithGIDs &&
484 allMinGIDs_[curRank] == noGIDsOnProc) curRank--;
485 }
487 // GID is lower than all GIDs in map
488 badGID = true;
489 }
490 else if (curRank == numProcs) {
491 // GID is higher than all GIDs in map
492 badGID = true;
493 }
494 else {
495 // we have the lower bound; now limit from above
496 int above = curRank + 1;
497 while (allMinGIDs_[above] == noGIDsOnProc) above++;
498
499 while (GID >= allMinGIDs_[above]) {
500 curRank = above;
501 if (curRank == numProcs) {
502 // GID is higher than all GIDs in map
503 badGID = true;
504 break;
505 }
506 above++;
507 while (allMinGIDs_[above] == noGIDsOnProc) above++;
508 }
509 }
510
511 if (!badGID) {
512 image = curRank;
513 LID = as<LO> (GID - allMinGIDs_[image]);
514 }
515 else {
517 }
518 *procIter++ = image;
519 if (computeLIDs) {
520 *lidIter++ = LID;
521 }
522 }
523 return res;
524 }
525
526 template<class LO, class GO, class NT>
528 DistributedNoncontiguousDirectory (const map_type& map) :
529 oneToOneResult_ (ONE_TO_ONE_NOT_CALLED_YET), // to be revised below
530 locallyOneToOne_ (true), // to be revised below
531 useHashTables_ (false) // to be revised below
532 {
533 initialize (map, Teuchos::null);
534 }
535
536 template<class LO, class GO, class NT>
537 DistributedNoncontiguousDirectory<LO, GO, NT>::
538 DistributedNoncontiguousDirectory (const map_type& map,
539 const tie_break_type& tie_break) :
540 oneToOneResult_ (ONE_TO_ONE_NOT_CALLED_YET), // to be revised below
541 locallyOneToOne_ (true), // to be revised below
542 useHashTables_ (false) // to be revised below
543 {
544 initialize (map, Teuchos::ptrFromRef (tie_break));
545 }
546
547 template<class LO, class GO, class NT>
548 void
549 DistributedNoncontiguousDirectory<LO, GO, NT>::
550 initialize (const map_type& map,
551 Teuchos::Ptr<const tie_break_type> tie_break)
552 {
553 using Teuchos::arcp;
554 using Teuchos::Array;
555 using Teuchos::ArrayRCP;
556 using Teuchos::ArrayView;
557 using Teuchos::as;
558 using Teuchos::RCP;
559 using Teuchos::rcp;
560 using Teuchos::typeName;
561 using Teuchos::TypeNameTraits;
562 using std::cerr;
563 using std::endl;
564 typedef Array<int>::size_type size_type;
565
566 // This class' implementation of getEntriesImpl() currently
567 // encodes the following assumptions:
568 //
569 // 1. global_size_t >= GO
570 // 2. global_size_t >= int
571 // 3. global_size_t >= LO
572 //
573 // We check these assumptions here.
574 TEUCHOS_TEST_FOR_EXCEPTION(sizeof(global_size_t) < sizeof(GO),
575 std::logic_error, typeName (*this) << ": sizeof(Tpetra::"
576 "global_size_t) = " << sizeof(global_size_t) << " < sizeof(Global"
577 "Ordinal = " << TypeNameTraits<LO>::name () << ") = " << sizeof(GO)
578 << ".");
579 TEUCHOS_TEST_FOR_EXCEPTION(sizeof(global_size_t) < sizeof(int),
580 std::logic_error, typeName (*this) << ": sizeof(Tpetra::"
581 "global_size_t) = " << sizeof(global_size_t) << " < sizeof(int) = "
582 << sizeof(int) << ".");
583 TEUCHOS_TEST_FOR_EXCEPTION(sizeof(global_size_t) < sizeof(LO),
584 std::logic_error, typeName (*this) << ": sizeof(Tpetra::"
585 "global_size_t) = " << sizeof(global_size_t) << " < sizeof(Local"
586 "Ordinal = " << TypeNameTraits<LO>::name () << ") = " << sizeof(LO)
587 << ".");
588
589 RCP<const Teuchos::Comm<int> > comm = map.getComm ();
590 const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid ();
591 const GO minAllGID = map.getMinAllGlobalIndex ();
592 const GO maxAllGID = map.getMaxAllGlobalIndex ();
593
594 // The "Directory Map" (see below) will have a range of elements
595 // from the minimum to the maximum GID of the user Map, and a
596 // minimum GID of minAllGID from the user Map. It doesn't
597 // actually have to store all those entries, though do beware of
598 // calling getLocalElementList on it (see Bug 5822).
599 const global_size_t numGlobalEntries = maxAllGID - minAllGID + 1;
600
601 // We can't afford to replicate the whole directory on each
602 // process, so create the "Directory Map", a uniform contiguous
603 // Map that describes how we will distribute the directory over
604 // processes.
605 //
606 // FIXME (mfh 08 May 2012) Here we're setting minAllGID to be
607 // the index base. The index base should be separate from the
608 // minimum GID.
609 directoryMap_ = rcp (new map_type (numGlobalEntries, minAllGID, comm,
610 GloballyDistributed));
611 // The number of Directory elements that my process owns.
612 const size_t dir_numMyEntries = directoryMap_->getLocalNumElements ();
613
614 // Fix for Bug 5822: If the input Map is "sparse," that is if
615 // the difference between the global min and global max GID is
616 // much larger than the global number of elements in the input
617 // Map, then it's possible that the Directory Map might have
618 // many more entries than the input Map on this process. This
619 // can cause memory scalability issues. In that case, we switch
620 // from the array-based implementation of Directory storage to
621 // the hash table - based implementation. We don't use hash
622 // tables all the time, because they are slower in the common
623 // case of a nonsparse Map.
624 //
625 // NOTE: This is a per-process decision. Some processes may use
626 // array-based storage, whereas others may use hash table -
627 // based storage.
628
629 // A hash table takes a constant factor more space, more or
630 // less, than an array. Thus, it's not worthwhile, even in
631 // terms of memory usage, always to use a hash table.
632 // Furthermore, array lookups are faster than hash table
633 // lookups, so it may be worthwhile to use an array even if it
634 // takes more space. The "sparsity threshold" governs when to
635 // switch to a hash table - based implementation.
636 const size_t inverseSparsityThreshold = 10;
637 useHashTables_ =
638 (dir_numMyEntries >= inverseSparsityThreshold * map.getLocalNumElements());
639
640 // Get list of process IDs that own the directory entries for the
641 // Map GIDs. These will be the targets of the sends that the
642 // Distributor will do.
643 const int myRank = comm->getRank ();
644 const size_t numMyEntries = map.getLocalNumElements ();
645 Array<int> sendImageIDs (numMyEntries);
646 ArrayView<const GO> myGlobalEntries = map.getLocalElementList ();
647 // An ID not present in this lookup indicates that it lies outside
648 // of the range [minAllGID,maxAllGID] (from map_). this means
649 // something is wrong with map_, our fault.
650 const LookupStatus lookupStatus =
651 directoryMap_->getRemoteIndexList (myGlobalEntries, sendImageIDs);
652 TEUCHOS_TEST_FOR_EXCEPTION(
653 lookupStatus == IDNotPresent, std::logic_error, Teuchos::typeName(*this)
654 << " constructor: the Directory Map could not find out where one or "
655 "more of my Map's indices should go. The input to getRemoteIndexList "
656 "is " << Teuchos::toString (myGlobalEntries) << ", and the output is "
657 << Teuchos::toString (sendImageIDs ()) << ". The input Map itself has "
658 "the following entries on the calling process " <<
659 map.getComm ()->getRank () << ": " <<
660 Teuchos::toString (map.getLocalElementList ()) << ", and has "
661 << map.getGlobalNumElements () << " total global indices in ["
662 << map.getMinAllGlobalIndex () << "," << map.getMaxAllGlobalIndex ()
663 << "]. The Directory Map has "
664 << directoryMap_->getGlobalNumElements () << " total global indices in "
665 "[" << directoryMap_->getMinAllGlobalIndex () << "," <<
666 directoryMap_->getMaxAllGlobalIndex () << "], and the calling process "
667 "has GIDs [" << directoryMap_->getMinGlobalIndex () << "," <<
668 directoryMap_->getMaxGlobalIndex () << "]. "
669 "This probably means there is a bug in Map or Directory. "
670 "Please report this bug to the Tpetra developers.");
671
672 // Initialize the distributor using the list of process IDs to
673 // which to send. We'll use the distributor to send out triples
674 // of (GID, process ID, LID). We're sending the entries to the
675 // processes that the Directory Map says should own them, which is
676 // why we called directoryMap_->getRemoteIndexList() above.
677 Distributor distor (comm);
678 const size_t numReceives = distor.createFromSends (sendImageIDs);
679
680 // NOTE (mfh 21 Mar 2012) The following code assumes that
681 // sizeof(GO) >= sizeof(int) and sizeof(GO) >= sizeof(LO).
682 //
683 // Create and fill buffer of (GID, PID, LID) triples to send
684 // out. We pack the (GID, PID, LID) triples into a single Array
685 // of GO, casting the PID from int to GO and the LID from LO to
686 // GO as we do so.
687 //
688 // FIXME (mfh 23 Mar 2014) This assumes that sizeof(LO) <=
689 // sizeof(GO) and sizeof(int) <= sizeof(GO). The former is
690 // required, and the latter is generally the case, but we should
691 // still check for this.
692 const int packetSize = 3; // We're sending triples, so packet size is 3.
693 Kokkos::View<GO*, Kokkos::HostSpace> exportEntries("exportEntries", packetSize * numMyEntries);
694 {
695 size_t exportIndex = 0;
696 for (size_t i = 0; i < numMyEntries; ++i) {
697 exportEntries[exportIndex++] = myGlobalEntries[i];
698 exportEntries[exportIndex++] = as<GO> (myRank);
699 exportEntries[exportIndex++] = as<GO> (i);
700 }
701 }
702 // Buffer of data to receive. The Distributor figured out for
703 // us how many packets we're receiving, when we called its
704 // createFromSends() method to set up the distribution plan.
705 Kokkos::View<GO*, Kokkos::HostSpace> importElements("importElements", packetSize * distor.getTotalReceiveLength());
706
707 // Distribute the triples of (GID, process ID, LID).
708 distor.doPostsAndWaits(exportEntries, packetSize, importElements);
709
710 // Unpack the redistributed data. Both implementations of
711 // Directory storage map from an LID in the Directory Map (which
712 // is the LID of the GID to store) to either a PID or an LID in
713 // the input Map. Each "packet" (contiguous chunk of
714 // importElements) contains a triple: (GID, PID, LID).
715 if (useHashTables_) {
716 // Create the hash tables. We know exactly how many elements
717 // to expect in each hash table. FixedHashTable's constructor
718 // currently requires all the keys and values at once, so we
719 // have to extract them in temporary arrays. It may be
720 // possible to rewrite FixedHashTable to use a "start fill" /
721 // "end fill" approach that avoids the temporary arrays, but
722 // we won't try that for now.
723
724 // The constructors of Array and ArrayRCP that take a number
725 // of elements all initialize the arrays. Instead, allocate
726 // raw arrays, then hand them off to ArrayRCP, to avoid the
727 // initial unnecessary initialization without losing the
728 // benefit of exception safety (and bounds checking, in a
729 // debug build).
730 LO* tableKeysRaw = NULL;
731 LO* tableLidsRaw = NULL;
732 int* tablePidsRaw = NULL;
733 try {
734 tableKeysRaw = new LO [numReceives];
735 tableLidsRaw = new LO [numReceives];
736 tablePidsRaw = new int [numReceives];
737 } catch (...) {
738 if (tableKeysRaw != NULL) {
739 delete [] tableKeysRaw;
740 }
741 if (tableLidsRaw != NULL) {
742 delete [] tableLidsRaw;
743 }
744 if (tablePidsRaw != NULL) {
745 delete [] tablePidsRaw;
746 }
747 throw;
748 }
749 ArrayRCP<LO> tableKeys (tableKeysRaw, 0, numReceives, true);
750 ArrayRCP<LO> tableLids (tableLidsRaw, 0, numReceives, true);
751 ArrayRCP<int> tablePids (tablePidsRaw, 0, numReceives, true);
752
753 if (tie_break.is_null ()) {
754 // Fill the temporary arrays of keys and values.
755 size_type importIndex = 0;
756 for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
757 const GO curGID = importElements[importIndex++];
758 const LO curLID = directoryMap_->getLocalElement (curGID);
759 TEUCHOS_TEST_FOR_EXCEPTION(
760 curLID == LINVALID, std::logic_error,
761 Teuchos::typeName(*this) << " constructor: Incoming global index "
762 << curGID << " does not have a corresponding local index in the "
763 "Directory Map. Please report this bug to the Tpetra developers.");
764 tableKeys[i] = curLID;
765 tablePids[i] = importElements[importIndex++];
766 tableLids[i] = importElements[importIndex++];
767 }
768 // Set up the hash tables. The hash tables' constructor
769 // detects whether there are duplicates, so that we can set
770 // locallyOneToOne_.
771 lidToPidTable_ =
772 rcp (new lidToPidTable_type (tableKeys (), tablePids ()));
773 locallyOneToOne_ = ! (lidToPidTable_->hasDuplicateKeys ());
774 lidToLidTable_ =
775 rcp (new lidToLidTable_type (tableKeys (), tableLids ()));
776 }
777 else { // tie_break is NOT null
778
779 // For each directory Map LID received, collect all the
780 // corresponding (PID,LID) pairs. If the input Map is not
781 // one-to-one, corresponding directory Map LIDs will have
782 // more than one pair. In that case, we will use the
783 // TieBreak object to pick exactly one pair.
784 typedef std::map<LO, std::vector<std::pair<int, LO> > > pair_table_type;
785 pair_table_type ownedPidLidPairs;
786
787 // For each directory Map LID received, collect the zero or
788 // more input Map (PID,LID) pairs into ownedPidLidPairs.
789 size_type importIndex = 0;
790 for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
791 const GO curGID = importElements[importIndex++];
792 const LO dirMapLid = directoryMap_->getLocalElement (curGID);
793 TEUCHOS_TEST_FOR_EXCEPTION(
794 dirMapLid == LINVALID, std::logic_error,
795 Teuchos::typeName(*this) << " constructor: Incoming global index "
796 << curGID << " does not have a corresponding local index in the "
797 "Directory Map. Please report this bug to the Tpetra developers.");
798 tableKeys[i] = dirMapLid;
799 const int PID = importElements[importIndex++];
800 const int LID = importElements[importIndex++];
801
802 // These may change below. We fill them in just to ensure
803 // that they won't have invalid values.
804 tablePids[i] = PID;
805 tableLids[i] = LID;
806
807 // For every directory Map LID, we have to remember all
808 // (PID, LID) pairs. The TieBreak object will arbitrate
809 // between them in the loop below.
810 ownedPidLidPairs[dirMapLid].push_back (std::make_pair (PID, LID));
811 }
812
813 // Use TieBreak to arbitrate between (PID,LID) pairs
814 // corresponding to each directory Map LID.
815 //
816 // FIXME (mfh 23 Mar 2014) How do I know that i is the same
817 // as the directory Map LID?
818 // KDD 21 Mar 2018: It isn't, especially if the user's IDs are not
819 // contiguous, but the directory map is. Need to set tablePids[i]
820 // and tableLids[i], so need to loop over numReceives (as that is
821 // how those arrays are allocated). FIXED
822
823 for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
824 const LO dirMapLid = tableKeys[i];
825 const std::vector<std::pair<int, LO> >& pidLidList =
826 ownedPidLidPairs[dirMapLid];
827 const size_t listLen = pidLidList.size();
828 if (listLen == 0) continue; // KDD This will never happen
829 const GO dirMapGid = directoryMap_->getGlobalElement (dirMapLid);
830 if (listLen > 1) {
831 locallyOneToOne_ = false;
832 }
833 // If there is some (PID,LID) pair for the current input
834 // Map LID, then it makes sense to invoke the TieBreak
835 // object to arbitrate between the options. Even if
836 // there is only one (PID,LID) pair, we still want to
837 // give the TieBreak object a chance to do whatever it
838 // likes to do, in terms of side effects (e.g., track
839 // (PID,LID) pairs).
840 const size_type index =
841 static_cast<size_type> (tie_break->selectedIndex (dirMapGid,
842 pidLidList));
843 tablePids[i] = pidLidList[index].first;
844 tableLids[i] = pidLidList[index].second;
845 }
846
847 // Set up the hash tables.
848 lidToPidTable_ =
849 rcp (new lidToPidTable_type (tableKeys (), tablePids ()));
850 lidToLidTable_ =
851 rcp (new lidToLidTable_type (tableKeys (), tableLids ()));
852 }
853 }
854 else {
855 if (tie_break.is_null ()) {
856 // Use array-based implementation of Directory storage.
857 // Allocate these arrays and fill them with invalid values,
858 // in case the input Map's GID list is sparse (i.e., does
859 // not populate all GIDs from minAllGID to maxAllGID).
860 PIDs_ = arcp<int> (dir_numMyEntries);
861 std::fill (PIDs_.begin (), PIDs_.end (), -1);
862 LIDs_ = arcp<LO> (dir_numMyEntries);
863 std::fill (LIDs_.begin (), LIDs_.end (), LINVALID);
864 // Fill in the arrays with PIDs resp. LIDs.
865 size_type importIndex = 0;
866 for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
867 const GO curGID = importElements[importIndex++];
868 const LO curLID = directoryMap_->getLocalElement (curGID);
869 TEUCHOS_TEST_FOR_EXCEPTION(curLID == LINVALID, std::logic_error,
870 Teuchos::typeName(*this) << " constructor: Incoming global index "
871 << curGID << " does not have a corresponding local index in the "
872 "Directory Map. Please report this bug to the Tpetra developers.");
873
874 // If PIDs_[curLID] is not -1, then curGID is a duplicate
875 // on the calling process, so the Directory is not locally
876 // one-to-one.
877 if (PIDs_[curLID] != -1) {
878 locallyOneToOne_ = false;
879 }
880 PIDs_[curLID] = importElements[importIndex++];
881 LIDs_[curLID] = importElements[importIndex++];
882 }
883 }
884 else {
885 PIDs_ = arcp<int> (dir_numMyEntries);
886 LIDs_ = arcp<LO> (dir_numMyEntries);
887 std::fill (PIDs_.begin (), PIDs_.end (), -1);
888
889 // All received (PID, LID) pairs go into ownedPidLidPairs.
890 // This is a map from the directory Map's LID to the (PID,
891 // LID) pair (where the latter LID comes from the input Map,
892 // not the directory Map). If the input Map is not
893 // one-to-one, corresponding LIDs will have
894 // ownedPidLidPairs[curLID].size() > 1. In that case, we
895 // will use the TieBreak object to pick exactly one pair.
896 Array<std::vector<std::pair<int, LO> > > ownedPidLidPairs (dir_numMyEntries);
897 size_t importIndex = 0;
898 for (size_t i = 0; i < numReceives; ++i) {
899 const GO GID = importElements[importIndex++];
900 const int PID = importElements[importIndex++];
901 const LO LID = importElements[importIndex++];
902
903 const LO dirMapLid = directoryMap_->getLocalElement (GID);
904 TEUCHOS_TEST_FOR_EXCEPTION(
905 dirMapLid == LINVALID, std::logic_error,
906 Teuchos::typeName(*this) << " constructor: Incoming global index "
907 << GID << " does not have a corresponding local index in the "
908 "Directory Map. Please report this bug to the Tpetra developers.");
909 ownedPidLidPairs[dirMapLid].push_back (std::make_pair (PID, LID));
910 }
911
912 // Use TieBreak to arbitrate between (PID,LID) pairs
913 // corresponding to each directory Map LID.
914 //
915 // FIXME (mfh 23 Mar 2014) How do I know that i is the same
916 // as the directory Map LID?
917 // KDD 21 Mar 2018: It isn't, especially if the user's IDs are not
918 // contiguous. Loop over all ownedPidLidPairs; skip those that have
919 // empty lists. FIXED
920
921 for (size_t i = 0; i < dir_numMyEntries; ++i) {
922 const std::vector<std::pair<int, LO> >& pidLidList =
923 ownedPidLidPairs[i];
924 const size_t listLen = pidLidList.size();
925 if (listLen == 0) continue; // KDD will happen for GIDs not in
926 // KDD the user's source map
927 const LO dirMapLid = static_cast<LO> (i);
928 const GO dirMapGid = directoryMap_->getGlobalElement (dirMapLid);
929 if (listLen > 1) {
930 locallyOneToOne_ = false;
931 }
932 // If there is some (PID,LID) pair for the current input
933 // Map LID, then it makes sense to invoke the TieBreak
934 // object to arbitrate between the options. Even if
935 // there is only one (PID,LID) pair, we still want to
936 // give the TieBreak object a chance to do whatever it
937 // likes to do, in terms of side effects (e.g., track
938 // (PID,LID) pairs).
939 const size_type index =
940 static_cast<size_type> (tie_break->selectedIndex (dirMapGid,
941 pidLidList));
942 PIDs_[i] = pidLidList[index].first;
943 LIDs_[i] = pidLidList[index].second;
944 }
945 }
946 }
947 }
948
949 template<class LO, class GO, class NT>
950 std::string
952 {
953 std::ostringstream os;
954 os << "DistributedNoncontiguousDirectory"
955 << "<" << Teuchos::TypeNameTraits<LO>::name ()
956 << ", " << Teuchos::TypeNameTraits<GO>::name ()
957 << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
958 return os.str ();
959 }
960
961 template<class LO, class GO, class NT>
964 getEntriesImpl (const map_type& map,
965 const Teuchos::ArrayView<const GO> &globalIDs,
966 const Teuchos::ArrayView<int> &nodeIDs,
967 const Teuchos::ArrayView<LO> &localIDs,
968 const bool computeLIDs) const
969 {
970 using Teuchos::Array;
971 using Teuchos::ArrayRCP;
972 using Teuchos::ArrayView;
973 using Teuchos::as;
974 using Teuchos::RCP;
975 using Teuchos::toString;
976 using Details::Behavior;
978 using std::cerr;
979 using std::endl;
980 using size_type = typename Array<GO>::size_type;
981 const char funcPrefix[] = "Tpetra::"
982 "DistributedNoncontiguousDirectory::getEntriesImpl: ";
983 const char errSuffix[] =
984 " Please report this bug to the Tpetra developers.";
985
986 RCP<const Teuchos::Comm<int> > comm = map.getComm ();
987 const bool verbose = Behavior::verbose ("Directory") ||
988 Behavior::verbose ("Tpetra::Directory");
989 const size_t maxNumToPrint = verbose ?
991
992 std::unique_ptr<std::string> procPrefix;
993 if (verbose) {
994 std::ostringstream os;
995 os << "Proc ";
996 if (map.getComm ().is_null ()) {
997 os << "?";
998 }
999 else {
1000 os << map.getComm ()->getRank ();
1001 }
1002 os << ": ";
1003 procPrefix = std::unique_ptr<std::string>(
1004 new std::string(os.str()));
1005 os << funcPrefix << "{";
1007 os << ", ";
1009 os << ", ";
1011 os << ", computeLIDs: "
1012 << (computeLIDs ? "true" : "false") << "}" << endl;
1013 cerr << os.str ();
1014 }
1015
1016 const size_t numEntries = globalIDs.size ();
1017 const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid();
1019
1020 //
1021 // Set up directory structure.
1022 //
1023
1024 // If we're computing LIDs, we also have to include them in each
1025 // packet, along with the GID and process ID.
1026 const int packetSize = computeLIDs ? 3 : 2;
1027
1028 // For data distribution, we use: Surprise! A Distributor!
1029 Distributor distor (comm);
1030
1031 // Get directory locations for the requested list of entries.
1032 Array<int> dirImages (numEntries);
1033 if (verbose) {
1034 std::ostringstream os;
1035 os << *procPrefix << "Call directoryMap_->getRemoteIndexList"
1036 << endl;
1037 cerr << os.str ();
1038 }
1039 res = directoryMap_->getRemoteIndexList (globalIDs, dirImages ());
1040 if (verbose) {
1041 std::ostringstream os;
1042 os << *procPrefix << "Director Map getRemoteIndexList out ";
1044 os << endl;
1045 cerr << os.str ();
1046 }
1047
1048 // Check for unfound globalIDs and set corresponding nodeIDs to -1
1049 size_t numMissing = 0;
1050 if (res == IDNotPresent) {
1051 for (size_t i=0; i < numEntries; ++i) {
1052 if (dirImages[i] == -1) {
1053 nodeIDs[i] = -1;
1054 if (computeLIDs) {
1055 localIDs[i] = LINVALID;
1056 }
1057 numMissing++;
1058 }
1059 }
1060 }
1061
1064 if (verbose) {
1065 std::ostringstream os;
1066 os << *procPrefix << "Call Distributor::createFromRecvs"
1067 << endl;
1068 cerr << os.str ();
1069 }
1070 distor.createFromRecvs (globalIDs, dirImages (), sendGIDs, sendImages);
1071 if (verbose) {
1072 std::ostringstream os;
1073 os << *procPrefix << "Distributor::createFromRecvs result: "
1074 << "{";
1076 os << ", ";
1078 os << "}" << endl;
1079 cerr << os.str ();
1080 }
1081 const size_type numSends = sendGIDs.size ();
1082
1083 //
1084 // mfh 13 Nov 2012:
1085 //
1086 // The code below temporarily stores LO, GO, and int values in
1087 // an array of global_size_t. If one of the signed types (LO
1088 // and GO should both be signed) happened to be -1 (or some
1089 // negative number, but -1 is the one that came up today), then
1090 // conversion to global_size_t will result in a huge
1091 // global_size_t value, and thus conversion back may overflow.
1092 // (Teuchos::as doesn't know that we meant it to be an LO or GO
1093 // all along.)
1094 //
1095 // The overflow normally would not be a problem, since it would
1096 // just go back to -1 again. However, Teuchos::as does range
1097 // checking on conversions in a debug build, so it throws an
1098 // exception (std::range_error) in this case. Range checking is
1099 // generally useful in debug mode, so we don't want to disable
1100 // this behavior globally.
1101 //
1102 // We solve this problem by forgoing use of Teuchos::as for the
1103 // conversions below from LO, GO, or int to global_size_t, and
1104 // the later conversions back from global_size_t to LO, GO, or
1105 // int.
1106 //
1107 // I've recorded this discussion as Bug 5760.
1108 //
1109
1110 // global_size_t >= GO
1111 // global_size_t >= size_t >= int
1112 // global_size_t >= size_t >= LO
1113 // Therefore, we can safely store all of these in a global_size_t
1114 Kokkos::View<global_size_t*, Kokkos::HostSpace> exports("exports", packetSize * numSends);
1115 {
1116 // Packet format:
1117 // - If computing LIDs: (GID, PID, LID)
1118 // - Otherwise: (GID, PID)
1119 //
1120 // "PID" means "process ID" (a.k.a. "node ID," a.k.a. "rank").
1121
1122 // Current position to which to write in exports array. If
1123 // sending pairs, we pack the (GID, PID) pair for gid =
1124 // sendGIDs[k] in exports[2*k], exports[2*k+1]. If sending
1125 // triples, we pack the (GID, PID, LID) pair for gid =
1126 // sendGIDs[k] in exports[3*k, 3*k+1, 3*k+2].
1127 size_t exportsIndex = 0;
1128
1129 if (useHashTables_) {
1130 if (verbose) {
1131 std::ostringstream os;
1132 os << *procPrefix << "Pack exports (useHashTables_ true)"
1133 << endl;
1134 cerr << os.str ();
1135 }
1136 for (size_type gidIndex = 0; gidIndex < numSends; ++gidIndex) {
1137 const GO curGID = sendGIDs[gidIndex];
1138 // Don't use as() here (see above note).
1139 exports[exportsIndex++] = static_cast<global_size_t> (curGID);
1140 const LO curLID = directoryMap_->getLocalElement (curGID);
1142 (curLID == LINVALID, std::logic_error, funcPrefix <<
1143 "Directory Map's global index " << curGID << " lacks "
1144 "a corresponding local index." << errSuffix);
1145 // Don't use as() here (see above note).
1146 exports[exportsIndex++] =
1147 static_cast<global_size_t> (lidToPidTable_->get (curLID));
1148 if (computeLIDs) {
1149 // Don't use as() here (see above note).
1150 exports[exportsIndex++] =
1151 static_cast<global_size_t> (lidToLidTable_->get (curLID));
1152 }
1153 }
1154 }
1155 else {
1156 if (verbose) {
1157 std::ostringstream os;
1158 os << *procPrefix << "Pack exports (useHashTables_ false)"
1159 << endl;
1160 cerr << os.str ();
1161 }
1162 for (size_type gidIndex = 0; gidIndex < numSends; ++gidIndex) {
1163 const GO curGID = sendGIDs[gidIndex];
1164 // Don't use as() here (see above note).
1165 exports[exportsIndex++] = static_cast<global_size_t> (curGID);
1166 const LO curLID = directoryMap_->getLocalElement (curGID);
1168 (curLID == LINVALID, std::logic_error, funcPrefix <<
1169 "Directory Map's global index " << curGID << " lacks "
1170 "a corresponding local index." << errSuffix);
1171 // Don't use as() here (see above note).
1172 exports[exportsIndex++] =
1173 static_cast<global_size_t> (PIDs_[curLID]);
1174 if (computeLIDs) {
1175 // Don't use as() here (see above note).
1176 exports[exportsIndex++] =
1177 static_cast<global_size_t> (LIDs_[curLID]);
1178 }
1179 }
1180 }
1181
1183 (exportsIndex > exports.size(), std::logic_error,
1184 funcPrefix << "On Process " << comm->getRank () << ", "
1185 "exportsIndex = " << exportsIndex << " > exports.size() = "
1186 << exports.size () << "." << errSuffix);
1187 }
1188
1190 (numEntries < numMissing, std::logic_error, funcPrefix <<
1191 "On Process " << comm->getRank () << ", numEntries = " <<
1192 numEntries << " < numMissing = " << numMissing << "." <<
1193 errSuffix);
1194
1195 //
1196 // mfh 13 Nov 2012: See note above on conversions between
1197 // global_size_t and LO, GO, or int.
1198 //
1199 const size_t numRecv = numEntries - numMissing;
1200
1201 {
1202 const size_t importLen = packetSize * distor.getTotalReceiveLength ();
1203 const size_t requiredImportLen = numRecv * packetSize;
1204 const int myRank = comm->getRank ();
1206 importLen < requiredImportLen, std::logic_error,
1207 "Tpetra::Details::DistributedNoncontiguousDirectory::getEntriesImpl: "
1208 "On Process " << myRank << ": The 'imports' array must have length "
1209 "at least " << requiredImportLen << ", but its actual length is " <<
1210 importLen << ". numRecv: " << numRecv << ", packetSize: " <<
1211 packetSize << ", numEntries (# GIDs): " << numEntries <<
1212 ", numMissing: " << numMissing << ": distor.getTotalReceiveLength(): "
1213 << distor.getTotalReceiveLength () << ". " << std::endl <<
1214 "Distributor description: " << distor.description () << ". "
1215 << std::endl <<
1216 "Please report this bug to the Tpetra developers.");
1217 }
1218
1219 Kokkos::View<global_size_t*, Kokkos::HostSpace> imports("imports", packetSize * distor.getTotalReceiveLength());
1220 // FIXME (mfh 20 Mar 2014) One could overlap the sort2() below
1221 // with communication, by splitting this call into doPosts and
1222 // doWaits. The code is still correct in this form, however.
1223 if (verbose) {
1224 std::ostringstream os;
1225 os << *procPrefix << "Call doPostsAndWaits: {"
1226 << "packetSize: " << packetSize << ", ";
1227 verbosePrintArray(os, exports, "exports", maxNumToPrint);
1228 os << "}" << endl;
1229 cerr << os.str ();
1230 }
1231 distor.doPostsAndWaits(exports, packetSize, imports);
1232 if (verbose) {
1233 std::ostringstream os;
1234 os << *procPrefix << "doPostsAndWaits result: ";
1235 verbosePrintArray(os, imports, "imports", maxNumToPrint);
1236 os << endl;
1237 cerr << os.str ();
1238 }
1239
1240 Array<GO> sortedIDs (globalIDs); // deep copy (for later sorting)
1241 Array<GO> offset (numEntries); // permutation array (sort2 output)
1242 for (GO ii = 0; ii < static_cast<GO> (numEntries); ++ii) {
1243 offset[ii] = ii;
1244 }
1245 sort2 (sortedIDs.begin(), sortedIDs.begin() + numEntries, offset.begin());
1246 if (verbose) {
1247 std::ostringstream os;
1248 os << *procPrefix;
1250 os << ", ";
1252 os << endl;
1253 cerr << os.str ();
1254 }
1255
1256 size_t importsIndex = 0;
1257
1258 // we know these conversions are in range, because we loaded this data
1259 //
1260 // Don't use as() for conversions here; we know they are in range.
1261 for (size_t i = 0; i < numRecv; ++i) {
1262 const GO curGID = static_cast<GO> (imports[importsIndex++]);
1263 auto p1 = std::equal_range (sortedIDs.begin(),
1264 sortedIDs.end(), curGID);
1265 if (p1.first != p1.second) {
1266 const size_t j = p1.first - sortedIDs.begin();
1267 nodeIDs[offset[j]] =
1268 static_cast<int> (imports[importsIndex++]);
1269 if (computeLIDs) {
1270 localIDs[offset[j]] =
1271 static_cast<LO> (imports[importsIndex++]);
1272 }
1273 if (nodeIDs[offset[j]] == -1) {
1274 res = IDNotPresent;
1275 }
1276 }
1277 }
1278
1280 (size_t (importsIndex) > size_t (imports.size ()),
1281 std::logic_error, funcPrefix << "On Process " <<
1282 comm->getRank () << ": importsIndex = " << importsIndex
1283 << " > imports.size() = " << imports.size () << ". "
1284 "numRecv: " << numRecv
1285 << ", packetSize: " << packetSize << ", "
1286 "numEntries (# GIDs): " << numEntries
1287 << ", numMissing: " << numMissing
1288 << ": distor.getTotalReceiveLength(): "
1289 << distor.getTotalReceiveLength () << "." << errSuffix);
1290 if (verbose) {
1291 std::ostringstream os;
1292 os << *procPrefix << funcPrefix << "Done!" << endl;
1293 cerr << os.str ();
1294 }
1295 return res;
1296 }
1297
1298
1299 template<class LO, class GO, class NT>
1300 bool
1302 isOneToOne (const Teuchos::Comm<int>& comm) const
1303 {
1304 if (oneToOneResult_ == ONE_TO_ONE_NOT_CALLED_YET) {
1305 const int lcl121 = isLocallyOneToOne () ? 1 : 0;
1306 int gbl121 = 0;
1307 Teuchos::reduceAll<int, int> (comm, Teuchos::REDUCE_MIN, lcl121,
1308 Teuchos::outArg (gbl121));
1309 oneToOneResult_ = (gbl121 == 1) ? ONE_TO_ONE_TRUE : ONE_TO_ONE_FALSE;
1310 }
1311 return (oneToOneResult_ == ONE_TO_ONE_TRUE);
1312 }
1313 } // namespace Details
1314} // namespace Tpetra
1315
1316//
1317// Explicit instantiation macro
1318//
1319// Must be expanded from within the Tpetra namespace!
1320//
1321#define TPETRA_DIRECTORYIMPL_INSTANT(LO,GO,NODE) \
1322 namespace Details { \
1323 template class Directory< LO , GO , NODE >; \
1324 template class ReplicatedDirectory< LO , GO , NODE >; \
1325 template class ContiguousUniformDirectory< LO, GO, NODE >; \
1326 template class DistributedContiguousDirectory< LO , GO , NODE >; \
1327 template class DistributedNoncontiguousDirectory< LO , GO , NODE >; \
1328 }
1329
1330#endif // TPETRA_DIRECTORYIMPL_DEF_HPP
Interface for breaking ties in ownership.
Stand-alone utility functions and macros.
Struct that holds views of the contents of a CrsMatrix.
Description of Tpetra's behavior.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
Implementation of Directory for a contiguous, uniformly distributed Map.
std::string description() const override
A one-line human-readable description of this object.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const override
Find process IDs and (optionally) local IDs for the given global IDs.
LookupStatus getEntries(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const
Implementation of Directory for a distributed contiguous Map.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const override
Find process IDs and (optionally) local IDs for the given global IDs.
std::string description() const override
A one-line human-readable description of this object.
Implementation of Directory for a distributed noncontiguous Map.
std::string description() const override
A one-line human-readable description of this object.
bool isOneToOne(const Teuchos::Comm< int > &comm) const override
Whether the Directory's input Map is (globally) one to one.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const override
Find process IDs and (optionally) local IDs for the given global IDs.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const override
Find process IDs and (optionally) local IDs for the given global IDs.
bool isOneToOne(const Teuchos::Comm< int > &comm) const override
Whether the Directory's input Map is (globally) one to one.
std::string description() const override
A one-line human-readable description of this object.
ReplicatedDirectory()=default
Constructor (that takes no arguments).
Sets up and executes a communication plan for a Tpetra DistObject.
A parallel distribution of indices over processes.
Implementation details of Tpetra.
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void initialize(int *argc, char ***argv)
Initialize Tpetra.
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2)
Sort the first array, and apply the resulting permutation to the second array.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
size_t global_size_t
Global size_t object.