Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_DistObject_def.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Tpetra: Templated Linear Algebra Services Package
5// Copyright (2008) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// ************************************************************************
38// @HEADER
39
40#ifndef TPETRA_DISTOBJECT_DEF_HPP
41#define TPETRA_DISTOBJECT_DEF_HPP
42
50
51#include "Tpetra_Distributor.hpp"
54#include "Tpetra_Details_checkGlobalError.hpp"
56#include "Tpetra_Util.hpp" // Details::createPrefix
57#include "Teuchos_CommHelpers.hpp"
58#include "Teuchos_TypeNameTraits.hpp"
59#include <typeinfo>
60#include <memory>
61#include <sstream>
62
63namespace Tpetra {
64
65 namespace { // (anonymous)
66 template<class DeviceType, class IndexType = size_t>
67 struct SumFunctor {
68 SumFunctor (const Kokkos::View<const size_t*, DeviceType>& viewToSum) :
69 viewToSum_ (viewToSum) {}
70 KOKKOS_INLINE_FUNCTION void operator() (const IndexType i, size_t& lclSum) const {
71 lclSum += viewToSum_(i);
72 }
73 Kokkos::View<const size_t*, DeviceType> viewToSum_;
74 };
75
76 template<class DeviceType, class IndexType = size_t>
77 size_t
78 countTotalImportPackets (const Kokkos::View<const size_t*, DeviceType>& numImportPacketsPerLID)
79 {
80 using Kokkos::parallel_reduce;
81 typedef DeviceType DT;
82 typedef typename DT::execution_space DES;
83 typedef Kokkos::RangePolicy<DES, IndexType> range_type;
84
85 const IndexType numOut = numImportPacketsPerLID.extent (0);
86 size_t totalImportPackets = 0;
87 parallel_reduce ("Count import packets",
88 range_type (0, numOut),
89 SumFunctor<DeviceType, IndexType> (numImportPacketsPerLID),
90 totalImportPackets);
91 return totalImportPackets;
92 }
93 } // namespace (anonymous)
94
95
96 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
98 DistObject (const Teuchos::RCP<const map_type>& map) :
99 map_ (map)
100 {
101#ifdef HAVE_TPETRA_TRANSFER_TIMERS
102 using Teuchos::RCP;
103 using Teuchos::Time;
104 using Teuchos::TimeMonitor;
105
107 TimeMonitor::lookupCounter ("Tpetra::DistObject::doTransfer");
108 if (doXferTimer.is_null ()) {
110 TimeMonitor::getNewCounter ("Tpetra::DistObject::doTransfer");
111 }
113
115 TimeMonitor::lookupCounter ("Tpetra::DistObject::copyAndPermute");
116 if (copyAndPermuteTimer.is_null ()) {
118 TimeMonitor::getNewCounter ("Tpetra::DistObject::copyAndPermute");
119 }
121
123 TimeMonitor::lookupCounter ("Tpetra::DistObject::packAndPrepare");
124 if (packAndPrepareTimer.is_null ()) {
126 TimeMonitor::getNewCounter ("Tpetra::DistObject::packAndPrepare");
127 }
129
131 TimeMonitor::lookupCounter ("Tpetra::DistObject::doPostsAndWaits");
132 if (doPostsAndWaitsTimer.is_null ()) {
134 TimeMonitor::getNewCounter ("Tpetra::DistObject::doPostsAndWaits");
135 }
137
139 TimeMonitor::lookupCounter ("Tpetra::DistObject::unpackAndCombine");
140 if (unpackAndCombineTimer.is_null ()) {
142 TimeMonitor::getNewCounter ("Tpetra::DistObject::unpackAndCombine");
143 }
145#endif // HAVE_TPETRA_TRANSFER_TIMERS
146 }
147
148 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
149 std::string
151 description () const
152 {
153 using Teuchos::TypeNameTraits;
154
155 std::ostringstream os;
156 os << "\"Tpetra::DistObject\": {"
157 << "Packet: " << TypeNameTraits<packet_type>::name ()
158 << ", LocalOrdinal: " << TypeNameTraits<local_ordinal_type>::name ()
159 << ", GlobalOrdinal: " << TypeNameTraits<global_ordinal_type>::name ()
160 << ", Node: " << TypeNameTraits<Node>::name ();
161 if (this->getObjectLabel () != "") {
162 os << "Label: \"" << this->getObjectLabel () << "\"";
163 }
164 os << "}";
165 return os.str ();
166 }
167
168 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
169 void
171 describe (Teuchos::FancyOStream &out,
172 const Teuchos::EVerbosityLevel verbLevel) const
173 {
174 using Teuchos::rcpFromRef;
175 using Teuchos::TypeNameTraits;
176 using std::endl;
177 const Teuchos::EVerbosityLevel vl = (verbLevel == Teuchos::VERB_DEFAULT) ?
178 Teuchos::VERB_LOW : verbLevel;
179 Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getMap ()->getComm ();
180 const int myRank = comm.is_null () ? 0 : comm->getRank ();
181 const int numProcs = comm.is_null () ? 1 : comm->getSize ();
182
183 if (vl != Teuchos::VERB_NONE) {
184 Teuchos::OSTab tab0 (out);
185 if (myRank == 0) {
186 out << "\"Tpetra::DistObject\":" << endl;
187 }
188 Teuchos::OSTab tab1 (out);
189 if (myRank == 0) {
190 out << "Template parameters:" << endl;
191 {
192 Teuchos::OSTab tab2 (out);
193 out << "Packet: " << TypeNameTraits<packet_type>::name () << endl
194 << "LocalOrdinal: " << TypeNameTraits<local_ordinal_type>::name () << endl
195 << "GlobalOrdinal: " << TypeNameTraits<global_ordinal_type>::name () << endl
196 << "Node: " << TypeNameTraits<node_type>::name () << endl;
197 }
198 if (this->getObjectLabel () != "") {
199 out << "Label: \"" << this->getObjectLabel () << "\"" << endl;
200 }
201 } // if myRank == 0
202
203 // Describe the Map.
204 {
205 if (myRank == 0) {
206 out << "Map:" << endl;
207 }
208 Teuchos::OSTab tab2 (out);
209 map_->describe (out, vl);
210 }
211
212 // At verbosity > VERB_LOW, each process prints something.
213 if (vl > Teuchos::VERB_LOW) {
214 for (int p = 0; p < numProcs; ++p) {
215 if (myRank == p) {
216 out << "Process " << myRank << ":" << endl;
217 Teuchos::OSTab tab2 (out);
218 out << "Export buffer size (in packets): "
219 << exports_.extent (0)
220 << endl
221 << "Import buffer size (in packets): "
222 << imports_.extent (0)
223 << endl;
224 }
225 if (! comm.is_null ()) {
226 comm->barrier (); // give output time to finish
227 comm->barrier ();
228 comm->barrier ();
229 }
230 } // for each process rank p
231 } // if vl > VERB_LOW
232 } // if vl != VERB_NONE
233 }
234
235 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
236 void
238 removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& /* newMap */)
239 {
240 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
241 "Tpetra::DistObject::removeEmptyProcessesInPlace: Not implemented");
242 }
243
244 /* These are provided in base DistObject template
245 template<class DistObjectType>
246 void
247 removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input,
248 const Teuchos::RCP<const Map<typename DistObjectType::local_ordinal_type,
249 typename DistObjectType::global_ordinal_type,
250 typename DistObjectType::node_type> >& newMap)
251 {
252 input->removeEmptyProcessesInPlace (newMap);
253 if (newMap.is_null ()) { // my process is excluded
254 input = Teuchos::null;
255 }
256 }
257
258 template<class DistObjectType>
259 void
260 removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input)
261 {
262 using Teuchos::RCP;
263 typedef typename DistObjectType::local_ordinal_type LO;
264 typedef typename DistObjectType::global_ordinal_type GO;
265 typedef typename DistObjectType::node_type NT;
266 typedef Map<LO, GO, NT> map_type;
267
268 RCP<const map_type> newMap = input->getMap ()->removeEmptyProcesses ();
269 removeEmptyProcessesInPlace<DistObjectType> (input, newMap);
270 }
271 */
272
273 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
274 void
278 const CombineMode CM,
279 const bool restrictedMode)
280 {
281 using Details::Behavior;
282 using std::endl;
283 const char modeString[] = "doImport (forward mode)";
284
285 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
286 // output to std::cerr on every MPI process. This is unwise for
287 // runs with large numbers of MPI processes.
288 const bool verbose = Behavior::verbose("DistObject");
289 std::unique_ptr<std::string> prefix;
290 if (verbose) {
291 prefix = this->createPrefix("DistObject", modeString);
292 std::ostringstream os;
293 os << *prefix << "Start" << endl;
294 std::cerr << os.str ();
295 }
296 this->beginImport(source, importer, CM, restrictedMode);
297 this->endImport(source, importer, CM, restrictedMode);
298 if (verbose) {
299 std::ostringstream os;
300 os << *prefix << "Done" << endl;
301 std::cerr << os.str ();
302 }
303 }
304
305 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
306 void
310 const CombineMode CM,
311 const bool restrictedMode)
312 {
313 using Details::Behavior;
314 using std::endl;
315 const char modeString[] = "doExport (forward mode)";
316
317 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
318 // output to std::cerr on every MPI process. This is unwise for
319 // runs with large numbers of MPI processes.
320 const bool verbose = Behavior::verbose("DistObject");
321 std::unique_ptr<std::string> prefix;
322 if (verbose) {
323 prefix = this->createPrefix("DistObject", modeString);
324 std::ostringstream os;
325 os << *prefix << "Start" << endl;
326 std::cerr << os.str ();
327 }
328 this->beginExport(source, exporter, CM, restrictedMode);
329 this->endExport(source, exporter, CM, restrictedMode);
330 if (verbose) {
331 std::ostringstream os;
332 os << *prefix << "Done" << endl;
333 std::cerr << os.str ();
334 }
335 }
336
337 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
338 void
342 const CombineMode CM,
343 const bool restrictedMode)
344 {
345 using Details::Behavior;
346 using std::endl;
347 const char modeString[] = "doImport (reverse mode)";
348
349 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
350 // output to std::cerr on every MPI process. This is unwise for
351 // runs with large numbers of MPI processes.
352 const bool verbose = Behavior::verbose("DistObject");
353 std::unique_ptr<std::string> prefix;
354 if (verbose) {
355 prefix = this->createPrefix("DistObject", modeString);
356 std::ostringstream os;
357 os << *prefix << "Start" << endl;
358 std::cerr << os.str ();
359 }
360 this->beginImport(source, exporter, CM, restrictedMode);
361 this->endImport(source, exporter, CM, restrictedMode);
362 if (verbose) {
363 std::ostringstream os;
364 os << *prefix << "Done" << endl;
365 std::cerr << os.str ();
366 }
367 }
368
369 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
370 void
374 const CombineMode CM,
375 const bool restrictedMode)
376 {
377 using Details::Behavior;
378 using std::endl;
379 const char modeString[] = "doExport (reverse mode)";
380
381 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
382 // output to std::cerr on every MPI process. This is unwise for
383 // runs with large numbers of MPI processes.
384 const bool verbose = Behavior::verbose("DistObject");
385 std::unique_ptr<std::string> prefix;
386 if (verbose) {
387 prefix = this->createPrefix("DistObject", modeString);
388 std::ostringstream os;
389 os << *prefix << "Start" << endl;
390 std::cerr << os.str ();
391 }
392 this->beginExport(source, importer, CM, restrictedMode);
393 this->endExport(source, importer, CM, restrictedMode);
394 if (verbose) {
395 std::ostringstream os;
396 os << *prefix << "Done" << endl;
397 std::cerr << os.str ();
398 }
399 }
400
401 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
402 void
406 const CombineMode CM,
407 const bool restrictedMode)
408 {
409 using Details::Behavior;
410 using std::endl;
411 const char modeString[] = "beginImport (forward mode)";
412
413 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
414 // output to std::cerr on every MPI process. This is unwise for
415 // runs with large numbers of MPI processes.
416 const bool verbose = Behavior::verbose("DistObject");
417 std::unique_ptr<std::string> prefix;
418 if (verbose) {
419 prefix = this->createPrefix("DistObject", modeString);
420 std::ostringstream os;
421 os << *prefix << "Start" << endl;
422 std::cerr << os.str ();
423 }
424 this->beginTransfer(source, importer, modeString, DoForward, CM, restrictedMode);
425 if (verbose) {
426 std::ostringstream os;
427 os << *prefix << "Done" << endl;
428 std::cerr << os.str ();
429 }
430 }
431
432 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
433 void
434 DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
435 beginExport(const SrcDistObject& source,
436 const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
437 const CombineMode CM,
438 const bool restrictedMode)
439 {
440 using Details::Behavior;
441 using std::endl;
442 const char modeString[] = "beginExport (forward mode)";
443
444 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
445 // output to std::cerr on every MPI process. This is unwise for
446 // runs with large numbers of MPI processes.
447 const bool verbose = Behavior::verbose("DistObject");
448 std::unique_ptr<std::string> prefix;
449 if (verbose) {
450 prefix = this->createPrefix("DistObject", modeString);
451 std::ostringstream os;
452 os << *prefix << "Start" << endl;
453 std::cerr << os.str ();
454 }
455 this->beginTransfer(source, exporter, modeString, DoForward, CM, restrictedMode);
456 if (verbose) {
457 std::ostringstream os;
458 os << *prefix << "Done" << endl;
459 std::cerr << os.str ();
460 }
461 }
462
463 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
464 void
465 DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
466 beginImport(const SrcDistObject& source,
467 const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
468 const CombineMode CM,
469 const bool restrictedMode)
470 {
471 using Details::Behavior;
472 using std::endl;
473 const char modeString[] = "beginImport (reverse mode)";
474
475 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
476 // output to std::cerr on every MPI process. This is unwise for
477 // runs with large numbers of MPI processes.
478 const bool verbose = Behavior::verbose("DistObject");
479 std::unique_ptr<std::string> prefix;
480 if (verbose) {
481 prefix = this->createPrefix("DistObject", modeString);
482 std::ostringstream os;
483 os << *prefix << "Start" << endl;
484 std::cerr << os.str ();
485 }
486 this->beginTransfer(source, exporter, modeString, DoReverse, CM, restrictedMode);
487 if (verbose) {
488 std::ostringstream os;
489 os << *prefix << "Done" << endl;
490 std::cerr << os.str ();
491 }
492 }
493
494 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
495 void
496 DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
497 beginExport(const SrcDistObject& source,
498 const Import<LocalOrdinal, GlobalOrdinal, Node> & importer,
499 const CombineMode CM,
500 const bool restrictedMode)
501 {
502 using Details::Behavior;
503 using std::endl;
504 const char modeString[] = "beginExport (reverse mode)";
505
506 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
507 // output to std::cerr on every MPI process. This is unwise for
508 // runs with large numbers of MPI processes.
509 const bool verbose = Behavior::verbose("DistObject");
510 std::unique_ptr<std::string> prefix;
511 if (verbose) {
512 prefix = this->createPrefix("DistObject", modeString);
513 std::ostringstream os;
514 os << *prefix << "Start" << endl;
515 std::cerr << os.str ();
516 }
517 this->beginTransfer(source, importer, modeString, DoReverse, CM, restrictedMode);
518 if (verbose) {
519 std::ostringstream os;
520 os << *prefix << "Done" << endl;
521 std::cerr << os.str ();
522 }
523 }
524
525 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
526 void
527 DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
528 endImport(const SrcDistObject& source,
529 const Import<LocalOrdinal, GlobalOrdinal, Node>& importer,
530 const CombineMode CM,
531 const bool restrictedMode)
532 {
533 using Details::Behavior;
534 using std::endl;
535 const char modeString[] = "endImport (forward mode)";
536
537 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
538 // output to std::cerr on every MPI process. This is unwise for
539 // runs with large numbers of MPI processes.
540 const bool verbose = Behavior::verbose("DistObject");
541 std::unique_ptr<std::string> prefix;
542 if (verbose) {
543 prefix = this->createPrefix("DistObject", modeString);
544 std::ostringstream os;
545 os << *prefix << "Start" << endl;
546 std::cerr << os.str ();
547 }
548 this->endTransfer(source, importer, modeString, DoForward, CM, restrictedMode);
549 if (verbose) {
550 std::ostringstream os;
551 os << *prefix << "Done" << endl;
552 std::cerr << os.str ();
553 }
554 }
555
556 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
557 void
558 DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
559 endExport(const SrcDistObject& source,
560 const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
561 const CombineMode CM,
562 const bool restrictedMode)
563 {
564 using Details::Behavior;
565 using std::endl;
566 const char modeString[] = "endExport (forward mode)";
567
568 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
569 // output to std::cerr on every MPI process. This is unwise for
570 // runs with large numbers of MPI processes.
571 const bool verbose = Behavior::verbose("DistObject");
572 std::unique_ptr<std::string> prefix;
573 if (verbose) {
574 prefix = this->createPrefix("DistObject", modeString);
575 std::ostringstream os;
576 os << *prefix << "Start" << endl;
577 std::cerr << os.str ();
578 }
579 this->endTransfer(source, exporter, modeString, DoForward, CM, restrictedMode);
580 if (verbose) {
581 std::ostringstream os;
582 os << *prefix << "Done" << endl;
583 std::cerr << os.str ();
584 }
585 }
586
587 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
588 void
589 DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
590 endImport(const SrcDistObject& source,
591 const Export<LocalOrdinal, GlobalOrdinal, Node>& exporter,
592 const CombineMode CM,
593 const bool restrictedMode)
594 {
595 using Details::Behavior;
596 using std::endl;
597 const char modeString[] = "endImport (reverse mode)";
599 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
600 // output to std::cerr on every MPI process. This is unwise for
601 // runs with large numbers of MPI processes.
602 const bool verbose = Behavior::verbose("DistObject");
603 std::unique_ptr<std::string> prefix;
604 if (verbose) {
605 prefix = this->createPrefix("DistObject", modeString);
606 std::ostringstream os;
607 os << *prefix << "Start" << endl;
608 std::cerr << os.str ();
609 }
610 this->endTransfer(source, exporter, modeString, DoReverse, CM, restrictedMode);
611 if (verbose) {
612 std::ostringstream os;
613 os << *prefix << "Done" << endl;
614 std::cerr << os.str ();
616 }
617
618 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
619 void
620 DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
621 endExport(const SrcDistObject& source,
622 const Import<LocalOrdinal, GlobalOrdinal, Node> & importer,
623 const CombineMode CM,
624 const bool restrictedMode)
625 {
626 using Details::Behavior;
627 using std::endl;
628 const char modeString[] = "endExport (reverse mode)";
629
630 // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
631 // output to std::cerr on every MPI process. This is unwise for
632 // runs with large numbers of MPI processes.
633 const bool verbose = Behavior::verbose("DistObject");
634 std::unique_ptr<std::string> prefix;
635 if (verbose) {
636 prefix = this->createPrefix("DistObject", modeString);
637 std::ostringstream os;
638 os << *prefix << "Start" << endl;
639 std::cerr << os.str ();
640 }
641 this->endTransfer(source, importer, modeString, DoReverse, CM, restrictedMode);
642 if (verbose) {
643 std::ostringstream os;
644 os << *prefix << "Done" << endl;
645 std::cerr << os.str ();
646 }
647 }
648
649 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
650 bool
652 transferArrived() const {
653 return distributorActor_.isReady();
654 }
655
656 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
657 bool
659 isDistributed () const {
660 return map_->isDistributed ();
661 }
662
663 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
664 size_t
667 return 0; // default implementation; subclasses may override
668 }
669
670 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
671 void
673 doTransfer (const SrcDistObject& src,
674 const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
675 const char modeString[],
676 const ReverseOption revOp,
677 const CombineMode CM,
678 bool restrictedMode)
679 {
680 beginTransfer(src, transfer, modeString, revOp, CM, restrictedMode);
681 endTransfer(src, transfer, modeString, revOp, CM, restrictedMode);
682 }
683
684 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
685 bool
687 reallocImportsIfNeeded (const size_t newSize,
688 const bool verbose,
689 const std::string* prefix,
690 const bool /*remoteLIDsContiguous*/,
691 const CombineMode /*CM*/)
692 {
693 if (verbose) {
694 std::ostringstream os;
695 os << *prefix << "Realloc (if needed) imports_ from "
696 << imports_.extent (0) << " to " << newSize << std::endl;
697 std::cerr << os.str ();
698 }
699 using ::Tpetra::Details::reallocDualViewIfNeeded;
700 const bool reallocated =
701 reallocDualViewIfNeeded (this->imports_, newSize, "imports");
702 if (verbose) {
703 std::ostringstream os;
704 os << *prefix << "Finished realloc'ing imports_" << std::endl;
705 std::cerr << os.str ();
706 }
707 return reallocated;
708 }
709
710 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
711 bool
714 const size_t numImportLIDs)
715 {
716 using Details::Behavior;
717 using ::Tpetra::Details::dualViewStatusToString;
718 using ::Tpetra::Details::reallocDualViewIfNeeded;
719 using std::endl;
720 // If an array is already allocated, and if is at least
721 // tooBigFactor times bigger than it needs to be, free it and
722 // reallocate to the size we need, in order to save space.
723 // Otherwise, take subviews to reduce allocation size.
724 constexpr size_t tooBigFactor = 10;
725
726 const bool verbose = Behavior::verbose("DistObject");
727 std::unique_ptr<std::string> prefix;
728 if (verbose) {
729 prefix = this->createPrefix("DistObject",
730 "reallocArraysForNumPacketsPerLid");
731 std::ostringstream os;
732 os << *prefix
733 << "numExportLIDs: " << numExportLIDs
734 << ", numImportLIDs: " << numImportLIDs
735 << endl;
736 os << *prefix << "DualView status before:" << endl
737 << *prefix
738 << dualViewStatusToString (this->numExportPacketsPerLID_,
739 "numExportPacketsPerLID_")
740 << endl
741 << *prefix
742 << dualViewStatusToString (this->numImportPacketsPerLID_,
743 "numImportPacketsPerLID_")
744 << endl;
745 std::cerr << os.str ();
746 }
747
748 // Reallocate numExportPacketsPerLID_ if needed.
749 const bool firstReallocated =
750 reallocDualViewIfNeeded (this->numExportPacketsPerLID_,
752 "numExportPacketsPerLID",
754 true); // need fence before, if realloc'ing
755
756 // If we reallocated above, then we fenced after that
757 // reallocation. This means that we don't need to fence again,
758 // before the next reallocation.
760 const bool secondReallocated =
761 reallocDualViewIfNeeded (this->numImportPacketsPerLID_,
763 "numImportPacketsPerLID",
766
767 if (verbose) {
768 std::ostringstream os;
769 os << *prefix << "DualView status after:" << endl
770 << *prefix << dualViewStatusToString (this->numExportPacketsPerLID_,
771 "numExportPacketsPerLID_")
772 << endl
773 << *prefix << dualViewStatusToString (this->numImportPacketsPerLID_,
774 "numImportPacketsPerLID_")
775 << endl;
776 std::cerr << os.str ();
778
780 }
781
782 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
783 void
786 const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
787 const char modeString[],
788 const ReverseOption revOp,
789 const CombineMode CM,
790 bool restrictedMode)
791 {
792 using Details::Behavior;
793 using ::Tpetra::Details::dualViewStatusToString;
794 using ::Tpetra::Details::getArrayViewFromDualView;
796 using Kokkos::Compat::getArrayView;
797 using Kokkos::Compat::getConstArrayView;
798 using Kokkos::Compat::getKokkosViewDeepCopy;
799 using Kokkos::Compat::create_const_view;
800 using std::endl;
803 const char funcName[] = "Tpetra::DistObject::doTransfer";
804
805 ProfilingRegion region_doTransfer(funcName);
806 const bool verbose = Behavior::verbose("DistObject");
807 std::shared_ptr<std::string> prefix;
808 if (verbose) {
809 std::ostringstream os;
810 prefix = this->createPrefix("DistObject", "doTransfer");
811 os << *prefix << "Source type: " << Teuchos::typeName(src)
812 << ", Target type: " << Teuchos::typeName(*this) << endl;
813 std::cerr << os.str();
814 }
815
816 // "Restricted Mode" does two things:
817 // 1) Skips copyAndPermute
818 // 2) Allows the "target" Map of the transfer to be a subset of
819 // the Map of *this, in a "locallyFitted" sense.
820 //
821 // This cannot be used if #2 is not true, OR there are permutes.
822 // Source Maps still need to match
823
824 // mfh 18 Oct 2017: Set TPETRA_DEBUG to true to enable extra debug
825 // checks. These may communicate more.
826 const bool debug = Behavior::debug("DistObject");
827 if (debug) {
828 if (! restrictedMode && revOp == DoForward) {
829 const bool myMapSameAsTransferTgtMap =
830 this->getMap ()->isSameAs (* (transfer.getTargetMap ()));
832 (! myMapSameAsTransferTgtMap, std::invalid_argument,
833 "Tpetra::DistObject::" << modeString << ": For forward-mode "
834 "communication, the target DistObject's Map must be the same "
835 "(in the sense of Tpetra::Map::isSameAs) as the input "
836 "Export/Import object's target Map.");
837 }
838 else if (! restrictedMode && revOp == DoReverse) {
839 const bool myMapSameAsTransferSrcMap =
840 this->getMap ()->isSameAs (* (transfer.getSourceMap ()));
842 (! myMapSameAsTransferSrcMap, std::invalid_argument,
843 "Tpetra::DistObject::" << modeString << ": For reverse-mode "
844 "communication, the target DistObject's Map must be the same "
845 "(in the sense of Tpetra::Map::isSameAs) as the input "
846 "Export/Import object's source Map.");
847 }
848 else if (restrictedMode && revOp == DoForward) {
850 this->getMap ()->isLocallyFitted (* (transfer.getTargetMap ()));
852 (! myMapLocallyFittedTransferTgtMap , std::invalid_argument,
853 "Tpetra::DistObject::" << modeString << ": For forward-mode "
854 "communication using restricted mode, Export/Import object's "
855 "target Map must be locally fitted (in the sense of "
856 "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
857 }
858 else { // if (restrictedMode && revOp == DoReverse)
860 this->getMap ()->isLocallyFitted (* (transfer.getSourceMap ()));
862 (! myMapLocallyFittedTransferSrcMap, std::invalid_argument,
863 "Tpetra::DistObject::" << modeString << ": For reverse-mode "
864 "communication using restricted mode, Export/Import object's "
865 "source Map must be locally fitted (in the sense of "
866 "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
867 }
868
869 // SrcDistObject need not even _have_ Maps. However, if the
870 // source object is a DistObject, it has a Map, and we may
871 // compare that Map with the Transfer's Maps.
872 const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
873 if (srcDistObj != nullptr) {
874 if (revOp == DoForward) {
875 const bool srcMapSameAsImportSrcMap =
876 srcDistObj->getMap ()->isSameAs (* (transfer.getSourceMap ()));
877 TEUCHOS_TEST_FOR_EXCEPTION
878 (! srcMapSameAsImportSrcMap, std::invalid_argument,
879 "Tpetra::DistObject::" << modeString << ": For forward-mode "
880 "communication, the source DistObject's Map must be the same "
881 "as the input Export/Import object's source Map.");
882 }
883 else { // revOp == DoReverse
884 const bool srcMapSameAsImportTgtMap =
885 srcDistObj->getMap ()->isSameAs (* (transfer.getTargetMap ()));
886 TEUCHOS_TEST_FOR_EXCEPTION
887 (! srcMapSameAsImportTgtMap, std::invalid_argument,
888 "Tpetra::DistObject::" << modeString << ": For reverse-mode "
889 "communication, the source DistObject's Map must be the same "
890 "as the input Export/Import object's target Map.");
891 }
892 }
893 }
894
895 const size_t numSameIDs = transfer.getNumSameIDs ();
896 Distributor& distor = transfer.getDistributor ();
897 const Details::DistributorPlan& distributorPlan = (revOp == DoForward) ? distor.getPlan() : *distor.getPlan().getReversePlan();
898
899 TEUCHOS_TEST_FOR_EXCEPTION
900 (debug && restrictedMode &&
901 (transfer.getPermuteToLIDs_dv().extent(0) != 0 ||
902 transfer.getPermuteFromLIDs_dv().extent(0) != 0),
903 std::invalid_argument,
904 "Tpetra::DistObject::" << modeString << ": Transfer object "
905 "cannot have permutes in restricted mode.");
906
907 // Do we need all communication buffers to live on host?
908 const bool commOnHost = ! Behavior::assumeMpiIsGPUAware ();
909 if (verbose) {
910 std::ostringstream os;
911 os << *prefix << "doTransfer: Use new interface; "
912 "commOnHost=" << (commOnHost ? "true" : "false") << endl;
913 std::cerr << os.str ();
914 }
915
916 using const_lo_dv_type =
917 Kokkos::DualView<const local_ordinal_type*, buffer_device_type>;
918 const_lo_dv_type permuteToLIDs = (revOp == DoForward) ?
919 transfer.getPermuteToLIDs_dv () :
920 transfer.getPermuteFromLIDs_dv ();
921 const_lo_dv_type permuteFromLIDs = (revOp == DoForward) ?
922 transfer.getPermuteFromLIDs_dv () :
923 transfer.getPermuteToLIDs_dv ();
924 const_lo_dv_type remoteLIDs = (revOp == DoForward) ?
925 transfer.getRemoteLIDs_dv () :
926 transfer.getExportLIDs_dv ();
927 const_lo_dv_type exportLIDs = (revOp == DoForward) ?
928 transfer.getExportLIDs_dv () :
929 transfer.getRemoteLIDs_dv ();
930 const bool canTryAliasing = (revOp == DoForward) ?
931 transfer.areRemoteLIDsContiguous() :
932 transfer.areExportLIDsContiguous();
933 // const bool canTryAliasing = false;
934
935 ProfilingRegion region_dTN(funcName);
936#ifdef HAVE_TPETRA_TRANSFER_TIMERS
937 // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in favor
938 // of Kokkos profiling.
939 Teuchos::TimeMonitor doXferMon (*doXferTimer_);
940#endif // HAVE_TPETRA_TRANSFER_TIMERS
941
942 if (verbose) {
943 std::ostringstream os;
944 os << *prefix << "Input arguments:" << endl
945 << *prefix << " combineMode: " << combineModeToString (CM) << endl
946 << *prefix << " numSameIDs: " << numSameIDs << endl
947 << *prefix << " "
948 << dualViewStatusToString (permuteToLIDs, "permuteToLIDs") << endl
949 << *prefix << " "
950 << dualViewStatusToString (permuteFromLIDs, "permuteFromLIDs") << endl
951 << *prefix << " "
952 << dualViewStatusToString (remoteLIDs, "remoteLIDs") << endl
953 << *prefix << " "
954 << dualViewStatusToString (exportLIDs, "exportLIDs") << endl
955 << *prefix << " revOp: Do" << (revOp == DoReverse ? "Reverse" : "Forward") << endl
956 << *prefix << " commOnHost: " << (commOnHost ? "true" : "false") << endl;
957 std::cerr << os.str ();
958 }
959
960 {
961 ProfilingRegion region_cs ("Tpetra::DistObject::doTransferNew::checkSizes");
962 if (verbose) {
963 std::ostringstream os;
964 os << *prefix << "1. checkSizes" << endl;
965 std::cerr << os.str ();
966 }
967 const bool checkSizesResult = this->checkSizes (src);
968 TEUCHOS_TEST_FOR_EXCEPTION
969 (! checkSizesResult, std::invalid_argument,
970 "Tpetra::DistObject::doTransfer: checkSizes() indicates that the "
971 "destination object is not a legal target for redistribution from the "
972 "source object. This probably means that they do not have the same "
973 "dimensions. For example, MultiVectors must have the same number of "
974 "rows and columns.");
975 }
976
977 // NOTE (mfh 26 Apr 2016) Chris Baker's implementation understood
978 // that if CM == INSERT || CM == REPLACE, the target object could
979 // be write only. We don't optimize for that here.
980
981 if (!restrictedMode && numSameIDs + permuteToLIDs.extent (0) != 0) {
982 // There is at least one GID to copy or permute.
983 if (verbose) {
984 std::ostringstream os;
985 os << *prefix << "2. copyAndPermute" << endl;
986 std::cerr << os.str ();
987 }
988 ProfilingRegion region_cp
989 ("Tpetra::DistObject::doTransferNew::copyAndPermute");
990#ifdef HAVE_TPETRA_TRANSFER_TIMERS
991 // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in favor
992 // of Kokkos profiling.
993 Teuchos::TimeMonitor copyAndPermuteMon (*copyAndPermuteTimer_);
994#endif // HAVE_TPETRA_TRANSFER_TIMERS
995
996 if (numSameIDs + permuteToLIDs.extent (0) != 0) {
997 // There is at least one GID to copy or permute.
998 if (verbose) {
999 std::ostringstream os;
1000 os << *prefix << "2. copyAndPermute" << endl;
1001 std::cerr << os.str ();
1002 }
1003 this->copyAndPermute (src, numSameIDs, permuteToLIDs,
1005 if (verbose) {
1006 std::ostringstream os;
1007 os << *prefix << "After copyAndPermute:" << endl
1008 << *prefix << " "
1009 << dualViewStatusToString (permuteToLIDs, "permuteToLIDs")
1010 << endl
1011 << *prefix << " "
1012 << dualViewStatusToString (permuteFromLIDs, "permuteFromLIDs")
1013 << endl;
1014 std::cerr << os.str ();
1015 }
1016 }
1017 }
1018
1019 // The method may return zero even if the implementation actually
1020 // does have a constant number of packets per LID. However, if it
1021 // returns nonzero, we may use this information to avoid
1022 // (re)allocating num{Ex,Im}portPacketsPerLID_. packAndPrepare()
1023 // will set this to its final value.
1024 //
1025 // We only need this if CM != ZERO, but it has to be lifted out of
1026 // that scope because there are multiple tests for CM != ZERO.
1027 size_t constantNumPackets = this->constantNumberOfPackets ();
1028 if (verbose) {
1029 std::ostringstream os;
1030 os << *prefix << "constantNumPackets=" << constantNumPackets << endl;
1031 std::cerr << os.str ();
1032 }
1033
1034 // We only need to pack communication buffers if the combine mode
1035 // is not ZERO. A "ZERO combine mode" means that the results are
1036 // the same as if we had received all zeros, and added them to the
1037 // existing values. That means we don't need to communicate.
1038 if (CM != ZERO) {
1039 if (constantNumPackets == 0) {
1040 if (verbose) {
1041 std::ostringstream os;
1042 os << *prefix << "3. (Re)allocate num{Ex,Im}portPacketsPerLID"
1043 << endl;
1044 std::cerr << os.str ();
1045 }
1046 // This only reallocates if necessary, that is, if the sizes
1047 // don't match.
1048 this->reallocArraysForNumPacketsPerLid (exportLIDs.extent (0),
1049 remoteLIDs.extent (0));
1050 }
1051
1052 if (verbose) {
1053 std::ostringstream os;
1054 os << *prefix << "4. packAndPrepare: before, "
1055 << dualViewStatusToString (this->exports_, "exports_")
1056 << endl;
1057 std::cerr << os.str ();
1058 }
1059
1060 doPackAndPrepare(src, exportLIDs, constantNumPackets);
1061 if (commOnHost) {
1062 this->exports_.sync_host();
1063 }
1064 else {
1065 this->exports_.sync_device();
1066 }
1067
1068 if (verbose) {
1069 std::ostringstream os;
1070 os << *prefix << "5.1. After packAndPrepare, "
1071 << dualViewStatusToString (this->exports_, "exports_")
1072 << endl;
1073 std::cerr << os.str ();
1074 }
1075 } // if (CM != ZERO)
1076
1077 // We only need to send data if the combine mode is not ZERO.
1078 if (CM != ZERO) {
1079 if (constantNumPackets != 0) {
1080 // There are a constant number of packets per element. We
1081 // already know (from the number of "remote" (incoming)
1082 // elements) how many incoming elements we expect, so we can
1083 // resize the buffer accordingly.
1084 const size_t rbufLen = remoteLIDs.extent (0) * constantNumPackets;
1085 reallocImportsIfNeeded (rbufLen, verbose, prefix.get (), canTryAliasing, CM);
1086 }
1087
1088 // Do we need to do communication (via doPostsAndWaits)?
1089 bool needCommunication = true;
1090
1091 // This may be NULL. It will be used below.
1092 const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
1093
1094 if (revOp == DoReverse && ! this->isDistributed ()) {
1095 needCommunication = false;
1096 }
1097 // FIXME (mfh 30 Jun 2013): Checking whether the source object
1098 // is distributed requires a cast to DistObject. If it's not a
1099 // DistObject, then I'm not quite sure what to do. Perhaps it
1100 // would be more appropriate for SrcDistObject to have an
1101 // isDistributed() method. For now, I'll just assume that we
1102 // need to do communication unless the cast succeeds and the
1103 // source is not distributed.
1104 else if (revOp == DoForward && srcDistObj != NULL &&
1105 ! srcDistObj->isDistributed ()) {
1106 needCommunication = false;
1107 }
1108
1109 if (! needCommunication) {
1110 if (verbose) {
1111 std::ostringstream os;
1112 os << *prefix << "Comm not needed; skipping" << endl;
1113 std::cerr << os.str ();
1114 }
1115 }
1116 else {
1117 ProfilingRegion region_dpw
1118 ("Tpetra::DistObject::doTransferNew::doPostsAndWaits");
1119#ifdef HAVE_TPETRA_TRANSFER_TIMERS
1120 // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in
1121 // favor of Kokkos profiling.
1122 Teuchos::TimeMonitor doPostsAndWaitsMon (*doPostsAndWaitsTimer_);
1123#endif // HAVE_TPETRA_TRANSFER_TIMERS
1124
1125 if (verbose) {
1126 std::ostringstream os;
1127 os << *prefix << "7.0. "
1128 << (revOp == DoReverse ? "Reverse" : "Forward")
1129 << " mode" << endl;
1130 std::cerr << os.str ();
1131 }
1132
1133 doPosts(distributorPlan, constantNumPackets, commOnHost, prefix, canTryAliasing, CM);
1134 } // if (needCommunication)
1135 } // if (CM != ZERO)
1136 }
1137
1138 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1139 void
1140 DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1141 endTransfer(const SrcDistObject& src,
1142 const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
1143 const char modeString[],
1144 const ReverseOption revOp,
1145 const CombineMode CM,
1146 bool restrictedMode)
1147 {
1148 using Details::Behavior;
1149 using ::Tpetra::Details::dualViewStatusToString;
1150 using ::Tpetra::Details::getArrayViewFromDualView;
1151 using Details::ProfilingRegion;
1152 using Kokkos::Compat::getArrayView;
1153 using Kokkos::Compat::getConstArrayView;
1154 using Kokkos::Compat::getKokkosViewDeepCopy;
1155 using Kokkos::Compat::create_const_view;
1156 using std::endl;
1158 using Details::ProfilingRegion;
1159 const char funcName[] = "Tpetra::DistObject::doTransfer";
1160
1161 ProfilingRegion region_doTransfer(funcName);
1162 const bool verbose = Behavior::verbose("DistObject");
1163 std::shared_ptr<std::string> prefix;
1164 if (verbose) {
1165 std::ostringstream os;
1166 prefix = this->createPrefix("DistObject", "doTransfer");
1167 os << *prefix << "Source type: " << Teuchos::typeName(src)
1168 << ", Target type: " << Teuchos::typeName(*this) << endl;
1169 std::cerr << os.str();
1170 }
1171
1172 // "Restricted Mode" does two things:
1173 // 1) Skips copyAndPermute
1174 // 2) Allows the "target" Map of the transfer to be a subset of
1175 // the Map of *this, in a "locallyFitted" sense.
1176 //
1177 // This cannot be used if #2 is not true, OR there are permutes.
1178 // Source Maps still need to match
1179
1180 // mfh 18 Oct 2017: Set TPETRA_DEBUG to true to enable extra debug
1181 // checks. These may communicate more.
1182 const bool debug = Behavior::debug("DistObject");
1183 if (debug) {
1184 if (! restrictedMode && revOp == DoForward) {
1185 const bool myMapSameAsTransferTgtMap =
1186 this->getMap ()->isSameAs (* (transfer.getTargetMap ()));
1187 TEUCHOS_TEST_FOR_EXCEPTION
1188 (! myMapSameAsTransferTgtMap, std::invalid_argument,
1189 "Tpetra::DistObject::" << modeString << ": For forward-mode "
1190 "communication, the target DistObject's Map must be the same "
1191 "(in the sense of Tpetra::Map::isSameAs) as the input "
1192 "Export/Import object's target Map.");
1193 }
1194 else if (! restrictedMode && revOp == DoReverse) {
1195 const bool myMapSameAsTransferSrcMap =
1196 this->getMap ()->isSameAs (* (transfer.getSourceMap ()));
1197 TEUCHOS_TEST_FOR_EXCEPTION
1198 (! myMapSameAsTransferSrcMap, std::invalid_argument,
1199 "Tpetra::DistObject::" << modeString << ": For reverse-mode "
1200 "communication, the target DistObject's Map must be the same "
1201 "(in the sense of Tpetra::Map::isSameAs) as the input "
1202 "Export/Import object's source Map.");
1203 }
1204 else if (restrictedMode && revOp == DoForward) {
1205 const bool myMapLocallyFittedTransferTgtMap =
1206 this->getMap ()->isLocallyFitted (* (transfer.getTargetMap ()));
1207 TEUCHOS_TEST_FOR_EXCEPTION
1208 (! myMapLocallyFittedTransferTgtMap , std::invalid_argument,
1209 "Tpetra::DistObject::" << modeString << ": For forward-mode "
1210 "communication using restricted mode, Export/Import object's "
1211 "target Map must be locally fitted (in the sense of "
1212 "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
1213 }
1214 else { // if (restrictedMode && revOp == DoReverse)
1215 const bool myMapLocallyFittedTransferSrcMap =
1216 this->getMap ()->isLocallyFitted (* (transfer.getSourceMap ()));
1217 TEUCHOS_TEST_FOR_EXCEPTION
1218 (! myMapLocallyFittedTransferSrcMap, std::invalid_argument,
1219 "Tpetra::DistObject::" << modeString << ": For reverse-mode "
1220 "communication using restricted mode, Export/Import object's "
1221 "source Map must be locally fitted (in the sense of "
1222 "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
1223 }
1224
1225 // SrcDistObject need not even _have_ Maps. However, if the
1226 // source object is a DistObject, it has a Map, and we may
1227 // compare that Map with the Transfer's Maps.
1228 const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
1229 if (srcDistObj != nullptr) {
1230 if (revOp == DoForward) {
1231 const bool srcMapSameAsImportSrcMap =
1232 srcDistObj->getMap ()->isSameAs (* (transfer.getSourceMap ()));
1233 TEUCHOS_TEST_FOR_EXCEPTION
1234 (! srcMapSameAsImportSrcMap, std::invalid_argument,
1235 "Tpetra::DistObject::" << modeString << ": For forward-mode "
1236 "communication, the source DistObject's Map must be the same "
1237 "as the input Export/Import object's source Map.");
1238 }
1239 else { // revOp == DoReverse
1240 const bool srcMapSameAsImportTgtMap =
1241 srcDistObj->getMap ()->isSameAs (* (transfer.getTargetMap ()));
1242 TEUCHOS_TEST_FOR_EXCEPTION
1243 (! srcMapSameAsImportTgtMap, std::invalid_argument,
1244 "Tpetra::DistObject::" << modeString << ": For reverse-mode "
1245 "communication, the source DistObject's Map must be the same "
1246 "as the input Export/Import object's target Map.");
1247 }
1248 }
1249 }
1250
1251 Distributor& distor = transfer.getDistributor ();
1252 const Details::DistributorPlan& distributorPlan = (revOp == DoForward) ? distor.getPlan() : *distor.getPlan().getReversePlan();
1253
1254 TEUCHOS_TEST_FOR_EXCEPTION
1255 (debug && restrictedMode &&
1256 (transfer.getPermuteToLIDs_dv().extent(0) != 0 ||
1257 transfer.getPermuteFromLIDs_dv().extent(0) != 0),
1258 std::invalid_argument,
1259 "Tpetra::DistObject::" << modeString << ": Transfer object "
1260 "cannot have permutes in restricted mode.");
1261
1262 // Do we need all communication buffers to live on host?
1263 const bool commOnHost = ! Behavior::assumeMpiIsGPUAware ();
1264 if (verbose) {
1265 std::ostringstream os;
1266 os << *prefix << "doTransfer: Use new interface; "
1267 "commOnHost=" << (commOnHost ? "true" : "false") << endl;
1268 std::cerr << os.str ();
1269 }
1270
1271 using const_lo_dv_type =
1272 Kokkos::DualView<const local_ordinal_type*, buffer_device_type>;
1273 const_lo_dv_type permuteToLIDs = (revOp == DoForward) ?
1274 transfer.getPermuteToLIDs_dv () :
1275 transfer.getPermuteFromLIDs_dv ();
1276 const_lo_dv_type permuteFromLIDs = (revOp == DoForward) ?
1277 transfer.getPermuteFromLIDs_dv () :
1278 transfer.getPermuteToLIDs_dv ();
1279 const_lo_dv_type remoteLIDs = (revOp == DoForward) ?
1280 transfer.getRemoteLIDs_dv () :
1281 transfer.getExportLIDs_dv ();
1282 const_lo_dv_type exportLIDs = (revOp == DoForward) ?
1283 transfer.getExportLIDs_dv () :
1284 transfer.getRemoteLIDs_dv ();
1285 const bool canTryAliasing = (revOp == DoForward) ?
1286 transfer.areRemoteLIDsContiguous() :
1287 transfer.areExportLIDsContiguous();
1288
1289 size_t constantNumPackets = this->constantNumberOfPackets ();
1290
1291 // We only need to send data if the combine mode is not ZERO.
1292 if (CM != ZERO) {
1293 if (constantNumPackets != 0) {
1294 // There are a constant number of packets per element. We
1295 // already know (from the number of "remote" (incoming)
1296 // elements) how many incoming elements we expect, so we can
1297 // resize the buffer accordingly.
1298 const size_t rbufLen = remoteLIDs.extent (0) * constantNumPackets;
1299 reallocImportsIfNeeded (rbufLen, verbose, prefix.get (), canTryAliasing, CM);
1300 }
1301
1302 // Do we need to do communication (via doPostsAndWaits)?
1303 bool needCommunication = true;
1304
1305 // This may be NULL. It will be used below.
1306 const this_type* srcDistObj = dynamic_cast<const this_type*> (&src);
1307
1308 if (revOp == DoReverse && ! this->isDistributed ()) {
1309 needCommunication = false;
1310 }
1311 // FIXME (mfh 30 Jun 2013): Checking whether the source object
1312 // is distributed requires a cast to DistObject. If it's not a
1313 // DistObject, then I'm not quite sure what to do. Perhaps it
1314 // would be more appropriate for SrcDistObject to have an
1315 // isDistributed() method. For now, I'll just assume that we
1316 // need to do communication unless the cast succeeds and the
1317 // source is not distributed.
1318 else if (revOp == DoForward && srcDistObj != NULL &&
1319 ! srcDistObj->isDistributed ()) {
1320 needCommunication = false;
1321 }
1322
1323 if (! needCommunication) {
1324 if (verbose) {
1325 std::ostringstream os;
1326 os << *prefix << "Comm not needed; skipping" << endl;
1327 std::cerr << os.str ();
1328 }
1329 }
1330 else {
1331 distributorActor_.doWaits(distributorPlan);
1332
1333 if (verbose) {
1334 std::ostringstream os;
1335 os << *prefix << "8. unpackAndCombine - remoteLIDs " << remoteLIDs.extent(0) << ", constantNumPackets " << constantNumPackets << endl;
1336 std::cerr << os.str ();
1337 }
1338 doUnpackAndCombine(remoteLIDs, constantNumPackets, CM);
1339 } // if (needCommunication)
1340 } // if (CM != ZERO)
1341
1342 if (verbose) {
1343 std::ostringstream os;
1344 os << *prefix << "9. Done!" << endl;
1345 std::cerr << os.str ();
1346 }
1347
1348 if (verbose) {
1349 std::ostringstream os;
1350 os << *prefix << "Tpetra::DistObject::doTransfer: Done!" << endl;
1351 std::cerr << os.str ();
1352 }
1353 }
1354
1355 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1356 void
1357 DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1358 doPosts(const Details::DistributorPlan& distributorPlan,
1359 size_t constantNumPackets,
1360 bool commOnHost,
1361 std::shared_ptr<std::string> prefix,
1362 const bool canTryAliasing,
1363 const CombineMode CM)
1364 {
1365 using ::Tpetra::Details::dualViewStatusToString;
1366 using ::Tpetra::Details::getArrayViewFromDualView;
1367 using Kokkos::Compat::create_const_view;
1368 using std::endl;
1369
1370 const bool verbose = Details::Behavior::verbose("DistObject");
1371
1372 if (constantNumPackets == 0) { // variable num packets per LID
1373 if (verbose) {
1374 std::ostringstream os;
1375 os << *prefix << "7.1. Variable # packets / LID: first comm "
1376 << "(commOnHost = " << (commOnHost ? "true" : "false") << ")"
1377 << endl;
1378 std::cerr << os.str ();
1379 }
1380 size_t totalImportPackets = 0;
1381 if (commOnHost) {
1382 if (this->numExportPacketsPerLID_.need_sync_host ()) {
1383 this->numExportPacketsPerLID_.sync_host ();
1384 }
1385 if (this->numImportPacketsPerLID_.need_sync_host ()) {
1386 this->numImportPacketsPerLID_.sync_host ();
1387 }
1388 this->numImportPacketsPerLID_.modify_host (); // out arg
1389 auto numExp_h =
1390 create_const_view (this->numExportPacketsPerLID_.view_host ());
1391 auto numImp_h = this->numImportPacketsPerLID_.view_host ();
1392
1393 // MPI communication happens here.
1394 if (verbose) {
1395 std::ostringstream os;
1396 os << *prefix << "Call doPostsAndWaits"
1397 << endl;
1398 std::cerr << os.str ();
1399 }
1400 distributorActor_.doPostsAndWaits(distributorPlan, numExp_h, 1, numImp_h);
1401
1402 if (verbose) {
1403 std::ostringstream os;
1404 os << *prefix << "Count totalImportPackets" << std::endl;
1405 std::cerr << os.str ();
1406 }
1407 using the_dev_type = typename decltype (numImp_h)::device_type;
1408 totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_h);
1409 }
1410 else { // ! commOnHost
1411 this->numExportPacketsPerLID_.sync_device ();
1412 this->numImportPacketsPerLID_.sync_device ();
1413 this->numImportPacketsPerLID_.modify_device (); // out arg
1414 auto numExp_d = create_const_view
1415 (this->numExportPacketsPerLID_.view_device ());
1416 auto numImp_d = this->numImportPacketsPerLID_.view_device ();
1417
1418 // MPI communication happens here.
1419 if (verbose) {
1420 std::ostringstream os;
1421 os << *prefix << "Call doPostsAndWaits"
1422 << endl;
1423 std::cerr << os.str ();
1424 }
1425
1426 distributorActor_.doPostsAndWaits(distributorPlan, numExp_d, 1, numImp_d);
1427
1428 if (verbose) {
1429 std::ostringstream os;
1430 os << *prefix << "Count totalImportPackets" << std::endl;
1431 std::cerr << os.str ();
1432 }
1433 using the_dev_type = typename decltype (numImp_d)::device_type;
1434 totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_d);
1435 }
1436
1437 if (verbose) {
1438 std::ostringstream os;
1439 os << *prefix << "totalImportPackets=" << totalImportPackets << endl;
1440 std::cerr << os.str ();
1441 }
1442 this->reallocImportsIfNeeded (totalImportPackets, verbose,
1443 prefix.get (), canTryAliasing, CM);
1444 if (verbose) {
1445 std::ostringstream os;
1446 os << *prefix << "7.3. Second comm" << std::endl;
1447 std::cerr << os.str ();
1448 }
1449
1450 // mfh 04 Feb 2019: Distributor expects the "num packets per
1451 // LID" arrays on host, so that it can issue MPI sends and
1452 // receives correctly.
1453 this->numExportPacketsPerLID_.sync_host ();
1454 this->numImportPacketsPerLID_.sync_host ();
1455
1456 // NOTE (mfh 25 Apr 2016, 01 Aug 2017) doPostsAndWaits and
1457 // doReversePostsAndWaits currently want
1458 // numExportPacketsPerLID and numImportPacketsPerLID as
1459 // Teuchos::ArrayView, rather than as Kokkos::View.
1460 //
1461 // NOTE (mfh 04 Feb 2019) This does NOT copy from host to
1462 // device. The above syncs might.
1463 auto numExportPacketsPerLID_av =
1464 getArrayViewFromDualView (this->numExportPacketsPerLID_);
1465 auto numImportPacketsPerLID_av =
1466 getArrayViewFromDualView (this->numImportPacketsPerLID_);
1467
1468 // imports_ is for output only, so we don't need to sync it
1469 // before marking it as modified. However, in order to
1470 // prevent spurious debug-mode errors (e.g., "modified on
1471 // both device and host"), we first need to clear its
1472 // "modified" flags.
1473 this->imports_.clear_sync_state ();
1474
1475 if (verbose) {
1476 std::ostringstream os;
1477 os << *prefix << "Comm on "
1478 << (commOnHost ? "host" : "device")
1479 << "; call doPosts" << endl;
1480 std::cerr << os.str ();
1481 }
1482
1483 if (commOnHost) {
1484 this->imports_.modify_host ();
1485 distributorActor_.doPosts
1486 (distributorPlan,
1487 create_const_view (this->exports_.view_host ()),
1488 numExportPacketsPerLID_av,
1489 this->imports_.view_host (),
1490 numImportPacketsPerLID_av);
1491 }
1492 else { // pack on device
1493 Kokkos::fence(); // for UVM
1494 this->imports_.modify_device ();
1495 distributorActor_.doPosts
1496 (distributorPlan,
1497 create_const_view (this->exports_.view_device ()),
1498 numExportPacketsPerLID_av,
1499 this->imports_.view_device (),
1500 numImportPacketsPerLID_av);
1501 }
1502 }
1503 else { // constant number of packets per LID
1504 if (verbose) {
1505 std::ostringstream os;
1506 os << *prefix << "7.1. Const # packets per LID: " << endl
1507 << *prefix << " "
1508 << dualViewStatusToString (this->exports_, "exports_")
1509 << endl
1510 << *prefix << " "
1511 << dualViewStatusToString (this->exports_, "imports_")
1512 << endl;
1513 std::cerr << os.str ();
1514 }
1515 // imports_ is for output only, so we don't need to sync it
1516 // before marking it as modified. However, in order to
1517 // prevent spurious debug-mode errors (e.g., "modified on
1518 // both device and host"), we first need to clear its
1519 // "modified" flags.
1520 this->imports_.clear_sync_state ();
1521
1522 if (verbose) {
1523 std::ostringstream os;
1524 os << *prefix << "7.2. Comm on "
1525 << (commOnHost ? "host" : "device")
1526 << "; call doPosts" << endl;
1527 std::cerr << os.str ();
1528 }
1529 if (commOnHost) {
1530 this->imports_.modify_host ();
1531 distributorActor_.doPosts
1532 (distributorPlan,
1533 create_const_view (this->exports_.view_host ()),
1534 constantNumPackets,
1535 this->imports_.view_host ());
1536 }
1537 else { // pack on device
1538 Kokkos::fence(); // for UVM
1539 this->imports_.modify_device ();
1540 distributorActor_.doPosts
1541 (distributorPlan,
1542 create_const_view (this->exports_.view_device ()),
1543 constantNumPackets,
1544 this->imports_.view_device ());
1545 } // commOnHost
1546 } // constant or variable num packets per LID
1547 }
1548
1549 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1550 void
1551 DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1552 doPackAndPrepare(const SrcDistObject& src,
1553 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1554 size_t& constantNumPackets)
1555 {
1556 using Details::ProfilingRegion;
1557 using std::endl;
1558 const bool debug = Details::Behavior::debug("DistObject");
1559
1560 ProfilingRegion region_pp
1561 ("Tpetra::DistObject::doTransferNew::packAndPrepare");
1562#ifdef HAVE_TPETRA_TRANSFER_TIMERS
1563 // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in
1564 // favor of Kokkos profiling.
1565 Teuchos::TimeMonitor packAndPrepareMon (*packAndPrepareTimer_);
1566#endif // HAVE_TPETRA_TRANSFER_TIMERS
1567
1568 // Ask the source to pack data. Also ask it whether there are
1569 // a constant number of packets per element
1570 // (constantNumPackets is an output argument). If there are,
1571 // constantNumPackets will come back nonzero. Otherwise, the
1572 // source will fill the numExportPacketsPerLID_ array.
1573
1574 // FIXME (mfh 18 Oct 2017) if (! commOnHost), sync to device?
1575 // Alternately, make packAndPrepare take a "commOnHost"
1576 // argument to tell it where to leave the data?
1577 //
1578 // NOTE (mfh 04 Feb 2019) Subclasses of DistObject should have
1579 // the freedom to pack and unpack either on host or device.
1580 // We should prefer sync'ing only on demand. Thus, we can
1581 // answer the above question: packAndPrepare should not
1582 // take a commOnHost argument, and doTransferNew should sync
1583 // where needed, if needed.
1584 if (debug) {
1585 std::ostringstream lclErrStrm;
1586 bool lclSuccess = false;
1587 try {
1588 this->packAndPrepare (src, exportLIDs, this->exports_,
1589 this->numExportPacketsPerLID_,
1590 constantNumPackets);
1591 lclSuccess = true;
1592 }
1593 catch (std::exception& e) {
1594 lclErrStrm << "packAndPrepare threw an exception: "
1595 << endl << e.what();
1596 }
1597 catch (...) {
1598 lclErrStrm << "packAndPrepare threw an exception "
1599 "not a subclass of std::exception.";
1600 }
1601 const char gblErrMsgHeader[] = "Tpetra::DistObject "
1602 "threw an exception in packAndPrepare on "
1603 "one or more processes in the DistObject's communicator.";
1604 auto comm = getMap()->getComm();
1605 Details::checkGlobalError(std::cerr, lclSuccess,
1606 lclErrStrm.str().c_str(),
1607 gblErrMsgHeader, *comm);
1608 }
1609 else {
1610 this->packAndPrepare (src, exportLIDs, this->exports_,
1611 this->numExportPacketsPerLID_,
1612 constantNumPackets);
1613 }
1614 }
1615
1616 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1617 void
1618 DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::
1619 doUnpackAndCombine(const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& remoteLIDs,
1620 size_t constantNumPackets,
1621 CombineMode CM)
1622 {
1623 using Details::ProfilingRegion;
1624 using std::endl;
1625 const bool debug = Details::Behavior::debug("DistObject");
1626
1627 ProfilingRegion region_uc
1628 ("Tpetra::DistObject::doTransferNew::unpackAndCombine");
1629#ifdef HAVE_TPETRA_TRANSFER_TIMERS
1630 // FIXME (mfh 04 Feb 2019) Deprecate Teuchos::TimeMonitor in
1631 // favor of Kokkos profiling.
1632 Teuchos::TimeMonitor unpackAndCombineMon (*unpackAndCombineTimer_);
1633#endif // HAVE_TPETRA_TRANSFER_TIMERS
1634
1635 if (debug) {
1636 std::ostringstream lclErrStrm;
1637 bool lclSuccess = false;
1638 try {
1639 this->unpackAndCombine (remoteLIDs, this->imports_,
1640 this->numImportPacketsPerLID_,
1641 constantNumPackets, CM);
1642 lclSuccess = true;
1643 }
1644 catch (std::exception& e) {
1645 lclErrStrm << "unpackAndCombine threw an exception: "
1646 << endl << e.what();
1647 }
1648 catch (...) {
1649 lclErrStrm << "unpackAndCombine threw an exception "
1650 "not a subclass of std::exception.";
1651 }
1652 const char gblErrMsgHeader[] = "Tpetra::DistObject "
1653 "threw an exception in unpackAndCombine on "
1654 "one or more processes in the DistObject's communicator.";
1655 auto comm = getMap()->getComm();
1656 Details::checkGlobalError(std::cerr, lclSuccess,
1657 lclErrStrm.str().c_str(),
1658 gblErrMsgHeader, *comm);
1659 }
1660 else {
1661 this->unpackAndCombine (remoteLIDs, this->imports_,
1662 this->numImportPacketsPerLID_,
1663 constantNumPackets, CM);
1664 }
1665 }
1666
1667 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1668 void
1671 (const SrcDistObject&,
1672 const size_t,
1673 const Kokkos::DualView<
1674 const local_ordinal_type*,
1676 const Kokkos::DualView<
1677 const local_ordinal_type*,
1679 const CombineMode CM)
1680 {}
1681
1682 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1683 void
1686 (const SrcDistObject&,
1687 const Kokkos::DualView<
1688 const local_ordinal_type*,
1690 Kokkos::DualView<
1691 packet_type*,
1693 Kokkos::DualView<
1694 size_t*,
1696 size_t&)
1697 {}
1698
1699 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1700 void
1703 (const Kokkos::DualView<
1704 const local_ordinal_type*,
1705 buffer_device_type>& /* importLIDs */,
1706 Kokkos::DualView<
1707 packet_type*,
1708 buffer_device_type> /* imports */,
1709 Kokkos::DualView<
1710 size_t*,
1711 buffer_device_type> /* numPacketsPerLID */,
1712 const size_t /* constantNumPackets */,
1713 const CombineMode /* combineMode */)
1714 {}
1715
1716
1717 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1718 void
1720 print (std::ostream& os) const
1721 {
1722 using Teuchos::FancyOStream;
1723 using Teuchos::getFancyOStream;
1724 using Teuchos::RCP;
1725 using Teuchos::rcpFromRef;
1726 using std::endl;
1727
1729 this->describe (*out, Teuchos::VERB_DEFAULT);
1730 }
1731
1732 template <class Packet, class LocalOrdinal, class GlobalOrdinal, class Node>
1733 std::unique_ptr<std::string>
1735 createPrefix(const char className[],
1736 const char methodName[]) const
1737 {
1738 auto map = this->getMap();
1739 auto comm = map.is_null() ? Teuchos::null : map->getComm();
1740 return Details::createPrefix(
1741 comm.getRawPtr(), className, methodName);
1742 }
1743
1744 template<class DistObjectType>
1745 void
1747 Teuchos::RCP<DistObjectType>& input,
1748 const Teuchos::RCP<const Map<
1749 typename DistObjectType::local_ordinal_type,
1750 typename DistObjectType::global_ordinal_type,
1751 typename DistObjectType::node_type>>& newMap)
1752 {
1753 input->removeEmptyProcessesInPlace (newMap);
1754 if (newMap.is_null ()) { // my process is excluded
1755 input = Teuchos::null;
1756 }
1757 }
1758
1759 template<class DistObjectType>
1760 void
1761 removeEmptyProcessesInPlace (Teuchos::RCP<DistObjectType>& input)
1762 {
1763 auto newMap = input->getMap ()->removeEmptyProcesses ();
1765 }
1766
1767// Explicit instantiation macro for general DistObject.
1768#define TPETRA_DISTOBJECT_INSTANT(SCALAR, LO, GO, NODE) \
1769 template class DistObject< SCALAR , LO , GO , NODE >;
1770
1771// Explicit instantiation macro for DistObject<char, ...>.
1772// The "SLGN" stuff above doesn't work for Packet=char.
1773#define TPETRA_DISTOBJECT_INSTANT_CHAR(LO, GO, NODE) \
1774 template class DistObject< char , LO , GO , NODE >;
1775
1776} // namespace Tpetra
1777
1778#endif // TPETRA_DISTOBJECT_DEF_HPP
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
void unpackAndCombine(const RowView &row_ptrs_beg, const RowView &row_ptrs_end, IndicesView &indices, const Kokkos::View< const GlobalOrdinal *, BufferDevice, Kokkos::MemoryUnmanaged > &imports, const Kokkos::View< const size_t *, BufferDevice, Kokkos::MemoryUnmanaged > &num_packets_per_lid, const Kokkos::View< const LocalOrdinal *, BufferDevice, Kokkos::MemoryUnmanaged > &import_lids, const typename CrsGraph< LocalOrdinal, GlobalOrdinal, Node >::padding_type &padding, const bool unpack_pids, const int myRank, const bool verbose)
Perform the unpack operation for the graph.
Stand-alone utility functions and macros.
Struct that holds views of the contents of a CrsMatrix.
Description of Tpetra's behavior.
static bool debug()
Whether Tpetra is in debug mode.
static bool verbose()
Whether Tpetra is in verbose mode.
Base class for distributed Tpetra objects that support data redistribution.
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print a descriptiion of this object to the given output stream.
virtual bool reallocImportsIfNeeded(const size_t newSize, const bool verbose, const std::string *prefix, const bool remoteLIDsContiguous=false, const CombineMode CM=INSERT)
Reallocate imports_ if needed.
virtual bool reallocArraysForNumPacketsPerLid(const size_t numExportLIDs, const size_t numImportLIDs)
Reallocate numExportPacketsPerLID_ and/or numImportPacketsPerLID_, if necessary.
void doImport(const SrcDistObject &source, const Import< LocalOrdinal, GlobalOrdinal, Node > &importer, const CombineMode CM, const bool restrictedMode=false)
Import data into this object using an Import object ("forward mode").
void beginTransfer(const SrcDistObject &src, const ::Tpetra::Details::Transfer< local_ordinal_type, global_ordinal_type, node_type > &transfer, const char modeString[], const ReverseOption revOp, const CombineMode CM, const bool restrictedMode)
Implementation detail of doTransfer.
DistObject(const Teuchos::RCP< const map_type > &map)
Constructor.
bool transferArrived() const
Whether the data from an import/export operation has arrived, and is ready for the unpack and combine...
virtual void packAndPrepare(const SrcDistObject &source, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets)
Pack data and metadata for communication (sends).
Kokkos::Device< typename device_type::execution_space, buffer_memory_space > buffer_device_type
Kokkos::Device specialization for communication buffers.
LocalOrdinal local_ordinal_type
The type of local indices.
virtual void doTransfer(const SrcDistObject &src, const ::Tpetra::Details::Transfer< local_ordinal_type, global_ordinal_type, node_type > &transfer, const char modeString[], const ReverseOption revOp, const CombineMode CM, const bool restrictedMode)
Redistribute data across (MPI) processes.
void print(std::ostream &os) const
Print this object to the given output stream.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode)
Perform any unpacking and combining after communication.
typename ::Kokkos::Details::ArithTraits< Packet >::val_type packet_type
The type of each datum being sent or received in an Import or Export.
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM)
Perform copies and permutations that are local to the calling (MPI) process.
ReverseOption
Whether the data transfer should be performed in forward or reverse mode.
virtual size_t constantNumberOfPackets() const
Whether the implementation's instance promises always to have a constant number of packets per LID (l...
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object ("forward mode").
virtual std::string description() const
One-line descriptiion of this object.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap)
Remove processes which contain no entries in this object's Map.
bool isDistributed() const
Whether this is a globally distributed object.
A parallel distribution of indices over processes.
Abstract base class for objects that can be the source of an Import or Export operation.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
Kokkos::DualView< T *, DT > getDualViewCopyFromArrayView(const Teuchos::ArrayView< const T > &x_av, const char label[], const bool leaveOnHost)
Get a 1-D Kokkos::DualView which is a deep copy of the input Teuchos::ArrayView (which views host mem...
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void removeEmptyProcessesInPlace(Teuchos::RCP< DistObjectType > &input, const Teuchos::RCP< const Map< typename DistObjectType::local_ordinal_type, typename DistObjectType::global_ordinal_type, typename DistObjectType::node_type > > &newMap)
Remove processes which contain no elements in this object's Map.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
CombineMode
Rule for combining data in an Import or Export.
@ ZERO
Replace old values with zero.