Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_packCrsGraph_def.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Tpetra: Templated Linear Algebra Services Package
5// Copyright (2008) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// ************************************************************************
38// @HEADER
39
40#ifndef TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP
41#define TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP
42
43#include "TpetraCore_config.h"
44#include "Teuchos_Array.hpp"
45#include "Teuchos_ArrayView.hpp"
53#include <memory>
54#include <string>
55
77
78namespace Tpetra {
79
80//
81// Users must never rely on anything in the Details namespace.
82//
83namespace Details {
84
85namespace PackCrsGraphImpl {
93template<class OutputOffsetsViewType,
94 class CountsViewType,
95 class InputOffsetsViewType,
96 class InputLocalRowIndicesViewType,
97 class InputLocalRowPidsViewType,
98 const bool debug =
99#ifdef HAVE_TPETRA_DEBUG
100 true
101#else
102 false
103#endif // HAVE_TPETRA_DEBUG
104 >
106public:
107 typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
108 typedef typename CountsViewType::non_const_value_type count_type;
109 typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
110 typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
111 typedef typename InputLocalRowPidsViewType::non_const_value_type local_row_pid_type;
112 // output Views drive where execution happens.
113 typedef typename OutputOffsetsViewType::device_type device_type;
114 static_assert (std::is_same<typename CountsViewType::device_type::execution_space,
115 typename device_type::execution_space>::value,
116 "OutputOffsetsViewType and CountsViewType must have the same execution space.");
117 static_assert (Kokkos::is_view<OutputOffsetsViewType>::value,
118 "OutputOffsetsViewType must be a Kokkos::View.");
119 static_assert (std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
120 "OutputOffsetsViewType must be a nonconst Kokkos::View.");
121 static_assert (std::is_integral<output_offset_type>::value,
122 "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
123 static_assert (Kokkos::is_view<CountsViewType>::value,
124 "CountsViewType must be a Kokkos::View.");
125 static_assert (std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
126 "CountsViewType must be a nonconst Kokkos::View.");
127 static_assert (std::is_integral<count_type>::value,
128 "The type of each entry of CountsViewType must be a built-in integer type.");
129 static_assert (Kokkos::is_view<InputOffsetsViewType>::value,
130 "InputOffsetsViewType must be a Kokkos::View.");
131 static_assert (std::is_integral<input_offset_type>::value,
132 "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
133 static_assert (Kokkos::is_view<InputLocalRowIndicesViewType>::value,
134 "InputLocalRowIndicesViewType must be a Kokkos::View.");
135 static_assert (std::is_integral<local_row_index_type>::value,
136 "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
137
139 const CountsViewType& counts,
143 outputOffsets_ (outputOffsets),
144 counts_ (counts),
145 rowOffsets_ (rowOffsets),
146 lclRowInds_ (lclRowInds),
147 lclRowPids_ (lclRowPids),
148 error_ ("error") // don't forget this, or you'll get segfaults!
149 {
150 if (debug) {
151 const size_t numRowsToPack = static_cast<size_t> (lclRowInds_.extent (0));
152
153 if (numRowsToPack != static_cast<size_t> (counts_.extent (0))) {
154 std::ostringstream os;
155 os << "lclRowInds.extent(0) = " << numRowsToPack
156 << " != counts.extent(0) = " << counts_.extent (0)
157 << ".";
158 TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, os.str ());
159 }
160 if (static_cast<size_t> (numRowsToPack + 1) !=
161 static_cast<size_t> (outputOffsets_.extent (0))) {
162 std::ostringstream os;
163 os << "lclRowInds.extent(0) + 1 = " << (numRowsToPack + 1)
164 << " != outputOffsets.extent(0) = " << outputOffsets_.extent (0)
165 << ".";
166 TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, os.str ());
167 }
168 }
169 }
170
172 operator() (const local_row_index_type& curInd,
173 output_offset_type& update,
174 const bool final) const
175 {
176 if (debug) {
177 if (curInd < static_cast<local_row_index_type> (0)) {
178 error_ () = 1;
179 return;
180 }
181 }
182
183 if (final) {
184 if (debug) {
185 if (curInd >= static_cast<local_row_index_type> (outputOffsets_.extent (0))) {
186 error_ () = 2;
187 return;
188 }
189 }
190 outputOffsets_(curInd) = update;
191 }
192
193 if (curInd < static_cast<local_row_index_type> (counts_.extent (0))) {
194 const auto lclRow = lclRowInds_(curInd);
195 if (static_cast<size_t> (lclRow + 1) >= static_cast<size_t> (rowOffsets_.extent (0)) ||
196 static_cast<local_row_index_type> (lclRow) < static_cast<local_row_index_type> (0)) {
197 error_ () = 3;
198 return;
199 }
200 // count_type could differ from the type of each row offset.
201 // For example, row offsets might each be 64 bits, but if their
202 // difference always fits in 32 bits, we may then safely use a
203 // 32-bit count_type.
204 const count_type count =
205 static_cast<count_type> (rowOffsets_(lclRow+1) - rowOffsets_(lclRow));
206
207 // We pack first the global column indices and then pids (if any),
208 // However, if the number of entries in the row is zero, we pack nothing.
209 const count_type numEntToPack = (count == 0)
210 ? static_cast<count_type>(0)
211 : count * (1 + (lclRowPids_.size() > 0 ? 1 : 0));
212
213 if (final) {
214 counts_(curInd) = numEntToPack;
215 }
216 update += numEntToPack;
217 }
218 }
219
220 // mfh 31 May 2017: Don't need init or join. If you have join, MUST
221 // have join both with and without volatile! Otherwise intrawarp
222 // joins are really slow on GPUs.
223
225 int getError () const {
226 auto error_h = Kokkos::create_mirror_view (error_);
227 // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
228 using execution_space = typename device_type::execution_space;
229 Kokkos::deep_copy (execution_space(), error_h, error_);
230 return error_h ();
231 }
232
233private:
234 OutputOffsetsViewType outputOffsets_;
235 CountsViewType counts_;
236 typename InputOffsetsViewType::const_type rowOffsets_;
237 typename InputLocalRowIndicesViewType::const_type lclRowInds_;
238 typename InputLocalRowPidsViewType::const_type lclRowPids_;
239 Kokkos::View<int, device_type> error_;
240};
241
251template<class OutputOffsetsViewType,
252 class CountsViewType,
256typename CountsViewType::non_const_value_type
258 const CountsViewType& counts,
262{
264 CountsViewType, typename InputOffsetsViewType::const_type,
265 typename InputLocalRowIndicesViewType::const_type,
266 typename InputLocalRowPidsViewType::const_type> functor_type;
267 typedef typename CountsViewType::non_const_value_type count_type;
268 typedef typename OutputOffsetsViewType::size_type size_type;
269 typedef typename OutputOffsetsViewType::execution_space execution_space;
270 typedef typename functor_type::local_row_index_type LO;
271 typedef Kokkos::RangePolicy<execution_space, LO> range_type;
272 const char prefix[] = "computeNumPacketsAndOffsets: ";
273
274 count_type count = 0;
275 const count_type numRowsToPack = lclRowInds.extent (0);
276
277 if (numRowsToPack == 0) {
278 return count;
279 }
280 else {
282 (rowOffsets.extent (0) <= static_cast<size_type> (1),
283 std::invalid_argument, prefix << "There is at least one row to pack, "
284 "but the graph has no rows. lclRowInds.extent(0) = " <<
285 numRowsToPack << ", but rowOffsets.extent(0) = " <<
286 rowOffsets.extent (0) << " <= 1.");
288 (outputOffsets.extent (0) !=
289 static_cast<size_type> (numRowsToPack + 1), std::invalid_argument,
290 prefix << "Output dimension does not match number of rows to pack. "
291 << "outputOffsets.extent(0) = " << outputOffsets.extent (0)
292 << " != lclRowInds.extent(0) + 1 = "
293 << static_cast<size_type> (numRowsToPack + 1) << ".");
295 (counts.extent (0) != numRowsToPack, std::invalid_argument,
296 prefix << "counts.extent(0) = " << counts.extent (0)
297 << " != numRowsToPack = " << numRowsToPack << ".");
298
300 Kokkos::parallel_scan (range_type (0, numRowsToPack + 1), f);
301
302 // At least in debug mode, this functor checks for errors.
303 const int errCode = f.getError ();
305 (errCode != 0, std::runtime_error, prefix << "parallel_scan error code "
306 << errCode << " != 0.");
307
308#if 0
309 size_t total = 0;
310 for (LO k = 0; k < numRowsToPack; ++k) {
311 total += counts[k];
312 }
314 if (errStr.get () == NULL) {
315 errStr = std::unique_ptr<std::ostringstream> (new std::ostringstream ());
316 }
317 std::ostringstream& os = *errStr;
318 os << prefix
319 << "outputOffsets(numRowsToPack=" << numRowsToPack << ") "
320 << outputOffsets(numRowsToPack) << " != sum of counts = "
321 << total << "." << std::endl;
322 if (numRowsToPack != 0) {
323 // Only print the array if it's not too long.
324 if (numRowsToPack < static_cast<LO> (10)) {
325 os << "outputOffsets: [";
326 for (LO i = 0; i <= numRowsToPack; ++i) {
327 os << outputOffsets(i);
328 if (static_cast<LO> (i + 1) <= numRowsToPack) {
329 os << ",";
330 }
331 }
332 os << "]" << std::endl;
333 os << "counts: [";
334 for (LO i = 0; i < numRowsToPack; ++i) {
335 os << counts(i);
336 if (static_cast<LO> (i + 1) < numRowsToPack) {
337 os << ",";
338 }
339 }
340 os << "]" << std::endl;
341 }
342 else {
343 os << "outputOffsets(" << (numRowsToPack-1) << ") = "
344 << outputOffsets(numRowsToPack-1) << "." << std::endl;
345 }
346 }
348 return {false, errStr};
349 }
350#endif // HAVE_TPETRA_DEBUG
351
352 // Get last entry of outputOffsets, which is the sum of the entries
353 // of counts. Don't assume UVM.
354 using Tpetra::Details::getEntryOnHost;
355 return static_cast<count_type> (getEntryOnHost (outputOffsets,
357 }
358}
359
370template<class Packet,
371 class LocalMapType,
372 class BufferDeviceType,
373 class InputLidsType,
374 class InputPidsType>
376size_t
378 const Kokkos::View<Packet*, BufferDeviceType>& exports,
379 const InputLidsType& lids_in,
380 const InputPidsType& pids_in,
381 const size_t offset,
382 const size_t num_ent,
383 const bool pack_pids)
384{
385 using LO = typename LocalMapType::local_ordinal_type;
386 using GO = typename LocalMapType::global_ordinal_type;
387
388 if (num_ent == 0) {
389 // Empty rows always take zero bytes, to ensure sparsity.
390 return static_cast<size_t>(0);
391 }
392
393 size_t num_ent_packed = num_ent;
394 if (pack_pids) {
396 }
397
398 // Copy column indices one at a time, so that we don't need
399 // temporary storage.
400 for (size_t k = 0; k < num_ent; ++k) {
401 const LO lid = lids_in[k];
402 const GO gid = col_map.getGlobalElement (lid);
403 exports(offset+k) = gid;
404 }
405 // Copy PIDs one at a time, so that we don't need temporary storage.
406 if (pack_pids) {
407 for (size_t k = 0; k < num_ent; ++k) {
408 const LO lid = lids_in[k];
409 const int pid = pids_in[lid];
410 exports(offset+num_ent+k) = static_cast<GO>(pid);
411 }
412 }
413
414 return num_ent_packed;
415}
416
417template<class Packet,
418 class LocalGraph,
419 class LocalMap,
420 class BufferDeviceType>
421struct PackCrsGraphFunctor {
422 using local_graph_type = LocalGraph;
423 using local_map_type = LocalMap;
424 using LO = typename local_map_type::local_ordinal_type;
425 using GO = typename local_map_type::global_ordinal_type;
426
427 using num_packets_per_lid_view_type =
428 Kokkos::View<const size_t*, BufferDeviceType>;
429 using offsets_view_type = Kokkos::View<const size_t*, BufferDeviceType>;
430 using exports_view_type = Kokkos::View<Packet*, BufferDeviceType>;
431 using export_lids_view_type =
433 using source_pids_view_type =
435
436 using count_type =
437 typename num_packets_per_lid_view_type::non_const_value_type;
438 using offset_type = typename offsets_view_type::non_const_value_type;
439 using value_type = Kokkos::pair<int, LO>;
440
441 static_assert (std::is_same<LO, typename local_graph_type::data_type>::value,
442 "local_map_type::local_ordinal_type and "
443 "local_graph_type::data_type must be the same.");
444
445 local_graph_type local_graph;
446 local_map_type local_col_map;
447 exports_view_type exports;
448 num_packets_per_lid_view_type num_packets_per_lid;
449 export_lids_view_type export_lids;
450 source_pids_view_type source_pids;
451 offsets_view_type offsets;
452 bool pack_pids;
453
454 PackCrsGraphFunctor(const local_graph_type& local_graph_in,
455 const local_map_type& local_col_map_in,
456 const exports_view_type& exports_in,
457 const num_packets_per_lid_view_type& num_packets_per_lid_in,
458 const export_lids_view_type& export_lids_in,
459 const source_pids_view_type& source_pids_in,
460 const offsets_view_type& offsets_in,
461 const bool pack_pids_in) :
462 local_graph (local_graph_in),
463 local_col_map (local_col_map_in),
464 exports (exports_in),
465 num_packets_per_lid (num_packets_per_lid_in),
466 export_lids (export_lids_in),
467 source_pids (source_pids_in),
468 offsets (offsets_in),
469 pack_pids (pack_pids_in)
470 {
471 const LO numRows = local_graph_in.numRows ();
472 const LO rowMapDim =
473 static_cast<LO> (local_graph.row_map.extent (0));
475 (numRows != 0 && rowMapDim != numRows + static_cast<LO> (1),
476 std::logic_error, "local_graph.row_map.extent(0) = "
477 << rowMapDim << " != numRows (= " << numRows << " ) + 1.");
478 }
479
480 KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
481 {
482 using ::Tpetra::Details::OrdinalTraits;
483 dst = Kokkos::make_pair (0, OrdinalTraits<LO>::invalid ());
484 }
485
486 KOKKOS_INLINE_FUNCTION void
487 join (value_type& dst, const value_type& src) const
488 {
489 // `dst` should reflect the first (least) bad index and all other
490 // associated error codes and data, so prefer keeping it.
491 if (src.first != 0 && dst.first == 0) {
492 dst = src;
493 }
494 }
495
496 KOKKOS_INLINE_FUNCTION
497 void operator() (const LO i, value_type& dst) const
498 {
499 const size_t offset = offsets[i];
500 const LO export_lid = export_lids[i];
501 const size_t buf_size = exports.size();
502 const size_t num_packets_this_lid = num_packets_per_lid(i);
503 const size_t num_ent =
504 static_cast<size_t> (local_graph.row_map[export_lid+1]
505 - local_graph.row_map[export_lid]);
506
507 // Only pack this row's data if it has a nonzero number of
508 // entries. We can do this because receiving processes get the
509 // number of packets, and will know that zero packets means zero
510 // entries.
511 if (num_ent == 0) {
512 return;
513 }
514
515 if (export_lid >= static_cast<LO>(local_graph.numRows())) {
516 if (dst.first != 0) { // keep only the first error
517 dst = Kokkos::make_pair (1, i); // invalid row
518 }
519 return;
520 }
521 else if ((offset > buf_size || offset + num_packets_this_lid > buf_size)) {
522 if (dst.first != 0) { // keep only the first error
523 dst = Kokkos::make_pair (2, i); // out of bounds
524 }
525 return;
526 }
527
528 // We can now pack this row
529
530 // Since the graph is locally indexed on the calling process, we
531 // have to use its column Map (which it _must_ have in this case)
532 // to convert to global indices.
533 const auto row_beg = local_graph.row_map[export_lid];
534 const auto row_end = local_graph.row_map[export_lid + 1];
535 auto lids_in = Kokkos::subview (local_graph.entries,
536 Kokkos::make_pair (row_beg, row_end));
537 size_t num_ent_packed_this_row =
538 packRow (local_col_map, exports, lids_in,
539 source_pids, offset, num_ent, pack_pids);
540 if (num_ent_packed_this_row != num_packets_this_lid) {
541 if (dst.first != 0) { // keep only the first error
542 dst = Kokkos::make_pair (3, i);
543 }
544 }
545 }
546};
547
555template<class Packet,
556 class LocalGraph,
557 class LocalMap,
558 class BufferDeviceType>
559void
560do_pack(const LocalGraph& local_graph,
561 const LocalMap& local_map,
562 const Kokkos::View<Packet*, BufferDeviceType>& exports,
563 const typename PackTraits<
564 size_t
565 >::input_array_type& num_packets_per_lid,
566 const typename PackTraits<
568 >::input_array_type& export_lids,
569 const typename PackTraits<
570 int
571 >::input_array_type& source_pids,
572 const Kokkos::View<const size_t*, BufferDeviceType>& offsets,
573 const bool pack_pids)
574{
575 using LO = typename LocalMap::local_ordinal_type;
576 using execution_space = typename LocalGraph::device_type::execution_space;
577 using range_type = Kokkos::RangePolicy<execution_space, LO>;
578 const char prefix[] = "Tpetra::Details::PackCrsGraphImpl::do_pack: ";
579
580 if (export_lids.extent (0) != 0) {
582 (static_cast<size_t> (offsets.extent (0)) !=
583 static_cast<size_t> (export_lids.extent (0) + 1),
584 std::invalid_argument, prefix << "offsets.extent(0) = "
585 << offsets.extent (0) << " != export_lids.extent(0) (= "
586 << export_lids.extent (0) << ") + 1.");
588 (export_lids.extent (0) != num_packets_per_lid.extent (0),
589 std::invalid_argument, prefix << "export_lids.extent(0) = " <<
590 export_lids.extent (0) << " != num_packets_per_lid.extent(0) = "
591 << num_packets_per_lid.extent (0) << ".");
592 // If exports has nonzero length at this point, then the graph
593 // has at least one entry to pack. Thus, if packing process
594 // ranks, we had better have at least one process rank to pack.
596 (pack_pids && exports.extent (0) != 0 &&
597 source_pids.extent (0) == 0, std::invalid_argument, prefix <<
598 "pack_pids is true, and exports.extent(0) = " <<
599 exports.extent (0) << " != 0, meaning that we need to pack at "
600 "least one graph entry, but source_pids.extent(0) = 0.");
601 }
602
603 using pack_functor_type =
604 PackCrsGraphFunctor<Packet, LocalGraph, LocalMap,
606 pack_functor_type f (local_graph, local_map, exports,
607 num_packets_per_lid, export_lids,
608 source_pids, offsets, pack_pids);
609
610 typename pack_functor_type::value_type result;
611 range_type range (0, num_packets_per_lid.extent (0));
612 Kokkos::parallel_reduce (range, f, result);
613
614 if (result.first != 0) {
615 // We can't deep_copy from AnonymousSpace Views, so we can't
616 // print out any information from them in case of error.
617 std::ostringstream os;
618 if (result.first == 1) { // invalid local row index
619 os << "invalid local row index";
620 }
621 else if (result.first == 2) { // invalid offset
622 os << "invalid offset";
623 }
625 (true, std::runtime_error, prefix << "PackCrsGraphFunctor "
626 "reported error code " << result.first << " (" << os.str ()
627 << ") for the first bad row " << result.second << ".");
628 }
629}
630
657template<typename LO, typename GO, typename NT>
658void
661 Kokkos::DualView<
664 >& exports,
665 const Kokkos::View<
666 size_t*,
668 >& num_packets_per_lid,
669 const Kokkos::View<
670 const LO*,
672 >& export_lids,
673 const Kokkos::View<
674 const int*,
676 >& export_pids,
677 size_t& constant_num_packets,
678 const bool pack_pids)
679{
680 using Kokkos::View;
681 using crs_graph_type = CrsGraph<LO, GO, NT>;
682 using packet_type = typename crs_graph_type::packet_type;
683 using buffer_device_type = typename crs_graph_type::buffer_device_type;
684 using exports_view_type = Kokkos::DualView<packet_type*, buffer_device_type>;
685 using local_graph_device_type = typename crs_graph_type::local_graph_device_type;
686 using local_map_type = typename Tpetra::Map<LO, GO, NT>::local_map_type;
687 const char prefix[] = "Tpetra::Details::packCrsGraph: ";
688 constexpr bool debug = false;
689
690 local_graph_device_type local_graph = sourceGraph.getLocalGraphDevice ();
691 local_map_type local_col_map = sourceGraph.getColMap ()->getLocalMap ();
692
693 // Setting this to zero tells the caller to expect a possibly
694 // different ("nonconstant") number of packets per local index
695 // (i.e., a possibly different number of entries per row).
697
698 const size_t num_export_lids (export_lids.extent (0));
700 (num_export_lids != size_t (num_packets_per_lid.extent (0)),
701 std::invalid_argument, prefix << "num_export_lids.extent(0) = "
702 << num_export_lids << " != num_packets_per_lid.extent(0) = "
703 << num_packets_per_lid.extent (0) << ".");
704 if (num_export_lids != 0) {
706 (num_packets_per_lid.data () == nullptr, std::invalid_argument,
707 prefix << "num_export_lids = "<< num_export_lids << " != 0, but "
708 "num_packets_per_lid.data() = "
709 << num_packets_per_lid.data () << " == NULL.");
710 }
711
712 if (num_export_lids == 0) {
713 exports = exports_view_type ("exports", 0);
714 return;
715 }
716
717 // Array of offsets into the pack buffer.
718 View<size_t*, buffer_device_type> offsets ("offsets", num_export_lids + 1);
719
720 // Compute number of packets per LID (row to send), as well as
721 // corresponding offsets (the prefix sum of the packet counts).
722 const size_t count =
723 computeNumPacketsAndOffsets(offsets, num_packets_per_lid,
724 local_graph.row_map, export_lids, export_pids);
725
726 // Resize the output pack buffer if needed.
727 if (count > size_t (exports.extent (0))) {
728 exports = exports_view_type ("exports", count);
729 if (debug) {
730 std::ostringstream os;
731 os << "*** exports resized to " << count << std::endl;
732 std::cerr << os.str ();
733 }
734 }
735 if (debug) {
736 std::ostringstream os;
737 os << "*** count: " << count << ", exports.extent(0): "
738 << exports.extent (0) << std::endl;
739 std::cerr << os.str ();
740 }
741
742 // If exports has nonzero length at this point, then the graph has
743 // at least one entry to pack. Thus, if packing process ranks, we
744 // had better have at least one process rank to pack.
746 (pack_pids && exports.extent (0) != 0 &&
747 export_pids.extent (0) == 0, std::invalid_argument, prefix <<
748 "pack_pids is true, and exports.extent(0) = " <<
749 exports.extent (0) << " != 0, meaning that we need to pack at least "
750 "one graph entry, but export_pids.extent(0) = 0.");
751
752 exports.modify_device ();
753 auto exports_d = exports.view_device ();
755 (local_graph, local_col_map, exports_d, num_packets_per_lid,
756 export_lids, export_pids, offsets, pack_pids);
757 // If we got this far, we succeeded.
758}
759
760} // namespace PackCrsGraphImpl
761
762template<typename LO, typename GO, typename NT>
763void
765 Teuchos::Array<typename CrsGraph<LO,GO,NT>::packet_type>& exports,
766 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
767 const Teuchos::ArrayView<const LO>& exportLIDs,
768 size_t& constantNumPackets)
769{
770 using Kokkos::HostSpace;
771 using Kokkos::MemoryUnmanaged;
772 using Kokkos::View;
773 using crs_graph_type = CrsGraph<LO, GO, NT>;
774 using packet_type = typename crs_graph_type::packet_type;
775 using BDT = typename crs_graph_type::buffer_device_type;
776
777 // Convert all Teuchos::Array to Kokkos::View
778
779 // This is an output array, so we don't have to copy to device here.
780 // However, we'll have to remember to copy back to host when done.
784 numPacketsPerLID.getRawPtr (),
785 numPacketsPerLID.size (), false,
786 "num_packets_per_lid");
787 // This is an input array, so we have to copy to device here.
788 // However, we never need to copy it back to host.
791 exportLIDs.getRawPtr (),
792 exportLIDs.size (), true,
793 "export_lids");
795 Kokkos::DualView<packet_type*, BDT> exports_dv;
796 constexpr bool pack_pids = false;
797
798 static_assert
799 (std::is_same<
800 typename decltype (num_packets_per_lid_d)::non_const_value_type,
801 size_t>::value,
802 "num_packets_per_lid_d's non_const_value_type should be size_t.");
803 static_assert
804 (std::is_same<
805 typename decltype (num_packets_per_lid_d)::device_type,
806 BDT>::value,
807 "num_packets_per_lid_d's BDT should be size_t.");
808 static_assert
809 (std::is_same<
810 typename decltype (export_lids_d)::device_type,
811 BDT>::value,
812 "export_lids_d's device_type should be BDT.");
813 static_assert
814 (std::is_same<
815 typename decltype (export_pids_d)::non_const_value_type,
816 int>::value,
817 "export_pids_d's non_const_value_type should be int.");
818 static_assert
819 (std::is_same<
820 typename decltype (export_pids_d)::device_type,
821 BDT>::value,
822 "export_pids_d's device_type should be BDT.");
823
824 PackCrsGraphImpl::packCrsGraph
827
828 // The counts are an output of packCrsGraph, so we have to copy
829 // them back to host.
832 numPacketsPerLID.size ());
833
834 // DEEP_COPY REVIEW - DEVICE-TO-HOST
835 using execution_space = typename BDT::execution_space;
836 Kokkos::deep_copy (execution_space(), num_packets_per_lid_h, num_packets_per_lid_d);
837
838 // FIXME (mfh 23 Aug 2017) If we're forced to use a DualView for
839 // exports_dv above, then we have two host copies for exports_h.
840
841 // The exports are an output of packCrsGraph, so we have to
842 // copy them back to host.
843 if (static_cast<size_t> (exports.size ()) !=
844 static_cast<size_t> (exports_dv.extent (0))) {
845 exports.resize (exports_dv.extent (0));
846 }
848 exports_h (exports.getRawPtr (), exports.size ());
849 // DEEP_COPY REVIEW - DEVICE-TO-HOST
850 Kokkos::deep_copy (execution_space(), exports_h, exports_dv.d_view);
851}
852
855template<typename LO, typename GO, typename NT>
856void
858 const Kokkos::DualView<
859 const LO*,
861 >& export_lids,
862 const Kokkos::DualView<
863 const int*,
865 >& export_pids,
866 Kokkos::DualView<
869 Kokkos::DualView<
870 size_t*,
872 > num_packets_per_lid,
873 size_t& constant_num_packets,
874 const bool pack_pids)
875{
876 using Kokkos::View;
877 using crs_graph_type = CrsGraph<LO,GO,NT>;
878 using BDT = typename crs_graph_type::buffer_device_type;
879 using PT = typename crs_graph_type::packet_type;
880 using exports_dual_view_type = Kokkos::DualView<PT*, BDT>;
881 using LGT = typename crs_graph_type::local_graph_device_type;
882 using LMT = typename crs_graph_type::map_type::local_map_type;
883 const char prefix[] = "Tpetra::Details::packCrsGraphNew: ";
884
885 const LGT local_graph = sourceGraph.getLocalGraphDevice ();
886 const LMT local_col_map = sourceGraph.getColMap ()->getLocalMap ();
887
888 // Setting this to zero tells the caller to expect a possibly
889 // different ("nonconstant") number of packets per local index
890 // (i.e., a possibly different number of entries per row).
892
893 const size_t num_export_lids =
894 static_cast<size_t> (export_lids.extent (0));
897 static_cast<size_t> (num_packets_per_lid.extent (0)),
898 std::invalid_argument, prefix << "num_export_lids.extent(0) = "
899 << num_export_lids << " != num_packets_per_lid.extent(0) = "
900 << num_packets_per_lid.extent (0) << ".");
902 (num_export_lids != 0 &&
903 num_packets_per_lid.view_device ().data () == nullptr,
904 std::invalid_argument, prefix << "num_export_lids = "<< num_export_lids
905 << " != 0, but num_packets_per_lid.view_device().data() = nullptr.");
906
907 if (num_export_lids == 0) {
908 exports = exports_dual_view_type ();
909 return;
910 }
911
912 // Array of offsets into the pack buffer.
913 using offsets_type = Kokkos::View<size_t*, BDT>;
914 offsets_type offsets ("offsets", num_export_lids + 1);
915
916 // Compute number of packets per LID (row to send), as well as
917 // corresponding offsets (the prefix sum of the packet counts).
918 num_packets_per_lid.clear_sync_state ();
919 num_packets_per_lid.modify_device ();
920 using PackCrsGraphImpl::computeNumPacketsAndOffsets;
921 const size_t count =
922 computeNumPacketsAndOffsets (offsets, num_packets_per_lid.view_device (),
923 local_graph.row_map,
924 export_lids.view_device (),
925 export_pids.view_device ());
926
927 // Resize the output pack buffer if needed.
928 if (count > static_cast<size_t> (exports.extent (0))) {
929 exports = exports_dual_view_type ("exports", count);
930 }
931
932 // If exports has nonzero length at this point, then the graph has
933 // at least one entry to pack. Thus, if packing process ranks, we
934 // had better have at least one process rank to pack.
936 (pack_pids && exports.extent (0) != 0 &&
937 export_pids.extent (0) == 0, std::invalid_argument, prefix <<
938 "pack_pids is true, and exports.extent(0) = " <<
939 exports.extent (0) << " != 0, meaning that we need to pack at least "
940 "one graph entry, but export_pids.extent(0) = 0.");
941
942 exports.modify_device ();
943 using PackCrsGraphImpl::do_pack;
944 do_pack<PT, LGT, LMT, BDT> (local_graph, local_col_map,
945 exports.view_device (),
946 num_packets_per_lid.view_device (),
947 export_lids.view_device (),
948 export_pids.view_device (),
949 offsets, pack_pids);
950}
951
952template<typename LO, typename GO, typename NT>
953void
956 Kokkos::DualView<
959 >& exports_dv,
960 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
961 const Teuchos::ArrayView<const LO>& exportLIDs,
962 const Teuchos::ArrayView<const int>& sourcePIDs,
963 size_t& constantNumPackets)
964{
965 using Kokkos::HostSpace;
966 using Kokkos::MemoryUnmanaged;
967 using Kokkos::View;
968 using crs_graph_type = CrsGraph<LO, GO, NT>;
969 using buffer_device_type = typename crs_graph_type::buffer_device_type;
970
971 // Convert all Teuchos::Array to Kokkos::View
972
973 // This is an output array, so we don't have to copy to device here.
974 // However, we'll have to remember to copy back to host when done.
976 create_mirror_view_from_raw_host_array (buffer_device_type (),
977 numPacketsPerLID.getRawPtr (),
978 numPacketsPerLID.size (), false,
979 "num_packets_per_lid");
980
981 // This is an input array, so we have to copy to device here.
982 // However, we never need to copy it back to host.
984 create_mirror_view_from_raw_host_array (buffer_device_type (),
985 exportLIDs.getRawPtr (),
986 exportLIDs.size (), true,
987 "export_lids");
988 // This is an input array, so we have to copy to device here.
989 // However, we never need to copy it back to host.
991 create_mirror_view_from_raw_host_array (buffer_device_type (),
992 sourcePIDs.getRawPtr (),
993 sourcePIDs.size (), true,
994 "export_pids");
995 constexpr bool pack_pids = true;
996 PackCrsGraphImpl::packCrsGraph
999
1000 // The counts are an output of packCrsGraph, so we
1001 // have to copy them back to host.
1003 (numPacketsPerLID.getRawPtr (), numPacketsPerLID.size ());
1004 // DEEP_COPY REVIEW - DEVICE-TO-HOST
1005 using execution_space = typename buffer_device_type::execution_space;
1006 Kokkos::deep_copy (execution_space(),
1008}
1009
1010} // namespace Details
1011} // namespace Tpetra
1012
1013#define TPETRA_DETAILS_PACKCRSGRAPH_INSTANT( LO, GO, NT ) \
1014 template void \
1015 Details::packCrsGraph<LO, GO, NT> ( \
1016 const CrsGraph<LO, GO, NT>&, \
1017 Teuchos::Array<CrsGraph<LO,GO,NT>::packet_type>&, \
1018 const Teuchos::ArrayView<size_t>&, \
1019 const Teuchos::ArrayView<const LO>&, \
1020 size_t&); \
1021 template void \
1022 Details::packCrsGraphNew<LO, GO, NT> ( \
1023 const CrsGraph<LO, GO, NT>&, \
1024 const Kokkos::DualView< \
1025 const LO*, \
1026 CrsGraph<LO,GO,NT>::buffer_device_type>&, \
1027 const Kokkos::DualView< \
1028 const int*, \
1029 CrsGraph<LO,GO,NT>::buffer_device_type>&, \
1030 Kokkos::DualView< \
1031 CrsGraph<LO,GO,NT>::packet_type*, \
1032 CrsGraph<LO,GO,NT>::buffer_device_type>&, \
1033 Kokkos::DualView< \
1034 size_t*, \
1035 CrsGraph<LO,GO,NT>::buffer_device_type>, \
1036 size_t&, \
1037 const bool); \
1038 template void \
1039 Details::packCrsGraphWithOwningPIDs<LO, GO, NT> ( \
1040 const CrsGraph<LO, GO, NT>&, \
1041 Kokkos::DualView<CrsGraph<LO,GO,NT>::packet_type*, CrsGraph<LO,GO,NT>::buffer_device_type>&, \
1042 const Teuchos::ArrayView<size_t>&, \
1043 const Teuchos::ArrayView<const LO>&, \
1044 const Teuchos::ArrayView<const int>&, \
1045 size_t&);
1046
1047#endif // TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP
Declaration of the Tpetra::CrsGraph class.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types,...
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
Declaration and definition of Tpetra::Details::getEntryOnHost.
CountsViewType::non_const_value_type computeNumPacketsAndOffsets(const OutputOffsetsViewType &outputOffsets, const CountsViewType &counts, const InputOffsetsViewType &rowOffsets, const InputLocalRowIndicesViewType &lclRowInds, const InputLocalRowPidsViewType &lclRowPids)
Compute the number of packets and offsets for the pack procedure.
void do_pack(const LocalGraph &local_graph, const LocalMap &local_map, const Kokkos::View< Packet *, BufferDeviceType > &exports, const typename PackTraits< size_t >::input_array_type &num_packets_per_lid, const typename PackTraits< typename LocalMap::local_ordinal_type >::input_array_type &export_lids, const typename PackTraits< int >::input_array_type &source_pids, const Kokkos::View< const size_t *, BufferDeviceType > &offsets, const bool pack_pids)
Perform the pack operation for the graph.
KOKKOS_FUNCTION size_t packRow(const LocalMapType &col_map, const Kokkos::View< Packet *, BufferDeviceType > &exports, const InputLidsType &lids_in, const InputPidsType &pids_in, const size_t offset, const size_t num_ent, const bool pack_pids)
Packs a single row of the CrsGraph.
typename dist_object_type::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
Struct that holds views of the contents of a CrsMatrix.
"Local" part of Map suitable for Kokkos kernels.
LocalOrdinal local_ordinal_type
The type of local indices.
GlobalOrdinal global_ordinal_type
The type of global indices.
Compute the number of packets and offsets for the pack procedure.
Implementation details of Tpetra.
void packCrsGraph(const CrsGraph< LO, GO, NT > &sourceGraph, Teuchos::Array< typename CrsGraph< LO, GO, NT >::packet_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
void packCrsGraphNew(const CrsGraph< LO, GO, NT > &sourceGraph, const Kokkos::DualView< const LO *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportLIDs, const Kokkos::DualView< const int *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportPIDs, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports, Kokkos::DualView< size_t *, typename CrsGraph< LO, GO, NT >::buffer_device_type > numPacketsPerLID, size_t &constantNumPackets, const bool pack_pids)
Pack specified entries of the given local sparse graph for communication, for "new" DistObject interf...
void packCrsGraphWithOwningPIDs(const CrsGraph< LO, GO, NT > &sourceGraph, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Traits class for packing / unpacking data of type T.
Kokkos::View< const value_type *, Kokkos::AnonymousSpace > input_array_type
The type of an input array of value_type.