Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_packCrsMatrix_def.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Tpetra: Templated Linear Algebra Services Package
5// Copyright (2008) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// ************************************************************************
38// @HEADER
39
40#ifndef TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
41#define TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
42
43#include "TpetraCore_config.h"
44#include "Teuchos_Array.hpp"
45#include "Teuchos_ArrayView.hpp"
54#include <memory>
55#include <sstream>
56#include <stdexcept>
57#include <string>
58
81
82namespace Tpetra {
83
84//
85// Users must never rely on anything in the Details namespace.
86//
87namespace Details {
88
89namespace PackCrsMatrixImpl {
97template<class OutputOffsetsViewType,
98 class CountsViewType,
99 class InputOffsetsViewType,
100 class InputLocalRowIndicesViewType,
101 class InputLocalRowPidsViewType,
102 const bool debug =
103#ifdef HAVE_TPETRA_DEBUG
104 true
105#else
106 false
107#endif // HAVE_TPETRA_DEBUG
108 >
110public:
111 typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
112 typedef typename CountsViewType::non_const_value_type count_type;
113 typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
114 typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
115 typedef typename InputLocalRowPidsViewType::non_const_value_type local_row_pid_type;
116 // output Views drive where execution happens.
117 typedef typename OutputOffsetsViewType::device_type device_type;
118 static_assert (std::is_same<typename CountsViewType::device_type::execution_space,
119 typename device_type::execution_space>::value,
120 "OutputOffsetsViewType and CountsViewType must have the same execution space.");
121 static_assert (Kokkos::is_view<OutputOffsetsViewType>::value,
122 "OutputOffsetsViewType must be a Kokkos::View.");
123 static_assert (std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
124 "OutputOffsetsViewType must be a nonconst Kokkos::View.");
125 static_assert (std::is_integral<output_offset_type>::value,
126 "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
127 static_assert (Kokkos::is_view<CountsViewType>::value,
128 "CountsViewType must be a Kokkos::View.");
129 static_assert (std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
130 "CountsViewType must be a nonconst Kokkos::View.");
131 static_assert (std::is_integral<count_type>::value,
132 "The type of each entry of CountsViewType must be a built-in integer type.");
133 static_assert (Kokkos::is_view<InputOffsetsViewType>::value,
134 "InputOffsetsViewType must be a Kokkos::View.");
135 static_assert (std::is_integral<input_offset_type>::value,
136 "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
137 static_assert (Kokkos::is_view<InputLocalRowIndicesViewType>::value,
138 "InputLocalRowIndicesViewType must be a Kokkos::View.");
139 static_assert (std::is_integral<local_row_index_type>::value,
140 "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
141
143 const CountsViewType& counts,
147 const count_type sizeOfLclCount,
148 const count_type sizeOfGblColInd,
149 const count_type sizeOfPid,
150 const count_type sizeOfValue) :
151 outputOffsets_ (outputOffsets),
152 counts_ (counts),
153 rowOffsets_ (rowOffsets),
154 lclRowInds_ (lclRowInds),
155 lclRowPids_ (lclRowPids),
156 sizeOfLclCount_ (sizeOfLclCount),
157 sizeOfGblColInd_ (sizeOfGblColInd),
158 sizeOfPid_ (sizeOfPid),
159 sizeOfValue_ (sizeOfValue),
160 error_ ("error") // don't forget this, or you'll get segfaults!
161 {
162 if (debug) {
163 const size_t numRowsToPack = static_cast<size_t> (lclRowInds_.extent (0));
164
165 if (numRowsToPack != static_cast<size_t> (counts_.extent (0))) {
166 std::ostringstream os;
167 os << "lclRowInds.extent(0) = " << numRowsToPack
168 << " != counts.extent(0) = " << counts_.extent (0)
169 << ".";
170 TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, os.str ());
171 }
172 if (static_cast<size_t> (numRowsToPack + 1) !=
173 static_cast<size_t> (outputOffsets_.extent (0))) {
174 std::ostringstream os;
175 os << "lclRowInds.extent(0) + 1 = " << (numRowsToPack + 1)
176 << " != outputOffsets.extent(0) = " << outputOffsets_.extent (0)
177 << ".";
178 TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, os.str ());
179 }
180 }
181 }
182
184 operator() (const local_row_index_type& curInd,
185 output_offset_type& update,
186 const bool final) const
187 {
188 if (debug) {
189 if (curInd < static_cast<local_row_index_type> (0)) {
190 error_ () = 1;
191 return;
192 }
193 }
194
195 if (final) {
196 if (debug) {
197 if (curInd >= static_cast<local_row_index_type> (outputOffsets_.extent (0))) {
198 error_ () = 2;
199 return;
200 }
201 }
202 outputOffsets_(curInd) = update;
203 }
204
205 if (curInd < static_cast<local_row_index_type> (counts_.extent (0))) {
206 const auto lclRow = lclRowInds_(curInd);
207 if (static_cast<size_t> (lclRow + 1) >= static_cast<size_t> (rowOffsets_.extent (0)) ||
208 static_cast<local_row_index_type> (lclRow) < static_cast<local_row_index_type> (0)) {
209 error_ () = 3;
210 return;
211 }
212 // count_type could differ from the type of each row offset.
213 // For example, row offsets might each be 64 bits, but if their
214 // difference always fits in 32 bits, we may then safely use a
215 // 32-bit count_type.
216 const count_type count =
217 static_cast<count_type> (rowOffsets_(lclRow+1) - rowOffsets_(lclRow));
218
219 // We pack first the number of entries in the row, then that
220 // many global column indices, then that many pids (if any),
221 // then that many values. However, if the number of entries in
222 // the row is zero, we pack nothing.
223 const count_type numBytes = (count == 0) ?
224 static_cast<count_type> (0) :
225 sizeOfLclCount_ + count * (sizeOfGblColInd_ +
226 (lclRowPids_.size() > 0 ? sizeOfPid_ : 0) +
227 sizeOfValue_);
228
229 if (final) {
230 counts_(curInd) = numBytes;
231 }
232 update += numBytes;
233 }
234 }
235
236 // mfh 31 May 2017: Don't need init or join. If you have join, MUST
237 // have join both with and without volatile! Otherwise intrawarp
238 // joins are really slow on GPUs.
239
241 int getError () const {
242 typedef typename device_type::execution_space execution_space;
243 auto error_h = Kokkos::create_mirror_view (error_);
244 // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
245 Kokkos::deep_copy (execution_space(), error_h, error_);
246 return error_h ();
247 }
248
249private:
250 OutputOffsetsViewType outputOffsets_;
251 CountsViewType counts_;
252 typename InputOffsetsViewType::const_type rowOffsets_;
253 typename InputLocalRowIndicesViewType::const_type lclRowInds_;
254 typename InputLocalRowPidsViewType::const_type lclRowPids_;
255 count_type sizeOfLclCount_;
256 count_type sizeOfGblColInd_;
257 count_type sizeOfPid_;
258 count_type sizeOfValue_;
259 Kokkos::View<int, device_type> error_;
260};
261
271template<class OutputOffsetsViewType,
272 class CountsViewType,
276typename CountsViewType::non_const_value_type
277computeNumPacketsAndOffsets (const OutputOffsetsViewType& outputOffsets,
278 const CountsViewType& counts,
282 const typename CountsViewType::non_const_value_type sizeOfLclCount,
283 const typename CountsViewType::non_const_value_type sizeOfGblColInd,
284 const typename CountsViewType::non_const_value_type sizeOfPid,
285 const typename CountsViewType::non_const_value_type sizeOfValue)
286{
288 CountsViewType, typename InputOffsetsViewType::const_type,
289 typename InputLocalRowIndicesViewType::const_type,
290 typename InputLocalRowPidsViewType::const_type> functor_type;
291 typedef typename CountsViewType::non_const_value_type count_type;
292 typedef typename OutputOffsetsViewType::size_type size_type;
293 typedef typename OutputOffsetsViewType::execution_space execution_space;
294 typedef typename functor_type::local_row_index_type LO;
295 typedef Kokkos::RangePolicy<execution_space, LO> range_type;
296 const char prefix[] = "computeNumPacketsAndOffsets: ";
297
298 count_type count = 0;
299 const count_type numRowsToPack = lclRowInds.extent (0);
300
301 if (numRowsToPack == 0) {
302 return count;
303 }
304 else {
306 (rowOffsets.extent (0) <= static_cast<size_type> (1),
307 std::invalid_argument, prefix << "There is at least one row to pack, "
308 "but the matrix has no rows. lclRowInds.extent(0) = " <<
309 numRowsToPack << ", but rowOffsets.extent(0) = " <<
310 rowOffsets.extent (0) << " <= 1.");
312 (outputOffsets.extent (0) !=
313 static_cast<size_type> (numRowsToPack + 1), std::invalid_argument,
314 prefix << "Output dimension does not match number of rows to pack. "
315 << "outputOffsets.extent(0) = " << outputOffsets.extent (0)
316 << " != lclRowInds.extent(0) + 1 = "
317 << static_cast<size_type> (numRowsToPack + 1) << ".");
319 (counts.extent (0) != numRowsToPack, std::invalid_argument,
320 prefix << "counts.extent(0) = " << counts.extent (0)
321 << " != numRowsToPack = " << numRowsToPack << ".");
322
326 Kokkos::parallel_scan (range_type (0, numRowsToPack + 1), f);
327
328 // At least in debug mode, this functor checks for errors.
329 const int errCode = f.getError ();
331 (errCode != 0, std::runtime_error, prefix << "parallel_scan error code "
332 << errCode << " != 0.");
333
334#if 0
335 size_t total = 0;
336 for (LO k = 0; k < numRowsToPack; ++k) {
337 total += counts[k];
338 }
340 if (errStr.get () == NULL) {
341 errStr = std::unique_ptr<std::ostringstream> (new std::ostringstream ());
342 }
343 std::ostringstream& os = *errStr;
344 os << prefix
345 << "outputOffsets(numRowsToPack=" << numRowsToPack << ") "
346 << outputOffsets(numRowsToPack) << " != sum of counts = "
347 << total << "." << std::endl;
348 if (numRowsToPack != 0) {
349 // Only print the array if it's not too long.
350 if (numRowsToPack < static_cast<LO> (10)) {
351 os << "outputOffsets: [";
352 for (LO i = 0; i <= numRowsToPack; ++i) {
353 os << outputOffsets(i);
354 if (static_cast<LO> (i + 1) <= numRowsToPack) {
355 os << ",";
356 }
357 }
358 os << "]" << std::endl;
359 os << "counts: [";
360 for (LO i = 0; i < numRowsToPack; ++i) {
361 os << counts(i);
362 if (static_cast<LO> (i + 1) < numRowsToPack) {
363 os << ",";
364 }
365 }
366 os << "]" << std::endl;
367 }
368 else {
369 os << "outputOffsets(" << (numRowsToPack-1) << ") = "
370 << outputOffsets(numRowsToPack-1) << "." << std::endl;
371 }
372 }
374 return {false, errStr};
375 }
376#endif // HAVE_TPETRA_DEBUG
377
378 // Get last entry of outputOffsets, which is the sum of the entries
379 // of counts. Don't assume UVM.
380 using Tpetra::Details::getEntryOnHost;
381 return static_cast<count_type> (getEntryOnHost (outputOffsets,
383 }
384}
385
401template<class ST, class ColumnMap, class BufferDeviceType>
403Kokkos::pair<int, size_t>
405 const Kokkos::View<char*, BufferDeviceType>& exports,
409 const size_t offset,
410 const size_t num_ent,
411 const size_t num_bytes_per_value,
412 const bool pack_pids)
413{
414 using Kokkos::subview;
415 using LO = typename ColumnMap::local_ordinal_type;
416 using GO = typename ColumnMap::global_ordinal_type;
417 using return_type = Kokkos::pair<int, size_t>;
418
419 if (num_ent == 0) {
420 // Empty rows always take zero bytes, to ensure sparsity.
421 return return_type (0, 0);
422 }
423
424 const LO num_ent_LO = static_cast<LO> (num_ent); // packValueCount wants this
425 const size_t num_ent_beg = offset;
427
428 const size_t gids_beg = num_ent_beg + num_ent_len;
429 const size_t gids_len = num_ent * PackTraits<GO>::packValueCount (GO (0));
430
431 const size_t pids_beg = gids_beg + gids_len;
432 const size_t pids_len = pack_pids ?
434 static_cast<size_t> (0);
435
436 const size_t vals_beg = gids_beg + gids_len + pids_len;
437 const size_t vals_len = num_ent * num_bytes_per_value;
438
439 char* const num_ent_out = exports.data () + num_ent_beg;
440 char* const gids_out = exports.data () + gids_beg;
441 char* const pids_out = pack_pids ? exports.data () + pids_beg : NULL;
442 char* const vals_out = exports.data () + vals_beg;
443
444 size_t num_bytes_out = 0;
445 int error_code = 0;
447
448 {
449 // Copy column indices one at a time, so that we don't need
450 // temporary storage.
451 for (size_t k = 0; k < num_ent; ++k) {
452 const LO lid = lids_in[k];
453 const GO gid = col_map.getGlobalElement (lid);
455 }
456 // Copy PIDs one at a time, so that we don't need temporary storage.
457 if (pack_pids) {
458 for (size_t k = 0; k < num_ent; ++k) {
459 const LO lid = lids_in[k];
460 const int pid = pids_in[lid];
462 }
463 }
464 const auto p =
466 error_code += p.first;
467 num_bytes_out += p.second;
468 }
469
470 if (error_code != 0) {
471 return return_type (10, num_bytes_out);
472 }
473
474 const size_t expected_num_bytes =
477 return return_type (11, num_bytes_out);
478 }
479 return return_type (0, num_bytes_out);
480}
481
482template<class LocalMatrix, class LocalMap, class BufferDeviceType>
483struct PackCrsMatrixFunctor {
484 typedef LocalMatrix local_matrix_device_type;
485 typedef LocalMap local_map_type;
486 typedef typename local_matrix_device_type::value_type ST;
487 typedef typename local_map_type::local_ordinal_type LO;
488 typedef typename local_map_type::global_ordinal_type GO;
489 typedef typename local_matrix_device_type::device_type DT;
490
491 typedef Kokkos::View<const size_t*, BufferDeviceType>
492 num_packets_per_lid_view_type;
493 typedef Kokkos::View<const size_t*, BufferDeviceType> offsets_view_type;
494 typedef Kokkos::View<char*, BufferDeviceType> exports_view_type;
495 using export_lids_view_type = typename PackTraits<LO>::input_array_type;
496 using source_pids_view_type = typename PackTraits<int>::input_array_type;
497
498 typedef typename num_packets_per_lid_view_type::non_const_value_type
499 count_type;
500 typedef typename offsets_view_type::non_const_value_type
501 offset_type;
502 typedef Kokkos::pair<int, LO> value_type;
503
504 static_assert (std::is_same<LO, typename local_matrix_device_type::ordinal_type>::value,
505 "local_map_type::local_ordinal_type and "
506 "local_matrix_device_type::ordinal_type must be the same.");
507
508 local_matrix_device_type local_matrix;
509 local_map_type local_col_map;
510 exports_view_type exports;
511 num_packets_per_lid_view_type num_packets_per_lid;
512 export_lids_view_type export_lids;
513 source_pids_view_type source_pids;
514 offsets_view_type offsets;
515 size_t num_bytes_per_value;
516 bool pack_pids;
517
518 PackCrsMatrixFunctor (const local_matrix_device_type& local_matrix_in,
519 const local_map_type& local_col_map_in,
520 const exports_view_type& exports_in,
521 const num_packets_per_lid_view_type& num_packets_per_lid_in,
522 const export_lids_view_type& export_lids_in,
523 const source_pids_view_type& source_pids_in,
524 const offsets_view_type& offsets_in,
525 const size_t num_bytes_per_value_in,
526 const bool pack_pids_in) :
527 local_matrix (local_matrix_in),
528 local_col_map (local_col_map_in),
529 exports (exports_in),
530 num_packets_per_lid (num_packets_per_lid_in),
531 export_lids (export_lids_in),
532 source_pids (source_pids_in),
533 offsets (offsets_in),
534 num_bytes_per_value (num_bytes_per_value_in),
535 pack_pids (pack_pids_in)
536 {
537 const LO numRows = local_matrix_in.numRows ();
538 const LO rowMapDim =
539 static_cast<LO> (local_matrix.graph.row_map.extent (0));
541 (numRows != 0 && rowMapDim != numRows + static_cast<LO> (1),
542 std::logic_error, "local_matrix.graph.row_map.extent(0) = "
543 << rowMapDim << " != numRows (= " << numRows << " ) + 1.");
544 }
545
546 KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
547 {
548 using ::Tpetra::Details::OrdinalTraits;
549 dst = Kokkos::make_pair (0, OrdinalTraits<LO>::invalid ());
550 }
551
552 KOKKOS_INLINE_FUNCTION void
553 join (value_type& dst, const value_type& src) const
554 {
555 // `dst` should reflect the first (least) bad index and all other
556 // associated error codes and data, so prefer keeping it.
557 if (src.first != 0 && dst.first == 0) {
558 dst = src;
559 }
560 }
561
562 KOKKOS_INLINE_FUNCTION
563 void operator() (const LO i, value_type& dst) const
564 {
565 const size_t offset = offsets[i];
566 const LO export_lid = export_lids[i];
567 const size_t buf_size = exports.size();
568 const size_t num_bytes = num_packets_per_lid(i);
569 const size_t num_ent =
570 static_cast<size_t> (local_matrix.graph.row_map[export_lid+1]
571 - local_matrix.graph.row_map[export_lid]);
572
573 // Only pack this row's data if it has a nonzero number of
574 // entries. We can do this because receiving processes get the
575 // number of packets, and will know that zero packets means zero
576 // entries.
577 if (num_ent == 0) {
578 return;
579 }
580
581 if (export_lid >= local_matrix.numRows ()) {
582 if (dst.first != 0) { // keep only the first error
583 dst = Kokkos::make_pair (1, i); // invalid row
584 }
585 return;
586 }
587 else if ((offset > buf_size || offset + num_bytes > buf_size)) {
588 if (dst.first != 0) { // keep only the first error
589 dst = Kokkos::make_pair (2, i); // out of bounds
590 }
591 return;
592 }
593
594 // We can now pack this row
595
596 // Since the matrix is locally indexed on the calling process, we
597 // have to use its column Map (which it _must_ have in this case)
598 // to convert to global indices.
599 const auto row_beg = local_matrix.graph.row_map[export_lid];
600 const auto row_end = local_matrix.graph.row_map[export_lid + 1];
601 auto vals_in = subview (local_matrix.values,
602 Kokkos::make_pair (row_beg, row_end));
603 auto lids_in = subview (local_matrix.graph.entries,
604 Kokkos::make_pair (row_beg, row_end));
605 typedef local_map_type LMT;
606 typedef BufferDeviceType BDT;
607 auto p = packCrsMatrixRow<ST, LMT, BDT> (local_col_map, exports, lids_in,
608 source_pids, vals_in, offset,
609 num_ent, num_bytes_per_value,
610 pack_pids);
611 int error_code_this_row = p.first;
612 size_t num_bytes_packed_this_row = p.second;
613 if (error_code_this_row != 0) {
614 if (dst.first != 0) { // keep only the first error
615 dst = Kokkos::make_pair (error_code_this_row, i); // bad pack
616 }
617 }
618 else if (num_bytes_packed_this_row != num_bytes) {
619 if (dst.first != 0) { // keep only the first error
620 dst = Kokkos::make_pair (3, i);
621 }
622 }
623 }
624};
625
633template<class LocalMatrix, class LocalMap, class BufferDeviceType>
634void
635do_pack (const LocalMatrix& local_matrix,
636 const LocalMap& local_map,
637 const Kokkos::View<char*, BufferDeviceType>& exports,
638 const typename PackTraits<size_t>::input_array_type& num_packets_per_lid,
640 const typename PackTraits<int>::input_array_type& source_pids,
641 const Kokkos::View<const size_t*, BufferDeviceType>& offsets,
642 const size_t num_bytes_per_value,
643 const bool pack_pids)
644{
645 using LO = typename LocalMap::local_ordinal_type;
646 using DT = typename LocalMatrix::device_type;
647 using range_type = Kokkos::RangePolicy<typename DT::execution_space, LO>;
648 const char prefix[] = "Tpetra::Details::do_pack: ";
649
650 if (export_lids.extent (0) != 0) {
652 (static_cast<size_t> (offsets.extent (0)) !=
653 static_cast<size_t> (export_lids.extent (0) + 1),
654 std::invalid_argument, prefix << "offsets.extent(0) = "
655 << offsets.extent (0) << " != export_lids.extent(0) (= "
656 << export_lids.extent (0) << ") + 1.");
658 (export_lids.extent (0) != num_packets_per_lid.extent (0),
659 std::invalid_argument, prefix << "export_lids.extent(0) = " <<
660 export_lids.extent (0) << " != num_packets_per_lid.extent(0) = "
661 << num_packets_per_lid.extent (0) << ".");
662 // If exports has nonzero length at this point, then the matrix
663 // has at least one entry to pack. Thus, if packing process
664 // ranks, we had better have at least one process rank to pack.
666 (pack_pids && exports.extent (0) != 0 &&
667 source_pids.extent (0) == 0, std::invalid_argument, prefix <<
668 "pack_pids is true, and exports.extent(0) = " <<
669 exports.extent (0) << " != 0, meaning that we need to pack at "
670 "least one matrix entry, but source_pids.extent(0) = 0.");
671 }
672
673 using pack_functor_type =
675 pack_functor_type f (local_matrix, local_map, exports,
676 num_packets_per_lid, export_lids,
677 source_pids, offsets, num_bytes_per_value,
678 pack_pids);
679
680 typename pack_functor_type::value_type result;
681 range_type range (0, num_packets_per_lid.extent (0));
682 Kokkos::parallel_reduce (range, f, result);
683
684 if (result.first != 0) {
685 // We can't deep_copy from AnonymousSpace Views, so we can't print
686 // out any information from them in case of error.
688 (true, std::runtime_error, prefix << "PackCrsMatrixFunctor "
689 "reported error code " << result.first << " for the first "
690 "bad row " << result.second << ".");
691 }
692}
693
723template<typename ST, typename LO, typename GO, typename NT, typename BufferDeviceType>
724void
726 Kokkos::DualView<char*, BufferDeviceType>& exports,
727 const Kokkos::View<size_t*, BufferDeviceType>& num_packets_per_lid,
728 const Kokkos::View<const LO*, BufferDeviceType>& export_lids,
729 const Kokkos::View<const int*, typename NT::device_type>& export_pids,
730 size_t& constant_num_packets,
731 const bool pack_pids)
732{
734 "Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix",
735 "Import/Export"
736 );
737 using Kokkos::View;
738 typedef BufferDeviceType DT;
739 typedef Kokkos::DualView<char*, BufferDeviceType> exports_view_type;
740 const char prefix[] = "Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix: ";
741 constexpr bool debug = false;
742
743 auto local_matrix = sourceMatrix.getLocalMatrixDevice ();
744 auto local_col_map = sourceMatrix.getColMap ()->getLocalMap ();
745
746 // Setting this to zero tells the caller to expect a possibly
747 // different ("nonconstant") number of packets per local index
748 // (i.e., a possibly different number of entries per row).
750
751 const size_t num_export_lids =
752 static_cast<size_t> (export_lids.extent (0));
755 static_cast<size_t> (num_packets_per_lid.extent (0)),
756 std::invalid_argument, prefix << "num_export_lids.extent(0) = "
757 << num_export_lids << " != num_packets_per_lid.extent(0) = "
758 << num_packets_per_lid.extent (0) << ".");
759 if (num_export_lids != 0) {
761 (num_packets_per_lid.data () == NULL, std::invalid_argument,
762 prefix << "num_export_lids = "<< num_export_lids << " != 0, but "
763 "num_packets_per_lid.data() = "
764 << num_packets_per_lid.data () << " == NULL.");
765 }
766
769 const size_t num_bytes_per_pid = PackTraits<int>::packValueCount (int (0));
770
771 size_t num_bytes_per_value = 0;
773 // Assume ST is default constructible; packValueCount wants an instance.
774 num_bytes_per_value = PackTraits<ST>::packValueCount (ST ());
775 }
776 else {
777 // Since the packed data come from the source matrix, we can use
778 // the source matrix to get the number of bytes per Scalar value
779 // stored in the matrix. This assumes that all Scalar values in
780 // the source matrix require the same number of bytes. If the
781 // source matrix has no entries on the calling process, then we
782 // hope that some process does have some idea how big a Scalar
783 // value is. Of course, if no processes have any entries, then no
784 // values should be packed (though this does assume that in our
785 // packing scheme, rows with zero entries take zero bytes).
786 size_t num_bytes_per_value_l = 0;
787 if (local_matrix.values.extent(0) > 0) {
788 const ST& val = local_matrix.values(0);
790 }
791 using Teuchos::reduceAll;
792 reduceAll<int, size_t> (* (sourceMatrix.getComm ()),
793 Teuchos::REDUCE_MAX,
795 Teuchos::outArg (num_bytes_per_value));
796 }
797
798 if (num_export_lids == 0) {
799 exports = exports_view_type ("exports", 0);
800 return;
801 }
802
803 // Array of offsets into the pack buffer.
804 Kokkos::View<size_t*, DT> offsets ("offsets", num_export_lids + 1);
805
806 // Compute number of packets per LID (row to send), as well as
807 // corresponding offsets (the prefix sum of the packet counts).
808 const size_t count =
809 computeNumPacketsAndOffsets (offsets, num_packets_per_lid,
810 local_matrix.graph.row_map, export_lids,
813 num_bytes_per_pid, num_bytes_per_value);
814
815 // Resize the output pack buffer if needed.
816 if (count > static_cast<size_t> (exports.extent (0))) {
817 exports = exports_view_type ("exports", count);
818 if (debug) {
819 std::ostringstream os;
820 os << "*** exports resized to " << count << std::endl;
821 std::cerr << os.str ();
822 }
823 }
824 if (debug) {
825 std::ostringstream os;
826 os << "*** count: " << count << ", exports.extent(0): "
827 << exports.extent (0) << std::endl;
828 std::cerr << os.str ();
829 }
830
831 // If exports has nonzero length at this point, then the matrix has
832 // at least one entry to pack. Thus, if packing process ranks, we
833 // had better have at least one process rank to pack.
835 (pack_pids && exports.extent (0) != 0 &&
836 export_pids.extent (0) == 0, std::invalid_argument, prefix <<
837 "pack_pids is true, and exports.extent(0) = " <<
838 exports.extent (0) << " != 0, meaning that we need to pack at least "
839 "one matrix entry, but export_pids.extent(0) = 0.");
840
841 typedef typename std::decay<decltype (local_matrix)>::type
842 local_matrix_device_type;
843 typedef typename std::decay<decltype (local_col_map)>::type
844 local_map_type;
845
846 exports.modify_device ();
847 auto exports_d = exports.view_device ();
849 (local_matrix, local_col_map, exports_d, num_packets_per_lid,
850 export_lids, export_pids, offsets, num_bytes_per_value,
851 pack_pids);
852 // If we got this far, we succeeded.
853}
854
855} // namespace PackCrsMatrixImpl
856
857template<typename ST, typename LO, typename GO, typename NT>
858void
860 Teuchos::Array<char>& exports,
861 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
862 const Teuchos::ArrayView<const LO>& exportLIDs,
863 size_t& constantNumPackets)
864{
865
867 using buffer_device_type = typename DistObject<char, LO, GO, NT>::buffer_device_type;
868 using host_exec_space = typename Kokkos::View<size_t*, device_type>::HostMirror::execution_space;
869 using device_exec_space = typename device_type::execution_space;
870 using host_dev_type = Kokkos::Device<host_exec_space, Kokkos::HostSpace>;
871
872 // Convert all Teuchos::Array to Kokkos::View
873
874 // This is an output array, so we don't have to copy to device here.
875 // However, we'll have to remember to copy back to host when done.
876 Kokkos::View<size_t*, buffer_device_type> num_packets_per_lid_d =
877 create_mirror_view_from_raw_host_array (buffer_device_type (),
878 numPacketsPerLID.getRawPtr (),
879 numPacketsPerLID.size (), false,
880 "num_packets_per_lid");
881 // FIXME (mfh 05 Feb 2019) We should just pass the exportLIDs
882 // DualView through here, instead of recreating a device View from a
883 // host ArrayView that itself came from a DualView.
884 //
885 // This is an input array, so we have to copy to device here.
886 // However, we never need to copy it back to host.
887 Kokkos::View<const LO*, buffer_device_type> export_lids_d =
888 create_mirror_view_from_raw_host_array (buffer_device_type (),
889 exportLIDs.getRawPtr (),
890 exportLIDs.size (), true,
891 "export_lids");
892
893 Kokkos::View<int*, device_type> export_pids_d; // output arg
894 Kokkos::DualView<char*, buffer_device_type> exports_dv; // output arg
895 constexpr bool pack_pids = false;
896 PackCrsMatrixImpl::packCrsMatrix<ST, LO, GO, NT, buffer_device_type> (
899
900 // The counts are an output of PackCrsMatrixImpl::packCrsMatrix, so we have to
901 // copy them back to host.
902 Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
903 (numPacketsPerLID.getRawPtr (),
904 numPacketsPerLID.size ());
905 // DEEP_COPY REVIEW - DEVICE-TO-HOST
907
908 // FIXME (mfh 23 Aug 2017) If we're forced to use a DualView for
909 // exports_dv above, then we have two host copies for exports_h.
910
911 // The exports are an output of PackCrsMatrixImpl::packCrsMatrix, so we have
912 // to copy them back to host.
913 if (static_cast<size_t> (exports.size ()) !=
914 static_cast<size_t> (exports_dv.extent (0))) {
915 exports.resize (exports_dv.extent (0));
916 }
917 Kokkos::View<char*, host_dev_type> exports_h (exports.getRawPtr (),
918 exports.size ());
919 // DEEP_COPY REVIEW - DEVICE-TO-HOST
920 Kokkos::deep_copy (device_exec_space(), exports_h, exports_dv.d_view);
921}
922
923template<typename ST, typename LO, typename GO, typename NT>
924void
927 Kokkos::DualView<char*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& exports,
928 const Kokkos::DualView<size_t*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& numPacketsPerLID,
929 const Kokkos::DualView<const LO*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& exportLIDs,
930 size_t& constantNumPackets)
931{
932 using device_type = typename CrsMatrix<ST, LO, GO, NT>::device_type;
933 using buffer_device_type = typename DistObject<char, LO, GO, NT>::buffer_device_type;
934
935 // Create an empty array of PIDs, since the interface needs it.
936 Kokkos::View<int*, device_type> exportPIDs_d ("exportPIDs", 0);
937 constexpr bool pack_pids = false;
938
939 // Write-only device access
940 auto numPacketsPerLID_nc = numPacketsPerLID; // const DV& -> DV
941 numPacketsPerLID_nc.clear_sync_state ();
942 numPacketsPerLID_nc.modify_device ();
943 auto numPacketsPerLID_d = numPacketsPerLID.view_device ();
944
945 // Read-only device access
946 TEUCHOS_ASSERT( ! exportLIDs.need_sync_device () );
947 auto exportLIDs_d = exportLIDs.view_device ();
948
950 "Tpetra::Details::packCrsMatrixNew",
951 "Import/Export"
952 );
953 PackCrsMatrixImpl::packCrsMatrix<ST,LO,GO,NT,buffer_device_type> (
955 exportPIDs_d, constantNumPackets, pack_pids);
956}
957
958template<typename ST, typename LO, typename GO, typename NT>
959void
961 Kokkos::DualView<char*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& exports_dv,
962 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
963 const Teuchos::ArrayView<const LO>& exportLIDs,
964 const Teuchos::ArrayView<const int>& sourcePIDs,
965 size_t& constantNumPackets)
966{
967 typedef typename CrsMatrix<ST,LO,GO,NT>::local_matrix_device_type local_matrix_device_type;
968 typedef typename DistObject<char, LO, GO, NT>::buffer_device_type buffer_device_type;
969 typedef typename Kokkos::DualView<char*, buffer_device_type>::t_host::execution_space host_exec_space;
970 typedef Kokkos::Device<host_exec_space, Kokkos::HostSpace> host_dev_type;
971
972 typename local_matrix_device_type::device_type outputDevice;
973 typedef typename NT::execution_space execution_space;
974
975
976 const bool verbose = ::Tpetra::Details::Behavior::verbose ();
977 std::unique_ptr<std::string> prefix;
978 if (verbose) {
979 const int myRank = [&] () {
980 auto map = sourceMatrix.getMap ();
981 if (map.get () == nullptr) {
982 return -1;
983 }
984 auto comm = map->getComm ();
985 if (comm.get () == nullptr) {
986 return -2;
987 }
988 return comm->getRank ();
989 } ();
990 std::ostringstream os;
991 os << "Proc " << myRank << ": packCrsMatrixWithOwningPIDs: ";
992 prefix = std::unique_ptr<std::string> (new std::string (os.str ()));
993
994 std::ostringstream os2;
995 os2 << *prefix << "start" << std::endl;
996 std::cerr << os2.str ();
997 }
998
999 // Convert all Teuchos::Array to Kokkos::View
1000
1001 // This is an output array, so we don't have to copy to device here.
1002 // However, we'll have to remember to copy back to host when done.
1004 create_mirror_view_from_raw_host_array (buffer_device_type (),
1005 numPacketsPerLID.getRawPtr (),
1006 numPacketsPerLID.size (), false,
1007 "num_packets_per_lid");
1008
1009 // This is an input array, so we have to copy to device here.
1010 // However, we never need to copy it back to host.
1011 auto export_lids_d =
1012 create_mirror_view_from_raw_host_array (buffer_device_type (),
1013 exportLIDs.getRawPtr (),
1014 exportLIDs.size (), true,
1015 "export_lids");
1016 // This is an input array, so we have to copy to device here.
1017 // However, we never need to copy it back to host.
1018 auto export_pids_d =
1020 sourcePIDs.getRawPtr (),
1021 sourcePIDs.size (), true,
1022 "export_pids");
1023 constexpr bool pack_pids = true;
1024 try {
1025 PackCrsMatrixImpl::packCrsMatrix
1027 export_pids_d, constantNumPackets, pack_pids);
1028 }
1029 catch (std::exception& e) {
1030 if (verbose) {
1031 std::ostringstream os;
1032 os << *prefix << "PackCrsMatrixImpl::packCrsMatrix threw: "
1033 << e.what () << std::endl;
1034 std::cerr << os.str ();
1035 }
1036 throw;
1037 }
1038 catch (...) {
1039 if (verbose) {
1040 std::ostringstream os;
1041 os << *prefix << "PackCrsMatrixImpl::packCrsMatrix threw an exception "
1042 "not a subclass of std::exception" << std::endl;
1043 std::cerr << os.str ();
1044 }
1045 throw;
1046 }
1047
1048 if (numPacketsPerLID.size () != 0) {
1049 try {
1050 // The counts are an output of PackCrsMatrixImpl::packCrsMatrix,
1051 // so we have to copy them back to host.
1052 Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
1053 (numPacketsPerLID.getRawPtr (), numPacketsPerLID.size ());
1054 // DEEP_COPY REVIEW - DEVICE-TO-HOST
1055 Kokkos::deep_copy (execution_space(), num_packets_per_lid_h, num_packets_per_lid_d);
1056 }
1057 catch (std::exception& e) {
1058 if (verbose) {
1059 std::ostringstream os;
1060 os << *prefix << "Kokkos::deep_copy threw: " << e.what () << std::endl;
1061 std::cerr << os.str ();
1062 }
1063 throw;
1064 }
1065 catch (...) {
1066 if (verbose) {
1067 std::ostringstream os;
1068 os << *prefix << "Kokkos::deep_copy threw an exception not a subclass "
1069 "of std::exception" << std::endl;
1070 std::cerr << os.str ();
1071 }
1072 throw;
1073 }
1074 }
1075
1076 if (verbose) {
1077 std::ostringstream os;
1078 os << *prefix << "done" << std::endl;
1079 std::cerr << os.str ();
1080 }
1081}
1082
1083} // namespace Details
1084} // namespace Tpetra
1085
1086#define TPETRA_DETAILS_PACKCRSMATRIX_INSTANT( ST, LO, GO, NT ) \
1087 template void \
1088 Details::packCrsMatrix<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1089 Teuchos::Array<char>&, \
1090 const Teuchos::ArrayView<size_t>&, \
1091 const Teuchos::ArrayView<const LO>&, \
1092 size_t&); \
1093 template void \
1094 Details::packCrsMatrixNew<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1095 Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1096 const Kokkos::DualView<size_t*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1097 const Kokkos::DualView<const LO*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1098 size_t&); \
1099 template void \
1100 Details::packCrsMatrixWithOwningPIDs<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1101 Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1102 const Teuchos::ArrayView<size_t>&, \
1103 const Teuchos::ArrayView<const LO>&, \
1104 const Teuchos::ArrayView<const int>&, \
1105 size_t&);
1106
1107#endif // TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
Declaration of the Tpetra::CrsMatrix class.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types,...
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
Declaration and definition of Tpetra::Details::getEntryOnHost.
KOKKOS_FUNCTION Kokkos::pair< int, size_t > packCrsMatrixRow(const ColumnMap &col_map, const Kokkos::View< char *, BufferDeviceType > &exports, const typename PackTraits< typename ColumnMap::local_ordinal_type >::input_array_type &lids_in, const typename PackTraits< int >::input_array_type &pids_in, const typename PackTraits< ST >::input_array_type &vals_in, const size_t offset, const size_t num_ent, const size_t num_bytes_per_value, const bool pack_pids)
Packs a single row of the CrsMatrix.
Struct that holds views of the contents of a CrsMatrix.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries.
typename Node::device_type device_type
The Kokkos device type.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
static bool verbose()
Whether Tpetra is in verbose mode.
"Local" part of Map suitable for Kokkos kernels.
LocalOrdinal local_ordinal_type
The type of local indices.
GlobalOrdinal global_ordinal_type
The type of global indices.
Compute the number of packets and offsets for the pack procedure.
Kokkos::Device< typename device_type::execution_space, buffer_memory_space > buffer_device_type
Kokkos::Device specialization for communication buffers.
Implementation details of Tpetra.
void packCrsMatrix(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Teuchos::Array< char > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
void packCrsMatrixWithOwningPIDs(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
void packCrsMatrixNew(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports, const Kokkos::DualView< size_t *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &numPacketsPerLID, const Kokkos::DualView< const LO *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication, for "new" DistObject inter...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Traits class for packing / unpacking data of type T.
static KOKKOS_INLINE_FUNCTION Kokkos::pair< int, size_t > packArray(char outBuf[], const value_type inBuf[], const size_t numEnt)
Pack the first numEnt entries of the given input buffer of value_type, into the output buffer of byte...
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const T &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const T &)
Number of bytes required to pack or unpack the given value of type value_type.
Kokkos::View< const value_type *, Kokkos::AnonymousSpace > input_array_type
The type of an input array of value_type.