Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_FixedHashTable_def.hpp
1/*
2// @HEADER
3// ***********************************************************************
4//
5// Tpetra: Templated Linear Algebra Services Package
6// Copyright (2008) Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39//
40// ************************************************************************
41// @HEADER
42*/
43
44#ifndef TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
45#define TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
46
47#include "Tpetra_Details_FixedHashTable_decl.hpp"
49#ifdef TPETRA_USE_MURMUR_HASH
50# include "Kokkos_Functional.hpp" // hash function used by Kokkos::UnorderedMap
51#endif // TPETRA_USE_MURMUR_HASH
52#include "Kokkos_ArithTraits.hpp"
53#include "Teuchos_TypeNameTraits.hpp"
55#include <type_traits>
56
57namespace Tpetra {
58namespace Details {
59//
60// This namespace stores utility functions and Kokkos
61// functors for use in FixedHashTable construction.
62//
63namespace FHT {
64
65
66// Is it worth actually using building the FixedHashTable using
67// parallel threads, instead of just counting in a sequential loop?
68//
69// The parallel version of FixedHashTable construction isn't just a
70// parallelization of the sequential loops. It incurs additional
71// overheads. For example, the CountBuckets kernel uses atomic update
72// instructions to count the number of "buckets" per offsets array
73// (ptr) entry. Atomic updates have overhead, even if only one thread
74// issues them. The Kokkos kernels are still correct in that case,
75// but I would rather not incur overhead then. It might make sense to
76// set the minimum number of threads to something greater than 1, but
77// we would need experiments to find out.
78template<class ExecSpace>
79bool worthBuildingFixedHashTableInParallel () {
80 return ExecSpace().concurrency() > 1;
81}
82
83//
84// Functors for FixedHashTable initialization
85//
86// NOTE (mfh 23 May 2015): Once we can use lambdas with CUDA, we
87// should consider replacing all of these functors with in-line
88// lambdas. The only issue is that we would need to bake the
89// execution space into the policy, since the default execution space
90// might differ from the one Tpetra wants to use.
91
101template<class CountsViewType,
102 class KeysViewType,
103 class SizeType = typename KeysViewType::size_type>
105public:
108 typedef typename CountsViewType::execution_space execution_space;
109 typedef typename CountsViewType::memory_space memory_space;
110 typedef SizeType size_type;
111 typedef typename keys_view_type::non_const_value_type key_type;
112 // mfh 21 May 2015: Having a device_type typedef in the functor
113 // along with an execution_space typedef causes compilation issues.
114 // This is because one of Kokkos' partial specializations picks up
115 // on the device_type typedef, and another picks up on the
116 // execution_space typedef. The former is a legacy of a previous
117 // design iteration of Kokkos, which did not separate memory and
118 // execution spaces.
120
127 const keys_view_type& keys,
128 const size_type size) :
129 counts_ (counts),
130 keys_ (keys),
131 size_ (size)
132 {}
133
140 {
143 const hash_value_type hashVal = hash_type::hashFunc (keys_[i], size_);
144 Kokkos::atomic_increment (&counts_[hashVal]);
145 }
146
147 using value_type = Kokkos::pair<int, key_type>;
148
153 operator () (const size_type& i, value_type& dst) const
154 {
156
157 const key_type keyVal = keys_[i];
159 if (hashVal < hash_value_type (0) ||
160 hashVal >= hash_value_type (counts_.extent (0))) {
161 dst.first = 1;
162 dst.second = keyVal;
163 }
164 else {
165 Kokkos::atomic_increment (&counts_[hashVal]);
167 }
168
170 KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
171 {
172 dst.first = 0;
173 dst.second = key_type (0);
174 }
175
177 join (value_type& dst,
178 const value_type& src) const
179 {
180 const bool good = dst.first == 0 || src.first == 0;
181 dst.first = good ? 0 : dst.first;
182 // leave dst.second as it is, to get the "first" key
183 }
184
185private:
187 counts_view_type counts_;
189 keys_view_type keys_;
191 size_type size_;
192};
193
204template<class KeyType>
207 FillPairsResult () :
208 minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
209 // min() for a floating-point type returns the minimum _positive_
210 // normalized value. This is different than for integer types.
211 // lowest() is new in C++11 and returns the least value, always
212 // negative for signed finite types.
213 //
214 // mfh 23 May 2015: I have heard reports that
215 // std::numeric_limits<int>::lowest() does not exist with the
216 // Intel compiler. I'm not sure if the users in question actually
217 // enabled C++11. However, it's easy enough to work around this
218 // issue. The standard floating-point types are signed and have a
219 // sign bit, so lowest() is just -max(). For integer types, we
220 // can use min() instead.
221 maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
222 ::Kokkos::Details::ArithTraits<KeyType>::min () :
224 success_ (true)
225 {
226 static_assert (std::is_arithmetic<KeyType>::value, "FillPairsResult: "
227 "KeyType must be some kind of number type.");
228 }
229
231 FillPairsResult (const KeyType& initMinKey,
232 const KeyType& initMaxKey) :
235 success_ (true)
236 {
237 static_assert (std::is_arithmetic<KeyType>::value, "FillPairsResult: "
238 "KeyType must be some kind of number type.");
239 }
240
243 bool success_;
244};
245
275template<class PairsViewType,
276 class KeysViewType,
277 class CountsViewType,
278 class SizeType = typename KeysViewType::size_type>
280public:
281 typedef typename CountsViewType::non_const_type counts_view_type;
282 typedef typename counts_view_type::const_type offsets_view_type;
283
285 typedef typename KeysViewType::const_type keys_view_type;
286 typedef typename offsets_view_type::execution_space execution_space;
287 typedef typename offsets_view_type::memory_space memory_space;
288 typedef SizeType size_type;
289
290 typedef typename keys_view_type::non_const_value_type key_type;
291 typedef typename pairs_view_type::non_const_value_type pair_type;
292
294
295 // mfh 23 May 2015: Having a device_type typedef in the functor
296 // along with an execution_space typedef causes compilation issues.
297 // This is because one of Kokkos' partial specializations picks up
298 // on the device_type typedef, and another picks up on the
299 // execution_space typedef. The former is a legacy of a previous
300 // design iteration of Kokkos, which did not separate memory and
301 // execution spaces.
303
315 const counts_view_type& counts,
316 const offsets_view_type& ptr,
317 const keys_view_type& keys,
318 const typename pair_type::second_type startingValue) :
319 pairs_ (pairs),
320 counts_ (counts),
321 ptr_ (ptr),
322 keys_ (keys),
323 size_ (counts.extent (0)),
324 startingValue_ (startingValue),
325 initMinKey_ (::Kokkos::Details::ArithTraits<key_type>::max ()),
326 initMaxKey_ (::Kokkos::Details::ArithTraits<key_type>::is_integer ?
327 ::Kokkos::Details::ArithTraits<key_type>::min () :
328 -::Kokkos::Details::ArithTraits<key_type>::max ())
329 {}
330
350 const counts_view_type& counts,
351 const offsets_view_type& ptr,
352 const keys_view_type& keys,
353 const typename pair_type::second_type startingValue,
354 const key_type initMinKey,
355 const key_type initMaxKey) :
356 pairs_ (pairs),
357 counts_ (counts),
358 ptr_ (ptr),
359 keys_ (keys),
360 size_ (counts.extent (0)),
361 startingValue_ (startingValue),
362 initMinKey_ (initMinKey),
363 initMaxKey_ (initMaxKey)
364 {}
365
368 {
369 dst.minKey_ = initMinKey_;
370 dst.maxKey_ = initMaxKey_;
371 dst.success_ = true;
372 }
373
375 join (value_type& dst,
376 const value_type& src) const
377 {
378 if (src.maxKey_ > dst.maxKey_) {
379 dst.maxKey_ = src.maxKey_;
380 }
381 if (src.minKey_ < dst.minKey_) {
382 dst.minKey_ = src.minKey_;
383 }
384 dst.success_ = dst.success_ && src.success_;
385 }
386
392 operator () (const size_type& i, value_type& dst) const
393 {
395 typedef typename offsets_view_type::non_const_value_type offset_type;
396 typedef typename pair_type::second_type val_type;
397 typedef typename std::remove_reference< decltype( counts_[0] ) >::type atomic_incr_type;
398
399 const key_type key = keys_[i];
400 if (key > dst.maxKey_) {
401 dst.maxKey_ = key;
402 }
403 if (key < dst.minKey_) {
404 dst.minKey_ = key;
405 }
406 const val_type theVal = startingValue_ + static_cast<val_type> (i);
408
409 // Return the old count; decrement afterwards.
410 const offset_type count = Kokkos::atomic_fetch_add (&counts_[hashVal], atomic_incr_type(-1));
411 if (count == 0) {
412 dst.success_ = false; // FAILURE!
413 }
414 else {
415 const offset_type curPos = ptr_[hashVal+1] - count;
416
417 pairs_[curPos].first = key;
418 pairs_[curPos].second = theVal;
419 }
420 }
421
422private:
423 pairs_view_type pairs_;
424 counts_view_type counts_;
425 offsets_view_type ptr_;
426 keys_view_type keys_;
427 size_type size_;
428 typename pair_type::second_type startingValue_;
430 key_type initMinKey_;
432 key_type initMaxKey_;
433};
434
458template<class OffsetsViewType,
459 class PairsViewType,
460 class SizeType = typename OffsetsViewType::size_type>
462public:
463 typedef typename OffsetsViewType::const_type offsets_view_type;
464 typedef typename PairsViewType::const_type pairs_view_type;
465 typedef typename offsets_view_type::execution_space execution_space;
466 typedef typename offsets_view_type::memory_space memory_space;
467 typedef SizeType size_type;
468
469 // The result of the check is whether the table has one or more duplicates.
470 typedef int value_type;
471
476 CheckForDuplicateKeys (const pairs_view_type& pairs,
477 const offsets_view_type& ptr) :
478 pairs_ (pairs),
479 ptr_ (ptr),
480 size_ (ptr_.extent (0) == 0 ?
481 size_type (0) :
482 ptr_.extent (0) - 1)
483 {}
484
487 {
488 dst = 0;
489 }
490
494 const value_type& src) const
495 {
496 dst = dst + src > 0?1:0;
497 }
498
501 operator () (const size_type& i, value_type& dst) const
502 {
503 typedef typename offsets_view_type::non_const_value_type offset_type;
504 typedef typename pairs_view_type::non_const_value_type pair_type;
505 typedef typename pair_type::first_type key_type;
506
507 if (dst>0) {
508 return; // we've already found duplicate keys elsewhere
509 }
510 else {
511 const offset_type beg = ptr_[i];
512 const offset_type end = ptr_[i+1];
513 bool foundDuplicateKey = false;
514 // This is an ~ n^2 algorithm in the worst case, where n is the
515 // max number of keys that hash to the same bucket. However, if
516 // the hash function is reasonable, n should be much less than
517 // the total number of keys.
518 for (offset_type j = beg + 1; j < end; ++j) {
519 const key_type curKey = pairs_[j].first;
520 for (offset_type k = beg; k < j; ++k) {
521 if (pairs_[k].first == curKey) {
522 foundDuplicateKey = true;
523 break;
524 }
525 }
526 }
527 dst = (dst>0) || foundDuplicateKey?1:0;
528 }
529 }
530
531private:
532 pairs_view_type pairs_;
533 offsets_view_type ptr_;
534 size_type size_;
535};
536
537} // namespace FHT
538
539//
540// Here begins the actual implementation of FixedHashTable.
541//
542
543template<class KeyType, class ValueType, class DeviceType>
546 minVal_ (0),
547 maxVal_ (keys.size () == 0 ?
549 static_cast<ValueType> (keys.size () - 1)),
550 checkedForDuplicateKeys_ (false)
551{
552 const ValueType startingValue = static_cast<ValueType> (0);
553 const KeyType initMinKey = this->minKey_;
554 const KeyType initMaxKey = this->maxKey_;
556 initMinKey, initMinKey, false);
557}
558
559template<class KeyType, class ValueType, class DeviceType>
561FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys) :
562 minVal_ (0),
563 maxVal_ (keys.size () == 0 ?
565 static_cast<ValueType> (keys.size () - 1)),
566 checkedForDuplicateKeys_ (false)
567{
568 typedef typename keys_type::non_const_type nonconst_keys_type;
569
570 // mfh 01 May 2015: I don't trust that
571 // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
572 // so I ensure this manually.
573 const ValueType startingValue = static_cast<ValueType> (0);
574 host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
575 keys.size ());
576 using Kokkos::ViewAllocateWithoutInitializing;
578 keys_k.extent (0));
579 // DEEP_COPY REVIEW - NOT TESTED
580 Kokkos::deep_copy (keys_d, keys_k);
581 const KeyType initMinKey = this->minKey_;
582 const KeyType initMaxKey = this->maxKey_;
584 initMinKey, initMinKey, false);
585}
586
587template<class KeyType, class ValueType, class DeviceType>
589FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys,
590 const ValueType startingValue) :
591 minVal_ (startingValue),
592 maxVal_ (keys.size () == 0 ?
595 checkedForDuplicateKeys_ (false)
596{
597 typedef typename keys_type::non_const_type nonconst_keys_type;
598
599 // mfh 01 May 2015: I don't trust that
600 // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
601 // so I ensure this manually.
602 host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
603 keys.size ());
604 using Kokkos::ViewAllocateWithoutInitializing;
606 keys_k.extent (0));
607 // DEEP_COPY REVIEW - HOST-TO_DEVICE
608 Kokkos::deep_copy (execution_space(), keys_d, keys_k);
609
610 const KeyType initMinKey = ::Kokkos::Details::ArithTraits<KeyType>::max ();
611 // min() for a floating-point type returns the minimum _positive_
612 // normalized value. This is different than for integer types.
613 // lowest() is new in C++11 and returns the least value, always
614 // negative for signed finite types.
615 //
616 // mfh 23 May 2015: I have heard reports that
617 // std::numeric_limits<int>::lowest() does not exist with the Intel
618 // compiler. I'm not sure if the users in question actually enabled
619 // C++11. However, it's easy enough to work around this issue. The
620 // standard floating-point types are signed and have a sign bit, so
621 // lowest() is just -max(). For integer types, we can use min()
622 // instead.
623 const KeyType initMaxKey = ::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
624 ::Kokkos::Details::ArithTraits<KeyType>::min () :
625 -::Kokkos::Details::ArithTraits<KeyType>::max ();
627 initMinKey, initMinKey, false);
628
629}
630
631template<class KeyType, class ValueType, class DeviceType>
636 const ValueType startingValue) :
637 minVal_ (startingValue),
638 maxVal_ (keys.size () == 0 ?
641 firstContigKey_ (firstContigKey),
642 lastContigKey_ (lastContigKey),
643 checkedForDuplicateKeys_ (false)
644{
645 const KeyType initMinKey = ::Kokkos::Details::ArithTraits<KeyType>::max ();
646 // min() for a floating-point type returns the minimum _positive_
647 // normalized value. This is different than for integer types.
648 // lowest() is new in C++11 and returns the least value, always
649 // negative for signed finite types.
650 //
651 // mfh 23 May 2015: I have heard reports that
652 // std::numeric_limits<int>::lowest() does not exist with the Intel
653 // compiler. I'm not sure if the users in question actually enabled
654 // C++11. However, it's easy enough to work around this issue. The
655 // standard floating-point types are signed and have a sign bit, so
656 // lowest() is just -max(). For integer types, we can use min()
657 // instead.
658 const KeyType initMaxKey = ::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
659 ::Kokkos::Details::ArithTraits<KeyType>::min () :
660 -::Kokkos::Details::ArithTraits<KeyType>::max ();
663}
664
665template<class KeyType, class ValueType, class DeviceType>
667FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys,
670 const ValueType startingValue) :
671 minVal_ (startingValue),
672 maxVal_ (keys.size () == 0 ?
675 firstContigKey_ (firstContigKey),
676 lastContigKey_ (lastContigKey),
677 checkedForDuplicateKeys_ (false)
678{
679 typedef typename keys_type::non_const_type nonconst_keys_type;
680
681 // mfh 01 May 2015: I don't trust that
682 // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
683 // so I ensure this manually.
684 host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
685 keys.size ());
686 using Kokkos::ViewAllocateWithoutInitializing;
688 keys_k.extent (0));
689 // DEEP_COPY REVIEW - NOT TESTED
690 Kokkos::deep_copy (keys_d, keys_k);
691
692 const KeyType initMinKey = ::Kokkos::Details::ArithTraits<KeyType>::max ();
693 // min() for a floating-point type returns the minimum _positive_
694 // normalized value. This is different than for integer types.
695 // lowest() is new in C++11 and returns the least value, always
696 // negative for signed finite types.
697 //
698 // mfh 23 May 2015: I have heard reports that
699 // std::numeric_limits<int>::lowest() does not exist with the Intel
700 // compiler. I'm not sure if the users in question actually enabled
701 // C++11. However, it's easy enough to work around this issue. The
702 // standard floating-point types are signed and have a sign bit, so
703 // lowest() is just -max(). For integer types, we can use min()
704 // instead.
705 const KeyType initMaxKey = ::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
706 ::Kokkos::Details::ArithTraits<KeyType>::min () :
707 -::Kokkos::Details::ArithTraits<KeyType>::max ();
710}
711
712template<class KeyType, class ValueType, class DeviceType>
715 const ValueType startingValue) :
716 minVal_ (startingValue),
717 maxVal_ (keys.size () == 0 ?
720 checkedForDuplicateKeys_ (false)
721{
722 const KeyType initMinKey = ::Kokkos::Details::ArithTraits<KeyType>::max ();
723 // min() for a floating-point type returns the minimum _positive_
724 // normalized value. This is different than for integer types.
725 // lowest() is new in C++11 and returns the least value, always
726 // negative for signed finite types.
727 //
728 // mfh 23 May 2015: I have heard reports that
729 // std::numeric_limits<int>::lowest() does not exist with the Intel
730 // compiler. I'm not sure if the users in question actually enabled
731 // C++11. However, it's easy enough to work around this issue. The
732 // standard floating-point types are signed and have a sign bit, so
733 // lowest() is just -max(). For integer types, we can use min()
734 // instead.
735 const KeyType initMaxKey = ::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
736 ::Kokkos::Details::ArithTraits<KeyType>::min () :
737 -::Kokkos::Details::ArithTraits<KeyType>::max ();
739 initMinKey, initMinKey, false);
740}
741
742template<class KeyType, class ValueType, class DeviceType>
744FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys,
745 const Teuchos::ArrayView<const ValueType>& vals) :
746 contiguousValues_ (false),
747 checkedForDuplicateKeys_ (false)
748{
749 // mfh 01 May 2015: I don't trust that
750 // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
751 // so I ensure this manually.
752 host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
753 keys.size ());
754 host_input_vals_type vals_k (vals.size () == 0 ? NULL : vals.getRawPtr (),
755 vals.size ());
756 const KeyType initMinKey = ::Kokkos::Details::ArithTraits<KeyType>::max ();
757 // min() for a floating-point type returns the minimum _positive_
758 // normalized value. This is different than for integer types.
759 // lowest() is new in C++11 and returns the least value, always
760 // negative for signed finite types.
761 //
762 // mfh 23 May 2015: I have heard reports that
763 // std::numeric_limits<int>::lowest() does not exist with the Intel
764 // compiler. I'm not sure if the users in question actually enabled
765 // C++11. However, it's easy enough to work around this issue. The
766 // standard floating-point types are signed and have a sign bit, so
767 // lowest() is just -max(). For integer types, we can use min()
768 // instead.
769 const KeyType initMaxKey = ::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
770 ::Kokkos::Details::ArithTraits<KeyType>::min () :
771 -::Kokkos::Details::ArithTraits<KeyType>::max ();
772 this->init (keys_k, vals_k, initMinKey, initMaxKey);
773}
774
775template<class KeyType, class ValueType, class DeviceType>
776void
778init (const keys_type& keys,
784 const bool computeInitContigKeys)
785{
786 using Kokkos::subview;
787 using Kokkos::ViewAllocateWithoutInitializing;
788 using Teuchos::TypeNameTraits;
789 typedef typename std::decay<decltype (keys.extent (0)) >::type size_type;
790 const char prefix[] = "Tpetra::Details::FixedHashTable: ";
791
792 const offset_type numKeys = static_cast<offset_type> (keys.extent (0));
793 {
794 const offset_type theMaxVal = ::Kokkos::Details::ArithTraits<offset_type>::max ();
795 const size_type maxValST = static_cast<size_type> (theMaxVal);
797 (keys.extent (0) > maxValST, std::invalid_argument, prefix << "The "
798 "number of keys " << keys.extent (0) << " does not fit in "
799 "offset_type = " << TypeNameTraits<offset_type>::name () << ", whose "
800 "max value is " << theMaxVal << ". This means that it is not possible to "
801 "use this constructor.");
802 }
804 (static_cast<unsigned long long> (numKeys) >
805 static_cast<unsigned long long> (::Kokkos::Details::ArithTraits<ValueType>::max ()),
806 std::invalid_argument, "Tpetra::Details::FixedHashTable: The number of "
807 "keys " << numKeys << " is greater than the maximum representable "
808 "ValueType value " << ::Kokkos::Details::ArithTraits<ValueType>::max () << ". "
809 "This means that it is not possible to use this constructor.");
811 (numKeys > static_cast<offset_type> (INT_MAX), std::logic_error, prefix <<
812 "This class currently only works when the number of keys is <= INT_MAX = "
813 << INT_MAX << ". If this is a problem for you, please talk to the Tpetra "
814 "developers.");
815
816 const bool buildInParallel =
817 FHT::worthBuildingFixedHashTableInParallel<execution_space> ();
818 const bool debug = ::Tpetra::Details::Behavior::debug ();
819
820 // NOTE (mfh 14 May 2015) This method currently assumes UVM. We
821 // could change that by setting up ptr and val as Kokkos::DualView
822 // instances. If we do that, since we are filling on host for now,
823 // we want to make sure that we only zero-fill ptr on host
824 // initially, and that we don't fill val at all. Once we finish
825 // Kokkos-izing all the set-up kernels, we won't need DualView for
826 // either ptr or val.
827
829 // Find the first and last initial contiguous keys. If we find a
830 // long sequence of initial contiguous keys, we can save space by
831 // not storing them explicitly as pairs in the hash table.
832 //
833 // NOTE (mfh 01 Jun 2015) Doing this in parallel requires a scan
834 // ("min index such that the difference between the current key and
835 // the next != 1"), which takes multiple passes over the data. We
836 // could fuse it with CountBuckets (only update counts on 'final'
837 // pass). However, we're really just moving this sequential search
838 // out of Map's constructor here, so there is no loss in doing it
839 // sequentially for now. Later, we can work on parallelization.
840 if (numKeys > 0) {
841 // FIXME: make it a parallel kernel with no host copy
842 auto keys_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
843 keys);
844 firstContigKey_ = keys_h[0];
845 // Start with one plus, then decrement at the end. That lets us do
846 // only one addition per loop iteration, rather than two (if we test
847 // against lastContigKey + 1 and then increment lastContigKey).
848 lastContigKey_ = firstContigKey_ + 1;
849
850 // We will only store keys in the table that are not part of the
851 // initial contiguous sequence. It's possible for the initial
852 // contiguous sequence to be trivial, which for a nonzero number of
853 // keys means that the "sequence" has length 1.
854 for (offset_type k = 1; k < numKeys; ++k) {
855 if (lastContigKey_ != keys_h[k]) {
856 break;
857 }
858 ++lastContigKey_;
859 }
860 --lastContigKey_;
861 }
862 }
863 else {
864 firstContigKey_ = firstContigKey;
865 lastContigKey_ = lastContigKey;
866 }
867
868 offset_type startIndex;
869 if (numKeys > 0) {
870 initMinKey = std::min (initMinKey, firstContigKey_);
871 initMaxKey = std::max (initMaxKey, lastContigKey_);
872 startIndex = static_cast<offset_type> (lastContigKey_ - firstContigKey_);
873 } else {
874 startIndex = 0;
875 }
876
877 const offset_type theNumKeys = numKeys - startIndex;
878 const offset_type size = hash_type::getRecommendedSize (theNumKeys);
879#ifdef HAVE_TPETRA_DEBUG
880 TEUCHOS_TEST_FOR_EXCEPTION(
881 size == 0 && numKeys != 0, std::logic_error,
882 "Tpetra::Details::FixedHashTable constructor: "
883 "getRecommendedSize(" << numKeys << ") returned zero, "
884 "even though the number of keys " << numKeys << " is nonzero. "
885 "Please report this bug to the Tpetra developers.");
886#endif // HAVE_TPETRA_DEBUG
887 keys_type theKeys =
888 subview (keys, std::pair<offset_type, offset_type> (startIndex, numKeys));
889
890 // FIXME (mfh 28 Mar 2016) For some reason, we don't seem to need a
891 // fence here, but we do before other allocations.
892
893 // The array of counts must be separate from the array of offsets,
894 // in order for parallel_scan to work correctly.
895 typedef typename ptr_type::non_const_type counts_type;
896 counts_type counts ("Tpetra::FixedHashTable::counts", size);
897
898 //
899 // Count the number of "buckets" per offsets array (ptr) entry.
900 //
901
902 // Will only create the mirror for buildInParallel false - but then use it in two places
903 typename keys_type::HostMirror theKeysHost;
904
905 // The Kokkos kernel uses atomic update instructions to count the
906 // number of "buckets" per offsets array (ptr) entry. Atomic
907 // updates incur overhead, even in the sequential case. The Kokkos
908 // kernel is still correct in that case, but I would rather not
909 // incur overhead then.
910 if (buildInParallel) {
911 FHT::CountBuckets<counts_type, keys_type> functor (counts, theKeys, size);
912 using range_type = Kokkos::RangePolicy<execution_space, offset_type>;
913 const char kernelLabel[] = "Tpetra::Details::FixedHashTable CountBuckets";
914 if (debug) {
915 using key_type = typename keys_type::non_const_value_type;
916 Kokkos::pair<int, key_type> err;
917 Kokkos::parallel_reduce (kernelLabel, range_type (0, theNumKeys),
918 functor, err);
919 TEUCHOS_TEST_FOR_EXCEPTION
920 (err.first != 0, std::logic_error, "Tpetra::Details::FixedHashTable "
921 "constructor: CountBuckets found a key " << err.second << " that "
922 "results in an out-of-bounds hash value.");
923 }
924 else {
925 Kokkos::parallel_for (kernelLabel, range_type (0, theNumKeys), functor);
926 }
927 }
928 else {
929 Kokkos::HostSpace hostMemSpace;
930 theKeysHost = Kokkos::create_mirror_view(theKeys);
931 // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
932 Kokkos::deep_copy(execution_space(), theKeysHost, theKeys);
933 auto countsHost = Kokkos::create_mirror_view (hostMemSpace, counts);
934
935 for (offset_type k = 0; k < theNumKeys; ++k) {
936 using key_type = typename keys_type::non_const_value_type;
937 const key_type key = theKeysHost[k];
938
939 using hash_value_type = typename hash_type::result_type;
940 const hash_value_type hashVal = hash_type::hashFunc (key, size);
941 TEUCHOS_TEST_FOR_EXCEPTION
942 (hashVal < hash_value_type (0) ||
943 hashVal >= hash_value_type (countsHost.extent (0)),
944 std::logic_error, "Tpetra::Details::FixedHashTable "
945 "constructor: Sequential CountBuckets found a key " << key
946 << " that results in an out-of-bounds hash value.");
947
948 ++countsHost[hashVal];
949 }
950 // DEEP_COPY REVIEW - HOSTMIRROR-TO-DEVICE
951 Kokkos::deep_copy (execution_space(), counts, countsHost);
952 }
953
954 // KJ: This fence is not required for the 2-argument deep_copy which calls
955 // fence, but will be required if switched to the 3-argumemt deep_copy which
956 // passes a space. The 3-argument form does not fence.
957 execution_space().fence ();
958
959 // Kokkos::View fills with zeros by default.
960 typename ptr_type::non_const_type ptr ("Tpetra::FixedHashTable::ptr", size+1);
961
962 // Compute row offsets via prefix sum:
963 //
964 // ptr[i+1] = \sum_{j=0}^{i} counts[j].
965 //
966 // Thus, ptr[i+1] - ptr[i] = counts[i], so that ptr[i+1] = ptr[i] +
967 // counts[i]. If we stored counts[i] in ptr[i+1] on input, then the
968 // formula is ptr[i+1] += ptr[i].
969 //
970 // parallel_scan does not incur overhead with Kokkos::Serial, but
971 // with actual parallel execution spaces, it does require multiple
972 // passes over the data. Thus, it still makes sense to have a
973 // sequential fall-back.
974
975 using ::Tpetra::Details::computeOffsetsFromCounts;
976 if (buildInParallel) {
977 computeOffsetsFromCounts (ptr, counts);
978 }
979
980 if (! buildInParallel || debug) {
981 Kokkos::HostSpace hostMemSpace;
982 auto counts_h = Kokkos::create_mirror_view_and_copy (hostMemSpace, counts);
983 auto ptr_h = Kokkos::create_mirror_view (hostMemSpace, ptr);
984
985#ifdef KOKKOS_ENABLE_SERIAL
986 Kokkos::Serial hostExecSpace;
987#else
988 Kokkos::DefaultHostExecutionSpace hostExecSpace;
989#endif // KOKKOS_ENABLE_SERIAL
990
991 computeOffsetsFromCounts (hostExecSpace, ptr_h, counts_h);
992 // DEEP_COPY REVIEW - HOSTMIRROR-TO-DEVICE
993 Kokkos::deep_copy (execution_space(), ptr, ptr_h);
994
995 if (debug) {
996 bool bad = false;
997 for (offset_type i = 0; i < size; ++i) {
998 if (ptr_h[i+1] != ptr_h[i] + counts_h[i]) {
999 bad = true;
1000 }
1001 }
1002 TEUCHOS_TEST_FOR_EXCEPTION
1003 (bad, std::logic_error, "Tpetra::Details::FixedHashTable "
1004 "constructor: computeOffsetsFromCounts gave an incorrect "
1005 "result.");
1006 }
1007 }
1008
1009 // KJ: computeOffsetsFromCounts calls parallel_scan which does not fence.
1010 // This fence is necessary as we need to make sure that the offset view
1011 // completes before the view is used in the next functor.
1012 execution_space().fence ();
1013
1014 // Allocate the array of (key,value) pairs. Don't fill it with
1015 // zeros, because we will fill it with actual data below.
1016 typedef typename val_type::non_const_type nonconst_val_type;
1017 nonconst_val_type val (ViewAllocateWithoutInitializing ("Tpetra::FixedHashTable::pairs"),
1018 theNumKeys);
1019
1020 // Fill in the hash table's "values" (the (key,value) pairs).
1021 typedef FHT::FillPairs<typename val_type::non_const_type, keys_type,
1022 typename ptr_type::non_const_type> functor_type;
1023 typename functor_type::value_type result (initMinKey, initMaxKey);
1024
1025 const ValueType newStartingValue = startingValue + static_cast<ValueType> (startIndex);
1026 if (buildInParallel) {
1027 functor_type functor (val, counts, ptr, theKeys, newStartingValue,
1028 initMinKey, initMaxKey);
1029 typedef Kokkos::RangePolicy<execution_space, offset_type> range_type;
1030 Kokkos::parallel_reduce (range_type (0, theNumKeys), functor, result);
1031 }
1032 else {
1033 Kokkos::HostSpace hostMemSpace;
1034 auto counts_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, counts);
1035 auto ptr_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, ptr);
1036 auto val_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, val);
1037 for (offset_type k = 0; k < theNumKeys; ++k) {
1038 typedef typename hash_type::result_type hash_value_type;
1039 const KeyType key = theKeysHost[k];
1040 if (key > result.maxKey_) {
1041 result.maxKey_ = key;
1042 }
1043 if (key < result.minKey_) {
1044 result.minKey_ = key;
1045 }
1046 const ValueType theVal = newStartingValue + static_cast<ValueType> (k);
1047 const hash_value_type hashVal = hash_type::hashFunc (key, size);
1048
1049 // Return the old count; decrement afterwards.
1050 const offset_type count = counts_h[hashVal];
1051 --counts_h[hashVal];
1052 if (count == 0) {
1053 result.success_ = false; // FAILURE!
1054 break;
1055 }
1056 else {
1057 const offset_type curPos = ptr_h[hashVal+1] - count;
1058 val_h[curPos].first = key;
1059 val_h[curPos].second = theVal;
1060 }
1061 }
1062 Kokkos::deep_copy(counts, counts_h); // restore
1063 Kokkos::deep_copy(val, val_h); // restore
1064 }
1065
1066 // FIXME (mfh 01 Jun 2015) Temporarily commented out because of
1067 // reports of exceptions being thrown in Albany.
1068 //
1069 // TEUCHOS_TEST_FOR_EXCEPTION
1070 // (! result.success_, std::logic_error, "Tpetra::Details::FixedHashTable::"
1071 // "init: Filling the hash table failed! Please report this bug to the "
1072 // "Tpetra developers.");
1073 (void) result; // prevent build warning (set but unused variable)
1074
1075 // "Commit" the computed arrays and other computed quantities.
1076 ptr_ = ptr;
1077 val_ = val;
1078 minKey_ = result.minKey_;
1079 maxKey_ = result.maxKey_;
1080 // We've already set firstContigKey_ and lastContigKey_ above.
1081}
1082
1083
1084template<class KeyType, class ValueType, class DeviceType>
1085void
1086FixedHashTable<KeyType, ValueType, DeviceType>::
1087init (const host_input_keys_type& keys,
1088 const host_input_vals_type& vals,
1089 KeyType initMinKey,
1090 KeyType initMaxKey)
1091{
1092 const offset_type numKeys = static_cast<offset_type> (keys.extent (0));
1093 TEUCHOS_TEST_FOR_EXCEPTION
1094 (static_cast<unsigned long long> (numKeys) > static_cast<unsigned long long> (::Kokkos::Details::ArithTraits<ValueType>::max ()),
1095 std::invalid_argument, "Tpetra::Details::FixedHashTable: The number of "
1096 "keys " << numKeys << " is greater than the maximum representable "
1097 "ValueType value " << ::Kokkos::Details::ArithTraits<ValueType>::max () << ".");
1098 TEUCHOS_TEST_FOR_EXCEPTION
1099 (numKeys > static_cast<offset_type> (INT_MAX), std::logic_error, "Tpetra::"
1100 "Details::FixedHashTable: This class currently only works when the number "
1101 "of keys is <= INT_MAX = " << INT_MAX << ". If this is a problem for you"
1102 ", please talk to the Tpetra developers.");
1103
1104 // There's no need to find the first and last initial contiguous
1105 // keys in this case, because if we reach this init() function, then
1106 // hasContiguousValues() is false and so get() doesn't use the
1107 // initial contiguous sequence of keys.
1108
1109 const offset_type size = hash_type::getRecommendedSize (numKeys);
1110#ifdef HAVE_TPETRA_DEBUG
1111 TEUCHOS_TEST_FOR_EXCEPTION(
1112 size == 0 && numKeys != 0, std::logic_error,
1113 "Tpetra::Details::FixedHashTable constructor: "
1114 "getRecommendedSize(" << numKeys << ") returned zero, "
1115 "even though the number of keys " << numKeys << " is nonzero. "
1116 "Please report this bug to the Tpetra developers.");
1117#endif // HAVE_TPETRA_DEBUG
1118
1119 // FIXME: Investigate a couple options:
1120 // 1. Allocate ptr_h, val_h directly on host and only deep_copy to ptr_ and val_ once at the end
1121 // 2. Do all this work as a parallel kernel with the same execution/memory spaces as ptr_ and val_
1122 // An old comment from MFH noted ptr_h should be zero-initialized, while val_h should not be initialized.
1123 // It further noted that we shouldn't need a DualView type arrangement when all setup kernels have
1124 // been "Kokkos-ized".
1125 Kokkos::HostSpace hostMemSpace;
1126 typename ptr_type::non_const_type ptr ("Tpetra::FixedHashTable::ptr", size + 1);
1127 auto ptr_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, ptr);
1128
1129 // Allocate the array of key,value pairs. Don't waste time filling
1130 // it with zeros, because we will fill it with actual data below.
1131 using Kokkos::ViewAllocateWithoutInitializing;
1132 typedef typename val_type::non_const_type nonconst_val_type;
1133 nonconst_val_type val (ViewAllocateWithoutInitializing ("Tpetra::FixedHashTable::pairs"),
1134 numKeys);
1135 auto val_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, val);
1136
1137 // Compute number of entries in each hash table position.
1138 for (offset_type k = 0; k < numKeys; ++k) {
1139 const typename hash_type::result_type hashVal =
1140 hash_type::hashFunc (keys[k], size);
1141 // Shift over one, so that counts[j] = ptr[j+1]. See below.
1142 ++ptr_h[hashVal+1];
1143 }
1144
1145 // Compute row offsets via prefix sum:
1146 //
1147 // ptr[i+1] = \sum_{j=0}^{i} counts[j].
1148 //
1149 // Thus, ptr[i+1] - ptr[i] = counts[i], so that ptr[i+1] = ptr[i] +
1150 // counts[i]. If we stored counts[i] in ptr[i+1] on input, then the
1151 // formula is ptr[i+1] += ptr[i].
1152 for (offset_type i = 0; i < size; ++i) {
1153 ptr_h[i+1] += ptr_h[i];
1154 }
1155 //ptr[0] = 0; // We've already done this when initializing ptr above.
1156
1157 // curRowStart[i] is the offset of the next element in row i.
1158 typename ptr_type::non_const_type::HostMirror curRowStart ("Tpetra::FixedHashTable::curRowStart", size);
1159
1160 // Fill in the hash table.
1161 FHT::FillPairsResult<KeyType> result (initMinKey, initMaxKey);
1162 for (offset_type k = 0; k < numKeys; ++k) {
1163 typedef typename hash_type::result_type hash_value_type;
1164 const KeyType key = keys[k];
1165 if (key > result.maxKey_) {
1166 result.maxKey_ = key;
1167 }
1168 if (key < result.minKey_) {
1169 result.minKey_ = key;
1170 }
1171 const ValueType theVal = vals[k];
1172 if (theVal > maxVal_) {
1173 maxVal_ = theVal;
1174 }
1175 if (theVal < minVal_) {
1176 minVal_ = theVal;
1177 }
1178 const hash_value_type hashVal = hash_type::hashFunc (key, size);
1179
1180 const offset_type offset = curRowStart[hashVal];
1181 const offset_type curPos = ptr_h[hashVal] + offset;
1182 if (curPos >= ptr_h[hashVal+1]) {
1183 result.success_ = false; // FAILURE!
1184 }
1185 else {
1186 val_h[curPos].first = key;
1187 val_h[curPos].second = theVal;
1188 ++curRowStart[hashVal];
1189 }
1190 }
1191
1192 TEUCHOS_TEST_FOR_EXCEPTION
1193 (! result.success_, std::logic_error, "Tpetra::Details::FixedHashTable::"
1194 "init: Filling the hash table failed! Please report this bug to the "
1195 "Tpetra developers.");
1196
1197 // "Commit" the computed arrays and other computed quantities.
1198 Kokkos::deep_copy(ptr, ptr_h);
1199 Kokkos::deep_copy(val, val_h);
1200
1201 ptr_ = ptr;
1202 val_ = val;
1203 minKey_ = result.minKey_;
1204 maxKey_ = result.maxKey_;
1205 // We've already assigned to minVal_ and maxVal_ above.
1206}
1207
1208template <class KeyType, class ValueType, class DeviceType>
1209bool
1212{
1213 if (! checkedForDuplicateKeys_) {
1214 hasDuplicateKeys_ = checkForDuplicateKeys ();
1215 checkedForDuplicateKeys_ = true;
1216 }
1217 return hasDuplicateKeys_;
1218}
1219
1220template <class KeyType, class ValueType, class DeviceType>
1221bool
1223checkForDuplicateKeys () const
1224{
1225 const offset_type size = this->getSize ();
1226 // It's allowed for the hash table to have a positive number of
1227 // buckets (getSize()), but still store no entries (numPairs()).
1228 // Both cases trivially entail no duplicates.
1229 if (size == 0 || this->numPairs () == 0) {
1230 return false;
1231 }
1232 else {
1233 typedef FHT::CheckForDuplicateKeys<ptr_type, val_type> functor_type;
1234 functor_type functor (val_, ptr_);
1235 int hasDupKeys = 0;
1236 typedef Kokkos::RangePolicy<execution_space, offset_type> range_type;
1237 Kokkos::parallel_reduce (range_type (0, size), functor, hasDupKeys);
1238 return hasDupKeys > 0;
1239 }
1240}
1241
1242template <class KeyType, class ValueType, class DeviceType>
1243std::string
1245description () const
1246{
1247 std::ostringstream oss;
1248 oss << "FixedHashTable<"
1249 << Teuchos::TypeNameTraits<KeyType>::name () << ","
1250 << Teuchos::TypeNameTraits<ValueType>::name () << ">: "
1251 << "{ numKeys: " << val_.extent (0)
1252 << ", tableSize: " << this->getSize () << " }";
1253 return oss.str();
1254}
1255
1256template <class KeyType, class ValueType, class DeviceType>
1257void
1259describe (Teuchos::FancyOStream& out,
1260 const Teuchos::EVerbosityLevel verbLevel) const
1261{
1262 using std::endl;
1263 using std::setw;
1264 using Teuchos::OSTab;
1265 using Teuchos::rcpFromRef;
1266 using Teuchos::TypeNameTraits;
1267 using Teuchos::VERB_DEFAULT;
1268 using Teuchos::VERB_NONE;
1269 using Teuchos::VERB_LOW;
1270 using Teuchos::VERB_EXTREME;
1271
1272 // NOTE (mfh 14 May 2015) This method currently assumes UVM for
1273 // access to ptr_ and val_ from the host.
1274
1275 Teuchos::EVerbosityLevel vl = verbLevel;
1276 if (vl == VERB_DEFAULT) vl = VERB_LOW;
1277
1278 if (vl == VERB_NONE) {
1279 // do nothing
1280 }
1281 else if (vl == VERB_LOW) {
1282 out << this->description() << endl;
1283 }
1284 else { // MEDIUM, HIGH or EXTREME
1285 out << "FixedHashTable:" << endl;
1286 {
1288
1289 // const std::string label = this->getObjectLabel ();
1290 // if (label != "") {
1291 // out << "label: " << label << endl;
1292 // }
1293 out << "Template parameters:" << endl;
1294 {
1296 out << "KeyType: " << TypeNameTraits<KeyType>::name () << endl
1297 << "ValueType: " << TypeNameTraits<ValueType>::name () << endl;
1298 }
1299
1300 const offset_type tableSize = this->getSize ();
1301 const offset_type numKeys = val_.extent (0);
1302
1303 out << "Table parameters:" << endl;
1304 {
1306 out << "numKeys: " << numKeys << endl
1307 << "tableSize: " << tableSize << endl;
1308 }
1309
1310 if (vl >= VERB_EXTREME) {
1311 out << "Contents: ";
1312 if (tableSize == 0 || numKeys == 0) {
1313 out << "[]" << endl;
1314 } else {
1315 out << "[ " << endl;
1316 {
1318 for (offset_type i = 0; i < tableSize; ++i) {
1320 out << "[";
1321 for (offset_type k = ptr_[i]; k < ptr_[i+1]; ++k) {
1322 out << "(" << val_[k].first << "," << val_[k].second << ")";
1323 if (k + 1 < ptr_[i+1]) {
1324 out << ", ";
1325 }
1326 }
1327 out << "]" << endl;
1328 } // for each table position i
1329 }
1330 out << "]" << endl;
1331 } // The table contains entries
1332 } // vl >= VERB_EXTREME
1333 }
1334 out << endl;
1335 } // if vl > VERB_LOW
1336}
1337
1338} // namespace Details
1339} // namespace Tpetra
1340
1341// Macro that explicitly instantiates FixedHashTable for the given local
1342// ordinal (LO) and global ordinal (GO) types. Note that FixedHashTable's
1343// template parameters occur in the opposite order of most Tpetra
1344// classes. This is because FixedHashTable performs global-to-local
1345// lookup, and the convention in templated C++ lookup tables (such as
1346// std::map) is <KeyType, ValueType>.
1347//
1348// This macro must be explanded within the Tpetra::Details namespace.
1349#define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT_DEFAULTNODE(LO,GO) \
1350 template class FixedHashTable< GO , LO >;
1351
1352// Macro that explicitly instantiates FixedHashTable for the given
1353// local ordinal (LO), global ordinal (GO), and Kokkos device (DEVICE)
1354// types. Note that FixedHashTable's first two template parameters
1355// occur in the opposite order of most Tpetra classes. This is
1356// because FixedHashTable performs global-to-local lookup, and the
1357// convention in templated C++ lookup tables (such as std::map) is
1358// <KeyType, ValueType>.
1359//
1360// This macro must be explanded within the Tpetra::Details namespace.
1361#define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT(LO, GO, DEVICE) \
1362 template class FixedHashTable< GO , LO , DEVICE >;
1363
1364#endif // TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Struct that holds views of the contents of a CrsMatrix.
static bool debug()
Whether Tpetra is in debug mode.
Functor for checking whether a FixedHashTable has one or more duplicate entries.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i, value_type &dst) const
Parallel loop body.
KOKKOS_INLINE_FUNCTION void join(value_type &dst, const value_type &src) const
Combine two intermediate reduction results.
CheckForDuplicateKeys(const pairs_view_type &pairs, const offsets_view_type &ptr)
Constructor.
Parallel for functor for counting "buckets" in the FixedHashTable.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i) const
Do this for every entry of keys_.
CountBuckets(const counts_view_type &counts, const keys_view_type &keys, const size_type size)
Constructor.
Parallel reduce functor for filling the FixedHashTable, and computing the min and max keys.
FillPairs(const pairs_view_type &pairs, const counts_view_type &counts, const offsets_view_type &ptr, const keys_view_type &keys, const typename pair_type::second_type startingValue, const key_type initMinKey, const key_type initMaxKey)
Constructor that takes initial min and max key values.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i, value_type &dst) const
Parallel loop body; do this for every entry of keys_.
FillPairs(const pairs_view_type &pairs, const counts_view_type &counts, const offsets_view_type &ptr, const keys_view_type &keys, const typename pair_type::second_type startingValue)
Constructor.
std::string description() const
Implementation of Teuchos::Describable interface.
bool hasDuplicateKeys()
Whether the table has any duplicate keys.
Kokkos::View< const KeyType *, Kokkos::LayoutLeft, device_type > keys_type
Type of a 1-D Kokkos::View (array) used to store keys.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print this object with the given verbosity to the output stream.
Implementation details of Tpetra.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Reduction result for FillPairs functor below.
bool success_
Whether fill succeeded (it can only fail on a bug)
The hash function for FixedHashTable.
ResultType result_type
Type of the return value of the hash function.
static KOKKOS_INLINE_FUNCTION result_type hashFunc(const argument_type &key, const offset_type &size)
The hash function.