Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_copyConvert.hpp
Go to the documentation of this file.
1/*
2// @HEADER
3// ***********************************************************************
4//
5// Tpetra: Templated Linear Algebra Services Package
6// Copyright (2008) Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39//
40// ************************************************************************
41// @HEADER
42*/
43
44#ifndef TPETRA_DETAILS_COPYCONVERT_HPP
45#define TPETRA_DETAILS_COPYCONVERT_HPP
46
51
52#include "TpetraCore_config.h"
53#include "Kokkos_Core.hpp"
54#include "Kokkos_ArithTraits.hpp"
55#include <sstream>
56#include <stdexcept>
57#include <type_traits>
58
59namespace Tpetra {
60namespace Details {
61
62//
63// Implementation details for copyConvert (see below).
64// Users should skip over this anonymous namespace.
65//
66namespace { // (anonymous)
67
68 // We need separate implementations for both (T,complex) and
69 // (complex,T), but we can't just overload for both cases, because
70 // that would be ambiguous (e.g., (complex,complex)).
71 template<class OutputValueType,
72 class InputValueType,
73 const bool outputIsComplex =
74 Kokkos::ArithTraits<OutputValueType>::is_complex,
75 const bool inputIsComplex =
76 Kokkos::ArithTraits<InputValueType>::is_complex>
77 struct ConvertValue
78 {
79 static KOKKOS_INLINE_FUNCTION void
80 convert (OutputValueType& dst, const InputValueType& src)
81 {
82 // This looks trivial, but it actually invokes OutputValueType's
83 // constructor, so that needs to be marked as a __host__
84 // __device__ function (e.g., via the KOKKOS_FUNCTION or
85 // KOKKOS_INLINE_FUNCTION macros).
86 dst = OutputValueType (src);
87 }
88 };
89
90 template<class OutputRealType, class InputComplexType>
91 struct ConvertValue<OutputRealType, InputComplexType, false, true>
92 {
93 static KOKKOS_INLINE_FUNCTION void
94 convert (OutputRealType& dst,
95 const InputComplexType& src)
96 {
97 // OutputRealType's constructor needs to be marked with either
98 // KOKKOS_FUNCTION or KOKKOS_INLINE_FUNCTION.
99 using KAI = Kokkos::ArithTraits<InputComplexType>;
100 dst = OutputRealType (KAI::real (src));
101 }
102 };
103
104 template<class OutputComplexType, class InputRealType>
105 struct ConvertValue<OutputComplexType, InputRealType, true, false>
106 {
107 static KOKKOS_INLINE_FUNCTION void
108 convert (OutputComplexType& dst,
109 const InputRealType& src)
110 {
111 // OutputComplexType's constructor needs to be marked with
112 // either KOKKOS_FUNCTION or KOKKOS_INLINE_FUNCTION.
113 using output_mag_type =
114 typename Kokkos::ArithTraits<OutputComplexType>::mag_type;
115 using KAM = Kokkos::ArithTraits<output_mag_type>;
116 dst = OutputComplexType (src, KAM::zero ());
117 }
118 };
119
120 template<class OutputValueType,
121 class InputValueType>
122 KOKKOS_INLINE_FUNCTION void
123 convertValue (OutputValueType& dst, const InputValueType& src) {
124 ConvertValue<OutputValueType, InputValueType>::convert (dst, src);
125 }
126
131 template<class OutputViewType,
132 class InputViewType,
133 const int rank = static_cast<int> (OutputViewType::Rank)>
134 class CopyConvertFunctor {};
135
136 template<class OutputViewType,
137 class InputViewType>
138 class CopyConvertFunctor<OutputViewType, InputViewType, 1> {
139 private:
140 static_assert
141 (static_cast<int> (OutputViewType::Rank) == 1 &&
142 static_cast<int> (InputViewType::Rank) == 1,
143 "CopyConvertFunctor (implements Tpetra::Details::copyConvert): "
144 "OutputViewType and InputViewType must both have rank 1.");
145 OutputViewType dst_;
146 InputViewType src_;
147
148 public:
149 using index_type = typename OutputViewType::size_type;
150
151 CopyConvertFunctor (const OutputViewType& dst,
152 const InputViewType& src) :
153 dst_ (dst),
154 src_ (src)
155 {}
156
157 KOKKOS_INLINE_FUNCTION void
158 operator () (const index_type i) const {
159 convertValue (dst_(i), src_(i));
160 }
161 };
162
163 template<class OutputViewType,
164 class InputViewType>
165 class CopyConvertFunctor<OutputViewType, InputViewType, 2> {
166 public:
167 using index_type = typename OutputViewType::size_type;
168
169 private:
170 static_assert
171 (static_cast<int> (OutputViewType::Rank) == 2 &&
172 static_cast<int> (InputViewType::Rank) == 2,
173 "CopyConvertFunctor (implements Tpetra::Details::copyConvert): "
174 "OutputViewType and InputViewType must both have rank 2.");
175 OutputViewType dst_;
176 InputViewType src_;
177 index_type numCols_;
178
179 public:
180 CopyConvertFunctor (const OutputViewType& dst,
181 const InputViewType& src) :
182 dst_ (dst),
183 src_ (src),
184 numCols_ (dst.extent (1))
185 {}
186
187 KOKKOS_INLINE_FUNCTION void
188 operator () (const index_type i) const {
189 const index_type numCols = numCols_;
190 for (index_type j = 0; j < numCols; ++j) {
191 convertValue (dst_(i,j), src_(i,j));
192 }
193 }
194 };
195
197 template<class OutputViewType, class InputViewType>
198 class CanUseKokkosDeepCopy {
199 private:
200 static constexpr bool sameValueType =
201 std::is_same<typename OutputViewType::non_const_value_type,
202 typename InputViewType::non_const_value_type>::value;
203 static constexpr bool sameMemorySpace =
204 std::is_same<typename OutputViewType::memory_space,
205 typename InputViewType::memory_space>::value;
206 static constexpr bool sameLayout =
207 std::is_same<typename OutputViewType::array_layout,
208 typename InputViewType::array_layout>::value;
209
210 public:
211 static constexpr bool value =
212 sameValueType && (sameMemorySpace || sameLayout);
213 };
214
233 template<class OutputViewType,
234 class InputViewType,
235 const bool canUseKokkosDeepCopy =
236 CanUseKokkosDeepCopy<OutputViewType, InputViewType>::value,
237 const bool outputExecSpaceCanAccessInputMemSpace =
238 Kokkos::SpaceAccessibility<
239 typename OutputViewType::memory_space,
240 typename InputViewType::memory_space>::accessible>
241 struct CopyConvertImpl {
242 static void
243 run (const OutputViewType& dst,
244 const InputViewType& src);
245 };
246
248 template<class OutputViewType,
249 class InputViewType,
250 const bool outputExecSpaceCanAccessInputMemSpace>
251 struct CopyConvertImpl<OutputViewType, InputViewType,
252 true, outputExecSpaceCanAccessInputMemSpace>
253 {
254 static void
255 run (const OutputViewType& dst,
256 const InputViewType& src)
257 {
258 // NOTE: It's important to do the addition _inside_ the
259 // reinterpret-cast. If you reinterpret_cast the separate
260 // results, you may get the wrong answer (e.g., because
261 // ptrdiff_t is signed, and pointers may have arbitrary 64-bit
262 // virtual addresses). I'm speaking from experience here.
263 const ptrdiff_t dst_beg =reinterpret_cast<ptrdiff_t> (dst.data ());
264 const ptrdiff_t dst_end =
265 reinterpret_cast<ptrdiff_t> (dst.data () + dst.span ());
266 const ptrdiff_t src_beg = reinterpret_cast<ptrdiff_t> (src.data ());
267 const ptrdiff_t src_end =
268 reinterpret_cast<ptrdiff_t> (src.data () + src.span ());
269
270 if (dst_end > src_beg && src_end > dst_beg) {
271 // dst and src alias each other, so we can't call
272 // Kokkos::deep_copy(dst,src) directly (Kokkos detects this
273 // and throws, at least in debug mode). Instead, we make
274 // temporary host storage (create_mirror always makes a new
275 // allocation, unlike create_mirror_view). Use host because
276 // it's cheaper to allocate. Hopefully users aren't doing
277 // aliased copies in a tight loop.
278 auto src_copy = Kokkos::create_mirror (Kokkos::HostSpace (), src);
279 // DEEP_COPY REVIEW - NOT TESTED
280 Kokkos::deep_copy (src_copy, src);
281 // DEEP_COPY REVIEW - NOT TESTED
282 Kokkos::deep_copy (dst, src_copy);
283 }
284 else { // no aliasing
285 // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
286 using execution_space = typename OutputViewType::execution_space;
287 Kokkos::deep_copy (execution_space(), dst, src);
288 }
289 }
290 };
291
294 template<class OutputViewType,
295 class InputViewType>
296 struct CopyConvertImpl<OutputViewType,
297 InputViewType,
298 false,
299 true>
300 {
301 static void
302 run (const OutputViewType& dst,
303 const InputViewType& src)
304 {
305 using functor_type = CopyConvertFunctor<OutputViewType, InputViewType>;
306 using execution_space = typename OutputViewType::execution_space;
307 using index_type = typename OutputViewType::size_type;
308 using range_type = Kokkos::RangePolicy<execution_space, index_type>;
309 Kokkos::parallel_for ("Tpetra::Details::copyConvert",
310 range_type (0, dst.extent (0)),
311 functor_type (dst, src));
312 }
313 };
314
321 template<class OutputViewType,
322 class InputViewType>
323 struct CopyConvertImpl<OutputViewType, InputViewType, false, false>
324 {
325 static void
326 run (const OutputViewType& dst,
327 const InputViewType& src)
328 {
329 using output_memory_space = typename OutputViewType::memory_space;
330 using output_execution_space = typename OutputViewType::execution_space;
331 auto src_outputSpaceCopy =
332 Kokkos::create_mirror_view (output_memory_space (), src);
333 // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
334 Kokkos::deep_copy (output_execution_space(), src_outputSpaceCopy, src);
335
336 // The output View's execution space can access
337 // outputSpaceCopy's data, so we can run the functor now.
338 using output_space_copy_type = decltype (src_outputSpaceCopy);
339 using functor_type =
340 CopyConvertFunctor<OutputViewType, output_space_copy_type>;
341 using execution_space = typename OutputViewType::execution_space;
342 using index_type = typename OutputViewType::size_type;
343 using range_type = Kokkos::RangePolicy<execution_space, index_type>;
344 Kokkos::parallel_for ("Tpetra::Details::copyConvert",
345 range_type (0, dst.extent (0)),
346 functor_type (dst, src_outputSpaceCopy));
347 }
348 };
349} // namespace (anonymous)
350
359template<class OutputViewType,
360 class InputViewType>
361void
363 const InputViewType& src)
364{
365 static_assert (Kokkos::is_view<OutputViewType>::value,
366 "OutputViewType must be a Kokkos::View.");
367 static_assert (Kokkos::is_view<InputViewType>::value,
368 "InputViewType must be a Kokkos::View.");
369 static_assert (std::is_same<typename OutputViewType::value_type,
370 typename OutputViewType::non_const_value_type>::value,
371 "OutputViewType must be a nonconst Kokkos::View.");
372 static_assert (static_cast<int> (OutputViewType::Rank) ==
373 static_cast<int> (InputViewType::Rank),
374 "src and dst must have the same rank.");
375
376 if (dst.extent (0) != src.extent (0)) {
377 std::ostringstream os;
378 os << "Tpetra::Details::copyConvert: "
379 << "dst.extent(0) = " << dst.extent (0)
380 << " != src.extent(0) = " << src.extent (0)
381 << ".";
382 throw std::invalid_argument (os.str ());
383 }
384 if (static_cast<int> (OutputViewType::Rank) > 1 &&
385 dst.extent (1) != src.extent (1)) {
386 std::ostringstream os;
387 os << "Tpetra::Details::copyConvert: "
388 << "dst.extent(1) = " << dst.extent (1)
389 << " != src.extent(1) = " << src.extent (1)
390 << ".";
391 throw std::invalid_argument (os.str ());
392 }
393
394 // Canonicalize the View types in order to avoid redundant instantiations.
395 using output_view_type =
396 Kokkos::View<typename OutputViewType::non_const_data_type,
397 typename OutputViewType::array_layout,
398 typename OutputViewType::device_type>;
399 using input_view_type =
400 Kokkos::View<typename InputViewType::const_data_type,
401 typename InputViewType::array_layout,
402 typename InputViewType::device_type>;
403 CopyConvertImpl<output_view_type, input_view_type>::run (dst, src);
404}
405
406} // namespace Details
407} // namespace Tpetra
408
409#endif // TPETRA_DETAILS_COPYCONVERT_HPP
Struct that holds views of the contents of a CrsMatrix.
Implementation details of Tpetra.
void copyConvert(const OutputViewType &dst, const InputViewType &src)
Copy values from the 1-D Kokkos::View src, to the 1-D Kokkos::View dst, of the same length....
Namespace Tpetra contains the class and methods constituting the Tpetra library.