44 #ifndef TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_MP_VECTOR_HPP 45 #define TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_MP_VECTOR_HPP 51 #include "Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp" 56 namespace KokkosRefactor {
59 #if defined( KOKKOS_ENABLE_CUDA ) 61 struct device_is_cuda :
public std::is_same<D,Kokkos::Cuda> {};
70 template <
typename DS,
typename ... DP,
71 typename SS,
typename ... SP,
73 struct PackArraySingleColumn<
75 Kokkos::View<const Sacado::MP::Vector<SS>**,SP...>,
78 typedef Kokkos::View<Sacado::MP::Vector<DS>*,DP...>
DstView;
79 typedef Kokkos::View<const Sacado::MP::Vector<SS>**,SP...>
SrcView;
81 typedef typename execution_space::size_type
size_type;
92 dst(dst_), src(src_), idx(idx_), col(col_) {}
94 KOKKOS_INLINE_FUNCTION
96 dst(k) = src(idx(k), col);
99 KOKKOS_INLINE_FUNCTION
101 dst(k).fastAccessCoeff(tidx) = src(idx(k), col).fastAccessCoeff(tidx);
109 Kokkos::parallel_for(
111 PackArraySingleColumn(dst,src,idx,col) );
113 Kokkos::parallel_for( idx.size(),
114 PackArraySingleColumn(dst,src,idx,col) );
118 template <
typename DS,
typename ... DP,
119 typename SS,
typename ... SP,
121 struct PackArrayMultiColumn<
123 Kokkos::View<const Sacado::MP::Vector<SS>**,SP...>,
126 typedef Kokkos::View<Sacado::MP::Vector<DS>*,DP...>
DstView;
127 typedef Kokkos::View<const Sacado::MP::Vector<SS>**,SP...>
SrcView;
140 dst(dst_), src(src_), idx(idx_), numCols(numCols_) {}
142 KOKKOS_INLINE_FUNCTION
145 const size_t offset = k*numCols;
146 for (
size_t j = 0;
j < numCols; ++
j)
147 dst(offset +
j) = src(localRow,
j);
150 KOKKOS_INLINE_FUNCTION
153 const size_t offset = k*numCols;
154 for (
size_t j = 0;
j < numCols; ++
j)
155 dst(offset +
j).fastAccessCoeff(tidx) =
156 src(localRow,
j).fastAccessCoeff(tidx);
164 Kokkos::parallel_for(
166 PackArrayMultiColumn(dst,src,idx,numCols) );
168 Kokkos::parallel_for( idx.size(),
169 PackArrayMultiColumn(dst,src,idx,numCols) );
173 template <
typename DS,
typename ... DP,
174 typename SS,
typename ... SP,
177 struct PackArrayMultiColumnVariableStride<
179 Kokkos::View<const Sacado::MP::Vector<SS>**,SP...>,
183 typedef Kokkos::View<Sacado::MP::Vector<DS>*,DP...>
DstView;
184 typedef Kokkos::View<const Sacado::MP::Vector<SS>**,SP...>
SrcView;
199 dst(dst_), src(src_), idx(idx_), col(col_), numCols(numCols_) {}
201 KOKKOS_INLINE_FUNCTION
204 const size_t offset = k*numCols;
205 for (
size_t j = 0;
j < numCols; ++
j)
206 dst(offset +
j) = src(localRow, col(
j));
209 KOKKOS_INLINE_FUNCTION
212 const size_t offset = k*numCols;
213 for (
size_t j = 0;
j < numCols; ++
j)
214 dst(offset +
j).fastAccessCoeff(tidx) =
215 src(localRow, col(
j)).fastAccessCoeff(tidx);
224 Kokkos::parallel_for(
226 PackArrayMultiColumnVariableStride(dst,src,idx,col,numCols) );
228 Kokkos::parallel_for( idx.size(),
229 PackArrayMultiColumnVariableStride(
230 dst,src,idx,col,numCols) );
234 template <
typename ExecutionSpace,
241 struct UnpackArrayMultiColumn<
244 Kokkos::View<const Sacado::MP::Vector<SS>*,SP...>,
248 typedef Kokkos::View<Sacado::MP::Vector<DS>**,DP...>
DstView;
249 typedef Kokkos::View<const Sacado::MP::Vector<SS>*,SP...>
SrcView;
264 const size_t numCols_) :
272 KOKKOS_INLINE_FUNCTION
void 276 const size_t offset = k*numCols;
277 for (
size_t j = 0;
j < numCols; ++
j) {
278 op (dst(localRow,
j), src(offset+
j));
282 KOKKOS_INLINE_FUNCTION
void 286 const size_t offset = k*numCols;
287 for (
size_t j = 0;
j < numCols; ++
j) {
299 const size_t numCols)
303 (
"Tpetra::MultiVector (Sacado::MP::Vector) unpack (constant stride)",
305 UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
309 (
"Tpetra::MultiVector (Sacado::MP::Vector) unpack (constant stride)",
310 Kokkos::RangePolicy<execution_space, size_type> (0, idx.size ()),
311 UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
316 template <
typename ExecutionSpace,
324 struct UnpackArrayMultiColumnVariableStride<
327 Kokkos::View<const Sacado::MP::Vector<SS>*,SP...>,
332 typedef Kokkos::View<Sacado::MP::Vector<DS>**,DP...>
DstView;
333 typedef Kokkos::View<const Sacado::MP::Vector<SS>*,SP...>
SrcView;
350 const size_t numCols_) :
359 KOKKOS_INLINE_FUNCTION
void 363 const size_t offset = k*numCols;
364 for (
size_t j = 0;
j < numCols; ++
j) {
365 op (dst(localRow, col(
j)), src(offset+
j));
369 KOKKOS_INLINE_FUNCTION
void 373 const size_t offset = k*numCols;
374 for (
size_t j = 0;
j < numCols; ++
j) {
387 const size_t numCols)
391 (
"Tpetra::MultiVector unpack (Sacado::MP::Vector) (nonconstant stride)",
393 UnpackArrayMultiColumnVariableStride (execSpace, dst, src, idx, col, op, numCols));
397 (
"Tpetra::MultiVector unpack (Sacado::MP::Vector) (nonconstant stride)",
398 Kokkos::RangePolicy<execution_space, size_type> (0, idx.size ()),
399 UnpackArrayMultiColumnVariableStride (execSpace, dst, src, idx, col, op, numCols));
404 template <
typename DS,
typename ... DP,
405 typename SS,
typename ... SP,
406 typename DstIdxView,
typename SrcIdxView>
407 struct PermuteArrayMultiColumn<
409 Kokkos::View<const Sacado::MP::Vector<SS>**,SP...>,
413 typedef Kokkos::View<Sacado::MP::Vector<DS>**,DP...>
DstView;
414 typedef Kokkos::View<const Sacado::MP::Vector<SS>**,SP...>
SrcView;
426 const DstIdxView& dst_idx_,
427 const SrcIdxView& src_idx_,
429 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
432 KOKKOS_INLINE_FUNCTION
436 for (
size_t j = 0;
j < numCols; ++
j)
437 dst(toRow,
j) = src(fromRow,
j);
440 KOKKOS_INLINE_FUNCTION
444 for (
size_t j = 0;
j < numCols; ++
j)
445 dst(toRow,
j).fastAccessCoeff(tidx) =
446 src(fromRow,
j).fastAccessCoeff(tidx);
451 const DstIdxView& dst_idx,
452 const SrcIdxView& src_idx,
456 Kokkos::parallel_for(
458 PermuteArrayMultiColumn(dst,src,dst_idx,src_idx,numCols) );
460 Kokkos::parallel_for(
461 n, PermuteArrayMultiColumn(dst,src,dst_idx,src_idx,numCols) );
465 template <
typename DS,
typename ... DP,
466 typename SS,
typename ... SP,
467 typename DstIdxView,
typename SrcIdxView,
468 typename DstColView,
typename SrcColView>
469 struct PermuteArrayMultiColumnVariableStride<
471 Kokkos::View<const Sacado::MP::Vector<SS>**,SP...>,
472 DstIdxView, SrcIdxView,
473 DstColView, SrcColView >
475 typedef Kokkos::View<Sacado::MP::Vector<DS>**,DP...>
DstView;
476 typedef Kokkos::View<const Sacado::MP::Vector<SS>**,SP...>
SrcView;
490 const DstIdxView& dst_idx_,
491 const SrcIdxView& src_idx_,
492 const DstColView& dst_col_,
493 const SrcColView& src_col_,
495 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
496 dst_col(dst_col_), src_col(src_col_),
499 KOKKOS_INLINE_FUNCTION
503 for (
size_t j = 0;
j < numCols; ++
j)
504 dst(toRow, dst_col(
j)) = src(fromRow, src_col(
j));
507 KOKKOS_INLINE_FUNCTION
511 for (
size_t j = 0;
j < numCols; ++
j)
512 dst(toRow, dst_col(
j)).fastAccessCoeff(tidx) =
513 src(fromRow, src_col(
j)).fastAccessCoeff(tidx);
518 const DstIdxView& dst_idx,
519 const SrcIdxView& src_idx,
520 const DstColView& dst_col,
521 const SrcColView& src_col,
525 Kokkos::parallel_for(
527 PermuteArrayMultiColumnVariableStride(
528 dst,src,dst_idx,src_idx,dst_col,src_col,numCols) );
530 Kokkos::parallel_for(
531 n, PermuteArrayMultiColumnVariableStride(
532 dst,src,dst_idx,src_idx,dst_col,src_col,numCols) );
540 #endif // TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_MP_VECTOR_HPP
execution_space::size_type size_type
execution_space::size_type size_type
Kokkos::View< Sacado::MP::Vector< DS > **, DP... > DstView
DstView::execution_space execution_space
Kokkos::View< const Sacado::MP::Vector< SS > **, SP... > SrcView
execution_space::size_type size_type
PackArrayMultiColumn(const DstView &dst_, const SrcView &src_, const IdxView &idx_, size_t numCols_)
KOKKOS_INLINE_FUNCTION void operator()(const size_type k) const
DstView::execution_space execution_space
Kokkos::View< const Sacado::MP::Vector< SS > **, SP... > SrcView
KOKKOS_INLINE_FUNCTION void operator()(const size_type k) const
Kokkos::DefaultExecutionSpace execution_space
expr1 expr1 expr1 expr2 expr1 expr1 c expr2 expr1 c fastAccessCoeff(j) - expr2.val(j)
Kokkos::View< Sacado::MP::Vector< DS > **, DP... > DstView
ExecutionSpace::execution_space execution_space
static void permute(const DstView &dst, const SrcView &src, const DstIdxView &dst_idx, const SrcIdxView &src_idx, size_t numCols)
execution_space::size_type size_type
execution_space::size_type size_type
KOKKOS_INLINE_FUNCTION void operator()(const size_type k, const size_type tidx) const
Kokkos::View< const Sacado::MP::Vector< SS > **, SP... > SrcView
DstView::execution_space execution_space
PackArrayMultiColumnVariableStride(const DstView &dst_, const SrcView &src_, const IdxView &idx_, const ColView &col_, size_t numCols_)
KOKKOS_INLINE_FUNCTION PCE< Storage > min(const typename PCE< Storage >::value_type &a, const PCE< Storage > &b)
Kokkos::View< Sacado::MP::Vector< DS > *, DP... > DstView
KOKKOS_INLINE_FUNCTION void operator()(const size_type k, const size_type tidx) const
Kokkos::View< const Sacado::MP::Vector< SS > **, SP... > SrcView
KOKKOS_INLINE_FUNCTION void operator()(const size_type k, const size_type tidx) const
static void unpack(const ExecutionSpace &execSpace, const DstView &dst, const SrcView &src, const IdxView &idx, const ColView &col, const Op &op, const size_t numCols)
execution_space::size_type size_type
Kokkos::View< Sacado::MP::Vector< DS > *, DP... > DstView
UnpackArrayMultiColumnVariableStride(const ExecutionSpace &, const DstView &dst_, const SrcView &src_, const IdxView &idx_, const ColView &col_, const Op &op_, const size_t numCols_)
KOKKOS_INLINE_FUNCTION constexpr std::enable_if< is_view_uq_pce< View< T, P... > >::value, unsigned >::type dimension_scalar(const View< T, P... > &view)
ExecutionSpace::execution_space execution_space
static void pack(const DstView &dst, const SrcView &src, const IdxView &idx, const ColView &col, size_t numCols)
KOKKOS_INLINE_FUNCTION void operator()(const size_type k, const size_type tidx) const
Kokkos::View< const Sacado::MP::Vector< SS > *, SP... > SrcView
static void permute(const DstView &dst, const SrcView &src, const DstIdxView &dst_idx, const SrcIdxView &src_idx, const DstColView &dst_col, const SrcColView &src_col, size_t numCols)
Team-based parallel work configuration for Sacado::MP::Vector.
Kokkos::View< Sacado::MP::Vector< DS > **, DP... > DstView
PermuteArrayMultiColumnVariableStride(const DstView &dst_, const SrcView &src_, const DstIdxView &dst_idx_, const SrcIdxView &src_idx_, const DstColView &dst_col_, const SrcColView &src_col_, size_t numCols_)
UnpackArrayMultiColumn(const ExecutionSpace &, const DstView &dst_, const SrcView &src_, const IdxView &idx_, const Op &op_, const size_t numCols_)
Kokkos::View< const Sacado::MP::Vector< SS > *, SP... > SrcView
Kokkos::View< const Sacado::MP::Vector< SS > **, SP... > SrcView
static void pack(const DstView &dst, const SrcView &src, const IdxView &idx, size_t col)
DstView::execution_space execution_space
PermuteArrayMultiColumn(const DstView &dst_, const SrcView &src_, const DstIdxView &dst_idx_, const SrcIdxView &src_idx_, size_t numCols_)
KOKKOS_INLINE_FUNCTION void operator()(const size_type k, const size_type tidx) const
static void unpack(const ExecutionSpace &execSpace, const DstView &dst, const SrcView &src, const IdxView &idx, const Op &op, const size_t numCols)
KOKKOS_INLINE_FUNCTION void operator()(const size_type k) const
Kokkos::View< Sacado::MP::Vector< DS > **, DP... > DstView
PackArraySingleColumn(const DstView &dst_, const SrcView &src_, const IdxView &idx_, size_t col_)
KOKKOS_INLINE_FUNCTION void operator()(const size_type k) const
DstView::execution_space execution_space
Kokkos::View< Sacado::MP::Vector< DS > *, DP... > DstView
execution_space::size_type size_type
KOKKOS_INLINE_FUNCTION void operator()(const size_type k) const
static void pack(const DstView &dst, const SrcView &src, const IdxView &idx, size_t numCols)