Stokhos Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
KokkosBlas2_gemv_MP_Vector.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Stokhos Package
5// Copyright (2009) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38//
39// ***********************************************************************
40// @HEADER
41
42#ifndef KOKKOSBLAS2_GEMV_MP_VECTOR_HPP
43#define KOKKOSBLAS2_GEMV_MP_VECTOR_HPP
44
45#include <type_traits>
46#include "Sacado_ConfigDefs.h"
47
49#include "Sacado_MP_Vector.hpp"
52#include "KokkosBlas.hpp"
53
55#include "Kokkos_Core.hpp"
56
57#include "Stokhos_config.h"
58
59#define Sacado_MP_Vector_GEMV_Tile_Size(size) (STOKHOS_GEMV_CACHE_SIZE / size)
60
61// Functor for the update
62template <class AViewType,
63 class XViewType,
64 class YViewType,
65 class IndexType = typename AViewType::size_type>
66struct updateF
67{
68 using AlphaCoeffType = typename AViewType::non_const_value_type;
69 using BetaCoeffType = typename YViewType::non_const_value_type;
70
71 updateF(const AlphaCoeffType &alpha,
72 const AViewType &A,
73 const XViewType &x,
74 const BetaCoeffType &beta,
75 const YViewType &y,
76 const IndexType m_c) : alpha_(alpha), A_(A), x_(x), beta_(beta), y_(y), m_c_(m_c)
77 {
78 }
79
80public:
81 // i_tile is the current tile of the input matrix A
82 KOKKOS_INLINE_FUNCTION void
83 operator()(const IndexType &i_tile) const
84 {
85 const IndexType m = y_.extent(0);
86 const IndexType n = x_.extent(0);
87
88 IndexType i_min = m_c_ * i_tile;
89 bool last_tile = (i_min + m_c_ >= m);
90 IndexType i_max = (last_tile) ? m : (i_min + m_c_);
91
92#ifdef STOKHOS_HAVE_PRAGMA_UNROLL
93#pragma unroll
94#endif
95 if (beta_ == BetaCoeffType(0.))
96 for (IndexType i = i_min; i < i_max; ++i)
97 y_(i) = beta_;
98 else
99 for (IndexType i = i_min; i < i_max; ++i)
100 y_(i) *= beta_;
101
102 for (IndexType j = 0; j < n; ++j)
103 {
104 AlphaCoeffType alphab = alpha_ * x_(j);
105
106 for (IndexType i = i_min; i < i_max; ++i)
107 y_(i) += alphab * A_(i, j);
108 }
109 }
110
111private:
113 typename AViewType::const_type A_;
114 typename XViewType::const_type x_;
116 YViewType y_;
117 const IndexType m_c_;
118};
119
120// Functor for the inner products
121template <class AViewType,
122 class XViewType,
123 class YViewType,
124 class IndexType = typename AViewType::size_type>
125struct innerF
126{
127 using execution_space = typename AViewType::execution_space;
128 using policy_type = Kokkos::TeamPolicy<execution_space>;
129 using member_type = typename policy_type::member_type;
130
131 using AlphaCoeffType = typename AViewType::non_const_value_type;
133
134 innerF(const AlphaCoeffType &alpha,
135 const AViewType &A,
136 const XViewType &x,
137 const YViewType &y,
138 const IndexType n_c) : alpha_(alpha), A_(A), x_(x), y_(y), n_c_(n_c)
139 {
140 }
141
142public:
143 KOKKOS_INLINE_FUNCTION void
144 operator()(const member_type &team) const
145 {
146 const IndexType m = y_.extent(0);
147 const IndexType n = x_.extent(0);
148
149 const int j = team.league_rank();
150 const IndexType j_min = n_c_ * j;
151 const IndexType nj = (j_min + n_c_ > n) ? (n - j_min) : n_c_;
152 const IndexType i_min = j % m;
153
154 for (IndexType i = i_min; i < m; ++i)
155 {
156 Scalar tmp = 0.;
157 Kokkos::parallel_reduce(
158 Kokkos::TeamThreadRange(team, nj), [=](int jj, Scalar &tmp_sum) {
159 tmp_sum += A_(jj + j_min, i) * x_(jj + j_min);
160 },
161 tmp);
162 if (team.team_rank() == 0)
163 {
164 tmp *= alpha_;
165 Kokkos::atomic_add<Scalar>(&y_(i), tmp);
166 }
167 }
168 for (IndexType i = 0; i < i_min; ++i)
169 {
170 Scalar tmp = 0.;
171 Kokkos::parallel_reduce(
172 Kokkos::TeamThreadRange(team, nj), [=](int jj, Scalar &tmp_sum) {
173 tmp_sum += A_(jj + j_min, i) * x_(jj + j_min);
174 },
175 tmp);
176 if (team.team_rank() == 0)
177 {
178 tmp *= alpha_;
179 Kokkos::atomic_add<Scalar>(&y_(i), tmp);
180 }
181 }
182 }
183
184private:
186 typename AViewType::const_type A_;
187 typename XViewType::const_type x_;
188 YViewType y_;
189 const IndexType n_c_;
190};
191
192template <class Scalar,
193 class VA,
194 class VX,
195 class VY>
197 typename VA::const_value_type &alpha,
198 const VA &A,
199 const VX &x,
200 typename VY::const_value_type &beta,
201 const VY &y)
202{
203 using execution_space = typename VA::execution_space;
204 using IndexType = typename VA::size_type;
205 using policy_type = Kokkos::RangePolicy<execution_space, IndexType>;
206
207 // Get the dimensions
208 const size_t m = y.extent(0);
209
210#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
211 const size_t N = execution_space::thread_pool_size();
212#else
213 const size_t N = execution_space::impl_thread_pool_size();
214#endif
215 const size_t m_c_star = Sacado_MP_Vector_GEMV_Tile_Size(sizeof(Scalar));
216 const size_t n_tiles_per_thread = ceil(((double)m) / (N * m_c_star));
217 const size_t m_c = ceil(((double)m) / (N * n_tiles_per_thread));
218 const size_t n_tiles = N * n_tiles_per_thread;
219
220 policy_type range(0, n_tiles);
221
222 using functor_type = updateF<VA, VX, VY, IndexType>;
223 functor_type functor(alpha, A, x, beta, y, m_c);
224
225 Kokkos::parallel_for("KokkosBlas::gemv[Update]", range, functor);
226}
227
228template <class Scalar,
229 class VA,
230 class VX,
231 class VY>
233 typename VA::const_value_type &alpha,
234 const VA &A,
235 const VX &x,
236 typename VY::const_value_type &beta,
237 const VY &y)
238{
239 using execution_space = typename VA::execution_space;
240 using IndexType = typename VA::size_type;
241 using team_policy_type = Kokkos::TeamPolicy<execution_space>;
242
243 // Get the dimensions
244 const size_t m = y.extent(0);
245 const size_t n = x.extent(0);
246
247 const size_t team_size = STOKHOS_GEMV_TEAM_SIZE;
248
249#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
250 const size_t N = execution_space::thread_pool_size();
251#else
252 const size_t N = execution_space::impl_thread_pool_size();
253#endif
254 const size_t m_c_star = Sacado_MP_Vector_GEMV_Tile_Size(sizeof(Scalar));
255 const size_t n_tiles_per_thread = ceil(((double)n) / (N * m_c_star));
256 const size_t m_c = ceil(((double)n) / (N * n_tiles_per_thread));
257 const size_t n_per_tile2 = m_c * team_size;
258
259 const size_t n_i2 = ceil(((double)n) / n_per_tile2);
260
261 team_policy_type team(n_i2, team_size);
262
263 if (beta == Scalar(0.))
264 Kokkos::parallel_for(
265 m, KOKKOS_LAMBDA(const int i) {
266 y(i) = beta;
267 });
268 else
269 Kokkos::parallel_for(
270 m, KOKKOS_LAMBDA(const int i) {
271 y(i) *= beta;
272 });
273
274 using functor_type = innerF<VA, VX, VY, IndexType>;
275 functor_type functor(alpha, A, x, y, n_per_tile2);
276
277 Kokkos::parallel_for("KokkosBlas::gemv[InnerProducts]", team, functor);
278}
279
280namespace KokkosBlas
281{
282template <typename DA, typename... PA,
283 typename DX, typename... PX,
284 typename DY, typename... PY>
285typename std::enable_if<Kokkos::is_view_mp_vector<Kokkos::View<DA, PA...>>::value &&
286 Kokkos::is_view_mp_vector<Kokkos::View<DX, PX...>>::value &&
287 Kokkos::is_view_mp_vector<Kokkos::View<DY, PY...>>::value>::type
288gemv(const char trans[],
289 typename Kokkos::View<DA, PA...>::const_value_type &alpha,
290 const Kokkos::View<DA, PA...> &A,
291 const Kokkos::View<DX, PX...> &x,
292 typename Kokkos::View<DY, PY...>::const_value_type &beta,
293 const Kokkos::View<DY, PY...> &y)
294{
295 typedef typename Kokkos::View<DA, PA...>::value_type Scalar;
296 typedef Kokkos::View<DA, PA...> VA;
297 typedef Kokkos::View<DX, PX...> VX;
298 typedef Kokkos::View<DY, PY...> VY;
299
300 static_assert(VA::rank == 2, "GEMM: A must have rank 2 (be a matrix).");
301 static_assert(VX::rank == 1, "GEMM: x must have rank 1 (be a vector).");
302 static_assert(VY::rank == 1, "GEMM: y must have rank 1 (be a vector).");
303
304 if (trans[0] == 'n' || trans[0] == 'N')
305 update_MP<Scalar, VA, VX, VY>(alpha, A, x, beta, y);
306 else
307 inner_products_MP<Scalar, VA, VX, VY>(alpha, A, x, beta, y);
308}
309
310} // namespace KokkosBlas
311#endif
void update_MP(typename VA::const_value_type &alpha, const VA &A, const VX &x, typename VY::const_value_type &beta, const VY &y)
void inner_products_MP(typename VA::const_value_type &alpha, const VA &A, const VX &x, typename VY::const_value_type &beta, const VY &y)
#define Sacado_MP_Vector_GEMV_Tile_Size(size)
Kokkos::DefaultExecutionSpace execution_space
std::enable_if< Kokkos::is_view_mp_vector< Kokkos::View< DA, PA... > >::value &&Kokkos::is_view_mp_vector< Kokkos::View< DX, PX... > >::value &&Kokkos::is_view_mp_vector< Kokkos::View< DY, PY... > >::value >::type gemv(const char trans[], typename Kokkos::View< DA, PA... >::const_value_type &alpha, const Kokkos::View< DA, PA... > &A, const Kokkos::View< DX, PX... > &x, typename Kokkos::View< DY, PY... >::const_value_type &beta, const Kokkos::View< DY, PY... > &y)
XViewType::const_type x_
innerF(const AlphaCoeffType &alpha, const AViewType &A, const XViewType &x, const YViewType &y, const IndexType n_c)
AlphaCoeffType alpha_
AlphaCoeffType Scalar
Kokkos::TeamPolicy< execution_space > policy_type
typename policy_type::member_type member_type
KOKKOS_INLINE_FUNCTION void operator()(const member_type &team) const
typename AViewType::non_const_value_type AlphaCoeffType
typename AViewType::execution_space execution_space
const IndexType n_c_
AViewType::const_type A_
typename AViewType::non_const_value_type AlphaCoeffType
typename YViewType::non_const_value_type BetaCoeffType
KOKKOS_INLINE_FUNCTION void operator()(const IndexType &i_tile) const
updateF(const AlphaCoeffType &alpha, const AViewType &A, const XViewType &x, const BetaCoeffType &beta, const YViewType &y, const IndexType m_c)
XViewType::const_type x_
AViewType::const_type A_