Stokhos Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
TestSpMM.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Stokhos Package
5// Copyright (2009) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38//
39// ***********************************************************************
40// @HEADER
41#include <iostream>
42
43// Kokkos CrsMatrix
44#include "KokkosSparse_CrsMatrix.hpp"
45#include "KokkosSparse_spmv.hpp"
46
47
48// Utilities
49#include "Kokkos_Timer.hpp"
50
51template< typename IntType >
52inline
53IntType map_fem_graph_coord( const IntType & N ,
54 const IntType & i ,
55 const IntType & j ,
56 const IntType & k )
57{
58 return k + N * ( j + N * i );
59}
60
61inline
62size_t generate_fem_graph( size_t N ,
63 std::vector< std::vector<size_t> > & graph )
64{
65 graph.resize( N * N * N , std::vector<size_t>() );
66
67 size_t total = 0 ;
68
69 for ( int i = 0 ; i < (int) N ; ++i ) {
70 for ( int j = 0 ; j < (int) N ; ++j ) {
71 for ( int k = 0 ; k < (int) N ; ++k ) {
72
73 const size_t row = map_fem_graph_coord((int)N,i,j,k);
74
75 graph[row].reserve(27);
76
77 for ( int ii = -1 ; ii < 2 ; ++ii ) {
78 for ( int jj = -1 ; jj < 2 ; ++jj ) {
79 for ( int kk = -1 ; kk < 2 ; ++kk ) {
80 if ( 0 <= i + ii && i + ii < (int) N &&
81 0 <= j + jj && j + jj < (int) N &&
82 0 <= k + kk && k + kk < (int) N ) {
83 size_t col = map_fem_graph_coord((int)N,i+ii,j+jj,k+kk);
84
85 graph[row].push_back(col);
86 }
87 }}}
88 total += graph[row].size();
89 }}}
90
91 return total ;
92}
93
94template <typename ScalarType, typename OrdinalType, typename Device>
95void
96test_spmm(const OrdinalType ensemble_length,
97 const OrdinalType nGrid,
98 const OrdinalType iterCount,
99 std::vector<double>& scalar_perf,
100 std::vector<double>& block_left_perf,
101 std::vector<double>& block_right_perf)
102{
103 typedef ScalarType value_type;
104 typedef OrdinalType ordinal_type;
105 typedef Device execution_space;
106 typedef Kokkos::View< value_type*, execution_space > vector_type;
107 typedef Kokkos::View< value_type**, Kokkos::LayoutLeft, execution_space > left_multivec_type;
108 //typedef Kokkos::View< value_type**, Kokkos::LayoutRight, execution_space > right_multivec_type;
109 typedef KokkosSparse::CrsMatrix< value_type, ordinal_type, execution_space > matrix_type;
110 typedef typename matrix_type::StaticCrsGraphType matrix_graph_type;
111 typedef typename matrix_type::values_type matrix_values_type;
112
113 //------------------------------
114 // Generate graph for "FEM" box structure:
115
116 std::vector< std::vector<size_t> > fem_graph;
117 const size_t fem_length = nGrid * nGrid * nGrid;
118 const size_t graph_length = generate_fem_graph( nGrid , fem_graph );
119
120 //------------------------------
121 // Generate input vectors:
122
123 std::vector<vector_type> x(ensemble_length);
124 std::vector<vector_type> y(ensemble_length);
125 for (ordinal_type e=0; e<ensemble_length; ++e) {
126 x[e] = vector_type(Kokkos::ViewAllocateWithoutInitializing("x"), fem_length);
127 y[e] = vector_type(Kokkos::ViewAllocateWithoutInitializing("y"), fem_length);
128
129 Kokkos::deep_copy( x[e] , value_type(1.0) );
130 Kokkos::deep_copy( y[e] , value_type(0.0) );
131 }
132 left_multivec_type xl(Kokkos::ViewAllocateWithoutInitializing("xl"), fem_length, ensemble_length);
133 left_multivec_type yl(Kokkos::ViewAllocateWithoutInitializing("yl"), fem_length, ensemble_length);
134 // right_multivec_type xr(Kokkos::ViewAllocateWithoutInitializing("xr"), fem_length, ensemble_length);
135 // right_multivec_type yr(Kokkos::ViewAllocateWithoutInitializing("yr"), fem_length, ensemble_length);
136 Kokkos::deep_copy(xl, value_type(1.0));
137 //Kokkos::deep_copy(xr, value_type(1.0));
138 Kokkos::deep_copy(yl, value_type(0.0));
139 //Kokkos::deep_copy(yr, value_type(0.0));
140
141 //------------------------------
142 // Generate matrix
143
144 matrix_graph_type matrix_graph =
145 Kokkos::create_staticcrsgraph<matrix_graph_type>(
146 std::string("test crs graph"), fem_graph);
147 matrix_values_type matrix_values =
148 matrix_values_type(Kokkos::ViewAllocateWithoutInitializing("matrix"), graph_length);
149 matrix_type matrix("matrix", fem_length, matrix_values, matrix_graph);
150 Kokkos::deep_copy( matrix_values , value_type(1.0) );
151
152 //------------------------------
153 // Scalar multiply
154
155 {
156 // warm up
157 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
158 for (ordinal_type e=0; e<ensemble_length; ++e) {
159 KokkosSparse::spmv( "N", value_type(1.0), matrix, x[e] , value_type(0.0) , y[e]);
160 }
161 }
162
163 execution_space().fence();
164 Kokkos::Timer clock ;
165 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
166 for (ordinal_type e=0; e<ensemble_length; ++e) {
167 KokkosSparse::spmv( "N", value_type(1.0), matrix, x[e] , value_type(0.0) , y[e]);
168 }
169 }
170 execution_space().fence();
171
172 const double seconds_per_iter = clock.seconds() / ((double) iterCount );
173 const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;
174
175 scalar_perf.resize(5);
176 scalar_perf[0] = fem_length;
177 scalar_perf[1] = ensemble_length;
178 scalar_perf[2] = graph_length;
179 scalar_perf[3] = seconds_per_iter;
180 scalar_perf[4] = flops / seconds_per_iter;
181 }
182
183 //------------------------------
184 // Block-left multiply
185
186 {
187 // warm up
188 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
189 KokkosSparse::spmv( "N", value_type(1.0), matrix, xl , value_type(0.0) , yl);
190 }
191
192 execution_space().fence();
193 Kokkos::Timer clock ;
194 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
195 KokkosSparse::spmv( "N", value_type(1.0), matrix, xl , value_type(0.0) , yl);
196 }
197 execution_space().fence();
198
199 const double seconds_per_iter = clock.seconds() / ((double) iterCount );
200 const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;
201
202 block_left_perf.resize(5);
203 block_left_perf[0] = fem_length;
204 block_left_perf[1] = ensemble_length;
205 block_left_perf[2] = graph_length;
206 block_left_perf[3] = seconds_per_iter;
207 block_left_perf[4] = flops / seconds_per_iter;
208 }
209
210#if 0
211 //------------------------------
212 // Block-right multiply
213
214 {
215 // warm up
216 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
217 KokkosSparse::spmv( "N", value_type(1.0), matrix, xr , value_type(0.0) , yr);
218 }
219
220 execution_space().fence();
221 Kokkos::Timer clock ;
222 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
223 KokkosSparse::spmv( "N", value_type(1.0), matrix, xr , value_type(0.0) , yr);
224 }
225 execution_space().fence();
226
227 const double seconds_per_iter = clock.seconds() / ((double) iterCount );
228 const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;
229
230 block_right_perf.resize(5);
231 block_right_perf[0] = fem_length;
232 block_right_perf[1] = ensemble_length;
233 block_right_perf[2] = graph_length;
234 block_right_perf[3] = seconds_per_iter;
235 block_right_perf[4] = flops / seconds_per_iter;
236 }
237#endif
238
239}
240
241template <typename Scalar, typename Ordinal, typename Device>
243 const Ordinal nIter,
244 const Ordinal ensemble_min,
245 const Ordinal ensemble_max,
246 const Ordinal ensemble_step )
247{
248 std::cout.precision(8);
249 std::cout << std::endl
250 << "\"Grid Size\" , "
251 << "\"FEM Size\" , "
252 << "\"FEM Graph Size\" , "
253 << "\"Ensemble Size\" , "
254 << "\"Scalar SpMM Time\" , "
255 << "\"Scalar SpMM Speedup\" , "
256 << "\"Scalar SpMM GFLOPS\" , "
257 << "\"Block-Left SpMM Speedup\" , "
258 << "\"Block-Left SpMM GFLOPS\" , "
259 //<< "\"Block_Right SpMM Speedup\" , "
260 //<< "\"Block_Right SpMM GFLOPS\" , "
261 << std::endl;
262
263 std::vector<double> perf_scalar, perf_block_left, perf_block_right;
264 for (Ordinal e=ensemble_min; e<=ensemble_max; e+=ensemble_step) {
265
266 test_spmm<Scalar,Ordinal,Device>(
267 e, nGrid, nIter, perf_scalar, perf_block_left, perf_block_right );
268
269 std::cout << nGrid << " , "
270 << perf_scalar[0] << " , "
271 << perf_scalar[2] << " , "
272 << perf_scalar[1] << " , "
273 << perf_scalar[3] << " , "
274 << perf_scalar[4] / perf_scalar[4] << " , "
275 << perf_scalar[4] << " , "
276 << perf_block_left[4]/ perf_scalar[4] << " , "
277 << perf_block_left[4] << " , "
278 //<< perf_block_right[4]/ perf_scalar[4] << " , "
279 //<< perf_block_right[4] << " , "
280 << std::endl;
281
282 }
283}
Kokkos::DefaultExecutionSpace execution_space
void test_spmm(const OrdinalType ensemble_length, const OrdinalType nGrid, const OrdinalType iterCount, std::vector< double > &scalar_perf, std::vector< double > &block_left_perf, std::vector< double > &block_right_perf)
Definition TestSpMM.hpp:96
size_t generate_fem_graph(size_t N, std::vector< std::vector< size_t > > &graph)
Definition TestSpMM.hpp:62
IntType map_fem_graph_coord(const IntType &N, const IntType &i, const IntType &j, const IntType &k)
Definition TestSpMM.hpp:53
void performance_test_driver(const Ordinal nGrid, const Ordinal nIter, const Ordinal ensemble_min, const Ordinal ensemble_max, const Ordinal ensemble_step)
Definition TestSpMM.hpp:242
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< InputType, InputP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< OutputType, OutputP... > >::value >::type spmv(const char mode[], const AlphaType &a, const MatrixType &A, const Kokkos::View< InputType, InputP... > &x, const BetaType &b, const Kokkos::View< OutputType, OutputP... > &y, const RANK_ONE)
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)