107 const OrdinalType dim,
108 const OrdinalType nGrid,
109 const OrdinalType iterCount,
110 std::vector<double>& scalar_perf,
111 std::vector<double>& block_left_perf,
112 std::vector<double>& block_right_perf,
113 std::vector<double>& pce_perf,
114 std::vector<double>& block_pce_perf)
116 typedef ScalarType value_type;
117 typedef OrdinalType ordinal_type;
123 typedef Kokkos::View< value_type*, Kokkos::LayoutLeft, execution_space > scalar_vector_type;
124 typedef Kokkos::View< value_type**, Kokkos::LayoutLeft, execution_space > scalar_left_multi_vector_type;
125 typedef Kokkos::View< value_type**, Kokkos::LayoutRight, execution_space > scalar_right_multi_vector_type;
126 typedef Kokkos::View< pce_type*, Kokkos::LayoutLeft, execution_space > pce_vector_type;
127 typedef Kokkos::View< pce_type**, Kokkos::LayoutLeft, execution_space > pce_multi_vector_type;
129 typedef KokkosSparse::CrsMatrix< value_type, ordinal_type, execution_space > scalar_matrix_type;
130 typedef KokkosSparse::CrsMatrix< pce_type, ordinal_type, execution_space > pce_matrix_type;
131 typedef typename scalar_matrix_type::StaticCrsGraphType matrix_graph_type;
132 typedef typename scalar_matrix_type::values_type scalar_matrix_values_type;
133 typedef typename pce_matrix_type::values_type pce_matrix_values_type;
140 typedef typename pce_type::cijk_type kokkos_cijk_type;
144 using Teuchos::Array;
147 const ordinal_type num_pce_col = 5;
150 Array< RCP<const abstract_basis_type> > bases(dim);
151 for (ordinal_type i=0; i<dim; ++i) {
152 bases[i] = Teuchos::rcp(
new basis_type(order,
true));
154 RCP<const product_basis_type> basis = rcp(
new product_basis_type(bases));
155 RCP<cijk_type> cijk = basis->computeTripleProductTensor();
156 kokkos_cijk_type kokkos_cijk =
157 Stokhos::create_product_tensor<execution_space>(*basis, *cijk);
163 std::vector< std::vector<size_t> > fem_graph;
164 const size_t fem_length = nGrid * nGrid * nGrid;
170 ordinal_type pce_size = basis->size();
171 scalar_left_multi_vector_type xl(Kokkos::ViewAllocateWithoutInitializing(
"scalar left x"), fem_length, pce_size);
172 scalar_left_multi_vector_type yl(Kokkos::ViewAllocateWithoutInitializing(
"scalar right y"), fem_length, pce_size);
173 scalar_right_multi_vector_type xr(Kokkos::ViewAllocateWithoutInitializing(
"scalar right x"), fem_length, pce_size);
174 scalar_right_multi_vector_type yr(Kokkos::ViewAllocateWithoutInitializing(
"scalar right y"), fem_length, pce_size);
175 std::vector<scalar_vector_type> x_col(pce_size), y_col(pce_size);
176 for (ordinal_type i=0; i<pce_size; ++i) {
177 x_col[i] = scalar_vector_type (Kokkos::ViewAllocateWithoutInitializing(
"scalar x col"), fem_length);
178 y_col[i] = scalar_vector_type(Kokkos::ViewAllocateWithoutInitializing(
"scalar y col"), fem_length);
182 pce_vector_type x_pce =
183 Kokkos::make_view<pce_vector_type>(Kokkos::ViewAllocateWithoutInitializing(
"pce x"),
184 kokkos_cijk, fem_length, pce_size);
185 pce_vector_type y_pce =
186 Kokkos::make_view<pce_vector_type>(Kokkos::ViewAllocateWithoutInitializing(
"pce y"),
187 kokkos_cijk, fem_length, pce_size);
188 pce_multi_vector_type x_multi_pce =
189 Kokkos::make_view<pce_multi_vector_type>(Kokkos::ViewAllocateWithoutInitializing(
"pce multi x"),
190 kokkos_cijk, fem_length,
191 num_pce_col, pce_size);
192 pce_multi_vector_type y_multi_pce =
193 Kokkos::make_view<pce_multi_vector_type>(Kokkos::ViewAllocateWithoutInitializing(
"pce multi y"),
194 kokkos_cijk, fem_length,
195 num_pce_col, pce_size);
209 matrix_graph_type matrix_graph =
210 Kokkos::create_staticcrsgraph<matrix_graph_type>(
211 std::string(
"test crs graph"), fem_graph);
212 scalar_matrix_values_type scalar_matrix_values =
213 scalar_matrix_values_type(Kokkos::ViewAllocateWithoutInitializing(
"scalar matrix"), graph_length);
214 pce_matrix_values_type pce_matrix_values =
215 Kokkos::make_view<pce_matrix_values_type>(Kokkos::ViewAllocateWithoutInitializing(
"pce matrix"), kokkos_cijk, graph_length, 1);
216 scalar_matrix_type scalar_matrix(
"scalar matrix", fem_length,
217 scalar_matrix_values, matrix_graph);
218 pce_matrix_type pce_matrix(
"pce matrix", fem_length,
219 pce_matrix_values, matrix_graph);
229 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
230 for (ordinal_type col=0; col<pce_size; ++col) {
236 KokkosSparse::spmv(
"N" , value_type(1.0) , scalar_matrix, x_col[col] , value_type(0.0) ,y_col[col]);
241 Kokkos::Timer clock ;
242 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
243 for (ordinal_type col=0; col<pce_size; ++col) {
249 KokkosSparse::spmv(
"N" , value_type(1.0) , scalar_matrix, x_col[col] , value_type(0.0) ,y_col[col]);
254 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
255 const double flops = 1.0e-9 * 2.0 * graph_length * pce_size;
257 scalar_perf.resize(5);
258 scalar_perf[0] = fem_length;
259 scalar_perf[1] = pce_size;
260 scalar_perf[2] = graph_length;
261 scalar_perf[3] = seconds_per_iter;
262 scalar_perf[4] = flops / seconds_per_iter;
270 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
275 Kokkos::Timer clock ;
276 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
281 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
282 const double flops = 1.0e-9 * 2.0 * graph_length * pce_size;
284 block_left_perf.resize(5);
285 block_left_perf[0] = fem_length;
286 block_left_perf[1] = pce_size;
287 block_left_perf[2] = graph_length;
288 block_left_perf[3] = seconds_per_iter;
289 block_left_perf[4] = flops / seconds_per_iter;
297 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
302 Kokkos::Timer clock ;
303 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
308 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
309 const double flops = 1.0e-9 * 2.0 * graph_length * pce_size;
311 block_right_perf.resize(5);
312 block_right_perf[0] = fem_length;
313 block_right_perf[1] = pce_size;
314 block_right_perf[2] = graph_length;
315 block_right_perf[3] = seconds_per_iter;
316 block_right_perf[4] = flops / seconds_per_iter;
324 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
325 KokkosSparse::spmv(
"N" , value_type(1.0) , pce_matrix, x_pce , value_type(0.0) ,y_pce);
329 Kokkos::Timer clock ;
330 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
331 KokkosSparse::spmv(
"N" , value_type(1.0) , pce_matrix, x_pce , value_type(0.0) ,y_pce);
335 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
336 const double flops = 1.0e-9 * 2.0 * graph_length * pce_size;
339 pce_perf[0] = fem_length;
340 pce_perf[1] = pce_size;
341 pce_perf[2] = graph_length;
342 pce_perf[3] = seconds_per_iter;
343 pce_perf[4] = flops / seconds_per_iter;
351 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
352 KokkosSparse::spmv(
"N" , value_type(1.0) , pce_matrix, x_multi_pce , value_type(0.0) ,y_multi_pce);
356 Kokkos::Timer clock ;
357 for (ordinal_type iter = 0; iter < iterCount; ++iter) {
358 KokkosSparse::spmv(
"N" , value_type(1.0) , pce_matrix, x_multi_pce , value_type(0.0) ,y_multi_pce);
362 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
363 const double flops = 1.0e-9 * 2.0 * graph_length * pce_size * num_pce_col;
365 block_pce_perf.resize(5);
366 block_pce_perf[0] = fem_length;
367 block_pce_perf[1] = pce_size;
368 block_pce_perf[2] = graph_length;
369 block_pce_perf[3] = seconds_per_iter;
370 block_pce_perf[4] = flops / seconds_per_iter;