Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
TestStochastic.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 #include <utility>
42 #include <cmath>
43 #include <iostream>
44 
45 #include "Kokkos_Core.hpp"
46 #include "impl/Kokkos_Timer.hpp"
47 
48 #include "Stokhos_Update.hpp"
49 #include "Stokhos_CrsMatrix.hpp"
61 
62 #if defined(HAVE_MPI) && 0
63 #include "Epetra_MpiComm.h"
64 #else
65 #include "Epetra_SerialComm.h"
66 #endif
68 #include "Stokhos_JacobiBasis.hpp"
74 
75 #ifdef HAVE_STOKHOS_KOKKOSLINALG
76 #include "KokkosSparse_CrsMatrix.hpp"
77 #include "KokkosSparse_spmv.hpp"
78 #include "KokkosBlas1_update.hpp"
79 #endif
80 
81 namespace unit_test {
82 
83 template< typename IntType >
84 inline
85 IntType map_fem_graph_coord( const IntType & N ,
86  const IntType & i ,
87  const IntType & j ,
88  const IntType & k )
89 {
90  return k + N * ( j + N * i );
91 }
92 
93 inline
94 size_t generate_fem_graph( size_t N ,
95  std::vector< std::vector<size_t> > & graph )
96 {
97  graph.resize( N * N * N , std::vector<size_t>() );
98 
99  size_t total = 0 ;
100 
101  for ( int i = 0 ; i < (int) N ; ++i ) {
102  for ( int j = 0 ; j < (int) N ; ++j ) {
103  for ( int k = 0 ; k < (int) N ; ++k ) {
104 
105  const size_t row = map_fem_graph_coord((int)N,i,j,k);
106 
107  graph[row].reserve(27);
108 
109  for ( int ii = -1 ; ii < 2 ; ++ii ) {
110  for ( int jj = -1 ; jj < 2 ; ++jj ) {
111  for ( int kk = -1 ; kk < 2 ; ++kk ) {
112  if ( 0 <= i + ii && i + ii < (int) N &&
113  0 <= j + jj && j + jj < (int) N &&
114  0 <= k + kk && k + kk < (int) N ) {
115  size_t col = map_fem_graph_coord((int)N,i+ii,j+jj,k+kk);
116 
117  graph[row].push_back(col);
118  }
119  }}}
120  total += graph[row].size();
121  }}}
122 
123  return total ;
124 }
125 
126 template <typename Scalar> struct ScalarTolerances {};
127 
128 template <> struct ScalarTolerances<float> {
129  typedef float scalar_type;
130  static scalar_type sparse_cijk_tol() { return 1e-5; }
131 };
132 
133 template <> struct ScalarTolerances<double> {
134  typedef double scalar_type;
135  static scalar_type sparse_cijk_tol() { return 1e-12; }
136 };
137 
138 template< typename ScalarType , typename TensorType, class Device >
139 std::vector<double>
141  const std::vector<int> & var_degree ,
142  const int nGrid ,
143  const int iterCount ,
144  const bool symmetric )
145 {
146  typedef ScalarType value_type ;
147  typedef Kokkos::View< value_type** ,
148  Kokkos::LayoutLeft ,
149  Device > block_vector_type ;
150 
152 
154  typedef typename matrix_type::graph_type graph_type ;
155 
156  typedef ScalarType value_type ;
157  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
160  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
162 
163  using Teuchos::rcp;
164  using Teuchos::RCP;
165  using Teuchos::Array;
166 
167  // Create Stochastic Galerkin basis and expansion
168  const size_t num_KL = var_degree.size();
169  Array< RCP<const abstract_basis_type> > bases(num_KL);
170  for (size_t i=0; i<num_KL; i++) {
171  if (symmetric)
172  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
173  else
174  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
175  }
176  RCP<const product_basis_type> basis =
177  rcp(new product_basis_type(
179  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
180 
181  //------------------------------
182  // Generate graph for "FEM" box structure:
183 
184  std::vector< std::vector<size_t> > graph ;
185 
186  const size_t outer_length = nGrid * nGrid * nGrid ;
187  const size_t graph_length = unit_test::generate_fem_graph( nGrid , graph );
188 
189  //------------------------------
190  // Generate CRS block-tensor matrix:
191 
192  matrix_type matrix ;
193 
194  matrix.block =
195  Stokhos::create_stochastic_product_tensor< TensorType >( *basis,
196  *Cijk );
197  matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string("test crs graph") , graph );
198 
199  const size_t inner_length = matrix.block.dimension();
200  const size_t inner_length_aligned = matrix.block.aligned_dimension();
201 
202  matrix.values =
203  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), inner_length_aligned , graph_length );
204 
205  block_vector_type x =
206  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), inner_length_aligned , outer_length );
207  block_vector_type y =
208  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), inner_length_aligned , outer_length );
209 
210  Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
211 
212  //------------------------------
213  // Generate input multivector:
214 
215  Kokkos::deep_copy( x , ScalarType(1.0) );
216  block_vector_type x0 =
217  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"),
218  inner_length_aligned , outer_length );
219  Kokkos::deep_copy( x0 , ScalarType(1.0) );
220 
221  //------------------------------
222 
223  Device().fence();
224  Kokkos::Impl::Timer clock ;
225  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
226  Kokkos::deep_copy( x, x0 ); // akin to import
227  Stokhos::multiply( matrix , x , y );
228  }
229  Device().fence();
230 
231  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
232  const double flops_per_block = matrix.block.tensor().num_flops();
233  const double flops = 1.0e-9*graph_length*flops_per_block;
234 
235  std::vector<double> perf(6) ;
236 
237  perf[0] = outer_length * inner_length ;
238  perf[1] = seconds_per_iter ;
239  perf[2] = flops / seconds_per_iter;
240  perf[3] = matrix.block.tensor().entry_count();
241  perf[4] = inner_length ;
242  perf[5] = flops_per_block;
243 
244  return perf ;
245 }
246 
247 template< typename ScalarType , class Device >
248 std::vector<double>
250  const std::vector<int> & var_degree ,
251  const int nGrid ,
252  const int iterCount ,
253  const bool symmetric )
254 {
255  typedef ScalarType value_type ;
256  typedef Kokkos::View< value_type**,
257  Kokkos::LayoutLeft ,
258  Device > block_vector_type ;
259 
260  //------------------------------
261 
263  value_type , Device > matrix_type ;
264 
265  typedef typename matrix_type::graph_type graph_type ;
266 
267  typedef ScalarType value_type ;
268  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
271  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
273 
274  using Teuchos::rcp;
275  using Teuchos::RCP;
276  using Teuchos::Array;
277 
278  // Create Stochastic Galerkin basis and expansion
279  const size_t num_KL = var_degree.size();
280  Array< RCP<const abstract_basis_type> > bases(num_KL);
281  for (size_t i=0; i<num_KL; i++) {
282  if (symmetric)
283  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
284  else
285  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
286  }
287  RCP<const product_basis_type> basis =
288  rcp(new product_basis_type(
290  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
291 
292  //------------------------------
293  // Generate FEM graph:
294 
295  std::vector< std::vector<size_t> > fem_graph ;
296 
297  const size_t fem_length = nGrid * nGrid * nGrid ;
298  const size_t fem_graph_length = unit_test::generate_fem_graph( nGrid , fem_graph );
299 
300  const size_t stoch_length = basis->size();
301 
302  //------------------------------
303 
304  block_vector_type x = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), stoch_length , fem_length );
305  block_vector_type y = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), stoch_length , fem_length );
306 
307  Kokkos::deep_copy( x , ScalarType(1.0) );
308 
309  //------------------------------
310  // Generate CRS matrix of blocks with symmetric diagonal storage
311 
312  matrix_type matrix ;
313 
314  matrix.block = Stokhos::SymmetricDiagonalSpec< Device >( stoch_length );
315  matrix.graph = Kokkos::create_staticcrsgraph<graph_type>(
316  std::string("test product tensor graph") , fem_graph );
317  matrix.values = block_vector_type(
318  Kokkos::ViewAllocateWithoutInitializing("matrix"), matrix.block.matrix_size() , fem_graph_length );
319 
320  Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
321 
322  //------------------------------
323 
324  Device().fence();
325  Kokkos::Impl::Timer clock ;
326  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
327  Stokhos::multiply( matrix , x , y );
328  }
329  Device().fence();
330 
331  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
332  const double flops_per_block = 2.0*stoch_length*stoch_length;
333  const double flops = 1e-9*fem_graph_length*flops_per_block;
334 
335  std::vector<double> perf(6);
336  perf[0] = fem_length * stoch_length ;
337  perf[1] = seconds_per_iter;
338  perf[2] = flops / seconds_per_iter;
339  perf[3] = Cijk->num_entries();
340  perf[4] = stoch_length;
341  perf[5] = flops_per_block;
342  return perf;
343 }
344 
345 //----------------------------------------------------------------------------
346 // Flatten to a plain CRS matrix
347 //
348 // Outer DOF == fem
349 // Inner DOF == stochastic
350 
351 template< typename ScalarType , class Device >
352 std::vector<double>
354  const std::vector<int> & var_degree ,
355  const int nGrid ,
356  const int iterCount ,
357  const bool symmetric )
358 {
359  typedef ScalarType value_type ;
360  typedef Kokkos::View< value_type* , Device > vector_type ;
361 
362  //------------------------------
363 
364  typedef Stokhos::CrsMatrix<value_type,Device> matrix_type;
365  typedef typename matrix_type::values_type matrix_values_type;
366  typedef typename matrix_type::graph_type matrix_graph_type;
367 
368  typedef ScalarType value_type ;
369  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
372  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
374 
375  using Teuchos::rcp;
376  using Teuchos::RCP;
377  using Teuchos::Array;
378 
379  // Create Stochastic Galerkin basis and expansion
380  const size_t num_KL = var_degree.size();
381  Array< RCP<const abstract_basis_type> > bases(num_KL);
382  for (size_t i=0; i<num_KL; i++) {
383  if (symmetric)
384  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
385  else
386  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
387  }
388  RCP<const product_basis_type> basis =
389  rcp(new product_basis_type(
391  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
392 
393  //------------------------------
394  // Generate FEM graph:
395 
396  std::vector< std::vector<size_t> > fem_graph ;
397 
398  const size_t fem_length = nGrid * nGrid * nGrid ;
399 
400  unit_test::generate_fem_graph( nGrid , fem_graph );
401 
402  //------------------------------
403  // Stochastic graph
404 
405  const size_t stoch_length = basis->size();
406  std::vector< std::vector< int > > stoch_graph( stoch_length );
407 #if defined(HAVE_MPI) && 0
408  Epetra_MpiComm comm(MPI_COMM_WORLD);
409 #else
410  Epetra_SerialComm comm;
411 #endif
412  Teuchos::RCP<Epetra_CrsGraph> cijk_graph = Stokhos::sparse3Tensor2CrsGraph(
413  *basis, *Cijk, comm);
414  for ( size_t i = 0 ; i < stoch_length ; ++i ) {
415  int len = cijk_graph->NumGlobalIndices(i);
416  stoch_graph[i].resize(len);
417  int len2;
418  cijk_graph->ExtractGlobalRowCopy(i, len, len2, &stoch_graph[i][0]);
419  }
420 
421  //------------------------------
422  // Generate flattened graph with FEM outer and stochastic inner
423 
424  const size_t flat_length = fem_length * stoch_length ;
425 
426  std::vector< std::vector<size_t> > flat_graph( flat_length );
427 
428  for ( size_t iOuterRow = 0 ; iOuterRow < fem_length ; ++iOuterRow ) {
429 
430  const size_t iOuterRowNZ = fem_graph[iOuterRow].size();
431 
432  for ( size_t iInnerRow = 0 ; iInnerRow < stoch_length ; ++iInnerRow ) {
433 
434  const size_t iInnerRowNZ = stoch_graph[ iInnerRow ].size(); ;
435  const size_t iFlatRowNZ = iOuterRowNZ * iInnerRowNZ ;
436  const size_t iFlatRow = iInnerRow + iOuterRow * stoch_length ;
437 
438  flat_graph[iFlatRow].resize( iFlatRowNZ );
439 
440  size_t iFlatEntry = 0 ;
441 
442  for ( size_t iOuterEntry = 0 ; iOuterEntry < iOuterRowNZ ; ++iOuterEntry ) {
443 
444  const size_t iOuterCol = fem_graph[iOuterRow][iOuterEntry];
445 
446  for ( size_t iInnerEntry = 0 ; iInnerEntry < iInnerRowNZ ; ++iInnerEntry ) {
447 
448  const size_t iInnerCol = stoch_graph[iInnerRow][iInnerEntry] ;
449  const size_t iFlatColumn = iInnerCol + iOuterCol * stoch_length ;
450 
451  flat_graph[iFlatRow][iFlatEntry] = iFlatColumn ;
452 
453  ++iFlatEntry ;
454  }
455  }
456  }
457  }
458 
459  //------------------------------
460 
461  vector_type x = vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), flat_length );
462  vector_type y = vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), flat_length );
463 
464  Kokkos::deep_copy( x , ScalarType(1.0) );
465 
466  //------------------------------
467 
468  matrix_type matrix ;
469 
470  matrix.graph = Kokkos::create_staticcrsgraph<matrix_graph_type>(
471  std::string("testing") , flat_graph );
472 
473  const size_t flat_graph_length = matrix.graph.entries.extent(0);
474 
475  matrix.values = matrix_values_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), flat_graph_length );
476 
477  Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
478 
479  //Kokkos::write_matrix_market(matrix, "flat_commuted.mm");
480 
481  //------------------------------
482 
483  Device().fence();
484  Kokkos::Impl::Timer clock ;
485  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
486  Stokhos::multiply( matrix , x , y );
487  }
488  Device().fence();
489 
490  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
491  const double flops = 2.0*1e-9*flat_graph_length / seconds_per_iter;
492 
493  std::vector<double> perf(4);
494  perf[0] = flat_length ;
495  perf[1] = seconds_per_iter;
496  perf[2] = flops;
497  perf[3] = flat_graph_length ;
498  return perf;
499 }
500 
501 //----------------------------------------------------------------------------
502 //----------------------------------------------------------------------------
503 // Flatten to a plain CRS matrix
504 //
505 // Outer DOF == stochastic
506 // Inner DOF == fem
507 
508 template< typename ScalarType , class Device >
509 std::vector<double>
511  const std::vector<int> & var_degree ,
512  const int nGrid ,
513  const int iterCount ,
514  const bool symmetric )
515 {
516  typedef ScalarType value_type ;
517  typedef Kokkos::View< value_type* , Device > vector_type ;
518 
519  //------------------------------
520 
521  typedef Stokhos::CrsMatrix<value_type,Device> matrix_type;
522  typedef typename matrix_type::values_type matrix_values_type;
523  typedef typename matrix_type::graph_type matrix_graph_type;
524 
525  typedef ScalarType value_type ;
526  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
529  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
531 
532  using Teuchos::rcp;
533  using Teuchos::RCP;
534  using Teuchos::Array;
535 
536  // Create Stochastic Galerkin basis and expansion
537  const size_t num_KL = var_degree.size();
538  Array< RCP<const abstract_basis_type> > bases(num_KL);
539  for (size_t i=0; i<num_KL; i++) {
540  if (symmetric)
541  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
542  else
543  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
544  }
545  RCP<const product_basis_type> basis =
546  rcp(new product_basis_type(
548  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
549 
550  //------------------------------
551  // Generate FEM graph:
552 
553  std::vector< std::vector<size_t> > fem_graph ;
554 
555  const size_t fem_length = nGrid * nGrid * nGrid ;
556 
557  unit_test::generate_fem_graph( nGrid , fem_graph );
558 
559  //------------------------------
560  // Stochastic graph
561 
562  const size_t stoch_length = basis->size();
563  std::vector< std::vector< int > > stoch_graph( stoch_length );
564 #if defined(HAVE_MPI) && 0
565  Epetra_MpiComm comm(MPI_COMM_WORLD);
566 #else
567  Epetra_SerialComm comm;
568 #endif
569  Teuchos::RCP<Epetra_CrsGraph> cijk_graph = Stokhos::sparse3Tensor2CrsGraph(
570  *basis, *Cijk, comm);
571  for ( size_t i = 0 ; i < stoch_length ; ++i ) {
572  int len = cijk_graph->NumGlobalIndices(i);
573  stoch_graph[i].resize(len);
574  int len2;
575  cijk_graph->ExtractGlobalRowCopy(i, len, len2, &stoch_graph[i][0]);
576  }
577 
578  //------------------------------
579  // Generate flattened graph with stochastic outer and FEM inner
580 
581  const size_t flat_length = fem_length * stoch_length ;
582 
583  std::vector< std::vector<size_t> > flat_graph( flat_length );
584 
585  for ( size_t iOuterRow = 0 ; iOuterRow < stoch_length ; ++iOuterRow ) {
586 
587  const size_t iOuterRowNZ = stoch_graph[iOuterRow].size();
588 
589  for ( size_t iInnerRow = 0 ; iInnerRow < fem_length ; ++iInnerRow ) {
590 
591  const size_t iInnerRowNZ = fem_graph[iInnerRow].size();
592  const size_t iFlatRowNZ = iOuterRowNZ * iInnerRowNZ ;
593  const size_t iFlatRow = iInnerRow + iOuterRow * fem_length ;
594 
595  flat_graph[iFlatRow].resize( iFlatRowNZ );
596 
597  size_t iFlatEntry = 0 ;
598 
599  for ( size_t iOuterEntry = 0 ; iOuterEntry < iOuterRowNZ ; ++iOuterEntry ) {
600 
601  const size_t iOuterCol = stoch_graph[ iOuterRow ][ iOuterEntry ];
602 
603  for ( size_t iInnerEntry = 0 ; iInnerEntry < iInnerRowNZ ; ++iInnerEntry ) {
604 
605  const size_t iInnerCol = fem_graph[ iInnerRow][iInnerEntry];
606  const size_t iFlatColumn = iInnerCol + iOuterCol * fem_length ;
607 
608  flat_graph[iFlatRow][iFlatEntry] = iFlatColumn ;
609  ++iFlatEntry ;
610  }
611  }
612  }
613  }
614 
615  //------------------------------
616 
617  vector_type x = vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), flat_length );
618  vector_type y = vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), flat_length );
619 
620  Kokkos::deep_copy( x , ScalarType(1.0) );
621 
622  //------------------------------
623 
624  matrix_type matrix ;
625 
626  matrix.graph = Kokkos::create_staticcrsgraph<matrix_graph_type>( std::string("testing") , flat_graph );
627 
628  const size_t flat_graph_length = matrix.graph.entries.extent(0);
629 
630  matrix.values = matrix_values_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), flat_graph_length );
631 
632  Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
633 
634  //Kokkos::write_matrix_market(matrix, "flat_original.mm");
635 
636  //------------------------------
637 
638  Device().fence();
639  Kokkos::Impl::Timer clock ;
640  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
641  Stokhos::multiply( matrix , x , y );
642  }
643  Device().fence();
644 
645  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
646  const double flops = 2.0*1e-9*flat_graph_length / seconds_per_iter;
647 
648  std::vector<double> perf(4);
649  perf[0] = flat_length ;
650  perf[1] = seconds_per_iter;
651  perf[2] = flops;
652  perf[3] = flat_graph_length ;
653  return perf;
654 }
655 
656 template< typename ScalarType , class Device >
657 std::vector<double>
659  const std::vector<int> & var_degree ,
660  const int nGrid ,
661  const int iterCount ,
662  const bool symmetric )
663 {
664  typedef ScalarType value_type ;
665  typedef Kokkos::View< value_type** ,
666  Kokkos::LayoutLeft ,
667  Device > block_vector_type ;
668 
671 
673  typedef typename matrix_type::graph_type graph_type ;
674 
675  typedef ScalarType value_type ;
676  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
679  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
681 
682  using Teuchos::rcp;
683  using Teuchos::RCP;
684  using Teuchos::Array;
685 
686  // Create Stochastic Galerkin basis and expansion
687  const size_t num_KL = var_degree.size();
688  Array< RCP<const abstract_basis_type> > bases(num_KL);
689  for (size_t i=0; i<num_KL; i++) {
690  if (symmetric)
691  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
692  else
693  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
694  }
695  RCP<const product_basis_type> basis =
696  rcp(new product_basis_type(
698  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
699 
700  //------------------------------
701  // Generate graph for "FEM" box structure:
702 
703  std::vector< std::vector<size_t> > graph ;
704 
705  const size_t outer_length = nGrid * nGrid * nGrid ;
706  const size_t graph_length = unit_test::generate_fem_graph( nGrid , graph );
707 
708  //------------------------------
709  // Generate CRS block-tensor matrix:
710 
711  matrix_type matrix ;
712 
713  Teuchos::ParameterList params;
714  params.set("Tile Size", 128);
715  params.set("Max Tiles", 10000);
716  matrix.block =
717  Stokhos::create_stochastic_product_tensor< TensorType >( *basis, *Cijk,
718  params);
719  matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string("test crs graph") , graph );
720 
721  const size_t inner_length = matrix.block.dimension();
722  const size_t inner_length_aligned = matrix.block.aligned_dimension();
723 
724  matrix.values =
725  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), inner_length_aligned , graph_length );
726 
727  block_vector_type x =
728  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), inner_length_aligned , outer_length );
729  block_vector_type y =
730  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), inner_length_aligned , outer_length );
731 
732  Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
733 
734  //------------------------------
735  // Generate input multivector:
736 
737  Kokkos::deep_copy( x , ScalarType(1.0) );
738 
739  //------------------------------
740 
741  Device().fence();
742  Kokkos::Impl::Timer clock ;
743  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
744  Stokhos::multiply( matrix , x , y );
745  }
746  Device().fence();
747 
748  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
749  const double flops_per_block = matrix.block.tensor().num_flops();
750  const double flops = 1.0e-9*graph_length*flops_per_block;
751 
752  // std::cout << "tensor: flops = " << flops
753  // << " time = " << seconds_per_iter << std::endl;
754 
755  std::vector<double> perf(6) ;
756 
757  perf[0] = outer_length * inner_length ;
758  perf[1] = seconds_per_iter ;
759  perf[2] = flops / seconds_per_iter;
760  perf[3] = matrix.block.tensor().entry_count();
761  perf[4] = inner_length ;
762  perf[5] = flops_per_block;
763 
764  return perf ;
765 }
766 
767 template< typename ScalarType , class Device >
768 std::vector<double>
770  const std::vector<int> & var_degree ,
771  const int nGrid ,
772  const int iterCount ,
773  const bool symmetric )
774 {
775  typedef ScalarType value_type ;
776  typedef Kokkos::View< value_type** ,
777  Kokkos::LayoutLeft ,
778  Device > block_vector_type ;
779 
782 
784  typedef typename matrix_type::graph_type graph_type ;
785 
786  typedef ScalarType value_type ;
787  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
790  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
792 
793  using Teuchos::rcp;
794  using Teuchos::RCP;
795  using Teuchos::Array;
796 
797  // Create Stochastic Galerkin basis and expansion
798  const size_t num_KL = var_degree.size();
799  Array< RCP<const abstract_basis_type> > bases(num_KL);
800  for (size_t i=0; i<num_KL; i++) {
801  if (symmetric)
802  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
803  else
804  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
805  }
806  RCP<const product_basis_type> basis =
807  rcp(new product_basis_type(
809  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
810 
811  //------------------------------
812  // Generate graph for "FEM" box structure:
813 
814  std::vector< std::vector<size_t> > graph ;
815 
816  const size_t outer_length = nGrid * nGrid * nGrid ;
817  const size_t graph_length = unit_test::generate_fem_graph( nGrid , graph );
818 
819  //------------------------------
820  // Generate CRS block-tensor matrix:
821 
822  matrix_type matrix ;
823 
824  Teuchos::ParameterList params;
825  params.set("Tile Size", 128);
826  matrix.block =
827  Stokhos::create_stochastic_product_tensor< TensorType >( *basis, *Cijk,
828  params);
829  matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string("test crs graph") , graph );
830 
831  const size_t inner_length = matrix.block.dimension();
832  const size_t inner_length_aligned = matrix.block.aligned_dimension();
833 
834  matrix.values =
835  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), inner_length_aligned , graph_length );
836 
837  block_vector_type x =
838  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), inner_length_aligned , outer_length );
839  block_vector_type y =
840  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), inner_length_aligned , outer_length );
841 
842  Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
843 
844  //------------------------------
845  // Generate input multivector:
846 
847  Kokkos::deep_copy( x , ScalarType(1.0) );
848 
849  //------------------------------
850 
851  Device().fence();
852  Kokkos::Impl::Timer clock ;
853  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
854  Stokhos::multiply( matrix , x , y );
855  }
856  Device().fence();
857 
858  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
859  const double flops_per_block = matrix.block.tensor().num_flops();
860  const double flops = 1.0e-9*graph_length*flops_per_block;
861 
862  // std::cout << "tensor: flops = " << flops
863  // << " time = " << seconds_per_iter << std::endl;
864 
865  std::vector<double> perf(6) ;
866 
867  perf[0] = outer_length * inner_length ;
868  perf[1] = seconds_per_iter ;
869  perf[2] = flops / seconds_per_iter;
870  perf[3] = matrix.block.tensor().entry_count();
871  perf[4] = inner_length ;
872  perf[5] = flops_per_block;
873 
874  return perf ;
875 }
876 
877 template< typename ScalarType , class Device >
878 std::vector<double>
880  const std::vector<int> & var_degree ,
881  const int nGrid ,
882  const int iterCount ,
883  const bool symmetric )
884 {
885  typedef ScalarType value_type ;
886  typedef Kokkos::View< value_type** ,
887  Kokkos::LayoutLeft ,
888  Device > block_vector_type ;
889 
892 
894  typedef typename matrix_type::graph_type graph_type ;
895 
896  typedef ScalarType value_type ;
897  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
900  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
902 
903  using Teuchos::rcp;
904  using Teuchos::RCP;
905  using Teuchos::Array;
906 
907  // Create Stochastic Galerkin basis and expansion
908  const size_t num_KL = var_degree.size();
909  Array< RCP<const abstract_basis_type> > bases(num_KL);
910  for (size_t i=0; i<num_KL; i++) {
911  if (symmetric)
912  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
913  else
914  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
915  }
916  RCP<const product_basis_type> basis =
917  rcp(new product_basis_type(
919  RCP<Cijk_type> Cijk =
921 
922  //------------------------------
923  // Generate graph for "FEM" box structure:
924 
925  std::vector< std::vector<size_t> > graph ;
926 
927  const size_t outer_length = nGrid * nGrid * nGrid ;
928  const size_t graph_length = unit_test::generate_fem_graph( nGrid , graph );
929 
930  //------------------------------
931  // Generate CRS block-tensor matrix:
932 
933  matrix_type matrix ;
934 
935  matrix.block =
936  Stokhos::create_stochastic_product_tensor< TensorType >( *basis,
937  *Cijk );
938  matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string("test crs graph") , graph );
939 
940  const size_t inner_length = matrix.block.dimension();
941 
942  matrix.values = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), inner_length , graph_length );
943 
944  block_vector_type x = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), inner_length , outer_length );
945  block_vector_type y = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), inner_length , outer_length );
946 
947  Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
948 
949  //------------------------------
950  // Generate input multivector:
951 
952  Kokkos::deep_copy( x , ScalarType(1.0) );
953 
954  //------------------------------
955 
956  Device().fence();
957  Kokkos::Impl::Timer clock ;
958  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
959  Stokhos::multiply( matrix , x , y );
960  }
961  Device().fence();
962 
963  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
964  const double flops_per_block = matrix.block.tensor().num_flops();
965  const double flops = 1.0e-9*graph_length*flops_per_block;
966 
967  // std::cout << "tensor: flops = " << flops
968  // << " time = " << seconds_per_iter << std::endl;
969 
970  std::vector<double> perf(6) ;
971 
972  perf[0] = outer_length * inner_length ;
973  perf[1] = seconds_per_iter ;
974  perf[2] = flops / seconds_per_iter;
975  perf[3] = matrix.block.tensor().num_non_zeros();
976  perf[4] = inner_length ;
977  perf[5] = flops_per_block;
978 
979  return perf ;
980 }
981 
982 template< typename ScalarType , class Device >
983 std::vector<double>
985  const std::vector<int> & var_degree ,
986  const int nGrid ,
987  const int iterCount ,
988  const bool symmetric )
989 {
990  typedef ScalarType value_type ;
991  typedef Kokkos::View< value_type** ,
992  Kokkos::LayoutLeft ,
993  Device > block_vector_type ;
994 
997 
999  typedef typename matrix_type::graph_type graph_type ;
1000 
1001  typedef ScalarType value_type ;
1002  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
1005  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
1007 
1008  using Teuchos::rcp;
1009  using Teuchos::RCP;
1010  using Teuchos::Array;
1011 
1012  // Create Stochastic Galerkin basis and expansion
1013  const size_t num_KL = var_degree.size();
1014  Array< RCP<const abstract_basis_type> > bases(num_KL);
1015  for (size_t i=0; i<num_KL; i++) {
1016  if (symmetric)
1017  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
1018  else
1019  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
1020  }
1021  RCP<const product_basis_type> basis =
1022  rcp(new product_basis_type(
1024  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
1025 
1026  //------------------------------
1027  // Generate graph for "FEM" box structure:
1028 
1029  std::vector< std::vector<size_t> > graph ;
1030 
1031  const size_t outer_length = nGrid * nGrid * nGrid ;
1032  const size_t graph_length = unit_test::generate_fem_graph( nGrid , graph );
1033 
1034  //------------------------------
1035  // Generate CRS block-tensor matrix:
1036 
1037  matrix_type matrix ;
1038 
1039  Teuchos::ParameterList params;
1040  params.set("Symmetric", symmetric);
1041  matrix.block =
1042  Stokhos::create_stochastic_product_tensor< TensorType >( *basis,
1043  *Cijk,
1044  params );
1045  matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string("test crs graph") , graph );
1046 
1047  const size_t inner_length = matrix.block.tensor().dimension();
1048  const size_t inner_length_aligned = matrix.block.tensor().aligned_dimension();
1049 
1050  matrix.values = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), inner_length_aligned , graph_length );
1051 
1052  block_vector_type x = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), inner_length_aligned , outer_length );
1053  block_vector_type y = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), inner_length_aligned , outer_length );
1054 
1055  Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
1056 
1057  //------------------------------
1058  // Generate input multivector:
1059 
1060  Kokkos::deep_copy( x , ScalarType(1.0) );
1061 
1062  //------------------------------
1063 
1064  Device().fence();
1065  Kokkos::Impl::Timer clock ;
1066  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
1067  Stokhos::multiply( matrix , x , y );
1068  }
1069  Device().fence();
1070 
1071  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
1072  const double flops_per_block = matrix.block.tensor().num_flops();
1073  const double flops = 1.0e-9*graph_length*flops_per_block;
1074 
1075  // std::cout << "tensor: flops = " << flops
1076  // << " time = " << seconds_per_iter << std::endl;
1077 
1078  std::vector<double> perf(6) ;
1079 
1080  perf[0] = outer_length * inner_length ;
1081  perf[1] = seconds_per_iter ;
1082  perf[2] = flops / seconds_per_iter;
1083  perf[3] = matrix.block.tensor().num_non_zeros();
1084  perf[4] = inner_length ;
1085  perf[5] = flops_per_block;
1086 
1087  return perf ;
1088 }
1089 
1090 template< typename ScalarType , class Device , class SparseMatOps >
1091 std::vector<double>
1093  const std::vector<int> & var_degree ,
1094  const int nGrid ,
1095  const int iterCount ,
1096  const bool test_block ,
1097  const bool symmetric )
1098 {
1099  typedef ScalarType value_type ;
1100  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
1103  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
1105 
1106  using Teuchos::rcp;
1107  using Teuchos::RCP;
1108  using Teuchos::Array;
1109 
1110  // Create Stochastic Galerkin basis and expansion
1111  const size_t num_KL = var_degree.size();
1112  Array< RCP<const abstract_basis_type> > bases(num_KL);
1113  for (size_t i=0; i<num_KL; i++) {
1114  if (symmetric)
1115  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
1116  else
1117  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
1118  }
1119  RCP<const product_basis_type> basis =
1120  rcp(new product_basis_type(
1122  const size_t outer_length = basis->size();
1123  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
1124 
1125  //------------------------------
1126 
1127  typedef Stokhos::CrsMatrix<value_type,Device> matrix_type;
1128  typedef typename matrix_type::values_type matrix_values_type;
1129  typedef typename matrix_type::graph_type matrix_graph_type;
1130 
1131  //------------------------------
1132  // Generate FEM graph:
1133 
1134  std::vector< std::vector<size_t> > fem_graph ;
1135 
1136  const size_t inner_length = nGrid * nGrid * nGrid ;
1137  const size_t graph_length =
1138  unit_test::generate_fem_graph( nGrid , fem_graph );
1139 
1140  //------------------------------
1141 
1142  typedef Kokkos::View<value_type*,Device> vec_type ;
1143 
1144  std::vector<matrix_type> matrix( outer_length ) ;
1145  std::vector<vec_type> x( outer_length ) ;
1146  std::vector<vec_type> y( outer_length ) ;
1147  std::vector<vec_type> tmp( outer_length ) ;
1148 
1149  for (size_t block=0; block<outer_length; ++block) {
1150  matrix[block].graph = Kokkos::create_staticcrsgraph<matrix_graph_type>( std::string("testing") , fem_graph );
1151 
1152  matrix[block].values = matrix_values_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), graph_length );
1153 
1154  x[block] = vec_type( Kokkos::ViewAllocateWithoutInitializing("x"), inner_length );
1155  y[block] = vec_type( Kokkos::ViewAllocateWithoutInitializing("y"), inner_length );
1156  tmp[block] = vec_type( Kokkos::ViewAllocateWithoutInitializing("tmp"), inner_length );
1157 
1158  Kokkos::deep_copy( matrix[block].values , ScalarType(1.0) );
1159 
1160  Kokkos::deep_copy( x[block] , ScalarType(1.0) );
1161  Kokkos::deep_copy( y[block] , ScalarType(1.0) );
1162  }
1163 
1164  Device().fence();
1165  SparseMatOps smo;
1166  Kokkos::Impl::Timer clock ;
1167  int n_apply = 0;
1168  int n_add = 0;
1169  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
1170 
1171  // Original matrix-free multiply algorithm using a block apply
1172  n_apply = 0;
1173  n_add = 0;
1174  typename Cijk_type::k_iterator k_begin = Cijk->k_begin();
1175  typename Cijk_type::k_iterator k_end = Cijk->k_end();
1176  for (typename Cijk_type::k_iterator k_it=k_begin; k_it!=k_end; ++k_it) {
1177  int nj = Cijk->num_j(k_it);
1178  if (nj > 0) {
1179  int k = index(k_it);
1180  typename Cijk_type::kj_iterator j_begin = Cijk->j_begin(k_it);
1181  typename Cijk_type::kj_iterator j_end = Cijk->j_end(k_it);
1182  std::vector<vec_type> xx(nj), yy(nj);
1183  int jdx = 0;
1184  for (typename Cijk_type::kj_iterator j_it = j_begin; j_it != j_end;
1185  ++j_it) {
1186  int j = index(j_it);
1187  xx[jdx] = x[j];
1188  yy[jdx] = tmp[j];
1189  jdx++;
1190  }
1191  Stokhos::multiply( matrix[k] , xx , yy, smo );
1192  n_apply += nj;
1193  jdx = 0;
1194  for (typename Cijk_type::kj_iterator j_it = j_begin; j_it != j_end;
1195  ++j_it) {
1196  typename Cijk_type::kji_iterator i_begin = Cijk->i_begin(j_it);
1197  typename Cijk_type::kji_iterator i_end = Cijk->i_end(j_it);
1198  for (typename Cijk_type::kji_iterator i_it = i_begin; i_it != i_end;
1199  ++i_it) {
1200  int i = index(i_it);
1201  value_type c = value(i_it);
1202  Stokhos::update( value_type(1.0) , y[i] , c , yy[jdx] );
1203  ++n_add;
1204  }
1205  jdx++;
1206  }
1207  }
1208  }
1209 
1210  }
1211  Device().fence();
1212 
1213  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
1214  const double flops = 1.0e-9*(2.0*static_cast<double>(n_apply)*graph_length+
1215  static_cast<double>(n_add)*inner_length);
1216 
1217  // std::cout << "mat-free: flops = " << flops
1218  // << " time = " << seconds_per_iter << std::endl;
1219 
1220  std::vector<double> perf(4);
1221  perf[0] = outer_length * inner_length;
1222  perf[1] = seconds_per_iter ;
1223  perf[2] = flops/seconds_per_iter;
1224  perf[3] = flops;
1225 
1226  return perf;
1227 }
1228 
1229 template< typename ScalarType , class Device , class SparseMatOps >
1230 std::vector<double>
1232  const std::vector<int> & var_degree ,
1233  const int nGrid ,
1234  const int iterCount ,
1235  const bool test_block ,
1236  const bool symmetric )
1237 {
1238  typedef ScalarType value_type ;
1239  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
1242  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
1244 
1245  using Teuchos::rcp;
1246  using Teuchos::RCP;
1247  using Teuchos::Array;
1248 
1249  // Create Stochastic Galerkin basis and expansion
1250  const size_t num_KL = var_degree.size();
1251  Array< RCP<const abstract_basis_type> > bases(num_KL);
1252  for (size_t i=0; i<num_KL; i++) {
1253  if (symmetric)
1254  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
1255  else
1256  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
1257  }
1258  RCP<const product_basis_type> basis =
1259  rcp(new product_basis_type(
1261  const size_t outer_length = basis->size();
1262  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
1263 
1264  //------------------------------
1265 
1266  typedef Stokhos::CrsMatrix<value_type,Device> matrix_type;
1267  typedef typename matrix_type::values_type matrix_values_type;
1268  typedef typename matrix_type::graph_type matrix_graph_type;
1269 
1270  //------------------------------
1271  // Generate FEM graph:
1272 
1273  std::vector< std::vector<size_t> > fem_graph ;
1274 
1275  const size_t inner_length = nGrid * nGrid * nGrid ;
1276  const size_t graph_length =
1277  unit_test::generate_fem_graph( nGrid , fem_graph );
1278 
1279  //------------------------------
1280 
1281  typedef Kokkos::View<value_type*, Kokkos::LayoutLeft, Device, Kokkos::MemoryUnmanaged> vec_type ;
1282  typedef Kokkos::View<value_type**, Kokkos::LayoutLeft, Device> multi_vec_type ;
1283 
1284  std::vector<matrix_type> matrix( outer_length ) ;
1285  multi_vec_type x( Kokkos::ViewAllocateWithoutInitializing("x"),
1286  inner_length, outer_length ) ;
1287  multi_vec_type y("y", inner_length, outer_length ) ;
1288  multi_vec_type tmp_x( "tmp_x", inner_length, outer_length ) ;
1289  multi_vec_type tmp_y( "tmp_y", inner_length, outer_length ) ;
1290 
1291  Kokkos::deep_copy( x , ScalarType(1.0) );
1292 
1293  for (size_t block=0; block<outer_length; ++block) {
1294  matrix[block].graph = Kokkos::create_staticcrsgraph<matrix_graph_type>(
1295  std::string("testing") , fem_graph );
1296 
1297  matrix[block].values = matrix_values_type( "matrix" , graph_length );
1298 
1299  Kokkos::deep_copy( matrix[block].values , ScalarType(1.0) );
1300  }
1301 
1302  Device().fence();
1303  SparseMatOps smo;
1304  Kokkos::Impl::Timer clock ;
1305  int n_apply = 0;
1306  int n_add = 0;
1307  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
1308 
1309  // Original matrix-free multiply algorithm using a block apply
1310  typedef typename Cijk_type::k_iterator k_iterator;
1311  typedef typename Cijk_type::kj_iterator kj_iterator;
1312  typedef typename Cijk_type::kji_iterator kji_iterator;
1313  n_apply = 0;
1314  n_add = 0;
1315  k_iterator k_begin = Cijk->k_begin();
1316  k_iterator k_end = Cijk->k_end();
1317  for (k_iterator k_it=k_begin; k_it!=k_end; ++k_it) {
1318  unsigned nj = Cijk->num_j(k_it);
1319  if (nj > 0) {
1320  int k = index(k_it);
1321  kj_iterator j_begin = Cijk->j_begin(k_it);
1322  kj_iterator j_end = Cijk->j_end(k_it);
1323  std::vector<int> j_indices(nj);
1324  unsigned jdx = 0;
1325  for (kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
1326  int j = index(j_it);
1327  vec_type xx = Kokkos::subview( x, Kokkos::ALL(), j );
1328  vec_type tt = Kokkos::subview( tmp_x, Kokkos::ALL(), jdx++ );
1329  Kokkos::deep_copy(tt, xx);
1330  }
1331  multi_vec_type tmp_x_view =
1332  Kokkos::subview( tmp_x, Kokkos::ALL(),
1333  std::make_pair(0u,nj));
1334  multi_vec_type tmp_y_view =
1335  Kokkos::subview( tmp_y, Kokkos::ALL(),
1336  std::make_pair(0u,nj));
1337  Stokhos::multiply( matrix[k] , tmp_x_view , tmp_y_view, smo );
1338  n_apply += nj;
1339  jdx = 0;
1340  for (kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
1341  vec_type tmp_y_view =
1342  Kokkos::subview( tmp_y, Kokkos::ALL(), jdx++ );
1343  kji_iterator i_begin = Cijk->i_begin(j_it);
1344  kji_iterator i_end = Cijk->i_end(j_it);
1345  for (kji_iterator i_it = i_begin; i_it != i_end; ++i_it) {
1346  int i = index(i_it);
1347  value_type c = value(i_it);
1348  vec_type y_view = Kokkos::subview( y, Kokkos::ALL(), i );
1349  Stokhos::update( value_type(1.0) , y_view , c , tmp_y_view );
1350  ++n_add;
1351  }
1352  }
1353  }
1354  }
1355 
1356  }
1357  Device().fence();
1358 
1359  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
1360  const double flops = 1.0e-9*(2.0*static_cast<double>(n_apply)*graph_length+
1361  static_cast<double>(n_add)*inner_length);
1362 
1363  // std::cout << "mat-free: flops = " << flops
1364  // << " time = " << seconds_per_iter << std::endl;
1365 
1366  std::vector<double> perf(4);
1367  perf[0] = outer_length * inner_length;
1368  perf[1] = seconds_per_iter ;
1369  perf[2] = flops/seconds_per_iter;
1370  perf[3] = flops;
1371 
1372  return perf;
1373 }
1374 
1375 #ifdef HAVE_STOKHOS_KOKKOSLINALG
1376 template< typename ScalarType , class Device >
1377 std::vector<double>
1378 test_original_matrix_free_kokkos(
1379  const std::vector<int> & var_degree ,
1380  const int nGrid ,
1381  const int iterCount ,
1382  const bool test_block ,
1383  const bool symmetric )
1384 {
1385  typedef ScalarType value_type ;
1386  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
1389  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
1391 
1392  using Teuchos::rcp;
1393  using Teuchos::RCP;
1394  using Teuchos::Array;
1395 
1396  // Create Stochastic Galerkin basis and expansion
1397  const size_t num_KL = var_degree.size();
1398  Array< RCP<const abstract_basis_type> > bases(num_KL);
1399  for (size_t i=0; i<num_KL; i++) {
1400  if (symmetric)
1401  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
1402  else
1403  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
1404  }
1405  RCP<const product_basis_type> basis =
1406  rcp(new product_basis_type(
1407  bases, ScalarTolerances<value_type>::sparse_cijk_tol()));
1408  const size_t outer_length = basis->size();
1409  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
1410 
1411  //------------------------------
1412 
1413  typedef int ordinal_type;
1414  typedef KokkosSparse::CrsMatrix<value_type,ordinal_type,Device> matrix_type;
1415  typedef typename matrix_type::values_type matrix_values_type;
1416  typedef typename matrix_type::StaticCrsGraphType matrix_graph_type;
1417 
1418  //------------------------------
1419  // Generate FEM graph:
1420 
1421  std::vector< std::vector<size_t> > fem_graph ;
1422 
1423  const size_t inner_length = nGrid * nGrid * nGrid ;
1424  const size_t graph_length =
1425  unit_test::generate_fem_graph( nGrid , fem_graph );
1426 
1427  //------------------------------
1428 
1429  typedef Kokkos::View<value_type*,Kokkos::LayoutLeft,Device, Kokkos::MemoryUnmanaged> vec_type ;
1430  typedef Kokkos::View<value_type**, Kokkos::LayoutLeft, Device> multi_vec_type;
1431 
1432  std::vector<matrix_type> matrix( outer_length ) ;
1433  multi_vec_type x( Kokkos::ViewAllocateWithoutInitializing("x"),
1434  inner_length, outer_length ) ;
1435  multi_vec_type y( "y", inner_length, outer_length ) ;
1436  multi_vec_type tmp_x( "tmp_x", inner_length, outer_length ) ;
1437  multi_vec_type tmp_y( "tmp_y", inner_length, outer_length ) ;
1438 
1439  Kokkos::deep_copy( x , ScalarType(1.0) );
1440 
1441  for (size_t block=0; block<outer_length; ++block) {
1442  matrix_graph_type matrix_graph =
1443  Kokkos::create_staticcrsgraph<matrix_graph_type>(
1444  std::string("test crs graph") , fem_graph );
1445 
1446  matrix_values_type matrix_values = matrix_values_type(
1447  Kokkos::ViewAllocateWithoutInitializing("matrix"), graph_length );
1448  Kokkos::deep_copy(matrix_values , ScalarType(1.0) );
1449  matrix[block] = matrix_type("matrix", outer_length, matrix_values,
1450  matrix_graph);
1451  }
1452 
1453  Device().fence();
1454  Kokkos::Impl::Timer clock ;
1455  int n_apply = 0;
1456  int n_add = 0;
1457  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
1458 
1459  // Original matrix-free multiply algorithm using a block apply
1460  typedef typename Cijk_type::k_iterator k_iterator;
1461  typedef typename Cijk_type::kj_iterator kj_iterator;
1462  typedef typename Cijk_type::kji_iterator kji_iterator;
1463  n_apply = 0;
1464  n_add = 0;
1465  k_iterator k_begin = Cijk->k_begin();
1466  k_iterator k_end = Cijk->k_end();
1467  for (k_iterator k_it=k_begin; k_it!=k_end; ++k_it) {
1468  unsigned nj = Cijk->num_j(k_it);
1469  if (nj > 0) {
1470  int k = index(k_it);
1471  kj_iterator j_begin = Cijk->j_begin(k_it);
1472  kj_iterator j_end = Cijk->j_end(k_it);
1473  unsigned jdx = 0;
1474  for (kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
1475  int j = index(j_it);
1476  vec_type xx = Kokkos::subview( x, Kokkos::ALL(), j );
1477  vec_type tt = Kokkos::subview( tmp_x, Kokkos::ALL(), jdx++ );
1478  Kokkos::deep_copy(tt, xx);
1479  }
1480  multi_vec_type tmp_x_view =
1481  Kokkos::subview( tmp_x, Kokkos::ALL(),
1482  std::make_pair(0u,nj));
1483  multi_vec_type tmp_y_view =
1484  Kokkos::subview( tmp_y, Kokkos::ALL(),
1485  std::make_pair(0u,nj));
1486  KokkosSparse::spmv( "N" , value_type(1.0) , matrix[k] , tmp_x_view , value_type(0.0) , tmp_y_view );
1487  n_apply += nj;
1488  jdx = 0;
1489  for (kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
1490  vec_type tmp_y_view =
1491  Kokkos::subview( tmp_y, Kokkos::ALL(), jdx++ );
1492  kji_iterator i_begin = Cijk->i_begin(j_it);
1493  kji_iterator i_end = Cijk->i_end(j_it);
1494  for (kji_iterator i_it = i_begin; i_it != i_end; ++i_it) {
1495  int i = index(i_it);
1496  value_type c = value(i_it);
1497  vec_type y_view = Kokkos::subview( y, Kokkos::ALL(), i );
1498  //Stokhos::update( value_type(1.0) , y_view , c , tmp_y_view );
1499  KokkosBlas::update(value_type(1.0) , y_view, c, tmp_y_view, value_type(0.0), y_view);
1500  ++n_add;
1501  }
1502  }
1503  }
1504  }
1505 
1506  }
1507  Device().fence();
1508 
1509  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
1510  const double flops = 1.0e-9*(2.0*static_cast<double>(n_apply)*graph_length+
1511  static_cast<double>(n_add)*inner_length);
1512 
1513  // std::cout << "mat-free: flops = " << flops
1514  // << " time = " << seconds_per_iter << std::endl;
1515 
1516  std::vector<double> perf(4);
1517  perf[0] = outer_length * inner_length;
1518  perf[1] = seconds_per_iter ;
1519  perf[2] = flops/seconds_per_iter;
1520  perf[3] = flops;
1521 
1522  return perf;
1523 }
1524 #endif
1525 
1526 template< class Scalar, class Device >
1527 void performance_test_driver_all( const int pdeg ,
1528  const int minvar ,
1529  const int maxvar ,
1530  const int nGrid ,
1531  const int nIter ,
1532  const bool test_block ,
1533  const bool symmetric )
1534 {
1535  //------------------------------
1536  // Generate FEM graph:
1537 
1538  std::vector< std::vector<size_t> > fem_graph ;
1539 
1540  const size_t fem_nonzeros =
1541  unit_test::generate_fem_graph( nGrid , fem_graph );
1542 
1543  //------------------------------
1544 
1545  std::cout.precision(8);
1546 
1547  //------------------------------
1548 
1549  std::cout << std::endl << "\"FEM NNZ = " << fem_nonzeros << "\"" << std::endl;
1550 
1551  std::cout << std::endl
1552  << "\"#nGrid\" , "
1553  << "\"#Variable\" , "
1554  << "\"PolyDegree\" , "
1555  << "\"#Bases\" , "
1556  << "\"#TensorEntry\" , "
1557  << "\"VectorSize\" , "
1558  << "\"Original-Flat MXV-Time\" , "
1559  << "\"Original-Flat MXV-Speedup\" , "
1560  << "\"Original-Flat MXV-GFLOPS\" , "
1561  << "\"Commuted-Flat MXV-Speedup\" , "
1562  << "\"Commuted-Flat MXV-GFLOPS\" , "
1563  << "\"Block-Diagonal MXV-Speedup\" , "
1564  << "\"Block-Diagonal MXV-GFLOPS\" , "
1565  << "\"Block-Crs-Tensor MXV-Speedup\" , "
1566  << "\"Block-Crs-Tensor MXV-GFLOPS\" , "
1567  << std::endl ;
1568 
1569  for ( int nvar = minvar ; nvar <= maxvar ; ++nvar ) {
1570 
1571  std::vector<int> var_degree( nvar , pdeg );
1572 
1573  //------------------------------
1574 
1575  const std::vector<double> perf_flat_original =
1576  test_product_flat_original_matrix<Scalar,Device>(
1577  var_degree , nGrid , nIter , symmetric );
1578 
1579  const std::vector<double> perf_flat_commuted =
1580  test_product_flat_commuted_matrix<Scalar,Device>(
1581  var_degree , nGrid , nIter , symmetric );
1582 
1583  const std::vector<double> perf_matrix =
1584  test_product_tensor_diagonal_matrix<Scalar,Device>(
1585  var_degree , nGrid , nIter , symmetric );
1586 
1587  const std::vector<double> perf_crs_tensor =
1588  test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(
1589  var_degree , nGrid , nIter , symmetric );
1590 
1591  if ( perf_flat_commuted[0] != perf_flat_original[0] ||
1592  perf_flat_commuted[3] != perf_flat_original[3] ) {
1593  std::cout << "ERROR: Original and commuted matrix sizes do not match"
1594  << std::endl
1595  << " original size = " << perf_flat_original[0]
1596  << " , nonzero = " << perf_flat_original[3]
1597  << std::endl
1598  << " commuted size = " << perf_flat_commuted[0]
1599  << " , nonzero = " << perf_flat_commuted[3]
1600  << std::endl ;
1601  }
1602 
1603  std::cout << nGrid << " , "
1604  << nvar << " , "
1605  << pdeg << " , "
1606  << perf_crs_tensor[4] << " , "
1607  << perf_crs_tensor[3] << " , "
1608  << perf_flat_original[0] << " , "
1609  << perf_flat_original[1] << " , "
1610  << perf_flat_original[1] / perf_flat_original[1] << " , "
1611  << perf_flat_original[2] << " , "
1612  << perf_flat_original[1] / perf_flat_commuted[1] << " , "
1613  << perf_flat_commuted[2] << " , "
1614  << perf_flat_original[1] / perf_matrix[1] << " , "
1615  << perf_matrix[2] << " , "
1616  << perf_flat_original[1] / perf_crs_tensor[1] << " , "
1617  << perf_crs_tensor[2] << " , "
1618  << std::endl ;
1619  }
1620 
1621  //------------------------------
1622 }
1623 
1624 template< class Scalar, class Device , class SparseMatOps >
1625 void performance_test_driver_poly( const int pdeg ,
1626  const int minvar ,
1627  const int maxvar ,
1628  const int nGrid ,
1629  const int nIter ,
1630  const bool test_block ,
1631  const bool symmetric )
1632 {
1633  std::cout.precision(8);
1634 
1635  //------------------------------
1636 
1637  std::vector< std::vector<size_t> > fem_graph ;
1638  const size_t graph_length =
1639  unit_test::generate_fem_graph( nGrid , fem_graph );
1640  std::cout << std::endl << "\"FEM NNZ = " << graph_length << "\"" << std::endl;
1641 
1642  std::cout << std::endl
1643  << "\"#nGrid\" , "
1644  << "\"#Variable\" , "
1645  << "\"PolyDegree\" , "
1646  << "\"#Bases\" , "
1647  << "\"#TensorEntry\" , "
1648  << "\"VectorSize\" , "
1649  << "\"Original-Matrix-Free-Block-MXV-Time\" , "
1650  << "\"Original-Matrix-Free-Block-MXV-Speedup\" , "
1651  << "\"Original-Matrix-Free-Block-MXV-GFLOPS\" , "
1652  << "\"Block-Crs-Tensor MXV-Speedup\" , "
1653  << "\"Block-Crs-Tensor MXV-GFLOPS\" , "
1654  // << "\"Block-Coo-Tensor MXV-Speedup\" , "
1655  // << "\"Block-Coo-Tensor MXV-GFLOPS\" , "
1656  // << "\"Block-Tiled Crs-Tensor MXV-Speedup\" , "
1657  // << "\"Block-Tiled Crs-3-Tensor MXV-GFLOPS\" , "
1658  << std::endl ;
1659 
1660  for ( int nvar = minvar ; nvar <= maxvar ; ++nvar ) {
1661  std::vector<int> var_degree( nvar , pdeg );
1662 
1663  const std::vector<double> perf_crs_tensor =
1664  test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(
1665  var_degree , nGrid , nIter , symmetric );
1666 
1667  // const bool Pack = std::is_same<Device,Kokkos::Cuda>::value;
1668  // const std::vector<double> perf_coo_tensor =
1669  // test_product_tensor_matrix<Scalar,Stokhos::CooProductTensor<Scalar,Device,Pack>,Device>(
1670  // var_degree , nGrid , nIter , symmetric );
1671 
1672  // const std::vector<double> perf_tiled_crs_tensor =
1673  // test_simple_tiled_product_tensor_matrix<Scalar,Device>(
1674  // var_degree , nGrid , nIter , symmetric );
1675 
1676  std::vector<double> perf_original_mat_free_block;
1677 #if defined(HAVE_STOKHOS_KOKKOSLINALG)
1678 #if defined( KOKKOS_ENABLE_CUDA )
1679  enum { is_cuda = std::is_same<Device,Kokkos::Cuda>::value };
1680 #else
1681  enum { is_cuda = false };
1682 #endif
1683  if ( is_cuda )
1684  perf_original_mat_free_block =
1685  test_original_matrix_free_kokkos<Scalar,Device>(
1686  var_degree , nGrid , nIter , test_block , symmetric );
1687  else
1688  perf_original_mat_free_block =
1689  test_original_matrix_free_view<Scalar,Device,SparseMatOps>(
1690  var_degree , nGrid , nIter , test_block , symmetric );
1691 #else
1692  perf_original_mat_free_block =
1693  test_original_matrix_free_view<Scalar,Device,SparseMatOps>(
1694  var_degree , nGrid , nIter , test_block , symmetric );
1695 #endif
1696 
1697  std::cout << nGrid << " , "
1698  << nvar << " , "
1699  << pdeg << " , "
1700  << perf_crs_tensor[4] << " , "
1701  << perf_crs_tensor[3] << " , "
1702  << perf_original_mat_free_block[0] << " , "
1703  << perf_original_mat_free_block[1] << " , "
1704  << perf_original_mat_free_block[1] /
1705  perf_original_mat_free_block[1] << " , "
1706  << perf_original_mat_free_block[2] << " , "
1707  << perf_original_mat_free_block[1] / perf_crs_tensor[1] << " , "
1708  << perf_crs_tensor[2] << " , "
1709  // << perf_original_mat_free_block[1] / perf_coo_tensor[1] << " , "
1710  // << perf_coo_tensor[2] << " , "
1711  // << perf_original_mat_free_block[1] / perf_tiled_crs_tensor[1] << " , "
1712  // << perf_tiled_crs_tensor[2]
1713  << std::endl ;
1714  }
1715 
1716  //------------------------------
1717 }
1718 
1719 template< class Scalar, class Device , class SparseMatOps >
1720 void performance_test_driver_poly_deg( const int nvar ,
1721  const int minp ,
1722  const int maxp ,
1723  const int nGrid ,
1724  const int nIter ,
1725  const bool test_block ,
1726  const bool symmetric )
1727 {
1728  bool do_flat_sparse =
1729  std::is_same<typename Device::memory_space,Kokkos::HostSpace>::value ;
1730 
1731  std::cout.precision(8);
1732 
1733  //------------------------------
1734 
1735  std::vector< std::vector<size_t> > fem_graph ;
1736  const size_t graph_length =
1737  unit_test::generate_fem_graph( nGrid , fem_graph );
1738  std::cout << std::endl << "\"FEM NNZ = " << graph_length << "\"" << std::endl;
1739 
1740  std::cout << std::endl
1741  << "\"#nGrid\" , "
1742  << "\"#Variable\" , "
1743  << "\"PolyDegree\" , "
1744  << "\"#Bases\" , "
1745  << "\"#TensorEntry\" , "
1746  << "\"VectorSize\" , "
1747  << "\"Original-Matrix-Free-Block-MXV-Time\" , "
1748  << "\"Original-Matrix-Free-Block-MXV-Speedup\" , "
1749  << "\"Original-Matrix-Free-Block-MXV-GFLOPS\" , "
1750  << "\"Block-Crs-Tensor MXV-Speedup\" , "
1751  << "\"Block-Crs-Tensor MXV-GFLOPS\" , ";
1752  if (do_flat_sparse)
1753  std::cout << "\"Block-Lexicographic-Sparse-3-Tensor MXV-Speedup\" , "
1754  << "\"Block-Lexicographic-Sparse-3-Tensor MXV-GFLOPS\" , "
1755  << "\"Lexicographic FLOPS / Crs FLOPS\" , ";
1756  std::cout << std::endl ;
1757 
1758  for ( int p = minp ; p <= maxp ; ++p ) {
1759  std::vector<int> var_degree( nvar , p );
1760 
1761  const std::vector<double> perf_crs_tensor =
1762  test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(
1763  var_degree , nGrid , nIter , symmetric );
1764 
1765  std::vector<double> perf_lexo_sparse_3_tensor;
1766  if (do_flat_sparse) {
1767  perf_lexo_sparse_3_tensor =
1768  test_lexo_block_tensor<Scalar,Device>( var_degree , nGrid , nIter , symmetric );
1769  }
1770 
1771  const std::vector<double> perf_original_mat_free_block =
1772  test_original_matrix_free_vec<Scalar,Device,SparseMatOps>(
1773  var_degree , nGrid , nIter , test_block , symmetric );
1774 
1775  std::cout << nGrid << " , "
1776  << nvar << " , "
1777  << p << " , "
1778  << perf_crs_tensor[4] << " , "
1779  << perf_crs_tensor[3] << " , "
1780  << perf_original_mat_free_block[0] << " , "
1781  << perf_original_mat_free_block[1] << " , "
1782  << perf_original_mat_free_block[1] / perf_original_mat_free_block[1] << " , "
1783  << perf_original_mat_free_block[2] << " , "
1784  << perf_original_mat_free_block[1] / perf_crs_tensor[1] << " , "
1785  << perf_crs_tensor[2] << " , ";
1786  if (do_flat_sparse) {
1787  std::cout << perf_original_mat_free_block[1] / perf_lexo_sparse_3_tensor[1] << " , "
1788  << perf_lexo_sparse_3_tensor[2] << " , "
1789  << perf_lexo_sparse_3_tensor[5] / perf_crs_tensor[5];
1790  }
1791 
1792 
1793  std::cout << std::endl ;
1794  }
1795 
1796  //------------------------------
1797 }
1798 
1799 template< class Scalar, class Device , class SparseMatOps >
1800 void performance_test_driver_linear( const int minvar ,
1801  const int maxvar ,
1802  const int varinc ,
1803  const int nGrid ,
1804  const int nIter ,
1805  const bool test_block ,
1806  const bool symmetric )
1807 {
1808  std::cout.precision(8);
1809 
1810  //------------------------------
1811 
1812  std::vector< std::vector<size_t> > fem_graph ;
1813  const size_t graph_length =
1814  unit_test::generate_fem_graph( nGrid , fem_graph );
1815  std::cout << std::endl << "\"FEM NNZ = " << graph_length << "\"" << std::endl;
1816 
1817  std::cout << std::endl
1818  << "\"#nGrid\" , "
1819  << "\"#Variable\" , "
1820  << "\"PolyDegree\" , "
1821  << "\"#Bases\" , "
1822  << "\"#TensorEntry\" , "
1823  << "\"VectorSize\" , "
1824  << "\"Original-Matrix-Free-Block-MXV-Time\" , "
1825  << "\"Original-Matrix-Free-Block-MXV-Speedup\" , "
1826  << "\"Original-Matrix-Free-Block-MXV-GFLOPS\" , "
1827  << "\"Block-Crs-Tensor MXV-Speedup\" , "
1828  << "\"Block-Crs-Tensor MXV-GFLOPS\" , "
1829  << "\"Linear-Sparse-3-Tensor MXV-Speedup\" , "
1830  << "\"Linear-Sparse-3-Tensor MXV-GFLOPS\" , "
1831  << std::endl ;
1832 
1833  for ( int nvar = minvar ; nvar <= maxvar ; nvar+=varinc ) {
1834  std::vector<int> var_degree( nvar , 1 );
1835 
1836  const std::vector<double> perf_crs_tensor =
1837  test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(
1838  var_degree , nGrid , nIter , symmetric );
1839 
1840  const std::vector<double> perf_linear_sparse_3_tensor =
1841  test_linear_tensor<Scalar,Device>( var_degree , nGrid , nIter , symmetric );
1842 
1843  const std::vector<double> perf_original_mat_free_block =
1844  test_original_matrix_free_vec<Scalar,Device,SparseMatOps>(
1845  var_degree , nGrid , nIter , test_block , symmetric );
1846 
1847  std::cout << nGrid << " , "
1848  << nvar << " , "
1849  << 1 << " , "
1850  << perf_crs_tensor[4] << " , "
1851  << perf_crs_tensor[3] << " , "
1852  << perf_original_mat_free_block[0] << " , "
1853  << perf_original_mat_free_block[1] << " , "
1854  << perf_original_mat_free_block[1] / perf_original_mat_free_block[1] << " , "
1855  << perf_original_mat_free_block[2] << " , "
1856  << perf_original_mat_free_block[1] / perf_crs_tensor[1] << " , "
1857  << perf_crs_tensor[2] << " , "
1858  << perf_original_mat_free_block[1] / perf_linear_sparse_3_tensor[1] << " , "
1859  << perf_linear_sparse_3_tensor[2] << " , "
1860  << std::endl ;
1861  }
1862 
1863  //------------------------------
1864 }
1865 
1866 template< class Scalar, class Device >
1868 
1869 //----------------------------------------------------------------------------
1870 
1871 }
std::vector< double > test_tiled_product_tensor_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
void performance_test_driver_linear(const int minvar, const int maxvar, const int varinc, const int nGrid, const int nIter, const bool test_block, const bool symmetric)
Bases defined by combinatorial product of polynomial bases.
std::vector< double > test_product_tensor_diagonal_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
Multivariate orthogonal polynomial basis generated from a total order tensor product of univariate po...
Data structure storing a sparse 3-tensor C(i,j,k) in a a compressed format.
size_t generate_fem_graph(size_t N, std::vector< std::vector< size_t > > &graph)
void performance_test_driver_poly_deg(const int nvar, const int minp, const int maxp, const int nGrid, const int nIter, const bool test_block, const bool symmetric)
Symmetric diagonal storage for a dense matrix.
std::vector< double > test_product_flat_original_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
Teuchos::RCP< LTBSparse3Tensor< ordinal_type, value_type > > computeTripleProductTensorLTBBlockLeaf(const TotalOrderBasis< ordinal_type, value_type, LexographicLess< MultiIndex< ordinal_type > > > &product_basis, bool symmetric=false)
std::vector< double > test_product_tensor_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
Stokhos::LegendreBasis< int, double > basis_type
Data structure storing a sparse 3-tensor C(i,j,k) in a a tree-based format for lexicographically orde...
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType * x
Definition: csr_vector.h:260
std::vector< double > test_lexo_block_tensor(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
Sparse product tensor with replicated entries to provide subsets with a given coordinate.
void performance_test_driver_all(const int pdeg, const int minvar, const int maxvar, const int nGrid, const int nIter, const bool test_block, const bool symmetric)
Jacobi polynomial basis.
Sparse product tensor with replicated entries to provide subsets with a given coordinate.
IntType map_fem_graph_coord(const IntType &N, const IntType &i, const IntType &j, const IntType &k)
std::vector< double > test_simple_tiled_product_tensor_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< XD, XP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< YD, YP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< ZD, ZP... > >::value >::type update(const typename Kokkos::View< XD, XP... >::array_type::non_const_value_type &alpha, const Kokkos::View< XD, XP... > &x, const typename Kokkos::View< YD, YP... >::array_type::non_const_value_type &beta, const Kokkos::View< YD, YP... > &y, const typename Kokkos::View< ZD, ZP... >::array_type::non_const_value_type &gamma, const Kokkos::View< ZD, ZP... > &z)
Stokhos::Sparse3Tensor< int, double > Cijk_type
void performance_test_driver(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], const bool check, Kokkos::Example::FENL::DeviceConfig dev_config)
Abstract base class for 1-D orthogonal polynomials.
Teuchos::RCP< Epetra_CrsGraph > sparse3Tensor2CrsGraph(const Stokhos::OrthogPolyBasis< ordinal_type, value_type > &basis, const Stokhos::Sparse3Tensor< ordinal_type, value_type > &Cijk, const Epetra_Comm &comm)
Build an Epetra_CrsGraph from a sparse 3 tensor.
CRS matrix of dense blocks.
A comparison functor implementing a strict weak ordering based lexographic ordering.
Sacado::MP::Vector< storage_type > vec_type
std::vector< double > test_linear_tensor(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
std::vector< double > test_original_matrix_free_vec(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool test_block, const bool symmetric)
void performance_test_driver_poly(const int pdeg, const int minvar, const int maxvar, const int nGrid, const int nIter, const bool test_block, const bool symmetric)
std::vector< double > test_product_flat_commuted_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType ValueType * y
Definition: csr_vector.h:267
void update(const ValueType &alpha, VectorType &x, const ValueType &beta, const VectorType &y)
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< InputType, InputP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< OutputType, OutputP... > >::value >::type spmv(const char mode[], const AlphaType &a, const MatrixType &A, const Kokkos::View< InputType, InputP... > &x, const BetaType &b, const Kokkos::View< OutputType, OutputP... > &y, const RANK_ONE)
std::vector< double > test_original_matrix_free_view(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool test_block, const bool symmetric)