108 const Teuchos::Array<int> & var_degree ,
110 const int iterCount ,
111 const bool print_flag ,
112 const bool test_block ,
115 typedef double value_type ;
123 using Teuchos::Array;
124 using Teuchos::ParameterList;
127 RCP<const Epetra_Comm> globalComm;
129 RCP<const Epetra_MpiComm> globalMpiComm =
132 globalComm = globalMpiComm;
134 RCP<const Epetra_SerialComm> globalSerialComm =
137 globalComm = globalSerialComm;
143 const size_t num_KL = var_degree.size();
144 Array< RCP<const abstract_basis_type> > bases(num_KL);
145 for (
size_t i=0; i<num_KL; i++)
146 bases[i] = rcp(
new basis_type(var_degree[i],
true));
147 RCP<const product_basis_type> basis =
148 rcp(
new product_basis_type(bases, 1e-12));
149 const size_t stoch_length = basis->size();
150 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
153 ParameterList parallelParams;
154 RCP<Stokhos::ParallelData> sg_parallel_data =
156 RCP<const EpetraExt::MultiComm> sg_comm =
157 sg_parallel_data->getMultiComm();
158 RCP<const Epetra_Comm> app_comm =
159 sg_parallel_data->getSpatialComm();
160 RCP<const Stokhos::EpetraSparse3Tensor> epetraCijk =
161 sg_parallel_data->getEpetraCijk();
162 RCP<const Epetra_BlockMap> stoch_row_map =
163 epetraCijk->getStochasticRowMap();
168 Teuchos::Array< Teuchos::Array<int> > fem_graph ;
170 const size_t fem_length = nGrid * nGrid * nGrid ;
176 RCP<const Epetra_Map> x_map = rcp(
new Epetra_Map(
static_cast<int>(fem_length), 0, *app_comm));
177 RCP<const Epetra_Map> sg_x_map =
178 rcp(EpetraExt::BlockUtility::GenerateBlockMap(
179 *x_map, *stoch_row_map, *sg_comm));
182 int *my_GIDs = x_map->MyGlobalElements();
183 int num_my_GIDs = x_map->NumMyElements();
184 for (
int i=0; i<num_my_GIDs; ++i) {
185 int row = my_GIDs[i];
186 int num_indices = fem_graph[row].size();
187 int *indices = &(fem_graph[row][0]);
188 graph->InsertGlobalIndices(row, num_indices, indices);
190 graph->FillComplete();
191 int nnz = graph->NumGlobalNonzeros();
193 RCP<ParameterList> sg_op_params = rcp(
new ParameterList);
194 RCP<Stokhos::MatrixFreeOperator> sg_A =
196 x_map, x_map, sg_x_map, sg_x_map,
198 RCP<Epetra_BlockMap> sg_A_overlap_map =
200 static_cast<int>(stoch_length), 0, *(sg_parallel_data->getStochasticComm())));
201 RCP< Stokhos::EpetraOperatorOrthogPoly > A_sg_blocks =
203 basis, sg_A_overlap_map, x_map, x_map, sg_x_map, sg_comm));
204 for (
unsigned int i=0; i<stoch_length; i++) {
208 A_sg_blocks->setCoeffPtr(i, A);
210 sg_A->setupOperator(A_sg_blocks);
212 RCP<Stokhos::EpetraVectorOrthogPoly> sg_x =
214 basis, stoch_row_map, x_map, sg_x_map, sg_comm));
215 RCP<Stokhos::EpetraVectorOrthogPoly> sg_y =
217 basis, stoch_row_map, x_map, sg_x_map, sg_comm));
223 Teuchos::Time clock(
"apply") ;
225 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
226 sg_A->Apply( *(sg_x->getBlockVector()), *(sg_y->getBlockVector()) );
229 double seconds_per_iter =
230 clock.totalElapsedTime() / ((
double) iterCount );
233 double average_seconds_per_iter;
234 globalComm->SumAll(&seconds_per_iter, &average_seconds_per_iter, 1);
235 average_seconds_per_iter /= globalComm->NumProc();
240 for (Cijk_type::k_iterator k_it=Cijk->k_begin();
241 k_it!=Cijk->k_end(); ++k_it) {
242 for (Cijk_type::kj_iterator j_it = Cijk->j_begin(k_it);
243 j_it != Cijk->j_end(k_it); ++j_it) {
245 for (Cijk_type::kji_iterator i_it = Cijk->i_begin(j_it);
246 i_it != Cijk->i_end(j_it); ++i_it) {
252 const double flops = 1.0e-9*(2.0*
static_cast<double>(n_apply)*nnz +
253 static_cast<double>(n_add)*fem_length);
257 Teuchos::Array<double> perf(8);
258 perf[0] = stoch_length;
259 perf[1] = fem_length;
260 perf[2] = stoch_length * fem_length;
261 perf[3] = Cijk->num_entries();
263 perf[5] = average_seconds_per_iter ;
264 perf[6] = flops/average_seconds_per_iter;
276 const bool test_block ,
278 Teuchos::FancyOStream& out)
285 <<
"\"#nGrid\" , \"NNZ\" "
286 <<
"\"#Variable\" , \"PolyDegree\" , \"#Bases\" , "
287 <<
"\"#TensorEntry\" , "
288 <<
"\"VectorSize\" , "
289 <<
"\"Original-Matrix-Free-Block-MXV-Time\" , "
290 <<
"\"Original-Matrix-Free-Block-MXV-GFLOPS\" , "
293 for (
int nvar = minvar ; nvar <= maxvar ; ++nvar ) {
294 Teuchos::Array<int> var_degree( nvar , pdeg );
296 const Teuchos::Array<double> perf_original_mat_free_epetra =
299 out << nGrid <<
" , "
300 << perf_original_mat_free_epetra[4] <<
" , "
301 << nvar <<
" , " << pdeg <<
" , "
302 << perf_original_mat_free_epetra[0] <<
" , "
303 << perf_original_mat_free_epetra[3] <<
" , "
304 << perf_original_mat_free_epetra[2] <<
" , "
305 << perf_original_mat_free_epetra[5] <<
" , "
306 << perf_original_mat_free_epetra[6] <<
" , "
int check(Epetra_CrsGraph &A, int NumMyRows1, int NumGlobalRows1, int NumMyNonzeros1, int NumGlobalNonzeros1, int *MyGlobalElements, bool verbose)