244 unsigned int ndot,
unsigned int nloop,
bool use_dynamic)
247 unsigned int sz = (m*k+k*n+m*n)*(1+ndot);
248 Teuchos::BLAS<int,FadType> blas(
false,use_dynamic,sz);
252 for (
unsigned int j=0; j<k; j++) {
253 for (
unsigned int i=0;
i<m;
i++) {
255 for (
unsigned int l=0; l<ndot; l++)
259 for (
unsigned int j=0; j<n; j++) {
260 for (
unsigned int i=0;
i<k;
i++) {
262 for (
unsigned int l=0; l<ndot; l++)
266 for (
unsigned int j=0; j<n; j++) {
267 for (
unsigned int i=0;
i<m;
i++) {
269 for (
unsigned int l=0; l<ndot; l++)
275 for (
unsigned int l=0; l<ndot; l++) {
276 alpha.fastAccessDx(l) = urand.
number();
277 beta.fastAccessDx(l) = urand.
number();
280 Teuchos::Time timer(
"Teuchos Fad GEMM",
false);
282 for (
unsigned int j=0; j<nloop; j++) {
283 blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, k, alpha, &A[0], m,
284 &
B[0], k, beta, &
C[0], m);
288 return timer.totalElapsedTime() / nloop;
363int main(
int argc,
char* argv[]) {
372 Teuchos::CommandLineProcessor clp;
373 clp.setDocString(
"This program tests the speed of differentiating BLAS routines using Fad");
375 clp.setOption(
"m", &m,
"Number of rows");
377 clp.setOption(
"n", &n,
"Number of columns");
379 clp.setOption(
"k", &k,
"Number of columns for GEMM");
381 clp.setOption(
"ndot", &ndot,
"Number of derivative components");
383 clp.setOption(
"nloop", &nloop,
"Number of loops");
385 clp.setOption(
"dynamic", &dynamic,
"Use dynamic allocation");
388 Teuchos::CommandLineProcessor::EParseCommandLineReturn
389 parseReturn= clp.parse(argc, argv);
390 if(parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL)
392 bool use_dynamic = (dynamic != 0);
394 std::cout.setf(std::ios::scientific);
395 std::cout.precision(
p);
396 std::cout <<
"Times (sec) for m = " << m <<
", n = " << n
397 <<
", ndot = " << ndot <<
", nloop = " << nloop
398 <<
", dynamic = " << use_dynamic <<
": "
402 std::cout <<
"GEMM: " << std::setw(w) << tb << std::endl;
404 t = do_time_sacado_fad_gemm< Sacado::Fad::DVFad<double> >(m,n,k,ndot,nloop,use_dynamic);
405 std::cout <<
"Sacado DVFad GEMM: " << std::setw(w) << t <<
"\t"
406 << std::setw(w) << t/tb << std::endl;
408 t = do_time_sacado_fad_gemm< Sacado::Fad::DFad<double> >(m,n,k,ndot,nloop,use_dynamic);
409 std::cout <<
"Sacado DFad GEMM: " << std::setw(w) << t <<
"\t"
410 << std::setw(w) << t/tb << std::endl;
412 t = do_time_teuchos_fad_gemm< Sacado::Fad::DFad<double> >(m,n,k,ndot,nloop);
413 std::cout <<
"Teuchos DFad GEMM: " << std::setw(w) << t <<
"\t"
414 << std::setw(w) << t/tb << std::endl;
420 t = do_time_teuchos_fad_gemm< Sacado::Fad::DVFad<double> >(m,n,k,ndot,nloop);
421 std::cout <<
"Teuchos DVFad GEMM: " << std::setw(w) << t <<
"\t"
422 << std::setw(w) << t/tb << std::endl;
424 std::cout << std::endl;
427 std::cout <<
"GEMV: " << std::setw(w) << tb << std::endl;
429 t = do_time_sacado_fad_gemv< Sacado::Fad::DVFad<double> >(m,n,ndot,nloop*10,use_dynamic);
430 std::cout <<
"Sacado DVFad GEMV: " << std::setw(w) << t <<
"\t"
431 << std::setw(w) << t/tb << std::endl;
433 t = do_time_sacado_fad_gemv< Sacado::Fad::DFad<double> >(m,n,ndot,nloop*10,use_dynamic);
434 std::cout <<
"Sacado DFad GEMV: " << std::setw(w) << t <<
"\t"
435 << std::setw(w) << t/tb << std::endl;
437 t = do_time_teuchos_fad_gemv< Sacado::Fad::DFad<double> >(m,n,ndot,nloop*10);
438 std::cout <<
"Teuchos DFad GEMV: " << std::setw(w) << t <<
"\t"
439 << std::setw(w) << t/tb << std::endl;
445 t = do_time_teuchos_fad_gemv< Sacado::Fad::DVFad<double> >(m,n,ndot,nloop*10);
446 std::cout <<
"Teuchos DVFad GEMV: " << std::setw(w) << t <<
"\t"
447 << std::setw(w) << t/tb << std::endl;
449 std::cout << std::endl;
452 std::cout <<
"DOT: " << std::setw(w) << tb << std::endl;
454 t = do_time_sacado_fad_dot< Sacado::Fad::DVFad<double> >(m,ndot,nloop*100,use_dynamic);
455 std::cout <<
"Sacado DVFad DOT: " << std::setw(w) << t <<
"\t"
456 << std::setw(w) << t/tb << std::endl;
458 t = do_time_sacado_fad_dot< Sacado::Fad::DFad<double> >(m,ndot,nloop*100,use_dynamic);
459 std::cout <<
"Sacado DFad DOT: " << std::setw(w) << t <<
"\t"
460 << std::setw(w) << t/tb << std::endl;
462 t = do_time_teuchos_fad_dot< Sacado::Fad::DFad<double> >(m,ndot,nloop*100);
463 std::cout <<
"Teuchos DFad DOT: " << std::setw(w) << t <<
"\t"
464 << std::setw(w) << t/tb << std::endl;
470 t = do_time_teuchos_fad_dot< Sacado::Fad::DVFad<double> >(m,ndot,nloop*100);
471 std::cout <<
"Teuchos DVFad DOT: " << std::setw(w) << t <<
"\t"
472 << std::setw(w) << t/tb << std::endl;
475 catch (std::exception& e) {
476 std::cout << e.what() << std::endl;
479 catch (
const char *s) {
480 std::cout << s << std::endl;
484 std::cout <<
"Caught unknown exception!" << std::endl;