1054 Teuchos::LAPACK<int,ScalarType> lapack;
1055 std::vector<int> index(recycleBlocks_);
1056 ScalarType one = Teuchos::ScalarTraits<ScalarType>::one();
1057 ScalarType zero = Teuchos::ScalarTraits<ScalarType>::zero();
1066 setParameters(Teuchos::parameterList(*getValidParameters()));
1070 "Belos::RCGSolMgr::solve(): Linear problem is not a valid object.");
1072 "Belos::RCGSolMgr::solve(): Linear problem is not ready, setProblem() has not been called.");
1073 TEUCHOS_TEST_FOR_EXCEPTION((problem_->getLeftPrec() != Teuchos::null)&&(problem_->getRightPrec() != Teuchos::null),
1075 "Belos::RCGSolMgr::solve(): RCG does not support split preconditioning, only set left or right preconditioner.");
1078 Teuchos::RCP<OP> precObj;
1079 if (problem_->getLeftPrec() != Teuchos::null) {
1080 precObj = Teuchos::rcp_const_cast<OP>(problem_->getLeftPrec());
1082 else if (problem_->getRightPrec() != Teuchos::null) {
1083 precObj = Teuchos::rcp_const_cast<OP>(problem_->getRightPrec());
1087 int numRHS2Solve = MVT::GetNumberVecs( *(problem_->getRHS()) );
1088 std::vector<int> currIdx(1);
1092 problem_->setLSIndex( currIdx );
1095 ptrdiff_t dim = MVT::GetGlobalLength( *(problem_->getRHS()) );
1096 if (numBlocks_ > dim) {
1097 numBlocks_ = Teuchos::asSafe<int>(dim);
1098 params_->set(
"Num Blocks", numBlocks_);
1100 "Warning! Requested Krylov subspace dimension is larger than operator dimension!" << std::endl <<
1101 " The maximum number of blocks allowed for the Krylov subspace will be adjusted to " << numBlocks_ << std::endl;
1105 initializeStateStorage();
1108 Teuchos::ParameterList plist;
1109 plist.set(
"Num Blocks",numBlocks_);
1110 plist.set(
"Recycled Blocks",recycleBlocks_);
1113 outputTest_->reset();
1116 bool isConverged =
true;
1120 index.resize(recycleBlocks_);
1121 for (
int i=0; i<recycleBlocks_; ++i) { index[i] = i; }
1122 Teuchos::RCP<const MV> Utmp = MVT::CloneView( *U_, index );
1123 index.resize(recycleBlocks_);
1124 for (
int i=0; i<recycleBlocks_; ++i) { index[i] = i; }
1125 Teuchos::RCP<MV> AUtmp = MVT::CloneViewNonConst( *AU_, index );
1127 problem_->applyOp( *Utmp, *AUtmp );
1129 Teuchos::SerialDenseMatrix<int,ScalarType> UTAUtmp( Teuchos::View, *UTAU_, recycleBlocks_, recycleBlocks_ );
1130 MVT::MvTransMv( one, *Utmp, *AUtmp, UTAUtmp );
1132 Teuchos::SerialDenseMatrix<int,ScalarType> AUTAUtmp( Teuchos::View, *AUTAU_, recycleBlocks_, recycleBlocks_ );
1133 if ( precObj != Teuchos::null ) {
1134 index.resize(recycleBlocks_);
1135 for (
int i=0; i<recycleBlocks_; ++i) { index[i] = i; }
1136 index.resize(recycleBlocks_);
1137 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1138 Teuchos::RCP<MV> PCAU = MVT::CloneViewNonConst( *U1_, index );
1139 OPT::Apply( *precObj, *AUtmp, *PCAU );
1140 MVT::MvTransMv( one, *AUtmp, *PCAU, AUTAUtmp );
1142 MVT::MvTransMv( one, *AUtmp, *AUtmp, AUTAUtmp );
1149 Teuchos::RCP<RCGIter<ScalarType,MV,OP> > rcg_iter;
1154#ifdef BELOS_TEUCHOS_TIME_MONITOR
1155 Teuchos::TimeMonitor slvtimer(*timerSolve_);
1158 while ( numRHS2Solve > 0 ) {
1161 if (printer_->isVerbosity(
Debug ) ) {
1162 if (existU_) printer_->print(
Debug,
"Using recycle space generated from previous call to solve()." );
1163 else printer_->print(
Debug,
"No recycle space exists." );
1167 rcg_iter->resetNumIters();
1170 rcg_iter->setSize( recycleBlocks_, numBlocks_ );
1173 outputTest_->resetNumCalls();
1182 problem_->computeCurrResVec( &*r_ );
1187 Teuchos::SerialDenseMatrix<int,ScalarType> Utr(recycleBlocks_,1);
1188 index.resize(recycleBlocks_);
1189 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1190 Teuchos::RCP<const MV> Utmp = MVT::CloneView( *U_, index );
1191 MVT::MvTransMv( one, *Utmp, *r_, Utr );
1192 Teuchos::SerialDenseMatrix<int,ScalarType> UTAUtmp( Teuchos::View, *UTAU_, recycleBlocks_, recycleBlocks_ );
1193 Teuchos::SerialDenseMatrix<int,ScalarType> LUUTAUtmp( Teuchos::View, *LUUTAU_, recycleBlocks_, recycleBlocks_ );
1194 LUUTAUtmp.assign(UTAUtmp);
1196 lapack.GESV(recycleBlocks_, 1, LUUTAUtmp.values(), LUUTAUtmp.stride(), &(*ipiv_)[0], Utr.values(), Utr.stride(), &info);
1198 "Belos::RCGSolMgr::solve(): LAPACK GESV failed to compute a solution.");
1201 MVT::MvTimesMatAddMv( one, *Utmp, Utr, one, *problem_->getCurrLHSVec() );
1204 index.resize(recycleBlocks_);
1205 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1206 Teuchos::RCP<const MV> AUtmp = MVT::CloneView( *AU_, index );
1207 MVT::MvTimesMatAddMv( -one, *AUtmp, Utr, one, *r_ );
1210 if ( precObj != Teuchos::null ) {
1211 OPT::Apply( *precObj, *r_, *z_ );
1217 MVT::MvTransMv( one, *r_, *z_, *rTz_old_ );
1221 Teuchos::SerialDenseMatrix<int,ScalarType> mu( Teuchos::View, *Delta_, recycleBlocks_, 1);
1222 index.resize(recycleBlocks_);
1223 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1224 Teuchos::RCP<const MV> AUtmp = MVT::CloneView( *AU_, index );
1225 MVT::MvTransMv( one, *AUtmp, *z_, mu );
1226 Teuchos::SerialDenseMatrix<int,ScalarType> LUUTAUtmp( Teuchos::View, *LUUTAU_, recycleBlocks_, recycleBlocks_ );
1229 lapack.GETRS(
TRANS, recycleBlocks_, 1, LUUTAUtmp.values(), LUUTAUtmp.stride(), &(*ipiv_)[0], mu.values(), mu.stride(), &info );
1231 "Belos::RCGSolMgr::solve(): LAPACK GETRS failed to compute a solution.");
1235 Teuchos::RCP<MV> Ptmp = MVT::CloneViewNonConst( *P_, index );
1236 MVT::MvAddMv(one,*z_,zero,*z_,*Ptmp);
1237 MVT::MvTimesMatAddMv( -one, *U_, mu, one, *Ptmp );
1242 Teuchos::RCP<MV> Ptmp = MVT::CloneViewNonConst( *P_, index );
1243 MVT::MvAddMv(one,*z_,zero,*z_,*Ptmp);
1250 index.resize( numBlocks_+1 );
1251 for (
int ii=0; ii<(numBlocks_+1); ++ii) { index[ii] = ii; }
1252 newstate.
P = MVT::CloneViewNonConst( *P_, index );
1253 index.resize( recycleBlocks_ );
1254 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1255 newstate.
U = MVT::CloneViewNonConst( *U_, index );
1256 index.resize( recycleBlocks_ );
1257 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1258 newstate.
AU = MVT::CloneViewNonConst( *AU_, index );
1259 newstate.
Alpha = Teuchos::rcp(
new Teuchos::SerialDenseMatrix<int,ScalarType>( Teuchos::View, *Alpha_, numBlocks_, 1 ) );
1260 newstate.
Beta = Teuchos::rcp(
new Teuchos::SerialDenseMatrix<int,ScalarType>( Teuchos::View, *Beta_, numBlocks_, 1 ) );
1261 newstate.
D = Teuchos::rcp(
new Teuchos::SerialDenseMatrix<int,ScalarType>( Teuchos::View, *D_, numBlocks_, 1 ) );
1262 newstate.
Delta = Teuchos::rcp(
new Teuchos::SerialDenseMatrix<int,ScalarType>( Teuchos::View, *Delta_, recycleBlocks_, numBlocks_, 0, 1 ) );
1263 newstate.
LUUTAU = Teuchos::rcp(
new Teuchos::SerialDenseMatrix<int,ScalarType>( Teuchos::View, *LUUTAU_, recycleBlocks_, recycleBlocks_ ) );
1269 newstate.
existU = existU_;
1270 newstate.
ipiv = ipiv_;
1273 rcg_iter->initialize(newstate);
1279 rcg_iter->iterate();
1286 if ( convTest_->getStatus() ==
Passed ) {
1295 else if ( maxIterTest_->getStatus() ==
Passed ) {
1297 isConverged =
false;
1305 else if ( rcg_iter->getCurSubspaceDim() == rcg_iter->getMaxSubspaceDim() ) {
1310 if (recycleBlocks_ > 0) {
1314 Teuchos::SerialDenseMatrix<int,ScalarType> Ftmp( Teuchos::View, *F_, numBlocks_, numBlocks_ );
1315 Teuchos::SerialDenseMatrix<int,ScalarType> Gtmp( Teuchos::View, *G_, numBlocks_, numBlocks_ );
1316 Teuchos::SerialDenseMatrix<int,ScalarType> Dtmp( Teuchos::View, *D_, numBlocks_, 1 );
1317 Teuchos::SerialDenseMatrix<int,ScalarType> Alphatmp( Teuchos::View, *Alpha_, numBlocks_, 1 );
1318 Teuchos::SerialDenseMatrix<int,ScalarType> Betatmp( Teuchos::View, *Beta_, numBlocks_, 1 );
1319 Ftmp.putScalar(zero);
1320 Gtmp.putScalar(zero);
1321 for (
int ii=0;ii<numBlocks_;ii++) {
1322 Gtmp(ii,ii) = (Dtmp(ii,0) / Alphatmp(ii,0))*(1 + Betatmp(ii,0));
1324 Gtmp(ii-1,ii) = -Dtmp(ii,0)/Alphatmp(ii-1,0);
1325 Gtmp(ii,ii-1) = -Dtmp(ii,0)/Alphatmp(ii-1,0);
1327 Ftmp(ii,ii) = Dtmp(ii,0);
1331 Teuchos::SerialDenseMatrix<int,ScalarType> Ytmp( Teuchos::View, *Y_, numBlocks_, recycleBlocks_ );
1332 getHarmonicVecs(Ftmp,Gtmp,Ytmp);
1335 index.resize( numBlocks_ );
1336 for (
int ii=0; ii<numBlocks_; ++ii) { index[ii] = ii; }
1337 Teuchos::RCP<const MV> Ptmp = MVT::CloneView( *P_, index );
1338 index.resize( recycleBlocks_ );
1339 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1340 Teuchos::RCP<MV> U1tmp = MVT::CloneViewNonConst( *U1_, index );
1341 MVT::MvTimesMatAddMv( one, *Ptmp, Ytmp, zero, *U1tmp );
1346 Teuchos::SerialDenseMatrix<int,ScalarType> GYtmp( Teuchos::View, *GY_, numBlocks_, recycleBlocks_ );
1347 GYtmp.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,Gtmp,Ytmp,zero);
1348 Teuchos::SerialDenseMatrix<int,ScalarType> AU1TAU1tmp( Teuchos::View, *AU1TAU1_, recycleBlocks_, recycleBlocks_ );
1349 AU1TAU1tmp.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,one,Ytmp,GYtmp,zero);
1352 Teuchos::SerialDenseMatrix<int,ScalarType> FYtmp( Teuchos::View, *FY_, numBlocks_, recycleBlocks_ );
1353 FYtmp.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,Ftmp,Ytmp,zero);
1354 Teuchos::SerialDenseMatrix<int,ScalarType> AU1TU1tmp( Teuchos::View, *AU1TU1_, recycleBlocks_, recycleBlocks_ );
1355 AU1TU1tmp.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,one,Ytmp,FYtmp,zero);
1357 Teuchos::SerialDenseMatrix<int,ScalarType> AU1TAPtmp( Teuchos::View, *AU1TAP_, recycleBlocks_, 1 );
1359 AU1TAPtmp.putScalar(zero);
1361 ScalarType alphatmp = -1.0 / Alphatmp(numBlocks_-1,0);
1362 for (
int ii=0; ii<recycleBlocks_; ++ii) {
1363 AU1TAPtmp(ii,0) = Ytmp(numBlocks_-1,ii) * alphatmp;
1371 lastBeta = numBlocks_-1;
1378 Teuchos::SerialDenseMatrix<int,ScalarType> Dtmp( Teuchos::View, *D_, numBlocks_, 1 );
1379 Teuchos::SerialDenseMatrix<int,ScalarType> AU1TAPtmp( Teuchos::View, *AU1TAP_, recycleBlocks_, numBlocks_ );
1380 AU1TAPtmp.scale(Dtmp(0,0));
1382 Teuchos::SerialDenseMatrix<int,ScalarType> Alphatmp( Teuchos::View, *Alpha_, numBlocks_, 1 );
1383 Teuchos::SerialDenseMatrix<int,ScalarType> Betatmp( Teuchos::View, *Beta_, numBlocks_+1, 1 );
1384 Teuchos::SerialDenseMatrix<int,ScalarType> APTAPtmp( Teuchos::View, *APTAP_, numBlocks_, numBlocks_ );
1385 APTAPtmp.putScalar(zero);
1386 for (
int ii=0; ii<numBlocks_; ii++) {
1387 APTAPtmp(ii,ii) = (Dtmp(ii,0) / Alphatmp(ii,0))*(1 + Betatmp(ii+1,0));
1389 APTAPtmp(ii-1,ii) = -Dtmp(ii,0)/Alphatmp(ii-1,0);
1390 APTAPtmp(ii,ii-1) = -Dtmp(ii,0)/Alphatmp(ii-1,0);
1395 Teuchos::SerialDenseMatrix<int,ScalarType> Ftmp( Teuchos::View, *F_, (numBlocks_+recycleBlocks_), (numBlocks_+recycleBlocks_) );
1396 Teuchos::SerialDenseMatrix<int,ScalarType> F11( Teuchos::View, *F_, recycleBlocks_, recycleBlocks_ );
1397 Teuchos::SerialDenseMatrix<int,ScalarType> F12( Teuchos::View, *F_, recycleBlocks_, numBlocks_, 0, recycleBlocks_ );
1398 Teuchos::SerialDenseMatrix<int,ScalarType> F21( Teuchos::View, *F_, numBlocks_, recycleBlocks_, recycleBlocks_, 0 );
1399 Teuchos::SerialDenseMatrix<int,ScalarType> F22( Teuchos::View, *F_, numBlocks_, numBlocks_, recycleBlocks_, recycleBlocks_ );
1400 Teuchos::SerialDenseMatrix<int,ScalarType> AU1TU1tmp( Teuchos::View, *AU1TU1_, recycleBlocks_, recycleBlocks_ );
1401 F11.assign(AU1TU1tmp);
1402 F12.putScalar(zero);
1403 F21.putScalar(zero);
1404 F22.putScalar(zero);
1405 for(
int ii=0;ii<numBlocks_;ii++) {
1406 F22(ii,ii) = Dtmp(ii,0);
1410 Teuchos::SerialDenseMatrix<int,ScalarType> Gtmp( Teuchos::View, *G_, (numBlocks_+recycleBlocks_), (numBlocks_+recycleBlocks_) );
1411 Teuchos::SerialDenseMatrix<int,ScalarType> AU1TAU1tmp( Teuchos::View, *AU1TAU1_, recycleBlocks_, recycleBlocks_ );
1412 Teuchos::SerialDenseMatrix<int,ScalarType> G11( Teuchos::View, *G_, recycleBlocks_, recycleBlocks_ );
1413 Teuchos::SerialDenseMatrix<int,ScalarType> G12( Teuchos::View, *G_, recycleBlocks_, numBlocks_, 0, recycleBlocks_ );
1414 Teuchos::SerialDenseMatrix<int,ScalarType> G21( Teuchos::View, *G_, numBlocks_, recycleBlocks_, recycleBlocks_, 0 );
1415 Teuchos::SerialDenseMatrix<int,ScalarType> G22( Teuchos::View, *G_, numBlocks_, numBlocks_, recycleBlocks_, recycleBlocks_ );
1416 G11.assign(AU1TAU1tmp);
1417 G12.assign(AU1TAPtmp);
1419 for (
int ii=0;ii<recycleBlocks_;++ii)
1420 for (
int jj=0;jj<numBlocks_;++jj)
1421 G21(jj,ii) = G12(ii,jj);
1422 G22.assign(APTAPtmp);
1425 Teuchos::SerialDenseMatrix<int,ScalarType> Ytmp( Teuchos::View, *Y_, (recycleBlocks_+numBlocks_), recycleBlocks_ );
1426 getHarmonicVecs(Ftmp,Gtmp,Ytmp);
1429 index.resize( numBlocks_ );
1430 for (
int ii=0; ii<numBlocks_; ++ii) { index[ii] = ii+1; }
1431 Teuchos::RCP<const MV> Ptmp = MVT::CloneView( *P_, index );
1432 index.resize( recycleBlocks_ );
1433 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1434 Teuchos::RCP<MV> PY2tmp = MVT::CloneViewNonConst( *PY2_, index );
1435 Teuchos::SerialDenseMatrix<int,ScalarType> Y2( Teuchos::View, *Y_, numBlocks_, recycleBlocks_, recycleBlocks_, 0 );
1436 index.resize( recycleBlocks_ );
1437 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1438 Teuchos::RCP<MV> U1tmp = MVT::CloneViewNonConst( *U1_, index );
1439 index.resize( recycleBlocks_ );
1440 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1441 Teuchos::RCP<MV> U1Y1tmp = MVT::CloneViewNonConst( *U1Y1_, index );
1442 Teuchos::SerialDenseMatrix<int,ScalarType> Y1( Teuchos::View, *Y_, recycleBlocks_, recycleBlocks_ );
1443 MVT::MvTimesMatAddMv( one, *Ptmp, Y2, zero, *PY2tmp );
1444 MVT::MvTimesMatAddMv( one, *U1tmp, Y1, zero, *U1Y1tmp );
1445 MVT::MvAddMv(one,*U1Y1tmp, one, *PY2tmp, *U1tmp);
1450 Teuchos::SerialDenseMatrix<int,ScalarType> GYtmp( Teuchos::View, *GY_, (numBlocks_+recycleBlocks_), recycleBlocks_ );
1451 GYtmp.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,Gtmp,Ytmp,zero);
1452 AU1TAU1tmp.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,one,Ytmp,GYtmp,zero);
1456 AU1TAPtmp.putScalar(zero);
1457 ScalarType alphatmp = -1.0 / Alphatmp(numBlocks_-1,0);
1458 for (
int ii=0; ii<recycleBlocks_; ++ii) {
1459 AU1TAPtmp(ii,0) = Ytmp(numBlocks_+recycleBlocks_-1,ii) * alphatmp;
1463 Teuchos::SerialDenseMatrix<int,ScalarType> FYtmp( Teuchos::View, *FY_, (numBlocks_+recycleBlocks_), recycleBlocks_ );
1464 FYtmp.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,Ftmp,Ytmp,zero);
1466 AU1TU1tmp.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,one,Ytmp,FYtmp,zero);
1469 lastp = numBlocks_+1;
1470 lastBeta = numBlocks_;
1476 Teuchos::SerialDenseMatrix<int,ScalarType> Alphatmp( Teuchos::View, *Alpha_, numBlocks_, 1 );
1477 Teuchos::SerialDenseMatrix<int,ScalarType> Betatmp( Teuchos::View, *Beta_, numBlocks_, 1 );
1478 Teuchos::SerialDenseMatrix<int,ScalarType> Dtmp( Teuchos::View, *D_, numBlocks_, 1 );
1479 Teuchos::SerialDenseMatrix<int,ScalarType> APTAPtmp( Teuchos::View, *APTAP_, numBlocks_, numBlocks_ );
1480 APTAPtmp.putScalar(zero);
1481 for (
int ii=0; ii<numBlocks_; ii++) {
1482 APTAPtmp(ii,ii) = (Dtmp(ii,0) / Alphatmp(ii,0))*(1 + Betatmp(ii,0));
1484 APTAPtmp(ii-1,ii) = -Dtmp(ii,0)/Alphatmp(ii-1,0);
1485 APTAPtmp(ii,ii-1) = -Dtmp(ii,0)/Alphatmp(ii-1,0);
1489 Teuchos::SerialDenseMatrix<int,ScalarType> L2tmp( Teuchos::View, *L2_, numBlocks_+1, numBlocks_ );
1490 L2tmp.putScalar(zero);
1491 for(
int ii=0;ii<numBlocks_;ii++) {
1492 L2tmp(ii,ii) = 1./Alphatmp(ii,0);
1493 L2tmp(ii+1,ii) = -1./Alphatmp(ii,0);
1497 Teuchos::SerialDenseMatrix<int,ScalarType> AUTAPtmp( Teuchos::View, *AUTAP_, recycleBlocks_, numBlocks_ );
1498 Teuchos::SerialDenseMatrix<int,ScalarType> UTAUtmp( Teuchos::View, *UTAU_, recycleBlocks_, recycleBlocks_ );
1499 Teuchos::SerialDenseMatrix<int,ScalarType> Deltatmp( Teuchos::View, *Delta_, recycleBlocks_, numBlocks_+1 );
1500 Teuchos::SerialDenseMatrix<int,ScalarType> DeltaL2tmp( Teuchos::View, *DeltaL2_, recycleBlocks_, numBlocks_ );
1501 DeltaL2tmp.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,Deltatmp,L2tmp,zero);
1502 AUTAPtmp.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,UTAUtmp,DeltaL2tmp,zero);
1505 Teuchos::SerialDenseMatrix<int,ScalarType> Ftmp( Teuchos::View, *F_, (numBlocks_+recycleBlocks_), (numBlocks_+recycleBlocks_) );
1506 Teuchos::SerialDenseMatrix<int,ScalarType> F11( Teuchos::View, *F_, recycleBlocks_, recycleBlocks_ );
1507 Teuchos::SerialDenseMatrix<int,ScalarType> F12( Teuchos::View, *F_, recycleBlocks_, numBlocks_, 0, recycleBlocks_ );
1508 Teuchos::SerialDenseMatrix<int,ScalarType> F21( Teuchos::View, *F_, numBlocks_, recycleBlocks_, recycleBlocks_, 0 );
1509 Teuchos::SerialDenseMatrix<int,ScalarType> F22( Teuchos::View, *F_, numBlocks_, numBlocks_, recycleBlocks_, recycleBlocks_ );
1510 F11.assign(UTAUtmp);
1511 F12.putScalar(zero);
1512 F21.putScalar(zero);
1513 F22.putScalar(zero);
1514 for(
int ii=0;ii<numBlocks_;ii++) {
1515 F22(ii,ii) = Dtmp(ii,0);
1519 Teuchos::SerialDenseMatrix<int,ScalarType> Gtmp( Teuchos::View, *G_, (numBlocks_+recycleBlocks_), (numBlocks_+recycleBlocks_) );
1520 Teuchos::SerialDenseMatrix<int,ScalarType> G11( Teuchos::View, *G_, recycleBlocks_, recycleBlocks_ );
1521 Teuchos::SerialDenseMatrix<int,ScalarType> G12( Teuchos::View, *G_, recycleBlocks_, numBlocks_, 0, recycleBlocks_ );
1522 Teuchos::SerialDenseMatrix<int,ScalarType> G21( Teuchos::View, *G_, numBlocks_, recycleBlocks_, recycleBlocks_, 0 );
1523 Teuchos::SerialDenseMatrix<int,ScalarType> G22( Teuchos::View, *G_, numBlocks_, numBlocks_, recycleBlocks_, recycleBlocks_ );
1524 Teuchos::SerialDenseMatrix<int,ScalarType> AUTAUtmp( Teuchos::View, *AUTAU_, recycleBlocks_, recycleBlocks_ );
1525 G11.assign(AUTAUtmp);
1526 G12.assign(AUTAPtmp);
1528 for (
int ii=0;ii<recycleBlocks_;++ii)
1529 for (
int jj=0;jj<numBlocks_;++jj)
1530 G21(jj,ii) = G12(ii,jj);
1531 G22.assign(APTAPtmp);
1534 Teuchos::SerialDenseMatrix<int,ScalarType> Ytmp( Teuchos::View, *Y_, (recycleBlocks_+numBlocks_), recycleBlocks_ );
1535 getHarmonicVecs(Ftmp,Gtmp,Ytmp);
1538 index.resize( recycleBlocks_ );
1539 for (
int ii=0; ii<(recycleBlocks_); ++ii) { index[ii] = ii; }
1540 Teuchos::RCP<const MV> Utmp = MVT::CloneView( *U_, index );
1541 index.resize( numBlocks_ );
1542 for (
int ii=0; ii<numBlocks_; ++ii) { index[ii] = ii; }
1543 Teuchos::RCP<const MV> Ptmp = MVT::CloneView( *P_, index );
1544 index.resize( recycleBlocks_ );
1545 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1546 Teuchos::RCP<MV> PY2tmp = MVT::CloneViewNonConst( *PY2_, index );
1547 Teuchos::SerialDenseMatrix<int,ScalarType> Y2( Teuchos::View, *Y_, numBlocks_, recycleBlocks_, recycleBlocks_, 0 );
1548 index.resize( recycleBlocks_ );
1549 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1550 Teuchos::RCP<MV> UY1tmp = MVT::CloneViewNonConst( *U1Y1_, index );
1551 Teuchos::SerialDenseMatrix<int,ScalarType> Y1( Teuchos::View, *Y_, recycleBlocks_, recycleBlocks_ );
1552 index.resize( recycleBlocks_ );
1553 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1554 Teuchos::RCP<MV> U1tmp = MVT::CloneViewNonConst( *U1_, index );
1555 MVT::MvTimesMatAddMv( one, *Ptmp, Y2, zero, *PY2tmp );
1556 MVT::MvTimesMatAddMv( one, *Utmp, Y1, zero, *UY1tmp );
1557 MVT::MvAddMv(one,*UY1tmp, one, *PY2tmp, *U1tmp);
1562 Teuchos::SerialDenseMatrix<int,ScalarType> GYtmp( Teuchos::View, *GY_, (numBlocks_+recycleBlocks_), recycleBlocks_ );
1563 GYtmp.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,Gtmp,Ytmp,zero);
1564 Teuchos::SerialDenseMatrix<int,ScalarType> AU1TAU1tmp( Teuchos::View, *AU1TAU1_, recycleBlocks_, recycleBlocks_ );
1565 AU1TAU1tmp.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,one,Ytmp,GYtmp,zero);
1568 Teuchos::SerialDenseMatrix<int,ScalarType> FYtmp( Teuchos::View, *FY_, (numBlocks_+recycleBlocks_), recycleBlocks_ );
1569 FYtmp.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,Ftmp,Ytmp,zero);
1570 Teuchos::SerialDenseMatrix<int,ScalarType> AU1TU1tmp( Teuchos::View, *AU1TU1_, recycleBlocks_, recycleBlocks_ );
1571 AU1TU1tmp.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,one,Ytmp,FYtmp,zero);
1574 Teuchos::SerialDenseMatrix<int,ScalarType> AU1TUtmp( Teuchos::View, *AU1TU_, recycleBlocks_, recycleBlocks_ );
1575 AU1TUtmp.assign(UTAUtmp);
1578 dold = Dtmp(numBlocks_-1,0);
1585 lastBeta = numBlocks_-1;
1588 Teuchos::SerialDenseMatrix<int,ScalarType> Alphatmp( Teuchos::View, *Alpha_, numBlocks_, 1 );
1589 Teuchos::SerialDenseMatrix<int,ScalarType> Betatmp( Teuchos::View, *Beta_, numBlocks_+1, 1 );
1590 Teuchos::SerialDenseMatrix<int,ScalarType> Dtmp( Teuchos::View, *D_, numBlocks_, 1 );
1591 Teuchos::SerialDenseMatrix<int,ScalarType> APTAPtmp( Teuchos::View, *APTAP_, numBlocks_, numBlocks_ );
1592 for (
int ii=0; ii<numBlocks_; ii++) {
1593 APTAPtmp(ii,ii) = (Dtmp(ii,0) / Alphatmp(ii,0))*(1 + Betatmp(ii+1,0));
1595 APTAPtmp(ii-1,ii) = -Dtmp(ii,0)/Alphatmp(ii-1,0);
1596 APTAPtmp(ii,ii-1) = -Dtmp(ii,0)/Alphatmp(ii-1,0);
1600 Teuchos::SerialDenseMatrix<int,ScalarType> L2tmp( Teuchos::View, *L2_, numBlocks_+1, numBlocks_ );
1601 for(
int ii=0;ii<numBlocks_;ii++) {
1602 L2tmp(ii,ii) = 1./Alphatmp(ii,0);
1603 L2tmp(ii+1,ii) = -1./Alphatmp(ii,0);
1608 Teuchos::SerialDenseMatrix<int,ScalarType> DeltaL2( Teuchos::View, *DeltaL2_, recycleBlocks_, numBlocks_ );
1609 Teuchos::SerialDenseMatrix<int,ScalarType> Deltatmp( Teuchos::View, *Delta_, recycleBlocks_, numBlocks_+1 );
1610 DeltaL2.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,Deltatmp,L2tmp,zero);
1611 Teuchos::SerialDenseMatrix<int,ScalarType> AU1TUDeltaL2( Teuchos::View, *AU1TUDeltaL2_, recycleBlocks_, numBlocks_ );
1612 Teuchos::SerialDenseMatrix<int,ScalarType> AU1TUtmp( Teuchos::View, *AU1TU_, recycleBlocks_, recycleBlocks_ );
1613 AU1TUDeltaL2.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,AU1TUtmp,DeltaL2,zero);
1614 Teuchos::SerialDenseMatrix<int,ScalarType> Y1( Teuchos::View, *Y_, recycleBlocks_, recycleBlocks_ );
1615 Teuchos::SerialDenseMatrix<int,ScalarType> AU1TAPtmp( Teuchos::View, *AU1TAP_, recycleBlocks_, numBlocks_ );
1616 AU1TAPtmp.putScalar(zero);
1617 AU1TAPtmp.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,one,Y1,AU1TUDeltaL2,zero);
1618 Teuchos::SerialDenseMatrix<int,ScalarType> Y2( Teuchos::View, *Y_, numBlocks_, recycleBlocks_, recycleBlocks_, 0 );
1619 ScalarType val = dold * (-Betatmp(0,0)/Alphatmp(0,0));
1620 for(
int ii=0;ii<recycleBlocks_;ii++) {
1621 AU1TAPtmp(ii,0) += Y2(numBlocks_-1,ii)*val;
1625 Teuchos::SerialDenseMatrix<int,ScalarType> Y1TAU1TU( Teuchos::View, *GY_, recycleBlocks_, recycleBlocks_ );
1626 Y1TAU1TU.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,one,Y1,AU1TUtmp,zero);
1627 AU1TUtmp.assign(Y1TAU1TU);
1630 Teuchos::SerialDenseMatrix<int,ScalarType> Ftmp( Teuchos::View, *F_, (numBlocks_+recycleBlocks_), (numBlocks_+recycleBlocks_) );
1631 Teuchos::SerialDenseMatrix<int,ScalarType> F11( Teuchos::View, *F_, recycleBlocks_, recycleBlocks_ );
1632 Teuchos::SerialDenseMatrix<int,ScalarType> F12( Teuchos::View, *F_, recycleBlocks_, numBlocks_, 0, recycleBlocks_ );
1633 Teuchos::SerialDenseMatrix<int,ScalarType> F21( Teuchos::View, *F_, numBlocks_, recycleBlocks_, recycleBlocks_, 0 );
1634 Teuchos::SerialDenseMatrix<int,ScalarType> F22( Teuchos::View, *F_, numBlocks_, numBlocks_, recycleBlocks_, recycleBlocks_ );
1635 Teuchos::SerialDenseMatrix<int,ScalarType> AU1TU1tmp( Teuchos::View, *AU1TU1_, recycleBlocks_, recycleBlocks_ );
1636 F11.assign(AU1TU1tmp);
1637 for(
int ii=0;ii<numBlocks_;ii++) {
1638 F22(ii,ii) = Dtmp(ii,0);
1642 Teuchos::SerialDenseMatrix<int,ScalarType> Gtmp( Teuchos::View, *G_, (numBlocks_+recycleBlocks_), (numBlocks_+recycleBlocks_) );
1643 Teuchos::SerialDenseMatrix<int,ScalarType> G11( Teuchos::View, *G_, recycleBlocks_, recycleBlocks_ );
1644 Teuchos::SerialDenseMatrix<int,ScalarType> G12( Teuchos::View, *G_, recycleBlocks_, numBlocks_, 0, recycleBlocks_ );
1645 Teuchos::SerialDenseMatrix<int,ScalarType> G21( Teuchos::View, *G_, numBlocks_, recycleBlocks_, recycleBlocks_, 0 );
1646 Teuchos::SerialDenseMatrix<int,ScalarType> G22( Teuchos::View, *G_, numBlocks_, numBlocks_, recycleBlocks_, recycleBlocks_ );
1647 Teuchos::SerialDenseMatrix<int,ScalarType> AU1TAU1tmp( Teuchos::View, *AU1TAU1_, recycleBlocks_, recycleBlocks_ );
1648 G11.assign(AU1TAU1tmp);
1649 G12.assign(AU1TAPtmp);
1651 for (
int ii=0;ii<recycleBlocks_;++ii)
1652 for (
int jj=0;jj<numBlocks_;++jj)
1653 G21(jj,ii) = G12(ii,jj);
1654 G22.assign(APTAPtmp);
1657 Teuchos::SerialDenseMatrix<int,ScalarType> Ytmp( Teuchos::View, *Y_, (recycleBlocks_+numBlocks_), recycleBlocks_ );
1658 getHarmonicVecs(Ftmp,Gtmp,Ytmp);
1661 index.resize( numBlocks_ );
1662 for (
int ii=0; ii<numBlocks_; ++ii) { index[ii] = ii+1; }
1663 Teuchos::RCP<const MV> Ptmp = MVT::CloneView( *P_, index );
1664 index.resize( recycleBlocks_ );
1665 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1666 Teuchos::RCP<MV> PY2tmp = MVT::CloneViewNonConst( *PY2_, index );
1667 index.resize( recycleBlocks_ );
1668 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1669 Teuchos::RCP<MV> U1tmp = MVT::CloneViewNonConst( *U1_, index );
1670 index.resize( recycleBlocks_ );
1671 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1672 Teuchos::RCP<MV> U1Y1tmp = MVT::CloneViewNonConst( *U1Y1_, index );
1673 MVT::MvTimesMatAddMv( one, *Ptmp, Y2, zero, *PY2tmp );
1674 MVT::MvTimesMatAddMv( one, *U1tmp, Y1, zero, *U1Y1tmp );
1675 MVT::MvAddMv(one,*U1Y1tmp, one, *PY2tmp, *U1tmp);
1680 Teuchos::SerialDenseMatrix<int,ScalarType> GYtmp( Teuchos::View, *GY_, (numBlocks_+recycleBlocks_), recycleBlocks_ );
1681 GYtmp.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,Gtmp,Ytmp,zero);
1682 AU1TAU1tmp.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,one,Ytmp,GYtmp,zero);
1685 Teuchos::SerialDenseMatrix<int,ScalarType> FYtmp( Teuchos::View, *FY_, (numBlocks_+recycleBlocks_), recycleBlocks_ );
1686 FYtmp.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,one,Ftmp,Ytmp,zero);
1687 AU1TU1tmp.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,one,Ytmp,FYtmp,zero);
1690 dold = Dtmp(numBlocks_-1,0);
1693 lastp = numBlocks_+1;
1694 lastBeta = numBlocks_;
1704 index[0] = lastp-1; index[1] = lastp;
1705 Teuchos::RCP<const MV> Ptmp2 = MVT::CloneView( *P_, index );
1706 index[0] = 0; index[1] = 1;
1707 MVT::SetBlock(*Ptmp2,index,*P_);
1710 (*Beta_)(0,0) = (*Beta_)(lastBeta,0);
1714 Teuchos::SerialDenseMatrix<int,ScalarType> mu1( Teuchos::View, *Delta_, recycleBlocks_, 1, 0, 0 );
1715 Teuchos::SerialDenseMatrix<int,ScalarType> mu2( Teuchos::View, *Delta_, recycleBlocks_, 1, 0, numBlocks_ );
1720 newstate.
P = Teuchos::null;
1721 index.resize( numBlocks_+1 );
1722 for (
int ii=0; ii<(numBlocks_+1); ++ii) { index[ii] = ii+1; }
1723 newstate.
P = MVT::CloneViewNonConst( *P_, index );
1725 newstate.
Beta = Teuchos::null;
1726 newstate.
Beta = Teuchos::rcp(
new Teuchos::SerialDenseMatrix<int,ScalarType>( Teuchos::View, *Beta_, numBlocks_, 1, 1, 0 ) );
1728 newstate.
Delta = Teuchos::null;
1729 newstate.
Delta = Teuchos::rcp(
new Teuchos::SerialDenseMatrix<int,ScalarType>( Teuchos::View, *Delta_, recycleBlocks_, numBlocks_, 0, 1 ) );
1734 rcg_iter->initialize(newstate);
1747 TEUCHOS_TEST_FOR_EXCEPTION(
true,std::logic_error,
1748 "Belos::RCGSolMgr::solve(): Invalid return from RCGIter::iterate().");
1751 catch (
const std::exception &e) {
1752 printer_->stream(
Errors) <<
"Error! Caught std::exception in RCGIter::iterate() at iteration "
1753 << rcg_iter->getNumIters() << std::endl
1754 << e.what() << std::endl;
1760 problem_->setCurrLS();
1764 if ( numRHS2Solve > 0 ) {
1767 problem_->setLSIndex( currIdx );
1770 currIdx.resize( numRHS2Solve );
1776 index.resize(recycleBlocks_);
1777 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1778 MVT::SetBlock(*U1_,index,*U_);
1781 if (numRHS2Solve > 0) {
1783 newstate.
P = Teuchos::null;
1784 newstate.
Ap = Teuchos::null;
1785 newstate.
r = Teuchos::null;
1786 newstate.
z = Teuchos::null;
1787 newstate.
U = Teuchos::null;
1788 newstate.
AU = Teuchos::null;
1789 newstate.
Alpha = Teuchos::null;
1790 newstate.
Beta = Teuchos::null;
1791 newstate.
D = Teuchos::null;
1792 newstate.
Delta = Teuchos::null;
1793 newstate.
LUUTAU = Teuchos::null;
1794 newstate.
ipiv = Teuchos::null;
1795 newstate.
rTz_old = Teuchos::null;
1798 index.resize(recycleBlocks_);
1799 for (
int i=0; i<recycleBlocks_; ++i) { index[i] = i; }
1800 Teuchos::RCP<const MV> Utmp = MVT::CloneView( *U_, index );
1801 index.resize(recycleBlocks_);
1802 for (
int i=0; i<recycleBlocks_; ++i) { index[i] = i; }
1803 Teuchos::RCP<MV> AUtmp = MVT::CloneViewNonConst( *AU_, index );
1805 problem_->applyOp( *Utmp, *AUtmp );
1807 Teuchos::SerialDenseMatrix<int,ScalarType> UTAUtmp( Teuchos::View, *UTAU_, recycleBlocks_, recycleBlocks_ );
1808 MVT::MvTransMv( one, *Utmp, *AUtmp, UTAUtmp );
1810 Teuchos::SerialDenseMatrix<int,ScalarType> AUTAUtmp( Teuchos::View, *AUTAU_, recycleBlocks_, recycleBlocks_ );
1811 if ( precObj != Teuchos::null ) {
1812 index.resize(recycleBlocks_);
1813 for (
int i=0; i<recycleBlocks_; ++i) { index[i] = i; }
1814 index.resize(recycleBlocks_);
1815 for (
int ii=0; ii<recycleBlocks_; ++ii) { index[ii] = ii; }
1816 Teuchos::RCP<MV> LeftPCAU = MVT::CloneViewNonConst( *U1_, index );
1817 OPT::Apply( *precObj, *AUtmp, *LeftPCAU );
1818 MVT::MvTransMv( one, *AUtmp, *LeftPCAU, AUTAUtmp );
1820 MVT::MvTransMv( one, *AUtmp, *AUtmp, AUTAUtmp );
1833#ifdef BELOS_TEUCHOS_TIME_MONITOR
1838 Teuchos::TimeMonitor::summarize( printer_->stream(
TimingDetails) );
1842 numIters_ = maxIterTest_->getNumIters();
1846 using Teuchos::rcp_dynamic_cast;
1849 const std::vector<MagnitudeType>* pTestValues =
1850 rcp_dynamic_cast<conv_test_type>(convTest_)->
getTestValue();
1852 TEUCHOS_TEST_FOR_EXCEPTION(pTestValues == NULL, std::logic_error,
1853 "Belos::RCGSolMgr::solve(): The convergence test's getTestValue() "
1854 "method returned NULL. Please report this bug to the Belos developers.");
1856 TEUCHOS_TEST_FOR_EXCEPTION(pTestValues->size() < 1, std::logic_error,
1857 "Belos::RCGSolMgr::solve(): The convergence test's getTestValue() "
1858 "method returned a vector of length zero. Please report this bug to the "
1859 "Belos developers.");
1864 achievedTol_ = *std::max_element (pTestValues->begin(), pTestValues->end());