|
Anasazi Version of the Day
|
00001 00002 00003 00004 #ifndef ANASAZI_SIRTR_HPP 00005 #define ANASAZI_SIRTR_HPP 00006 00007 #include "AnasaziTypes.hpp" 00008 #include "AnasaziRTRBase.hpp" 00009 00010 #include "AnasaziEigensolver.hpp" 00011 #include "AnasaziMultiVecTraits.hpp" 00012 #include "AnasaziOperatorTraits.hpp" 00013 #include "Teuchos_ScalarTraits.hpp" 00014 00015 #include "Teuchos_LAPACK.hpp" 00016 #include "Teuchos_BLAS.hpp" 00017 #include "Teuchos_SerialDenseMatrix.hpp" 00018 #include "Teuchos_ParameterList.hpp" 00019 #include "Teuchos_TimeMonitor.hpp" 00020 00040 // TODO: add randomization 00041 // TODO: add expensive debug checking on Teuchos_Debug 00042 00043 namespace Anasazi { 00044 00045 template <class ScalarType, class MV, class OP> 00046 class SIRTR : public RTRBase<ScalarType,MV,OP> { 00047 public: 00048 00050 00051 00063 SIRTR( const Teuchos::RCP<Eigenproblem<ScalarType,MV,OP> > &problem, 00064 const Teuchos::RCP<SortManager<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > &sorter, 00065 const Teuchos::RCP<OutputManager<ScalarType> > &printer, 00066 const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > &tester, 00067 const Teuchos::RCP<GenOrthoManager<ScalarType,MV,OP> > &ortho, 00068 Teuchos::ParameterList ¶ms 00069 ); 00070 00072 virtual ~SIRTR() {}; 00073 00075 00077 00078 00080 void iterate(); 00081 00083 00085 00086 00088 void currentStatus(std::ostream &os); 00089 00091 00092 private: 00093 // 00094 // Convenience typedefs 00095 // 00096 typedef SolverUtils<ScalarType,MV,OP> Utils; 00097 typedef MultiVecTraits<ScalarType,MV> MVT; 00098 typedef OperatorTraits<ScalarType,MV,OP> OPT; 00099 typedef Teuchos::ScalarTraits<ScalarType> SCT; 00100 typedef typename SCT::magnitudeType MagnitudeType; 00101 typedef Teuchos::ScalarTraits<MagnitudeType> MAT; 00102 enum trRetType { 00103 UNINITIALIZED = 0, 00104 MAXIMUM_ITERATIONS, 00105 NEGATIVE_CURVATURE, 00106 EXCEEDED_TR, 00107 KAPPA_CONVERGENCE, 00108 THETA_CONVERGENCE 00109 }; 00110 // these correspond to above 00111 std::vector<std::string> stopReasons_; 00112 // 00113 // Consts 00114 // 00115 const MagnitudeType ZERO; 00116 const MagnitudeType ONE; 00117 // 00118 // Internal methods 00119 // 00121 void solveTRSubproblem(); 00122 // 00123 // rho_prime 00124 MagnitudeType rho_prime_; 00125 // 00126 // norm of initial gradient: this is used for scaling 00127 MagnitudeType normgradf0_; 00128 // 00129 // tr stopping reason 00130 trRetType innerStop_; 00131 // 00132 // number of inner iterations 00133 int innerIters_, totalInnerIters_; 00134 }; 00135 00136 00137 00138 00140 // Constructor 00141 template <class ScalarType, class MV, class OP> 00142 SIRTR<ScalarType,MV,OP>::SIRTR( 00143 const Teuchos::RCP<Eigenproblem<ScalarType,MV,OP> > &problem, 00144 const Teuchos::RCP<SortManager<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > &sorter, 00145 const Teuchos::RCP<OutputManager<ScalarType> > &printer, 00146 const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > &tester, 00147 const Teuchos::RCP<GenOrthoManager<ScalarType,MV,OP> > &ortho, 00148 Teuchos::ParameterList ¶ms 00149 ) : 00150 RTRBase<ScalarType,MV,OP>(problem,sorter,printer,tester,ortho,params,"SIRTR",true), 00151 ZERO(MAT::zero()), 00152 ONE(MAT::one()), 00153 totalInnerIters_(0) 00154 { 00155 // set up array of stop reasons 00156 stopReasons_.push_back("n/a"); 00157 stopReasons_.push_back("maximum iterations"); 00158 stopReasons_.push_back("negative curvature"); 00159 stopReasons_.push_back("exceeded TR"); 00160 stopReasons_.push_back("kappa convergence"); 00161 stopReasons_.push_back("theta convergence"); 00162 00163 rho_prime_ = params.get("Rho Prime",0.5); 00164 TEST_FOR_EXCEPTION(rho_prime_ <= 0 || rho_prime_ >= 1,std::invalid_argument, 00165 "Anasazi::SIRTR::constructor: rho_prime must be in (0,1)."); 00166 } 00167 00168 00170 // TR subproblem solver 00171 // 00172 // FINISH: 00173 // define pre- and post-conditions 00174 // 00175 // POST: 00176 // delta_,Adelta_,Hdelta_ undefined 00177 // 00178 template <class ScalarType, class MV, class OP> 00179 void SIRTR<ScalarType,MV,OP>::solveTRSubproblem() { 00180 00181 // return one of: 00182 // MAXIMUM_ITERATIONS 00183 // NEGATIVE_CURVATURE 00184 // EXCEEDED_TR 00185 // KAPPA_CONVERGENCE 00186 // THETA_CONVERGENCE 00187 00188 using Teuchos::RCP; 00189 using Teuchos::tuple; 00190 using Teuchos::null; 00191 using Teuchos::TimeMonitor; 00192 using std::endl; 00193 typedef Teuchos::RCP<const MV> PCMV; 00194 typedef Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > PSDM; 00195 00196 innerStop_ = MAXIMUM_ITERATIONS; 00197 00198 const int n = MVT::GetVecLength(*this->eta_); 00199 const int p = this->blockSize_; 00200 const int d = n*p - (p*p+p)/2; 00201 00202 // We have the following: 00203 // 00204 // X'*B*X = I 00205 // X'*A*X = theta_ 00206 // 00207 // We desire to remain in the trust-region: 00208 // { eta : rho_Y(eta) \geq rho_prime } 00209 // where 00210 // rho_Y(eta) = 1/(1+eta'*B*eta) 00211 // Therefore, the trust-region is 00212 // { eta : eta'*B*eta \leq 1/rho_prime - 1 } 00213 // 00214 const double D2 = ONE/rho_prime_ - ONE; 00215 00216 std::vector<MagnitudeType> d_Hd(p), alpha(p), beta(p), z_r(p), zold_rold(p); 00217 std::vector<MagnitudeType> eBe(p), eBd(p), dBd(p), new_eBe(p); 00218 MagnitudeType r0_norm; 00219 00220 MVT::MvInit(*this->eta_ ,0.0); 00221 00222 // 00223 // R_ contains direct residuals: 00224 // R_ = A X_ - B X_ diag(theta_) 00225 // 00226 // r0 = grad f(X) = 2 P_BX A X = 2 P_BX (A X - B X diag(theta_) = 2 proj(R_) 00227 // We will do this in place. 00228 // For seeking the rightmost, we want to maximize f 00229 // This is the same as minimizing -f 00230 // Substitute all f with -f here. In particular, 00231 // grad -f(X) = -grad f(X) 00232 // Hess -f(X) = -Hess f(X) 00233 // 00234 { 00235 TimeMonitor lcltimer( *this->timerOrtho_ ); 00236 this->orthman_->projectGen( 00237 *this->R_, // operating on R 00238 tuple<PCMV>(this->BV_),tuple<PCMV>(this->V_),false, // P_{BV,V}, and <BV,V>_B != I 00239 tuple<PSDM>(null), // don't care about coeffs 00240 null,tuple<PCMV>(null), tuple<PCMV>(this->BV_)); // don't have B*BV, but do have B*V 00241 if (this->leftMost_) { 00242 MVT::MvScale(*this->R_,2.0); 00243 } 00244 else { 00245 MVT::MvScale(*this->R_,-2.0); 00246 } 00247 } 00248 00249 r0_norm = MAT::squareroot( ginner(*this->R_) ); 00250 // 00251 // kappa (linear) convergence 00252 // theta (superlinear) convergence 00253 // 00254 MagnitudeType kconv = r0_norm * this->conv_kappa_; 00255 // FINISH: consider inserting some scaling here 00256 // MagnitudeType tconv = r0_norm * MAT::pow(r0_norm/normgradf0_,this->conv_theta_); 00257 MagnitudeType tconv = MAT::pow(r0_norm,this->conv_theta_+ONE); 00258 if (this->om_->isVerbosity(Debug)) { 00259 this->om_->stream(Debug) 00260 << " >> |r0| : " << r0_norm << endl 00261 << " >> kappa conv : " << kconv << endl 00262 << " >> theta conv : " << tconv << endl; 00263 } 00264 00265 // 00266 // For Olsen preconditioning, the preconditioner is 00267 // Z = P_{Prec^-1 BX, BX} Prec^-1 R 00268 // for efficiency, we compute Prec^-1 BX once here for use later 00269 // Otherwise, we don't need PBX 00270 if (this->hasPrec_ && this->olsenPrec_) 00271 { 00272 std::vector<int> ind(this->blockSize_); 00273 for (int i=0; i<this->blockSize_; ++i) ind[i] = this->numAuxVecs_+i; 00274 Teuchos::RCP<MV> PBX = MVT::CloneViewNonConst(*this->PBV_,ind); 00275 { 00276 TimeMonitor prectimer( *this->timerPrec_ ); 00277 OPT::Apply(*this->Prec_,*this->BX_,*PBX); 00278 this->counterPrec_ += this->blockSize_; 00279 } 00280 PBX = Teuchos::null; 00281 } 00282 00283 // Z = P_{Prec^-1 BV, BV} Prec^-1 R 00284 // Prec^-1 BV in PBV 00285 // or 00286 // Z = P_{BV,BV} Prec^-1 R 00287 if (this->hasPrec_) 00288 { 00289 TimeMonitor prectimer( *this->timerPrec_ ); 00290 OPT::Apply(*this->Prec_,*this->R_,*this->Z_); 00291 this->counterPrec_ += this->blockSize_; 00292 // the orthogonalization time counts under Ortho and under Preconditioning 00293 if (this->olsenPrec_) { 00294 TimeMonitor orthtimer( *this->timerOrtho_ ); 00295 this->orthman_->projectGen( 00296 *this->Z_, // operating on Z 00297 tuple<PCMV>(this->PBV_),tuple<PCMV>(this->V_),false, // P_{PBV,V}, B inner product, and <PBV,V>_B != I 00298 tuple<PSDM>(null), // don't care about coeffs 00299 null,tuple<PCMV>(null), tuple<PCMV>(this->BV_)); // don't have B*PBV or B*Z, but do have B*V 00300 } 00301 else { 00302 TimeMonitor orthtimer( *this->timerOrtho_ ); 00303 this->orthman_->projectGen( 00304 *this->Z_, // operating on Z 00305 tuple<PCMV>(this->BV_),tuple<PCMV>(this->V_),false, // P_{BV,V}, and <BV,V>_B != I 00306 tuple<PSDM>(null), // don't care about coeffs 00307 null,tuple<PCMV>(null), tuple<PCMV>(this->BV_)); // don't have B*BV, but do have B*V 00308 } 00309 ginnersep(*this->R_,*this->Z_,z_r); 00310 } 00311 else { 00312 // Z = R 00313 MVT::MvAddMv(ONE,*this->R_,ZERO,*this->R_,*this->Z_); 00314 ginnersep(*this->R_,z_r); 00315 } 00316 00317 if (this->om_->isVerbosity( Debug )) { 00318 // Check that gradient is B-orthogonal to X 00319 typename RTRBase<ScalarType,MV,OP>::CheckList chk; 00320 chk.checkBR = true; 00321 if (this->hasPrec_) chk.checkZ = true; 00322 this->om_->print( Debug, this->accuracyCheck(chk, "after computing gradient") ); 00323 } 00324 else if (this->om_->isVerbosity( OrthoDetails )) { 00325 // Check that gradient is B-orthogonal to X 00326 typename RTRBase<ScalarType,MV,OP>::CheckList chk; 00327 chk.checkBR = true; 00328 if (this->hasPrec_) chk.checkZ = true; 00329 this->om_->print( OrthoDetails, this->accuracyCheck(chk, "after computing gradient") ); 00330 } 00331 00332 // delta = -z 00333 MVT::MvAddMv(-ONE,*this->Z_,ZERO,*this->Z_,*this->delta_); 00334 00335 if (this->om_->isVerbosity(Debug)) { 00336 // compute the model at eta 00337 // we need Heta, which requires A*eta and B*eta 00338 // we also need A*X 00339 // use Z for storage of these 00340 std::vector<MagnitudeType> eAx(this->blockSize_), 00341 d_eAe(this->blockSize_), 00342 d_eBe(this->blockSize_), 00343 d_mxe(this->blockSize_); 00344 // compute AX and <eta,AX> 00345 { 00346 TimeMonitor lcltimer( *this->timerAOp_ ); 00347 OPT::Apply(*this->AOp_,*this->X_,*this->Z_); 00348 this->counterAOp_ += this->blockSize_; 00349 } 00350 ginnersep(*this->eta_,*this->Z_,eAx); 00351 // compute A*eta and <eta,A*eta> 00352 { 00353 TimeMonitor lcltimer( *this->timerAOp_ ); 00354 OPT::Apply(*this->AOp_,*this->eta_,*this->Z_); 00355 this->counterAOp_ += this->blockSize_; 00356 } 00357 ginnersep(*this->eta_,*this->Z_,d_eAe); 00358 // compute B*eta and <eta,B*eta> 00359 if (this->hasBOp_) { 00360 TimeMonitor lcltimer( *this->timerBOp_ ); 00361 OPT::Apply(*this->BOp_,*this->eta_,*this->Z_); 00362 this->counterBOp_ += this->blockSize_; 00363 } 00364 else { 00365 MVT::MvAddMv(ONE,*this->eta_,ZERO,*this->eta_,*this->Z_); 00366 } 00367 ginnersep(*this->eta_,*this->Z_,d_eBe); 00368 // compute model: 00369 // m_x(eta) = theta + 2*eta'*A*x + eta'*A*eta - eta'*B*eta*theta 00370 if (this->leftMost_) { 00371 for (int j=0; j<this->blockSize_; ++j) { 00372 d_mxe[j] = this->theta_[j] + 2*eAx[j] + d_eAe[j] - d_eBe[j]*this->theta_[j]; 00373 } 00374 } 00375 else { 00376 for (int j=0; j<this->blockSize_; ++j) { 00377 d_mxe[j] = -this->theta_[j] - 2*eAx[j] - d_eAe[j] + d_eBe[j]*this->theta_[j]; 00378 } 00379 } 00380 this->om_->stream(Debug) 00381 << " Debugging checks: SIRTR inner iteration " << innerIters_ << endl 00382 << " >> m_X(eta) : " << std::accumulate(d_mxe.begin(),d_mxe.end(),0.0) << endl; 00383 for (int j=0; j<this->blockSize_; ++j) { 00384 this->om_->stream(Debug) 00385 << " >> m_X(eta_" << j << ") : " << d_mxe[j] << endl; 00386 } 00387 } 00388 00391 // the inner/tCG loop 00392 for (innerIters_=1; innerIters_<=d; ++innerIters_) { 00393 00394 // 00395 // [Hdelta,Adelta,Bdelta] = Hess*delta = 2 Proj(A*delta - B*delta*X'*A*X) 00396 // X'*A*X = diag(theta), so that 00397 // (B*delta)*diag(theta) can be done on the cheap 00398 // 00399 { 00400 TimeMonitor lcltimer( *this->timerAOp_ ); 00401 OPT::Apply(*this->AOp_,*this->delta_,*this->Z_); 00402 this->counterAOp_ += this->blockSize_; 00403 } 00404 if (this->hasBOp_) { 00405 TimeMonitor lcltimer( *this->timerBOp_ ); 00406 OPT::Apply(*this->BOp_,*this->delta_,*this->Hdelta_); 00407 this->counterBOp_ += this->blockSize_; 00408 } 00409 else { 00410 MVT::MvAddMv(ONE,*this->delta_,ZERO,*this->delta_,*this->Hdelta_); 00411 } 00412 // while we have B*delta, compute <eta,B*delta> and <delta,B*delta> 00413 // these will be needed below 00414 ginnersep(*this->eta_ ,*this->Hdelta_,eBd); 00415 ginnersep(*this->delta_,*this->Hdelta_,dBd); 00416 // put 2*A*d - 2*B*d*theta --> Hd 00417 { 00418 std::vector<ScalarType> theta_comp(this->theta_.begin(),this->theta_.end()); 00419 MVT::MvScale(*this->Hdelta_,theta_comp); 00420 } 00421 if (this->leftMost_) { 00422 MVT::MvAddMv( 2.0,*this->Z_,-2.0,*this->Hdelta_,*this->Hdelta_); 00423 } 00424 else { 00425 MVT::MvAddMv(-2.0,*this->Z_, 2.0,*this->Hdelta_,*this->Hdelta_); 00426 } 00427 // apply projector 00428 { 00429 TimeMonitor lcltimer( *this->timerOrtho_ ); 00430 this->orthman_->projectGen( 00431 *this->Hdelta_, // operating on Hdelta 00432 tuple<PCMV>(this->BV_),tuple<PCMV>(this->V_),false, // P_{BV,V}, and <BV,V>_B != I 00433 tuple<PSDM>(null), // don't care about coeffs 00434 null,tuple<PCMV>(null), tuple<PCMV>(this->BV_)); // don't have B*BV, but do have B*V 00435 } 00436 ginnersep(*this->delta_,*this->Hdelta_,d_Hd); 00437 00438 00439 // compute update step 00440 for (unsigned int j=0; j<alpha.size(); ++j) 00441 { 00442 alpha[j] = z_r[j]/d_Hd[j]; 00443 } 00444 00445 // compute new B-norms 00446 for (unsigned int j=0; j<alpha.size(); ++j) 00447 { 00448 new_eBe[j] = eBe[j] + 2*alpha[j]*eBd[j] + alpha[j]*alpha[j]*dBd[j]; 00449 } 00450 00451 if (this->om_->isVerbosity(Debug)) { 00452 for (unsigned int j=0; j<alpha.size(); j++) { 00453 this->om_->stream(Debug) 00454 << " >> z_r[" << j << "] : " << z_r[j] 00455 << " d_Hd[" << j << "] : " << d_Hd[j] << endl 00456 << " >> eBe[" << j << "] : " << eBe[j] 00457 << " neweBe[" << j << "] : " << new_eBe[j] << endl 00458 << " >> eBd[" << j << "] : " << eBd[j] 00459 << " dBd[" << j << "] : " << dBd[j] << endl; 00460 } 00461 } 00462 00463 // check truncation criteria: negative curvature or exceeded trust-region 00464 std::vector<int> trncstep; 00465 trncstep.reserve(p); 00466 // trncstep will contain truncated step, due to 00467 // negative curvature or exceeding implicit trust-region 00468 bool atleastonenegcur = false; 00469 for (unsigned int j=0; j<d_Hd.size(); ++j) { 00470 if (d_Hd[j] <= 0) { 00471 trncstep.push_back(j); 00472 atleastonenegcur = true; 00473 } 00474 else if (new_eBe[j] > D2) { 00475 trncstep.push_back(j); 00476 } 00477 } 00478 00479 if (!trncstep.empty()) 00480 { 00481 // compute step to edge of trust-region, for trncstep vectors 00482 if (this->om_->isVerbosity(Debug)) { 00483 for (unsigned int j=0; j<trncstep.size(); ++j) { 00484 this->om_->stream(Debug) 00485 << " >> alpha[" << trncstep[j] << "] : " << alpha[trncstep[j]] << endl; 00486 } 00487 } 00488 for (unsigned int j=0; j<trncstep.size(); ++j) { 00489 int jj = trncstep[j]; 00490 alpha[jj] = ( -eBd[jj] + MAT::squareroot(eBd[jj]*eBd[jj] + dBd[jj]*(D2-eBe[jj]) ) ) / dBd[jj]; 00491 } 00492 if (this->om_->isVerbosity(Debug)) { 00493 for (unsigned int j=0; j<trncstep.size(); ++j) { 00494 this->om_->stream(Debug) 00495 << " >> tau[" << trncstep[j] << "] : " << alpha[trncstep[j]] << endl; 00496 } 00497 } 00498 if (atleastonenegcur) { 00499 innerStop_ = NEGATIVE_CURVATURE; 00500 } 00501 else { 00502 innerStop_ = EXCEEDED_TR; 00503 } 00504 } 00505 00506 // compute new eta = eta + alpha*delta 00507 // we need delta*diag(alpha) 00508 // do this in situ in delta_ and friends (we will note this for below) 00509 // then set eta_ = eta_ + delta_ 00510 { 00511 std::vector<ScalarType> alpha_comp(alpha.begin(),alpha.end()); 00512 MVT::MvScale(*this->delta_,alpha_comp); 00513 MVT::MvScale(*this->Hdelta_,alpha_comp); 00514 } 00515 MVT::MvAddMv(ONE,*this->delta_ ,ONE,*this->eta_ ,*this->eta_); 00516 00517 // store new eBe 00518 eBe = new_eBe; 00519 00520 // 00521 // print some debugging info 00522 if (this->om_->isVerbosity(Debug)) { 00523 // compute the model at eta 00524 // we need Heta, which requires A*eta and B*eta 00525 // we also need A*X 00526 // use Z for storage of these 00527 std::vector<MagnitudeType> eAx(this->blockSize_), 00528 d_eAe(this->blockSize_), 00529 d_eBe(this->blockSize_), 00530 d_mxe(this->blockSize_); 00531 // compute AX and <eta,AX> 00532 { 00533 TimeMonitor lcltimer( *this->timerAOp_ ); 00534 OPT::Apply(*this->AOp_,*this->X_,*this->Z_); 00535 this->counterAOp_ += this->blockSize_; 00536 } 00537 ginnersep(*this->eta_,*this->Z_,eAx); 00538 // compute A*eta and <eta,A*eta> 00539 { 00540 TimeMonitor lcltimer( *this->timerAOp_ ); 00541 OPT::Apply(*this->AOp_,*this->eta_,*this->Z_); 00542 this->counterAOp_ += this->blockSize_; 00543 } 00544 ginnersep(*this->eta_,*this->Z_,d_eAe); 00545 // compute B*eta and <eta,B*eta> 00546 if (this->hasBOp_) { 00547 TimeMonitor lcltimer( *this->timerBOp_ ); 00548 OPT::Apply(*this->BOp_,*this->eta_,*this->Z_); 00549 this->counterBOp_ += this->blockSize_; 00550 } 00551 else { 00552 MVT::MvAddMv(ONE,*this->eta_,ZERO,*this->eta_,*this->Z_); 00553 } 00554 ginnersep(*this->eta_,*this->Z_,d_eBe); 00555 // compute model: 00556 // m_x(eta) = theta + 2*eta'*A*x + eta'*A*eta - eta'*B*eta*theta 00557 if (this->leftMost_) { 00558 for (int j=0; j<this->blockSize_; ++j) { 00559 d_mxe[j] = this->theta_[j] + 2*eAx[j] + d_eAe[j] - d_eBe[j]*this->theta_[j]; 00560 } 00561 } 00562 else { 00563 for (int j=0; j<this->blockSize_; ++j) { 00564 d_mxe[j] = -this->theta_[j] - 2*eAx[j] - d_eAe[j] + d_eBe[j]*this->theta_[j]; 00565 } 00566 } 00567 this->om_->stream(Debug) 00568 << " Debugging checks: SIRTR inner iteration " << innerIters_ << endl 00569 << " >> m_X(eta) : " << std::accumulate(d_mxe.begin(),d_mxe.end(),0.0) << endl; 00570 for (int j=0; j<this->blockSize_; ++j) { 00571 this->om_->stream(Debug) 00572 << " >> m_X(eta_" << j << ") : " << d_mxe[j] << endl; 00573 } 00574 } 00575 00576 // 00577 // if we found negative curvature or exceeded trust-region, then quit 00578 if (!trncstep.empty()) { 00579 break; 00580 } 00581 00582 // update gradient of m 00583 // R = R + Hdelta*diag(alpha) 00584 // however, Hdelta_ already stores Hdelta*diag(alpha) 00585 // so just add them 00586 MVT::MvAddMv(ONE,*this->Hdelta_,ONE,*this->R_,*this->R_); 00587 { 00588 // re-tangentialize r 00589 TimeMonitor lcltimer( *this->timerOrtho_ ); 00590 this->orthman_->projectGen( 00591 *this->R_, // operating on R 00592 tuple<PCMV>(this->BV_),tuple<PCMV>(this->V_),false, // P_{BV,V}, and <BV,V>_B != I 00593 tuple<PSDM>(null), // don't care about coeffs 00594 null,tuple<PCMV>(null), tuple<PCMV>(this->BV_)); // don't have B*BV, but do have B*V 00595 } 00596 00597 // 00598 // check convergence 00599 MagnitudeType r_norm = MAT::squareroot(ginner(*this->R_,*this->R_)); 00600 00601 // 00602 // check local convergece 00603 // 00604 // kappa (linear) convergence 00605 // theta (superlinear) convergence 00606 // 00607 if (this->om_->isVerbosity(Debug)) { 00608 this->om_->stream(Debug) 00609 << " >> |r" << innerIters_ << "| : " << r_norm << endl; 00610 } 00611 if ( r_norm <= ANASAZI_MIN(tconv,kconv) ) { 00612 if (tconv <= kconv) { 00613 innerStop_ = THETA_CONVERGENCE; 00614 } 00615 else { 00616 innerStop_ = KAPPA_CONVERGENCE; 00617 } 00618 break; 00619 } 00620 00621 // Z = P_{Prec^-1 BV, BV} Prec^-1 R 00622 // Prec^-1 BV in PBV 00623 // or 00624 // Z = P_{BV,BV} Prec^-1 R 00625 zold_rold = z_r; 00626 if (this->hasPrec_) 00627 { 00628 TimeMonitor prectimer( *this->timerPrec_ ); 00629 OPT::Apply(*this->Prec_,*this->R_,*this->Z_); 00630 this->counterPrec_ += this->blockSize_; 00631 // the orthogonalization time counts under Ortho and under Preconditioning 00632 if (this->olsenPrec_) { 00633 TimeMonitor orthtimer( *this->timerOrtho_ ); 00634 this->orthman_->projectGen( 00635 *this->Z_, // operating on Z 00636 tuple<PCMV>(this->PBV_),tuple<PCMV>(this->V_),false, // P_{PBV,V}, B inner product, and <PBV,V>_B != I 00637 tuple<PSDM>(null), // don't care about coeffs 00638 null,tuple<PCMV>(null), tuple<PCMV>(this->BV_)); // don't have B*PBV or B*Z, but do have B*V 00639 } 00640 else { 00641 TimeMonitor orthtimer( *this->timerOrtho_ ); 00642 this->orthman_->projectGen( 00643 *this->Z_, // operating on Z 00644 tuple<PCMV>(this->BV_),tuple<PCMV>(this->V_),false, // P_{BV,V}, and <BV,V>_B != I 00645 tuple<PSDM>(null), // don't care about coeffs 00646 null,tuple<PCMV>(null), tuple<PCMV>(this->BV_)); // don't have B*BV, but do have B*V 00647 } 00648 ginnersep(*this->R_,*this->Z_,z_r); 00649 } 00650 else { 00651 // Z = R 00652 MVT::MvAddMv(ONE,*this->R_,ZERO,*this->R_,*this->Z_); 00653 ginnersep(*this->R_,z_r); 00654 } 00655 00656 // compute new search direction 00657 // below, we need to perform 00658 // delta = -Z + delta*diag(beta) 00659 // however, delta_ currently stores delta*diag(alpha) 00660 // therefore, set 00661 // beta_ to beta/alpha 00662 // so that 00663 // delta_ = delta_*diag(beta_) 00664 // will in fact result in 00665 // delta_ = delta_*diag(beta_) 00666 // = delta*diag(alpha)*diag(beta/alpha) 00667 // = delta*diag(beta) 00668 // i hope this is numerically sound... 00669 for (unsigned int j=0; j<beta.size(); ++j) { 00670 beta[j] = z_r[j]/(zold_rold[j]*alpha[j]); 00671 } 00672 { 00673 std::vector<ScalarType> beta_comp(beta.begin(),beta.end()); 00674 MVT::MvScale(*this->delta_,beta_comp); 00675 } 00676 MVT::MvAddMv(-ONE,*this->Z_,ONE,*this->delta_,*this->delta_); 00677 00678 } 00679 // end of the inner iteration loop 00682 if (innerIters_ > d) innerIters_ = d; 00683 00684 this->om_->stream(Debug) 00685 << " >> stop reason is " << stopReasons_[innerStop_] << endl 00686 << endl; 00687 00688 } // end of solveTRSubproblem 00689 00690 00691 #define SIRTR_GET_TEMP_MV(mv,workspace) \ 00692 { \ 00693 TEST_FOR_EXCEPTION(workspace.size() == 0,std::logic_error,"SIRTR: Request for workspace could not be honored."); \ 00694 mv = workspace.back(); \ 00695 workspace.pop_back(); \ 00696 } 00697 00698 #define SIRTR_RELEASE_TEMP_MV(mv,workspace) \ 00699 { \ 00700 workspace.push_back(mv); \ 00701 mv = Teuchos::null; \ 00702 } 00703 00704 00706 // Eigensolver iterate() method 00707 template <class ScalarType, class MV, class OP> 00708 void SIRTR<ScalarType,MV,OP>::iterate() { 00709 00710 using Teuchos::RCP; 00711 using Teuchos::null; 00712 using Teuchos::tuple; 00713 using Teuchos::TimeMonitor; 00714 using std::endl; 00715 typedef Teuchos::RCP<const MV> PCMV; 00716 typedef Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > PSDM; 00717 00718 // 00719 // Allocate/initialize data structures 00720 // 00721 if (this->initialized_ == false) { 00722 this->initialize(); 00723 } 00724 00725 Teuchos::SerialDenseMatrix<int,ScalarType> AA(this->blockSize_,this->blockSize_), 00726 BB(this->blockSize_,this->blockSize_), 00727 S(this->blockSize_,this->blockSize_); 00728 00729 // we will often exploit temporarily unused storage for workspace 00730 // in order to keep it straight and make for clearer code, 00731 // we will put pointers to available multivectors into the following vector 00732 // when we need them, we get them out, using a meaningfully-named pointer 00733 // when we're done, we put them back 00734 std::vector< RCP<MV> > workspace; 00735 // we only have 7 multivectors, so that is more than the maximum number that 00736 // we could use for temp storage 00737 workspace.reserve(7); 00738 00739 // set iteration details to invalid, as they don't have any meaning right now 00740 innerIters_ = -1; 00741 innerStop_ = UNINITIALIZED; 00742 00743 // allocate temporary space 00744 while (this->tester_->checkStatus(this) != Passed) { 00745 00746 // Print information on current status 00747 if (this->om_->isVerbosity(Debug)) { 00748 this->currentStatus( this->om_->stream(Debug) ); 00749 } 00750 else if (this->om_->isVerbosity(IterationDetails)) { 00751 this->currentStatus( this->om_->stream(IterationDetails) ); 00752 } 00753 00754 // increment iteration counter 00755 this->iter_++; 00756 00757 // solve the trust-region subproblem 00758 solveTRSubproblem(); 00759 totalInnerIters_ += innerIters_; 00760 00761 // perform debugging on eta et al. 00762 if (this->om_->isVerbosity( Debug ) ) { 00763 typename RTRBase<ScalarType,MV,OP>::CheckList chk; 00764 // this is the residual of the model, should still be in the tangent plane 00765 chk.checkBR = true; 00766 chk.checkEta = true; 00767 this->om_->print( Debug, this->accuracyCheck(chk, "in iterate() after solveTRSubproblem()") ); 00768 } 00769 00770 00771 // 00772 // multivectors X, BX (if hasB) and eta contain meaningful information that we need below 00773 // the others will be sacrificed to temporary storage 00774 // we are not allowed to reference these anymore, RELEASE_TEMP_MV will clear the pointers 00775 // the RCP in workspace will keep the MV alive, we will get the MVs back 00776 // as we need them using GET_TEMP_MV 00777 // 00778 // this strategy doesn't cost us much, and it keeps us honest 00779 // 00780 TEST_FOR_EXCEPTION(workspace.size() != 0,std::logic_error,"SIRTR::iterate(): workspace list should be empty."); 00781 SIRTR_RELEASE_TEMP_MV(this->delta_ ,workspace); // workspace size is 1 00782 SIRTR_RELEASE_TEMP_MV(this->Hdelta_,workspace); // workspace size is 2 00783 SIRTR_RELEASE_TEMP_MV(this->R_ ,workspace); // workspace size is 3 00784 SIRTR_RELEASE_TEMP_MV(this->Z_ ,workspace); // workspace size is 4 00785 00786 00787 // compute the retraction of eta: R_X(eta) = X+eta 00788 // we must accept, but we will work out of temporary so that we can multiply back into X below 00789 RCP<MV> XpEta; 00790 SIRTR_GET_TEMP_MV(XpEta,workspace); // workspace size is 3 00791 { 00792 TimeMonitor lcltimer( *this->timerLocalUpdate_ ); 00793 MVT::MvAddMv(ONE,*this->X_,ONE,*this->eta_,*XpEta); 00794 } 00795 00796 // 00797 // perform rayleigh-ritz for XpEta = X+eta 00798 // save an old copy of f(X) for rho analysis below 00799 // 00800 MagnitudeType oldfx = this->fx_; 00801 int rank, ret; 00802 rank = this->blockSize_; 00803 // compute AA = (X+eta)'*A*(X+eta) 00804 // get temporarily storage for A*(X+eta) 00805 RCP<MV> AXpEta; 00806 SIRTR_GET_TEMP_MV(AXpEta,workspace); // workspace size is 2 00807 { 00808 TimeMonitor lcltimer( *this->timerAOp_ ); 00809 OPT::Apply(*this->AOp_,*XpEta,*AXpEta); 00810 this->counterAOp_ += this->blockSize_; 00811 } 00812 // project A onto X+eta 00813 { 00814 TimeMonitor lcltimer( *this->timerLocalProj_ ); 00815 MVT::MvTransMv(ONE,*XpEta,*AXpEta,AA); 00816 } 00817 // compute BB = (X+eta)'*B*(X+eta) 00818 // get temporary storage for B*(X+eta) 00819 RCP<MV> BXpEta; 00820 if (this->hasBOp_) { 00821 SIRTR_GET_TEMP_MV(BXpEta,workspace); // workspace size is 1 00822 { 00823 TimeMonitor lcltimer( *this->timerBOp_ ); 00824 OPT::Apply(*this->BOp_,*XpEta,*BXpEta); 00825 this->counterBOp_ += this->blockSize_; 00826 } 00827 // project B onto X+eta 00828 { 00829 TimeMonitor lcltimer( *this->timerLocalProj_ ); 00830 MVT::MvTransMv(ONE,*XpEta,*BXpEta,BB); 00831 } 00832 } 00833 else { 00834 // project I onto X+eta 00835 TimeMonitor lcltimer( *this->timerLocalProj_ ); 00836 MVT::MvTransMv(ONE,*XpEta,*XpEta,BB); 00837 } 00838 this->om_->stream(Debug) << "AA: " << std::endl << AA << std::endl;; 00839 this->om_->stream(Debug) << "BB: " << std::endl << BB << std::endl;; 00840 // do the direct solve 00841 // save old theta first 00842 std::vector<MagnitudeType> oldtheta(this->theta_); 00843 { 00844 TimeMonitor lcltimer( *this->timerDS_ ); 00845 ret = Utils::directSolver(this->blockSize_,AA,Teuchos::rcpFromRef(BB),S,this->theta_,rank,1); 00846 } 00847 this->om_->stream(Debug) << "S: " << std::endl << S << std::endl;; 00848 TEST_FOR_EXCEPTION(ret != 0,std::logic_error,"Anasazi::SIRTR::iterate(): failure solving projected eigenproblem after retraction. ret == " << ret << "AA: " << AA << std::endl << "BB: " << BB << std::endl); 00849 TEST_FOR_EXCEPTION(rank != this->blockSize_,RTRRitzFailure,"Anasazi::SIRTR::iterate(): retracted iterate failed in Ritz analysis. rank == " << rank); 00850 00851 // 00852 // order the projected ritz values and vectors 00853 // this ensures that the ritz vectors produced below are ordered 00854 { 00855 TimeMonitor lcltimer( *this->timerSort_ ); 00856 std::vector<int> order(this->blockSize_); 00857 // sort the first blockSize_ values in theta_ 00858 this->sm_->sort(this->theta_, Teuchos::rcpFromRef(order), this->blockSize_); // don't catch exception 00859 // apply the same ordering to the primitive ritz vectors 00860 Utils::permuteVectors(order,S); 00861 } 00862 // 00863 // update f(x) 00864 this->fx_ = std::accumulate(this->theta_.begin(),this->theta_.end(),ZERO); 00865 00866 // 00867 // if debugging, do rho analysis before overwriting X,AX,BX 00868 RCP<MV> AX; 00869 SIRTR_GET_TEMP_MV(AX,workspace); // workspace size is 0 00870 if (this->om_->isVerbosity( Debug ) ) { 00871 // 00872 // compute rho 00873 // f(X) - f(X+eta) f(X) - f(X+eta) 00874 // rho = ----------------- = ------------------------- 00875 // m(0) - m(eta) -<2AX,eta> - .5*<Heta,eta> 00876 MagnitudeType rhonum, rhoden, mxeta; 00877 // 00878 // compute rhonum 00879 rhonum = oldfx - this->fx_; 00880 00881 // 00882 // compute rhoden = -<eta,gradfx> - 0.5 <eta,H*eta> 00883 // = -2.0*<eta,AX> - <eta,A*eta> + <eta,B*eta*theta> 00884 // in three steps (3) (1) (2) 00885 // 00886 // first, compute seconder-order decrease in model (steps 1 and 2) 00887 // get temp storage for second order decrease of model 00888 // 00889 // do the first-order decrease last, because we need AX below 00890 { 00891 // compute A*eta and then <eta,A*eta> 00892 TimeMonitor lcltimer( *this->timerAOp_ ); 00893 OPT::Apply(*this->AOp_,*this->eta_,*AX); 00894 this->counterAOp_ += this->blockSize_; 00895 } 00896 // compute A part of second order decrease into rhoden 00897 rhoden = -ginner(*this->eta_,*AX); 00898 if (this->hasBOp_) { 00899 // compute B*eta into AX 00900 TimeMonitor lcltimer( *this->timerBOp_ ); 00901 OPT::Apply(*this->BOp_,*this->eta_,*AX); 00902 this->counterBOp_ += this->blockSize_; 00903 } 00904 else { 00905 // put B*eta==eta into AX 00906 MVT::MvAddMv(ONE,*this->eta_,ZERO,*this->eta_,*AX); 00907 } 00908 // we need this below for computing individual rho, get it now 00909 std::vector<MagnitudeType> eBe(this->blockSize_); 00910 ginnersep(*this->eta_,*AX,eBe); 00911 // scale B*eta by theta 00912 { 00913 std::vector<ScalarType> oldtheta_complex(oldtheta.begin(),oldtheta.end()); 00914 MVT::MvScale( *AX, oldtheta_complex); 00915 } 00916 // accumulate B part of second order decrease into rhoden 00917 rhoden += ginner(*this->eta_,*AX); 00918 { 00919 TimeMonitor lcltimer( *this->timerAOp_ ); 00920 OPT::Apply(*this->AOp_,*this->X_,*AX); 00921 this->counterAOp_ += this->blockSize_; 00922 } 00923 // accumulate first-order decrease of model into rhoden 00924 rhoden += -2.0*ginner(*AX,*this->eta_); 00925 00926 mxeta = oldfx - rhoden; 00927 this->rho_ = rhonum / rhoden; 00928 this->om_->stream(Debug) 00929 << " >> old f(x) is : " << oldfx << endl 00930 << " >> new f(x) is : " << this->fx_ << endl 00931 << " >> m_x(eta) is : " << mxeta << endl 00932 << " >> rhonum is : " << rhonum << endl 00933 << " >> rhoden is : " << rhoden << endl 00934 << " >> rho is : " << this->rho_ << endl; 00935 // compute individual rho 00936 for (int j=0; j<this->blockSize_; ++j) { 00937 this->om_->stream(Debug) 00938 << " >> rho[" << j << "] : " << 1.0/(1.0+eBe[j]) << endl; 00939 } 00940 } 00941 00942 // compute Ritz vectors back into X,BX,AX 00943 { 00944 // release const views to X, BX 00945 this->X_ = Teuchos::null; 00946 this->BX_ = Teuchos::null; 00947 // get non-const views 00948 std::vector<int> ind(this->blockSize_); 00949 for (int i=0; i<this->blockSize_; ++i) ind[i] = this->numAuxVecs_+i; 00950 Teuchos::RCP<MV> X, BX; 00951 X = MVT::CloneViewNonConst(*this->V_,ind); 00952 if (this->hasBOp_) { 00953 BX = MVT::CloneViewNonConst(*this->BV_,ind); 00954 } 00955 // compute ritz vectors, A,B products into X,AX,BX 00956 { 00957 TimeMonitor lcltimer( *this->timerLocalUpdate_ ); 00958 MVT::MvTimesMatAddMv(ONE,* XpEta,S,ZERO,*X); 00959 MVT::MvTimesMatAddMv(ONE,*AXpEta,S,ZERO,*AX); 00960 if (this->hasBOp_) { 00961 MVT::MvTimesMatAddMv(ONE,*BXpEta,S,ZERO,*BX); 00962 } 00963 } 00964 // clear non-const views, restore const views 00965 X = Teuchos::null; 00966 BX = Teuchos::null; 00967 this->X_ = MVT::CloneView(static_cast<const MV&>(*this->V_ ),ind); 00968 this->BX_ = MVT::CloneView(static_cast<const MV&>(*this->BV_),ind); 00969 } 00970 // 00971 // return XpEta and BXpEta to temp storage 00972 SIRTR_RELEASE_TEMP_MV(XpEta,workspace); // workspace size is 1 00973 SIRTR_RELEASE_TEMP_MV(AXpEta,workspace); // workspace size is 2 00974 if (this->hasBOp_) { 00975 SIRTR_RELEASE_TEMP_MV(BXpEta,workspace); // workspace size is 3 00976 } 00977 00978 // 00979 // solveTRSubproblem destroyed R, we must recompute it 00980 // compute R = AX - BX*theta 00981 // 00982 // get R back from temp storage 00983 SIRTR_GET_TEMP_MV(this->R_,workspace); // workspace size is 2 00984 { 00985 TimeMonitor lcltimer( *this->timerCompRes_ ); 00986 MVT::MvAddMv( ONE, *this->BX_, ZERO, *this->BX_, *this->R_ ); 00987 { 00988 std::vector<ScalarType> theta_comp(this->theta_.begin(),this->theta_.end()); 00989 MVT::MvScale( *this->R_, theta_comp ); 00990 } 00991 MVT::MvAddMv( ONE, *AX, -ONE, *this->R_, *this->R_ ); 00992 } 00993 // 00994 // R has been updated; mark the norms as out-of-date 00995 this->Rnorms_current_ = false; 00996 this->R2norms_current_ = false; 00997 00998 // 00999 // we are done with AX, release it 01000 SIRTR_RELEASE_TEMP_MV(AX,workspace); // workspace size is 3 01001 // 01002 // get data back for delta, Hdelta and Z 01003 SIRTR_GET_TEMP_MV(this->delta_,workspace); // workspace size is 2 01004 SIRTR_GET_TEMP_MV(this->Hdelta_,workspace); // workspace size is 1 01005 SIRTR_GET_TEMP_MV(this->Z_,workspace); // workspace size is 0 01006 01007 // 01008 // When required, monitor some orthogonalities 01009 if (this->om_->isVerbosity( Debug ) ) { 01010 // Check almost everything here 01011 typename RTRBase<ScalarType,MV,OP>::CheckList chk; 01012 chk.checkX = true; 01013 chk.checkBX = true; 01014 chk.checkR = true; 01015 this->om_->print( Debug, this->accuracyCheck(chk, "after local update") ); 01016 } 01017 else if (this->om_->isVerbosity( OrthoDetails )) { 01018 typename RTRBase<ScalarType,MV,OP>::CheckList chk; 01019 chk.checkX = true; 01020 chk.checkR = true; 01021 this->om_->print( OrthoDetails, this->accuracyCheck(chk, "after local update") ); 01022 } 01023 01024 } // end while (statusTest == false) 01025 01026 } // end of iterate() 01027 01028 01030 // Print the current status of the solver 01031 template <class ScalarType, class MV, class OP> 01032 void 01033 SIRTR<ScalarType,MV,OP>::currentStatus(std::ostream &os) 01034 { 01035 using std::endl; 01036 os.setf(std::ios::scientific, std::ios::floatfield); 01037 os.precision(6); 01038 os <<endl; 01039 os <<"================================================================================" << endl; 01040 os << endl; 01041 os <<" SIRTR Solver Status" << endl; 01042 os << endl; 01043 os <<"The solver is "<<(this->initialized_ ? "initialized." : "not initialized.") << endl; 01044 os <<"The number of iterations performed is " << this->iter_ << endl; 01045 os <<"The current block size is " << this->blockSize_ << endl; 01046 os <<"The number of auxiliary vectors is " << this->numAuxVecs_ << endl; 01047 os <<"The number of operations A*x is " << this->counterAOp_ << endl; 01048 os <<"The number of operations B*x is " << this->counterBOp_ << endl; 01049 os <<"The number of operations B*x by the orthomanager is " << this->orthman_->getOpCounter() << endl; 01050 os <<"The number of operations Prec*x is " << this->counterPrec_ << endl; 01051 os <<"Parameter rho_prime is " << rho_prime_ << endl; 01052 os <<"Inner stopping condition was " << stopReasons_[innerStop_] << endl; 01053 os <<"Number of inner iterations was " << innerIters_ << endl; 01054 os <<"Total number of inner iterations is " << totalInnerIters_ << endl; 01055 os <<"f(x) is " << this->fx_ << endl; 01056 01057 os.setf(std::ios_base::right, std::ios_base::adjustfield); 01058 01059 if (this->initialized_) { 01060 os << endl; 01061 os <<"CURRENT EIGENVALUE ESTIMATES "<<endl; 01062 os << std::setw(20) << "Eigenvalue" 01063 << std::setw(20) << "Residual(B)" 01064 << std::setw(20) << "Residual(2)" 01065 << endl; 01066 os <<"--------------------------------------------------------------------------------"<<endl; 01067 for (int i=0; i<this->blockSize_; i++) { 01068 os << std::setw(20) << this->theta_[i]; 01069 if (this->Rnorms_current_) os << std::setw(20) << this->Rnorms_[i]; 01070 else os << std::setw(20) << "not current"; 01071 if (this->R2norms_current_) os << std::setw(20) << this->R2norms_[i]; 01072 else os << std::setw(20) << "not current"; 01073 os << endl; 01074 } 01075 } 01076 os <<"================================================================================" << endl; 01077 os << endl; 01078 } 01079 01080 01081 } // end Anasazi namespace 01082 01083 #endif // ANASAZI_SIRTR_HPP
1.7.4