|
Anasazi Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Anasazi: Block Eigensolvers Package 00005 // Copyright (2004) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // This library is free software; you can redistribute it and/or modify 00011 // it under the terms of the GNU Lesser General Public License as 00012 // published by the Free Software Foundation; either version 2.1 of the 00013 // License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, but 00016 // WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00023 // USA 00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00025 // 00026 // *********************************************************************** 00027 // @HEADER 00028 00029 00034 /* 00035 LOBPCG contains local storage of up to 10*blockSize_ vectors, representing 10 entities 00036 X,H,P,R 00037 KX,KH,KP (product of K and the above) 00038 MX,MH,MP (product of M and the above, not allocated if we don't have an M matrix) 00039 If full orthogonalization is enabled, one extra multivector of blockSize_ vectors is required to 00040 compute the local update of X and P. 00041 00042 A solver is bound to an eigenproblem at declaration. 00043 Other solver parameters (e.g., block size, auxiliary vectors) can be changed dynamically. 00044 00045 The orthogonalization manager is used to project away from the auxiliary vectors. 00046 If full orthogonalization is enabled, the orthogonalization manager is also used to construct an M orthonormal basis. 00047 The orthogonalization manager is subclass of MatOrthoManager, which LOBPCG assumes to be defined by the M inner product. 00048 LOBPCG will not work correctly if the orthomanager uses a different inner product. 00049 */ 00050 00051 00052 #ifndef ANASAZI_LOBPCG_HPP 00053 #define ANASAZI_LOBPCG_HPP 00054 00055 #include "AnasaziTypes.hpp" 00056 00057 #include "AnasaziEigensolver.hpp" 00058 #include "AnasaziMultiVecTraits.hpp" 00059 #include "AnasaziOperatorTraits.hpp" 00060 #include "Teuchos_ScalarTraits.hpp" 00061 00062 #include "AnasaziMatOrthoManager.hpp" 00063 #include "AnasaziSolverUtils.hpp" 00064 00065 #include "Teuchos_LAPACK.hpp" 00066 #include "Teuchos_BLAS.hpp" 00067 #include "Teuchos_SerialDenseMatrix.hpp" 00068 #include "Teuchos_ParameterList.hpp" 00069 #include "Teuchos_TimeMonitor.hpp" 00070 00091 namespace Anasazi { 00092 00094 00095 00100 template <class ScalarType, class MultiVector> 00101 struct LOBPCGState { 00103 Teuchos::RCP<const MultiVector> V; 00105 Teuchos::RCP<const MultiVector> KV; 00107 Teuchos::RCP<const MultiVector> MV; 00108 00110 Teuchos::RCP<const MultiVector> X; 00112 Teuchos::RCP<const MultiVector> KX; 00114 Teuchos::RCP<const MultiVector> MX; 00115 00117 Teuchos::RCP<const MultiVector> P; 00119 Teuchos::RCP<const MultiVector> KP; 00121 Teuchos::RCP<const MultiVector> MP; 00122 00127 Teuchos::RCP<const MultiVector> H; 00129 Teuchos::RCP<const MultiVector> KH; 00131 Teuchos::RCP<const MultiVector> MH; 00132 00134 Teuchos::RCP<const MultiVector> R; 00135 00137 Teuchos::RCP<const std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > T; 00138 00139 LOBPCGState() : 00140 V(Teuchos::null),KV(Teuchos::null),MV(Teuchos::null), 00141 X(Teuchos::null),KX(Teuchos::null),MX(Teuchos::null), 00142 P(Teuchos::null),KP(Teuchos::null),MP(Teuchos::null), 00143 H(Teuchos::null),KH(Teuchos::null),MH(Teuchos::null), 00144 R(Teuchos::null),T(Teuchos::null) {}; 00145 }; 00146 00148 00150 00151 00165 class LOBPCGRitzFailure : public AnasaziError {public: 00166 LOBPCGRitzFailure(const std::string& what_arg) : AnasaziError(what_arg) 00167 {}}; 00168 00181 class LOBPCGInitFailure : public AnasaziError {public: 00182 LOBPCGInitFailure(const std::string& what_arg) : AnasaziError(what_arg) 00183 {}}; 00184 00201 class LOBPCGOrthoFailure : public AnasaziError {public: 00202 LOBPCGOrthoFailure(const std::string& what_arg) : AnasaziError(what_arg) 00203 {}}; 00204 00206 00207 00208 template <class ScalarType, class MV, class OP> 00209 class LOBPCG : public Eigensolver<ScalarType,MV,OP> { 00210 public: 00211 00213 00214 00222 LOBPCG( const Teuchos::RCP<Eigenproblem<ScalarType,MV,OP> > &problem, 00223 const Teuchos::RCP<SortManager<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > &sorter, 00224 const Teuchos::RCP<OutputManager<ScalarType> > &printer, 00225 const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > &tester, 00226 const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> > &ortho, 00227 Teuchos::ParameterList ¶ms 00228 ); 00229 00231 virtual ~LOBPCG() {}; 00232 00234 00236 00237 00264 void iterate(); 00265 00291 void initialize(LOBPCGState<ScalarType,MV> newstate); 00292 00296 void initialize(); 00297 00317 bool isInitialized() const; 00318 00329 LOBPCGState<ScalarType,MV> getState() const; 00330 00332 00334 00335 00337 int getNumIters() const; 00338 00340 void resetNumIters(); 00341 00349 Teuchos::RCP<const MV> getRitzVectors(); 00350 00356 std::vector<Value<ScalarType> > getRitzValues(); 00357 00365 std::vector<int> getRitzIndex(); 00366 00367 00373 std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getResNorms(); 00374 00375 00381 std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getRes2Norms(); 00382 00383 00391 std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getRitzRes2Norms(); 00392 00393 00402 int getCurSubspaceDim() const; 00403 00407 int getMaxSubspaceDim() const; 00408 00410 00412 00413 00415 void setStatusTest(Teuchos::RCP<StatusTest<ScalarType,MV,OP> > test); 00416 00418 Teuchos::RCP<StatusTest<ScalarType,MV,OP> > getStatusTest() const; 00419 00421 const Eigenproblem<ScalarType,MV,OP>& getProblem() const; 00422 00423 00432 void setBlockSize(int blockSize); 00433 00434 00436 int getBlockSize() const; 00437 00438 00450 void setAuxVecs(const Teuchos::Array<Teuchos::RCP<const MV> > &auxvecs); 00451 00453 Teuchos::Array<Teuchos::RCP<const MV> > getAuxVecs() const; 00454 00456 00458 00459 00465 void setFullOrtho(bool fullOrtho); 00466 00468 bool getFullOrtho() const; 00469 00471 bool hasP(); 00472 00474 00476 00477 00479 void currentStatus(std::ostream &os); 00480 00482 00483 private: 00484 // 00485 // 00486 // 00487 void setupViews(); 00488 // 00489 // Convenience typedefs 00490 // 00491 typedef SolverUtils<ScalarType,MV,OP> Utils; 00492 typedef MultiVecTraits<ScalarType,MV> MVT; 00493 typedef OperatorTraits<ScalarType,MV,OP> OPT; 00494 typedef Teuchos::ScalarTraits<ScalarType> SCT; 00495 typedef typename SCT::magnitudeType MagnitudeType; 00496 const MagnitudeType ONE; 00497 const MagnitudeType ZERO; 00498 const MagnitudeType NANVAL; 00499 // 00500 // Internal structs 00501 // 00502 struct CheckList { 00503 bool checkX, checkMX, checkKX; 00504 bool checkH, checkMH; 00505 bool checkP, checkMP, checkKP; 00506 bool checkR, checkQ; 00507 CheckList() : checkX(false),checkMX(false),checkKX(false), 00508 checkH(false),checkMH(false), 00509 checkP(false),checkMP(false),checkKP(false), 00510 checkR(false),checkQ(false) {}; 00511 }; 00512 // 00513 // Internal methods 00514 // 00515 std::string accuracyCheck(const CheckList &chk, const std::string &where) const; 00516 // 00517 // Classes inputed through constructor that define the eigenproblem to be solved. 00518 // 00519 const Teuchos::RCP<Eigenproblem<ScalarType,MV,OP> > problem_; 00520 const Teuchos::RCP<SortManager<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > sm_; 00521 const Teuchos::RCP<OutputManager<ScalarType> > om_; 00522 Teuchos::RCP<StatusTest<ScalarType,MV,OP> > tester_; 00523 const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> > orthman_; 00524 // 00525 // Information obtained from the eigenproblem 00526 // 00527 Teuchos::RCP<const OP> Op_; 00528 Teuchos::RCP<const OP> MOp_; 00529 Teuchos::RCP<const OP> Prec_; 00530 bool hasM_; 00531 // 00532 // Internal timers 00533 // 00534 Teuchos::RCP<Teuchos::Time> timerOp_, timerMOp_, timerPrec_, 00535 timerSort_, 00536 timerLocalProj_, timerDS_, 00537 timerLocalUpdate_, timerCompRes_, 00538 timerOrtho_, timerInit_; 00539 // 00540 // Counters 00541 // 00542 // Number of operator applications 00543 int count_ApplyOp_, count_ApplyM_, count_ApplyPrec_; 00544 00545 // 00546 // Algorithmic parameters. 00547 // 00548 // blockSize_ is the solver block size 00549 int blockSize_; 00550 // 00551 // fullOrtho_ dictates whether the orthogonalization procedures specified by Hetmaniuk and Lehoucq should 00552 // be activated (see citations at the top of this file) 00553 bool fullOrtho_; 00554 00555 // 00556 // Current solver state 00557 // 00558 // initialized_ specifies that the basis vectors have been initialized and the iterate() routine 00559 // is capable of running; _initialize is controlled by the initialize() member method 00560 // For the implications of the state of initialized_, please see documentation for initialize() 00561 bool initialized_; 00562 // 00563 // nevLocal_ reflects how much of the current basis is valid (0 <= nevLocal_ <= 3*blockSize_) 00564 // this tells us how many of the values in theta_ are valid Ritz values 00565 int nevLocal_; 00566 // 00567 // hasP_ tells us whether there is valid data in P (and KP,MP) 00568 bool hasP_; 00569 // 00570 // State Multivecs 00571 // V_, KV_ MV_ and R_ are primary pointers to allocated multivectors 00572 Teuchos::RCP<MV> V_, KV_, MV_, R_; 00573 // the rest are multivector views into V_, KV_ and MV_ 00574 Teuchos::RCP<MV> X_, KX_, MX_, 00575 H_, KH_, MH_, 00576 P_, KP_, MP_; 00577 00578 // 00579 // if fullOrtho_ == true, then we must produce the following on every iteration: 00580 // [newX newP] = [X H P] [CX;CP] 00581 // the structure of [CX;CP] when using full orthogonalization does not allow us to 00582 // do this in situ, and R_ does not have enough storage for newX and newP. therefore, 00583 // we must allocate additional storage for this. 00584 // otherwise, when not using full orthogonalization, the structure 00585 // [newX newP] = [X H P] [CX1 0 ] 00586 // [CX2 CP2] allows us to work using only R as work space 00587 // [CX3 CP3] 00588 Teuchos::RCP<MV> tmpmvec_; 00589 // 00590 // auxiliary vectors 00591 Teuchos::Array<Teuchos::RCP<const MV> > auxVecs_; 00592 int numAuxVecs_; 00593 // 00594 // Number of iterations that have been performed. 00595 int iter_; 00596 // 00597 // Current eigenvalues, residual norms 00598 std::vector<MagnitudeType> theta_, Rnorms_, R2norms_; 00599 // 00600 // are the residual norms current with the residual? 00601 bool Rnorms_current_, R2norms_current_; 00602 00603 }; 00604 00605 00606 00607 00609 // Constructor 00610 template <class ScalarType, class MV, class OP> 00611 LOBPCG<ScalarType,MV,OP>::LOBPCG( 00612 const Teuchos::RCP<Eigenproblem<ScalarType,MV,OP> > &problem, 00613 const Teuchos::RCP<SortManager<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > &sorter, 00614 const Teuchos::RCP<OutputManager<ScalarType> > &printer, 00615 const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > &tester, 00616 const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> > &ortho, 00617 Teuchos::ParameterList ¶ms 00618 ) : 00619 ONE(Teuchos::ScalarTraits<MagnitudeType>::one()), 00620 ZERO(Teuchos::ScalarTraits<MagnitudeType>::zero()), 00621 NANVAL(Teuchos::ScalarTraits<MagnitudeType>::nan()), 00622 // problem, tools 00623 problem_(problem), 00624 sm_(sorter), 00625 om_(printer), 00626 tester_(tester), 00627 orthman_(ortho), 00628 // timers, counters 00629 timerOp_(Teuchos::TimeMonitor::getNewTimer("LOBPCG::Operation Op*x")), 00630 timerMOp_(Teuchos::TimeMonitor::getNewTimer("LOBPCG::Operation M*x")), 00631 timerPrec_(Teuchos::TimeMonitor::getNewTimer("LOBPCG::Operation Prec*x")), 00632 timerSort_(Teuchos::TimeMonitor::getNewTimer("LOBPCG::Sorting eigenvalues")), 00633 timerLocalProj_(Teuchos::TimeMonitor::getNewTimer("LOBPCG::Local projection")), 00634 timerDS_(Teuchos::TimeMonitor::getNewTimer("LOBPCG::Direct solve")), 00635 timerLocalUpdate_(Teuchos::TimeMonitor::getNewTimer("LOBPCG::Local update")), 00636 timerCompRes_(Teuchos::TimeMonitor::getNewTimer("LOBPCG::Computing residuals")), 00637 timerOrtho_(Teuchos::TimeMonitor::getNewTimer("LOBPCG::Orthogonalization")), 00638 timerInit_(Teuchos::TimeMonitor::getNewTimer("LOBPCG::Initialization")), 00639 count_ApplyOp_(0), 00640 count_ApplyM_(0), 00641 count_ApplyPrec_(0), 00642 // internal data 00643 blockSize_(0), 00644 fullOrtho_(params.get("Full Ortho", true)), 00645 initialized_(false), 00646 nevLocal_(0), 00647 hasP_(false), 00648 auxVecs_( Teuchos::Array<Teuchos::RCP<const MV> >(0) ), 00649 numAuxVecs_(0), 00650 iter_(0), 00651 Rnorms_current_(false), 00652 R2norms_current_(false) 00653 { 00654 TEST_FOR_EXCEPTION(problem_ == Teuchos::null,std::invalid_argument, 00655 "Anasazi::LOBPCG::constructor: user passed null problem pointer."); 00656 TEST_FOR_EXCEPTION(sm_ == Teuchos::null,std::invalid_argument, 00657 "Anasazi::LOBPCG::constructor: user passed null sort manager pointer."); 00658 TEST_FOR_EXCEPTION(om_ == Teuchos::null,std::invalid_argument, 00659 "Anasazi::LOBPCG::constructor: user passed null output manager pointer."); 00660 TEST_FOR_EXCEPTION(tester_ == Teuchos::null,std::invalid_argument, 00661 "Anasazi::LOBPCG::constructor: user passed null status test pointer."); 00662 TEST_FOR_EXCEPTION(orthman_ == Teuchos::null,std::invalid_argument, 00663 "Anasazi::LOBPCG::constructor: user passed null orthogonalization manager pointer."); 00664 TEST_FOR_EXCEPTION(problem_->isProblemSet() == false, std::invalid_argument, 00665 "Anasazi::LOBPCG::constructor: problem is not set."); 00666 TEST_FOR_EXCEPTION(problem_->isHermitian() == false, std::invalid_argument, 00667 "Anasazi::LOBPCG::constructor: problem is not Hermitian; LOBPCG requires Hermitian problem."); 00668 00669 // get the problem operators 00670 Op_ = problem_->getOperator(); 00671 TEST_FOR_EXCEPTION(Op_ == Teuchos::null, std::invalid_argument, 00672 "Anasazi::LOBPCG::constructor: problem provides no operator."); 00673 MOp_ = problem_->getM(); 00674 Prec_ = problem_->getPrec(); 00675 hasM_ = (MOp_ != Teuchos::null); 00676 00677 // set the block size and allocate data 00678 int bs = params.get("Block Size", problem_->getNEV()); 00679 setBlockSize(bs); 00680 } 00681 00682 00684 // Set the block size and make necessary adjustments. 00685 template <class ScalarType, class MV, class OP> 00686 void LOBPCG<ScalarType,MV,OP>::setBlockSize (int newBS) 00687 { 00688 // time spent here counts towards timerInit_ 00689 Teuchos::TimeMonitor lcltimer( *timerInit_ ); 00690 00691 // This routine only allocates space; it doesn't not perform any computation 00692 // if size is decreased, take the first newBS vectors of all and leave state as is 00693 // otherwise, grow/allocate space and set solver to unitialized 00694 00695 Teuchos::RCP<const MV> tmp; 00696 // grab some Multivector to Clone 00697 // in practice, getInitVec() should always provide this, but it is possible to use a 00698 // Eigenproblem with nothing in getInitVec() by manually initializing with initialize(); 00699 // in case of that strange scenario, we will try to Clone from R_ because it is smaller 00700 // than V_, and we don't want to keep V_ around longer than necessary 00701 if (blockSize_ > 0) { 00702 tmp = R_; 00703 } 00704 else { 00705 tmp = problem_->getInitVec(); 00706 TEST_FOR_EXCEPTION(tmp == Teuchos::null,std::logic_error, 00707 "Anasazi::LOBPCG::setBlockSize(): eigenproblem did not specify initial vectors to clone from."); 00708 } 00709 00710 TEST_FOR_EXCEPTION(newBS <= 0 || newBS > MVT::GetVecLength(*tmp), std::invalid_argument, "Anasazi::LOBPCG::setBlockSize(): block size must be strictly positive."); 00711 if (newBS == blockSize_) { 00712 // do nothing 00713 return; 00714 } 00715 else if (newBS < blockSize_ && initialized_) { 00716 // 00717 // shrink vectors 00718 00719 // release views so we can modify the bases 00720 X_ = Teuchos::null; 00721 KX_ = Teuchos::null; 00722 MX_ = Teuchos::null; 00723 H_ = Teuchos::null; 00724 KH_ = Teuchos::null; 00725 MH_ = Teuchos::null; 00726 P_ = Teuchos::null; 00727 KP_ = Teuchos::null; 00728 MP_ = Teuchos::null; 00729 00730 // make new indices vectors 00731 std::vector<int> newind(newBS), oldind(newBS); 00732 for (int i=0; i<newBS; i++) { 00733 newind[i] = i; 00734 oldind[i] = i; 00735 } 00736 00737 Teuchos::RCP<MV> newV, newMV, newKV, newR; 00738 Teuchos::RCP<const MV> src; 00739 // allocate R and newV 00740 newR = MVT::Clone(*tmp,newBS); 00741 newV = MVT::Clone(*tmp,newBS*3); 00742 newKV = MVT::Clone(*tmp,newBS*3); 00743 if (hasM_) { 00744 newMV = MVT::Clone(*tmp,newBS*3); 00745 } 00746 00747 // 00748 // if we are initialized, we want to pull the data from V_ into newV: 00749 // bs | bs | bs 00750 // newV = [newX | **** |newP ] 00751 // newKV = [newKX| **** |newKP] 00752 // newMV = [newMX| **** |newMP] 00753 // where 00754 // oldbs | oldbs | oldbs 00755 // V_ = [newX *** | ******* | newP ***] 00756 // KV_ = [newKX *** | ******* | newKP ***] 00757 // MV_ = [newMX *** | ******* | newMP ***] 00758 // 00759 // we don't care to copy the data corresponding to H 00760 // we will not copy the M data if !hasM_, because it doesn't exist 00761 // 00762 00763 // these are shrink operations which preserve their data 00764 theta_.resize(3*newBS); 00765 Rnorms_.resize(newBS); 00766 R2norms_.resize(newBS); 00767 00768 // copy residual vectors: oldind,newind currently contains [0,...,newBS-1] 00769 src = MVT::CloneView(*R_,newind); 00770 MVT::SetBlock(*src,newind,*newR); 00771 // free old memory and point to new memory 00772 R_ = newR; 00773 00774 // copy in order: newX newKX newMX, then newP newKP newMP 00775 // for X: [0,bs-1] <-- [0,bs-1] 00776 src = MVT::CloneView(*V_,oldind); 00777 MVT::SetBlock(*src,newind,*newV); 00778 src = MVT::CloneView(*KV_,oldind); 00779 MVT::SetBlock(*src,newind,*newKV); 00780 if (hasM_) { 00781 src = MVT::CloneView(*MV_,oldind); 00782 MVT::SetBlock(*src,newind,*newMV); 00783 } 00784 // for P: [2*bs, 3*bs-1] <-- [2*oldbs, 2*oldbs+bs-1] 00785 for (int i=0; i<newBS; i++) { 00786 newind[i] += 2*newBS; 00787 oldind[i] += 2*blockSize_; 00788 } 00789 src = MVT::CloneView(*V_,oldind); 00790 MVT::SetBlock(*src,newind,*newV); 00791 src = MVT::CloneView(*KV_,oldind); 00792 MVT::SetBlock(*src,newind,*newKV); 00793 if (hasM_) { 00794 src = MVT::CloneView(*MV_,oldind); 00795 MVT::SetBlock(*src,newind,*newMV); 00796 } 00797 00798 // release temp view 00799 src = Teuchos::null; 00800 00801 // release old allocations and point at new memory 00802 V_ = newV; 00803 KV_ = newKV; 00804 if (hasM_) { 00805 MV_ = newMV; 00806 } 00807 else { 00808 MV_ = V_; 00809 } 00810 } 00811 else { 00812 // newBS > blockSize_ or not initialized 00813 // this is also the scenario for our initial call to setBlockSize(), in the constructor 00814 initialized_ = false; 00815 hasP_ = false; 00816 00817 // release views 00818 X_ = Teuchos::null; 00819 KX_ = Teuchos::null; 00820 MX_ = Teuchos::null; 00821 H_ = Teuchos::null; 00822 KH_ = Teuchos::null; 00823 MH_ = Teuchos::null; 00824 P_ = Teuchos::null; 00825 KP_ = Teuchos::null; 00826 MP_ = Teuchos::null; 00827 00828 // free allocated storage 00829 R_ = Teuchos::null; 00830 V_ = Teuchos::null; 00831 00832 // allocate scalar vectors 00833 theta_.resize(3*newBS,NANVAL); 00834 Rnorms_.resize(newBS,NANVAL); 00835 R2norms_.resize(newBS,NANVAL); 00836 00837 // clone multivectors off of tmp 00838 R_ = MVT::Clone(*tmp,newBS); 00839 V_ = MVT::Clone(*tmp,newBS*3); 00840 KV_ = MVT::Clone(*tmp,newBS*3); 00841 if (hasM_) { 00842 MV_ = MVT::Clone(*tmp,newBS*3); 00843 } 00844 else { 00845 MV_ = V_; 00846 } 00847 } 00848 00849 // allocate tmp space 00850 tmpmvec_ = Teuchos::null; 00851 if (fullOrtho_) { 00852 tmpmvec_ = MVT::Clone(*tmp,newBS); 00853 } 00854 00855 // set new block size 00856 blockSize_ = newBS; 00857 00858 // setup new views 00859 setupViews(); 00860 } 00861 00862 00864 // Setup views into V,KV,MV 00865 template <class ScalarType, class MV, class OP> 00866 void LOBPCG<ScalarType,MV,OP>::setupViews() 00867 { 00868 std::vector<int> ind(blockSize_); 00869 00870 for (int i=0; i<blockSize_; i++) { 00871 ind[i] = i; 00872 } 00873 X_ = MVT::CloneViewNonConst(*V_,ind); 00874 KX_ = MVT::CloneViewNonConst(*KV_,ind); 00875 if (hasM_) { 00876 MX_ = MVT::CloneViewNonConst(*MV_,ind); 00877 } 00878 else { 00879 MX_ = X_; 00880 } 00881 00882 for (int i=0; i<blockSize_; i++) { 00883 ind[i] += blockSize_; 00884 } 00885 H_ = MVT::CloneViewNonConst(*V_,ind); 00886 KH_ = MVT::CloneViewNonConst(*KV_,ind); 00887 if (hasM_) { 00888 MH_ = MVT::CloneViewNonConst(*MV_,ind); 00889 } 00890 else { 00891 MH_ = H_; 00892 } 00893 00894 for (int i=0; i<blockSize_; i++) { 00895 ind[i] += blockSize_; 00896 } 00897 P_ = MVT::CloneViewNonConst(*V_,ind); 00898 KP_ = MVT::CloneViewNonConst(*KV_,ind); 00899 if (hasM_) { 00900 MP_ = MVT::CloneViewNonConst(*MV_,ind); 00901 } 00902 else { 00903 MP_ = P_; 00904 } 00905 } 00906 00907 00909 // Set the auxiliary vectors 00910 template <class ScalarType, class MV, class OP> 00911 void LOBPCG<ScalarType,MV,OP>::setAuxVecs(const Teuchos::Array<Teuchos::RCP<const MV> > &auxvecs) { 00912 typedef typename Teuchos::Array<Teuchos::RCP<const MV> >::iterator tarcpmv; 00913 00914 // set new auxiliary vectors 00915 auxVecs_ = auxvecs; 00916 00917 numAuxVecs_ = 0; 00918 for (tarcpmv i=auxVecs_.begin(); i != auxVecs_.end(); i++) { 00919 numAuxVecs_ += MVT::GetNumberVecs(**i); 00920 } 00921 00922 // If the solver has been initialized, X and P are not necessarily orthogonal to new auxiliary vectors 00923 if (numAuxVecs_ > 0 && initialized_) { 00924 initialized_ = false; 00925 hasP_ = false; 00926 } 00927 00928 if (om_->isVerbosity( Debug ) ) { 00929 // Check almost everything here 00930 CheckList chk; 00931 chk.checkQ = true; 00932 om_->print( Debug, accuracyCheck(chk, ": in setAuxVecs()") ); 00933 } 00934 } 00935 00936 00938 /* Initialize the state of the solver 00939 * 00940 * POST-CONDITIONS: 00941 * 00942 * initialized_ == true 00943 * X is orthonormal, orthogonal to auxVecs_ 00944 * KX = Op*X 00945 * MX = M*X if hasM_ 00946 * theta_ contains Ritz values of X 00947 * R = KX - MX*diag(theta_) 00948 * if hasP() == true, 00949 * P orthogonal to auxVecs_ 00950 * if fullOrtho_ == true, 00951 * P orthonormal and orthogonal to X 00952 * KP = Op*P 00953 * MP = M*P 00954 */ 00955 template <class ScalarType, class MV, class OP> 00956 void LOBPCG<ScalarType,MV,OP>::initialize(LOBPCGState<ScalarType,MV> newstate) 00957 { 00958 // NOTE: memory has been allocated by setBlockSize(). Use SetBlock below; do not Clone 00959 // NOTE: Overall time spent in this routine is counted to timerInit_; portions will also be counted towards other primitives 00960 00961 Teuchos::TimeMonitor inittimer( *timerInit_ ); 00962 00963 std::vector<int> bsind(blockSize_); 00964 for (int i=0; i<blockSize_; i++) bsind[i] = i; 00965 00966 // in LOBPCG, X (the subspace iterate) is primary 00967 // the order of dependence follows like so. 00968 // --init-> X 00969 // --op apply-> MX,KX 00970 // --ritz analysis-> theta 00971 // --optional-> P,MP,KP 00972 // 00973 // if the user specifies all data for a level, we will accept it. 00974 // otherwise, we will generate the whole level, and all subsequent levels. 00975 // 00976 // the data members are ordered based on dependence, and the levels are 00977 // partitioned according to the amount of work required to produce the 00978 // items in a level. 00979 // 00980 // inconsitent multivectors widths and lengths will not be tolerated, and 00981 // will be treated with exceptions. 00982 00983 // set up X, KX, MX: get them from "state" if user specified them 00984 00985 //---------------------------------------- 00986 // set up X, MX, KX 00987 //---------------------------------------- 00988 if (newstate.X != Teuchos::null) { 00989 TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.X) != MVT::GetVecLength(*X_), 00990 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.X not correct." ); 00991 // newstate.X must have blockSize_ vectors; any more will be ignored 00992 TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.X) < blockSize_, 00993 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.X must have at least block size vectors."); 00994 00995 // put X data in X_ 00996 MVT::SetBlock(*newstate.X,bsind,*X_); 00997 00998 // put MX data in MX_ 00999 if (hasM_) { 01000 if (newstate.MX != Teuchos::null) { 01001 TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.MX) != MVT::GetVecLength(*MX_), 01002 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.MX not correct." ); 01003 // newstate.MX must have blockSize_ vectors; any more will be ignored 01004 TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.MX) < blockSize_, 01005 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.MX must have at least block size vectors."); 01006 MVT::SetBlock(*newstate.MX,bsind,*MX_); 01007 } 01008 else { 01009 // user didn't specify MX, compute it 01010 { 01011 Teuchos::TimeMonitor lcltimer( *timerMOp_ ); 01012 OPT::Apply(*MOp_,*X_,*MX_); 01013 count_ApplyM_ += blockSize_; 01014 } 01015 // we generated MX; we will generate R as well 01016 newstate.R = Teuchos::null; 01017 } 01018 } 01019 01020 // put data in KX 01021 if (newstate.KX != Teuchos::null) { 01022 TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.KX) != MVT::GetVecLength(*KX_), 01023 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.KX not correct." ); 01024 // newstate.KX must have blockSize_ vectors; any more will be ignored 01025 TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.KX) < blockSize_, 01026 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.KX must have at least block size vectors."); 01027 MVT::SetBlock(*newstate.KX,bsind,*KX_); 01028 } 01029 else { 01030 // user didn't specify KX, compute it 01031 { 01032 Teuchos::TimeMonitor lcltimer( *timerOp_ ); 01033 OPT::Apply(*Op_,*X_,*KX_); 01034 count_ApplyOp_ += blockSize_; 01035 } 01036 // we generated KX; we will generate R as well 01037 newstate.R = Teuchos::null; 01038 } 01039 } 01040 else { 01041 // user did not specify X 01042 // we will initialize X, compute KX and MX, and compute R 01043 // 01044 // clear state so we won't use any data from it below 01045 newstate.P = Teuchos::null; 01046 newstate.KP = Teuchos::null; 01047 newstate.MP = Teuchos::null; 01048 newstate.R = Teuchos::null; 01049 newstate.T = Teuchos::null; 01050 01051 // generate a basis and projectAndNormalize 01052 Teuchos::RCP<const MV> ivec = problem_->getInitVec(); 01053 TEST_FOR_EXCEPTION(ivec == Teuchos::null,std::logic_error, 01054 "Anasazi::LOBPCG::initialize(): Eigenproblem did not specify initial vectors to clone from."); 01055 01056 int initSize = MVT::GetNumberVecs(*ivec); 01057 if (initSize > blockSize_) { 01058 // we need only the first blockSize_ vectors from ivec; get a view of them 01059 initSize = blockSize_; 01060 std::vector<int> ind(blockSize_); 01061 for (int i=0; i<blockSize_; i++) ind[i] = i; 01062 ivec = MVT::CloneView(*ivec,ind); 01063 } 01064 01065 // assign ivec to first part of X_ 01066 if (initSize > 0) { 01067 std::vector<int> ind(initSize); 01068 for (int i=0; i<initSize; i++) ind[i] = i; 01069 MVT::SetBlock(*ivec,ind,*X_); 01070 } 01071 // fill the rest of X_ with random 01072 if (blockSize_ > initSize) { 01073 std::vector<int> ind(blockSize_ - initSize); 01074 for (int i=0; i<blockSize_ - initSize; i++) ind[i] = initSize + i; 01075 Teuchos::RCP<MV> rX = MVT::CloneViewNonConst(*X_,ind); 01076 MVT::MvRandom(*rX); 01077 rX = Teuchos::null; 01078 } 01079 01080 // put data in MX 01081 if (hasM_) { 01082 Teuchos::TimeMonitor lcltimer( *timerMOp_ ); 01083 OPT::Apply(*MOp_,*X_,*MX_); 01084 count_ApplyM_ += blockSize_; 01085 } 01086 01087 // remove auxVecs from X_ and normalize it 01088 if (numAuxVecs_ > 0) { 01089 Teuchos::TimeMonitor lcltimer( *timerOrtho_ ); 01090 Teuchos::Array<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > > dummy; 01091 int rank = orthman_->projectAndNormalizeMat(*X_,auxVecs_,dummy,Teuchos::null,MX_); 01092 TEST_FOR_EXCEPTION(rank != blockSize_, LOBPCGInitFailure, 01093 "Anasazi::LOBPCG::initialize(): Couldn't generate initial basis of full rank."); 01094 } 01095 else { 01096 Teuchos::TimeMonitor lcltimer( *timerOrtho_ ); 01097 int rank = orthman_->normalizeMat(*X_,Teuchos::null,MX_); 01098 TEST_FOR_EXCEPTION(rank != blockSize_, LOBPCGInitFailure, 01099 "Anasazi::LOBPCG::initialize(): Couldn't generate initial basis of full rank."); 01100 } 01101 01102 // put data in KX 01103 { 01104 Teuchos::TimeMonitor lcltimer( *timerOp_ ); 01105 OPT::Apply(*Op_,*X_,*KX_); 01106 count_ApplyOp_ += blockSize_; 01107 } 01108 } // end if (newstate.X != Teuchos::null) 01109 01110 01111 //---------------------------------------- 01112 // set up Ritz values 01113 //---------------------------------------- 01114 theta_.resize(3*blockSize_,NANVAL); 01115 if (newstate.T != Teuchos::null) { 01116 TEST_FOR_EXCEPTION( (signed int)(newstate.T->size()) < blockSize_, 01117 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.T must contain at least block size Ritz values."); 01118 for (int i=0; i<blockSize_; i++) { 01119 theta_[i] = (*newstate.T)[i]; 01120 } 01121 } 01122 else { 01123 // get ritz vecs/vals 01124 Teuchos::SerialDenseMatrix<int,ScalarType> KK(blockSize_,blockSize_), 01125 MM(blockSize_,blockSize_), 01126 S(blockSize_,blockSize_); 01127 { 01128 Teuchos::TimeMonitor lcltimer( *timerLocalProj_ ); 01129 // project K 01130 MVT::MvTransMv(ONE,*X_,*KX_,KK); 01131 // project M 01132 MVT::MvTransMv(ONE,*X_,*MX_,MM); 01133 nevLocal_ = blockSize_; 01134 } 01135 01136 // solve the projected problem 01137 { 01138 Teuchos::TimeMonitor lcltimer( *timerDS_ ); 01139 Utils::directSolver(blockSize_, KK, Teuchos::rcpFromRef(MM), S, theta_, nevLocal_, 1); 01140 TEST_FOR_EXCEPTION(nevLocal_ != blockSize_,LOBPCGInitFailure, 01141 "Anasazi::LOBPCG::initialize(): Initial Ritz analysis did not produce enough Ritz pairs to initialize algorithm."); 01142 } 01143 01144 // We only have blockSize_ ritz pairs, ergo we do not need to select. 01145 // However, we still require them to be ordered correctly 01146 { 01147 Teuchos::TimeMonitor lcltimer( *timerSort_ ); 01148 01149 std::vector<int> order(blockSize_); 01150 // 01151 // sort the first blockSize_ values in theta_ 01152 sm_->sort(theta_, Teuchos::rcpFromRef(order), blockSize_); // don't catch exception 01153 // 01154 // apply the same ordering to the primitive ritz vectors 01155 Utils::permuteVectors(order,S); 01156 } 01157 01158 // update the solution, use R for storage 01159 { 01160 Teuchos::TimeMonitor lcltimer( *timerLocalUpdate_ ); 01161 // X <- X*S 01162 MVT::MvAddMv( ONE, *X_, ZERO, *X_, *R_ ); 01163 MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *X_ ); 01164 // KX <- KX*S 01165 MVT::MvAddMv( ONE, *KX_, ZERO, *KX_, *R_ ); 01166 MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *KX_ ); 01167 if (hasM_) { 01168 // MX <- MX*S 01169 MVT::MvAddMv( ONE, *MX_, ZERO, *MX_, *R_ ); 01170 MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *MX_ ); 01171 } 01172 } 01173 } 01174 01175 //---------------------------------------- 01176 // compute R 01177 //---------------------------------------- 01178 if (newstate.R != Teuchos::null) { 01179 TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.R) != MVT::GetVecLength(*R_), 01180 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.R not correct." ); 01181 TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.R) < blockSize_, 01182 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.R must have blockSize number of vectors." ); 01183 MVT::SetBlock(*newstate.R,bsind,*R_); 01184 } 01185 else { 01186 Teuchos::TimeMonitor lcltimer( *timerCompRes_ ); 01187 // form R <- KX - MX*T 01188 MVT::MvAddMv(ZERO,*KX_,ONE,*KX_,*R_); 01189 Teuchos::SerialDenseMatrix<int,ScalarType> T(blockSize_,blockSize_); 01190 for (int i=0; i<blockSize_; i++) T(i,i) = theta_[i]; 01191 MVT::MvTimesMatAddMv(-ONE,*MX_,T,ONE,*R_); 01192 } 01193 01194 // R has been updated; mark the norms as out-of-date 01195 Rnorms_current_ = false; 01196 R2norms_current_ = false; 01197 01198 // put data in P,KP,MP: P is not used to set theta 01199 if (newstate.P != Teuchos::null) { 01200 TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.P) < blockSize_ , 01201 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.P must have blockSize number of vectors." ); 01202 TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.P) != MVT::GetVecLength(*P_), 01203 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.P not correct." ); 01204 hasP_ = true; 01205 01206 // set P_ 01207 MVT::SetBlock(*newstate.P,bsind,*P_); 01208 01209 // set/compute KP_ 01210 if (newstate.KP != Teuchos::null) { 01211 TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.KP) < blockSize_, 01212 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.KP must have blockSize number of vectors." ); 01213 TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.KP) != MVT::GetVecLength(*KP_), 01214 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.KP not correct." ); 01215 MVT::SetBlock(*newstate.KP,bsind,*KP_); 01216 } 01217 else { 01218 Teuchos::TimeMonitor lcltimer( *timerOp_ ); 01219 OPT::Apply(*Op_,*P_,*KP_); 01220 count_ApplyOp_ += blockSize_; 01221 } 01222 01223 // set/compute MP_ 01224 if (hasM_) { 01225 if (newstate.MP != Teuchos::null) { 01226 TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.MP) < blockSize_, 01227 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.MP must have blockSize number of vectors." ); 01228 TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.MP) != MVT::GetVecLength(*MP_), 01229 std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.MP not correct." ); 01230 MVT::SetBlock(*newstate.MP,bsind,*MP_); 01231 } 01232 else { 01233 Teuchos::TimeMonitor lcltimer( *timerMOp_ ); 01234 OPT::Apply(*MOp_,*P_,*MP_); 01235 count_ApplyM_ += blockSize_; 01236 } 01237 } 01238 } 01239 else { 01240 hasP_ = false; 01241 } 01242 01243 // finally, we are initialized 01244 initialized_ = true; 01245 01246 if (om_->isVerbosity( Debug ) ) { 01247 // Check almost everything here 01248 CheckList chk; 01249 chk.checkX = true; 01250 chk.checkKX = true; 01251 chk.checkMX = true; 01252 chk.checkP = true; 01253 chk.checkKP = true; 01254 chk.checkMP = true; 01255 chk.checkR = true; 01256 chk.checkQ = true; 01257 om_->print( Debug, accuracyCheck(chk, ": after initialize()") ); 01258 } 01259 01260 } 01261 01262 template <class ScalarType, class MV, class OP> 01263 void LOBPCG<ScalarType,MV,OP>::initialize() 01264 { 01265 LOBPCGState<ScalarType,MV> empty; 01266 initialize(empty); 01267 } 01268 01269 01271 // Instruct the solver to use full orthogonalization 01272 template <class ScalarType, class MV, class OP> 01273 void LOBPCG<ScalarType,MV,OP>::setFullOrtho (bool fullOrtho) 01274 { 01275 if ( fullOrtho_ == true || initialized_ == false || fullOrtho == fullOrtho_ ) { 01276 // state is already orthogonalized or solver is not initialized 01277 fullOrtho_ = fullOrtho; 01278 } 01279 else { 01280 // solver is initialized, state is not fully orthogonalized, and user has requested full orthogonalization 01281 // ergo, we must throw away data in P 01282 fullOrtho_ = true; 01283 hasP_ = false; 01284 } 01285 01286 // the user has called setFullOrtho, so the class has been instantiated 01287 // ergo, the data has already been allocated, i.e., setBlockSize() has been called 01288 // if it is already allocated, it should be the proper size 01289 if (fullOrtho_ && tmpmvec_ == Teuchos::null) { 01290 // allocated the workspace 01291 tmpmvec_ = MVT::Clone(*X_,blockSize_); 01292 } 01293 else if (fullOrtho_==false) { 01294 // free the workspace 01295 tmpmvec_ = Teuchos::null; 01296 } 01297 } 01298 01299 01301 // Perform LOBPCG iterations until the StatusTest tells us to stop. 01302 template <class ScalarType, class MV, class OP> 01303 void LOBPCG<ScalarType,MV,OP>::iterate () 01304 { 01305 // 01306 // Allocate/initialize data structures 01307 // 01308 if (initialized_ == false) { 01309 initialize(); 01310 } 01311 01312 // 01313 // Miscellaneous definitions 01314 const int oneBlock = blockSize_; 01315 const int twoBlocks = 2*blockSize_; 01316 const int threeBlocks = 3*blockSize_; 01317 01318 std::vector<int> indblock1(blockSize_), indblock2(blockSize_), indblock3(blockSize_); 01319 for (int i=0; i<blockSize_; i++) { 01320 indblock1[i] = i; 01321 indblock2[i] = i + blockSize_; 01322 indblock3[i] = i + 2*blockSize_; 01323 } 01324 01325 // 01326 // Define dense projected/local matrices 01327 // KK = Local stiffness matrix (size: 3*blockSize_ x 3*blockSize_) 01328 // MM = Local mass matrix (size: 3*blockSize_ x 3*blockSize_) 01329 // S = Local eigenvectors (size: 3*blockSize_ x 3*blockSize_) 01330 Teuchos::SerialDenseMatrix<int,ScalarType> KK( threeBlocks, threeBlocks ), 01331 MM( threeBlocks, threeBlocks ), 01332 S( threeBlocks, threeBlocks ); 01333 01334 while (tester_->checkStatus(this) != Passed) { 01335 01336 // Print information on current status 01337 if (om_->isVerbosity(Debug)) { 01338 currentStatus( om_->stream(Debug) ); 01339 } 01340 else if (om_->isVerbosity(IterationDetails)) { 01341 currentStatus( om_->stream(IterationDetails) ); 01342 } 01343 01344 // increment iteration counter 01345 iter_++; 01346 01347 // Apply the preconditioner on the residuals: H <- Prec*R 01348 if (Prec_ != Teuchos::null) { 01349 Teuchos::TimeMonitor lcltimer( *timerPrec_ ); 01350 OPT::Apply( *Prec_, *R_, *H_ ); // don't catch the exception 01351 count_ApplyPrec_ += blockSize_; 01352 } 01353 else { 01354 MVT::MvAddMv(ONE,*R_,ZERO,*R_,*H_); 01355 } 01356 01357 // Apply the mass matrix on H 01358 if (hasM_) { 01359 Teuchos::TimeMonitor lcltimer( *timerMOp_ ); 01360 OPT::Apply( *MOp_, *H_, *MH_); // don't catch the exception 01361 count_ApplyM_ += blockSize_; 01362 } 01363 01364 // orthogonalize H against the auxiliary vectors 01365 // optionally: orthogonalize H against X and P ([X P] is already orthonormal) 01366 Teuchos::Array<Teuchos::RCP<const MV> > Q; 01367 Q = auxVecs_; 01368 if (fullOrtho_) { 01369 // X and P are not contiguous, so there is not much point in putting them under 01370 // a single multivector view 01371 Q.push_back(X_); 01372 if (hasP_) { 01373 Q.push_back(P_); 01374 } 01375 } 01376 { 01377 Teuchos::TimeMonitor lcltimer( *timerOrtho_ ); 01378 Teuchos::Array<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > > dummyC = 01379 Teuchos::tuple<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > >(Teuchos::null); 01380 int rank = orthman_->projectAndNormalizeMat(*H_,Q,dummyC,Teuchos::null,MH_); 01381 // our views are currently in place; it is safe to throw an exception 01382 TEST_FOR_EXCEPTION(rank != blockSize_,LOBPCGOrthoFailure, 01383 "Anasazi::LOBPCG::iterate(): unable to compute orthonormal basis for H."); 01384 } 01385 01386 if (om_->isVerbosity( Debug ) ) { 01387 CheckList chk; 01388 chk.checkH = true; 01389 chk.checkMH = true; 01390 om_->print( Debug, accuracyCheck(chk, ": after ortho H") ); 01391 } 01392 else if (om_->isVerbosity( OrthoDetails ) ) { 01393 CheckList chk; 01394 chk.checkH = true; 01395 chk.checkMH = true; 01396 om_->print( OrthoDetails, accuracyCheck(chk,": after ortho H") ); 01397 } 01398 01399 // Apply the stiffness matrix to H 01400 { 01401 Teuchos::TimeMonitor lcltimer( *timerOp_ ); 01402 OPT::Apply( *Op_, *H_, *KH_); // don't catch the exception 01403 count_ApplyOp_ += blockSize_; 01404 } 01405 01406 if (hasP_) { 01407 nevLocal_ = threeBlocks; 01408 } 01409 else { 01410 nevLocal_ = twoBlocks; 01411 } 01412 01413 // 01414 // we need bases: [X H P] and [H P] (only need the latter if fullOrtho == false) 01415 // we need to perform the following operations: 01416 // X' [KX KH KP] 01417 // X' [MX MH MP] 01418 // H' [KH KP] 01419 // H' [MH MP] 01420 // P' [KP] 01421 // P' [MP] 01422 // [X H P] CX 01423 // [X H P] CP if fullOrtho 01424 // [H P] CP if !fullOrtho 01425 // 01426 // since M[X H P] is potentially the same memory as [X H P], and 01427 // because we are not allowed to have overlapping non-const views of 01428 // a multivector, we will now abandon our non-const views in favor of 01429 // const views 01430 // 01431 X_ = Teuchos::null; 01432 KX_ = Teuchos::null; 01433 MX_ = Teuchos::null; 01434 H_ = Teuchos::null; 01435 KH_ = Teuchos::null; 01436 MH_ = Teuchos::null; 01437 P_ = Teuchos::null; 01438 KP_ = Teuchos::null; 01439 MP_ = Teuchos::null; 01440 Teuchos::RCP<const MV> cX, cH, cXHP, cHP, cK_XHP, cK_HP, cM_XHP, cM_HP, cP, cK_P, cM_P; 01441 { 01442 cX = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indblock1); 01443 cH = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indblock2); 01444 01445 std::vector<int> indXHP(nevLocal_); 01446 for (int i=0; i<nevLocal_; i++) { 01447 indXHP[i] = i; 01448 } 01449 cXHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indXHP); 01450 cK_XHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(KV_),indXHP); 01451 if (hasM_) { 01452 cM_XHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(MV_),indXHP); 01453 } 01454 else { 01455 cM_XHP = cXHP; 01456 } 01457 01458 std::vector<int> indHP(nevLocal_-blockSize_); 01459 for (int i=blockSize_; i<nevLocal_; i++) { 01460 indHP[i-blockSize_] = i; 01461 } 01462 cHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indHP); 01463 cK_HP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(KV_),indHP); 01464 if (hasM_) { 01465 cM_HP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(MV_),indHP); 01466 } 01467 else { 01468 cM_HP = cHP; 01469 } 01470 01471 if (nevLocal_ == threeBlocks) { 01472 cP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indblock3); 01473 cK_P = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(KV_),indblock3); 01474 if (hasM_) { 01475 cM_P = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(MV_),indblock3); 01476 } 01477 else { 01478 cM_P = cP; 01479 } 01480 } 01481 } 01482 01483 // 01484 //---------------------------------------- 01485 // Form "local" mass and stiffness matrices 01486 //---------------------------------------- 01487 { 01488 // We will form only the block upper triangular part of 01489 // [X H P]' K [X H P] and [X H P]' M [X H P] 01490 // Get the necessary views into KK and MM: 01491 // [--K1--] [--M1--] 01492 // KK = [ -K2-] MM = [ -M2-] 01493 // [ K3] [ M3] 01494 // 01495 // It is okay to declare a zero-area view of a Teuchos::SerialDenseMatrix 01496 // 01497 Teuchos::SerialDenseMatrix<int,ScalarType> 01498 K1(Teuchos::View,KK,blockSize_,nevLocal_ ,0*blockSize_,0*blockSize_), 01499 K2(Teuchos::View,KK,blockSize_,nevLocal_-1*blockSize_,1*blockSize_,1*blockSize_), 01500 K3(Teuchos::View,KK,blockSize_,nevLocal_-2*blockSize_,2*blockSize_,2*blockSize_), 01501 M1(Teuchos::View,MM,blockSize_,nevLocal_ ,0*blockSize_,0*blockSize_), 01502 M2(Teuchos::View,MM,blockSize_,nevLocal_-1*blockSize_,1*blockSize_,1*blockSize_), 01503 M3(Teuchos::View,MM,blockSize_,nevLocal_-2*blockSize_,2*blockSize_,2*blockSize_); 01504 { 01505 Teuchos::TimeMonitor lcltimer( *timerLocalProj_ ); 01506 MVT::MvTransMv( ONE, *cX, *cK_XHP, K1 ); 01507 MVT::MvTransMv( ONE, *cX, *cM_XHP, M1 ); 01508 MVT::MvTransMv( ONE, *cH, *cK_HP , K2 ); 01509 MVT::MvTransMv( ONE, *cH, *cM_HP , M2 ); 01510 if (nevLocal_ == threeBlocks) { 01511 MVT::MvTransMv( ONE, *cP, *cK_P, K3 ); 01512 MVT::MvTransMv( ONE, *cP, *cM_P, M3 ); 01513 } 01514 } 01515 } 01516 // below, we only need bases [X H P] and [H P] and friends 01517 // furthermore, we only need [H P] and friends if fullOrtho == false 01518 // clear the others now 01519 cX = Teuchos::null; 01520 cH = Teuchos::null; 01521 cP = Teuchos::null; 01522 cK_P = Teuchos::null; 01523 cM_P = Teuchos::null; 01524 if (fullOrtho_ == true) { 01525 cHP = Teuchos::null; 01526 cK_HP = Teuchos::null; 01527 cM_HP = Teuchos::null; 01528 } 01529 01530 // 01531 //--------------------------------------------------- 01532 // Perform a spectral decomposition of (KK,MM) 01533 //--------------------------------------------------- 01534 // 01535 // Get pointers to relevant part of KK, MM and S for Rayleigh-Ritz analysis 01536 Teuchos::SerialDenseMatrix<int,ScalarType> lclKK(Teuchos::View,KK,nevLocal_,nevLocal_), 01537 lclMM(Teuchos::View,MM,nevLocal_,nevLocal_), 01538 lclS(Teuchos::View, S,nevLocal_,nevLocal_); 01539 { 01540 Teuchos::TimeMonitor lcltimer( *timerDS_ ); 01541 int localSize = nevLocal_; 01542 Utils::directSolver(localSize, lclKK, Teuchos::rcpFromRef(lclMM), lclS, theta_, nevLocal_, 0); 01543 // localSize tells directSolver() how big KK,MM are 01544 // however, directSolver() may choose to use only the principle submatrices of KK,MM 01545 // because of loss of MM-orthogonality in the projected eigenvectors 01546 // nevLocal_ tells us how much it used, telling us the effective localSize 01547 // (i.e., how much of KK,MM used by directSolver) 01548 // we will not tolerate any indefiniteness, and will throw an exception if it was 01549 // detected by directSolver 01550 // 01551 if (nevLocal_ != localSize) { 01552 // before throwing the exception, and thereby leaving iterate(), setup the views again 01553 // first, clear the const views 01554 cXHP = Teuchos::null; 01555 cK_XHP = Teuchos::null; 01556 cM_XHP = Teuchos::null; 01557 cHP = Teuchos::null; 01558 cK_HP = Teuchos::null; 01559 cM_HP = Teuchos::null; 01560 setupViews(); 01561 } 01562 TEST_FOR_EXCEPTION(nevLocal_ != localSize, LOBPCGRitzFailure, 01563 "Anasazi::LOBPCG::iterate(): indefiniteness detected in projected mass matrix." ); 01564 } 01565 01566 // 01567 //--------------------------------------------------- 01568 // Sort the ritz values using the sort manager 01569 //--------------------------------------------------- 01570 Teuchos::LAPACK<int,ScalarType> lapack; 01571 Teuchos::BLAS<int,ScalarType> blas; 01572 { 01573 Teuchos::TimeMonitor lcltimer( *timerSort_ ); 01574 01575 std::vector<int> order(nevLocal_); 01576 // 01577 // Sort the first nevLocal_ values in theta_ 01578 sm_->sort(theta_, Teuchos::rcpFromRef(order), nevLocal_); // don't catch exception 01579 // 01580 // Sort the primitive ritz vectors 01581 Utils::permuteVectors(order,lclS); 01582 } 01583 01584 // 01585 //---------------------------------------- 01586 // Compute coefficients for X and P under [X H P] 01587 //---------------------------------------- 01588 // Before computing X,P, optionally perform orthogonalization per Hetmaniuk,Lehoucq paper 01589 // CX will be the coefficients of [X,H,P] for new X, CP for new P 01590 // The paper suggests orthogonalizing CP against CX and orthonormalizing CP, w.r.t. MM 01591 // Here, we will also orthonormalize CX. 01592 // This is accomplished using the Cholesky factorization of [CX CP]^H lclMM [CX CP] 01593 Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > CX, CP; 01594 if (fullOrtho_) { 01595 // build orthonormal basis for ( 0 ) that is MM orthogonal to ( S11 ) 01596 // ( S21 ) ( S21 ) 01597 // ( S31 ) ( S31 ) 01598 // Do this using Cholesky factorization of ( S11 0 ) 01599 // ( S21 S21 ) 01600 // ( S31 S31 ) 01601 // ( S11 0 ) 01602 // Build C = ( S21 S21 ) 01603 // ( S31 S31 ) 01604 Teuchos::SerialDenseMatrix<int,ScalarType> C(nevLocal_,twoBlocks), 01605 MMC(nevLocal_,twoBlocks), 01606 CMMC(twoBlocks ,twoBlocks); 01607 01608 // first block of rows: ( S11 0 ) 01609 for (int j=0; j<oneBlock; j++) { 01610 for (int i=0; i<oneBlock; i++) { 01611 // CX 01612 C(i,j) = lclS(i,j); 01613 // CP 01614 C(i,j+oneBlock) = ZERO; 01615 } 01616 } 01617 // second block of rows: (S21 S21) 01618 for (int j=0; j<oneBlock; j++) { 01619 for (int i=oneBlock; i<twoBlocks; i++) { 01620 // CX 01621 C(i,j) = lclS(i,j); 01622 // CP 01623 C(i,j+oneBlock) = lclS(i,j); 01624 } 01625 } 01626 // third block of rows 01627 if (nevLocal_ == threeBlocks) { 01628 for (int j=0; j<oneBlock; j++) { 01629 for (int i=twoBlocks; i<threeBlocks; i++) { 01630 // CX 01631 C(i,j) = lclS(i,j); 01632 // CP 01633 C(i,j+oneBlock) = lclS(i,j); 01634 } 01635 } 01636 } 01637 01638 // compute MMC = lclMM*C 01639 { 01640 int teuchosret; 01641 teuchosret = MMC.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,C,ZERO); 01642 TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error, 01643 "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply"); 01644 // compute CMMC = C^H*MMC == C^H*lclMM*C 01645 teuchosret = CMMC.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,C,MMC,ZERO); 01646 TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error, 01647 "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply"); 01648 } 01649 01650 // compute R (cholesky) of CMMC 01651 { 01652 int info; 01653 lapack.POTRF('U',twoBlocks,CMMC.values(),CMMC.stride(),&info); 01654 // our views ARE NOT currently in place; we must reestablish them before throwing an exception 01655 if (info != 0) { 01656 cXHP = Teuchos::null; 01657 cHP = Teuchos::null; 01658 cK_XHP = Teuchos::null; 01659 cK_HP = Teuchos::null; 01660 cM_XHP = Teuchos::null; 01661 cM_HP = Teuchos::null; 01662 setupViews(); 01663 } 01664 TEST_FOR_EXCEPTION(info != 0, LOBPCGOrthoFailure, 01665 "Anasazi::LOBPCG::iterate(): Cholesky factorization failed during full orthogonalization."); 01666 } 01667 // compute C = C inv(R) 01668 blas.TRSM(Teuchos::RIGHT_SIDE,Teuchos::UPPER_TRI,Teuchos::NO_TRANS,Teuchos::NON_UNIT_DIAG, 01669 nevLocal_,twoBlocks,ONE,CMMC.values(),CMMC.stride(),C.values(),C.stride()); 01670 // put C(:,0:oneBlock-1) into CX 01671 CX = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,C,nevLocal_,oneBlock,0,0) ); 01672 // put C(:,oneBlock:twoBlocks-1) into CP 01673 CP = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,C,nevLocal_,oneBlock,0,oneBlock) ); 01674 01675 // check the results 01676 if (om_->isVerbosity( Debug ) ) { 01677 Teuchos::SerialDenseMatrix<int,ScalarType> tmp1(nevLocal_,oneBlock), 01678 tmp2(oneBlock,oneBlock); 01679 MagnitudeType tmp; 01680 int teuchosret; 01681 std::stringstream os; 01682 os.precision(2); 01683 os.setf(std::ios::scientific, std::ios::floatfield); 01684 01685 os << " Checking Full Ortho: iteration " << iter_ << std::endl; 01686 01687 // check CX^T MM CX == I 01688 // compute tmp1 = MM*CX 01689 teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,*CX,ZERO); 01690 TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error, 01691 "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply"); 01692 // compute tmp2 = CX^H*tmp1 == CX^H*MM*CX 01693 teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,*CX,tmp1,ZERO); 01694 TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error, 01695 "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply"); 01696 // subtrace tmp2 - I == CX^H * MM * CX - I 01697 for (int i=0; i<oneBlock; i++) tmp2(i,i) -= ONE; 01698 tmp = tmp2.normFrobenius(); 01699 os << " >> Error in CX^H MM CX == I : " << tmp << std::endl; 01700 01701 // check CP^T MM CP == I 01702 // compute tmp1 = MM*CP 01703 teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,*CP,ZERO); 01704 TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error, 01705 "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply"); 01706 // compute tmp2 = CP^H*tmp1 == CP^H*MM*CP 01707 teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,*CP,tmp1,ZERO); 01708 TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error, 01709 "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply"); 01710 // subtrace tmp2 - I == CP^H * MM * CP - I 01711 for (int i=0; i<oneBlock; i++) tmp2(i,i) -= ONE; 01712 tmp = tmp2.normFrobenius(); 01713 os << " >> Error in CP^H MM CP == I : " << tmp << std::endl; 01714 01715 // check CX^T MM CP == 0 01716 // compute tmp1 = MM*CP 01717 teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,*CP,ZERO); 01718 TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply"); 01719 // compute tmp2 = CX^H*tmp1 == CX^H*MM*CP 01720 teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,*CX,tmp1,ZERO); 01721 TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply"); 01722 // subtrace tmp2 == CX^H * MM * CP 01723 tmp = tmp2.normFrobenius(); 01724 os << " >> Error in CX^H MM CP == 0 : " << tmp << std::endl; 01725 01726 os << std::endl; 01727 om_->print(Debug,os.str()); 01728 } 01729 } 01730 else { 01731 // [S11 ... ...] 01732 // S = [S21 ... ...] 01733 // [S31 ... ...] 01734 // 01735 // CX = [S11] 01736 // [S21] 01737 // [S31] -> X = [X H P] CX 01738 // 01739 // CP = [S21] -> P = [H P] CP 01740 // [S31] 01741 // 01742 CX = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,lclS,nevLocal_ ,oneBlock,0 ,0) ); 01743 CP = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,lclS,nevLocal_-oneBlock,oneBlock,oneBlock,0) ); 01744 } 01745 01746 // 01747 //---------------------------------------- 01748 // Compute new X and new P 01749 //---------------------------------------- 01750 // Note: Use R as a temporary work space and (if full ortho) tmpMV as well 01751 { 01752 Teuchos::TimeMonitor lcltimer( *timerLocalUpdate_ ); 01753 01754 // if full ortho, then CX and CP are dense 01755 // we multiply [X H P]*CX into tmpMV 01756 // [X H P]*CP into R 01757 // then put V(:,firstblock) <- tmpMV 01758 // V(:,thirdblock) <- R 01759 // 01760 // if no full ortho, then [H P]*CP doesn't reference first block (X) 01761 // of V, so that we can modify it before computing P 01762 // so we multiply [X H P]*CX into R 01763 // V(:,firstblock) <- R 01764 // multiply [H P]*CP into R 01765 // V(:,thirdblock) <- R 01766 // 01767 // mutatis mutandis for K[XP] and M[XP] 01768 // 01769 // use SetBlock to do the assignments into V_ 01770 // 01771 // in either case, views are only allowed to be overlapping 01772 // if they are const, and it should be assume that SetBlock 01773 // creates a view of the associated part 01774 // 01775 // we have from above const-pointers to [KM]XHP, [KM]HP and (if hasP) [KM]P 01776 // 01777 if (fullOrtho_) { 01778 // X,P 01779 MVT::MvTimesMatAddMv(ONE,*cXHP,*CX,ZERO,*tmpmvec_); 01780 MVT::MvTimesMatAddMv(ONE,*cXHP,*CP,ZERO,*R_); 01781 cXHP = Teuchos::null; 01782 MVT::SetBlock(*tmpmvec_,indblock1,*V_); 01783 MVT::SetBlock(*R_ ,indblock3,*V_); 01784 // KX,KP 01785 MVT::MvTimesMatAddMv(ONE,*cK_XHP,*CX,ZERO,*tmpmvec_); 01786 MVT::MvTimesMatAddMv(ONE,*cK_XHP,*CP,ZERO,*R_); 01787 cK_XHP = Teuchos::null; 01788 MVT::SetBlock(*tmpmvec_,indblock1,*KV_); 01789 MVT::SetBlock(*R_ ,indblock3,*KV_); 01790 // MX,MP 01791 if (hasM_) { 01792 MVT::MvTimesMatAddMv(ONE,*cM_XHP,*CX,ZERO,*tmpmvec_); 01793 MVT::MvTimesMatAddMv(ONE,*cM_XHP,*CP,ZERO,*R_); 01794 cM_XHP = Teuchos::null; 01795 MVT::SetBlock(*tmpmvec_,indblock1,*MV_); 01796 MVT::SetBlock(*R_ ,indblock3,*MV_); 01797 } 01798 else { 01799 cM_XHP = Teuchos::null; 01800 } 01801 } 01802 else { 01803 // X,P 01804 MVT::MvTimesMatAddMv(ONE,*cXHP,*CX,ZERO,*R_); 01805 cXHP = Teuchos::null; 01806 MVT::SetBlock(*R_,indblock1,*V_); 01807 MVT::MvTimesMatAddMv(ONE,*cHP,*CP,ZERO,*R_); 01808 cHP = Teuchos::null; 01809 MVT::SetBlock(*R_,indblock3,*V_); 01810 // KX,KP 01811 MVT::MvTimesMatAddMv(ONE,*cK_XHP,*CX,ZERO,*R_); 01812 cK_XHP = Teuchos::null; 01813 MVT::SetBlock(*R_,indblock1,*KV_); 01814 MVT::MvTimesMatAddMv(ONE,*cK_HP,*CP,ZERO,*R_); 01815 cK_HP = Teuchos::null; 01816 MVT::SetBlock(*R_,indblock3,*KV_); 01817 // MX,MP 01818 if (hasM_) { 01819 MVT::MvTimesMatAddMv(ONE,*cM_XHP,*CX,ZERO,*R_); 01820 cM_XHP = Teuchos::null; 01821 MVT::SetBlock(*R_,indblock1,*MV_); 01822 MVT::MvTimesMatAddMv(ONE,*cM_HP,*CP,ZERO,*R_); 01823 cM_HP = Teuchos::null; 01824 MVT::SetBlock(*R_,indblock3,*MV_); 01825 } 01826 else { 01827 cM_XHP = Teuchos::null; 01828 cM_HP = Teuchos::null; 01829 } 01830 } 01831 } // end timing block 01832 // done with coefficient matrices 01833 CX = Teuchos::null; 01834 CP = Teuchos::null; 01835 01836 // 01837 // we now have a P direction 01838 hasP_ = true; 01839 01840 // debugging check: all of our const views should have been cleared by now 01841 // if not, we have a logic error above 01842 TEST_FOR_EXCEPTION( cXHP != Teuchos::null || cK_XHP != Teuchos::null || cM_XHP != Teuchos::null 01843 || cHP != Teuchos::null || cK_HP != Teuchos::null || cM_HP != Teuchos::null 01844 || cP != Teuchos::null || cK_P != Teuchos::null || cM_P != Teuchos::null, 01845 std::logic_error, 01846 "Anasazi::BlockKrylovSchur::iterate(): const views were not all cleared! Something went wrong!" ); 01847 01848 // 01849 // recreate our const MV views of X,H,P and friends 01850 setupViews(); 01851 01852 // 01853 // Compute the new residuals, explicitly 01854 { 01855 Teuchos::TimeMonitor lcltimer( *timerCompRes_ ); 01856 MVT::MvAddMv( ONE, *KX_, ZERO, *KX_, *R_ ); 01857 Teuchos::SerialDenseMatrix<int,ScalarType> T( blockSize_, blockSize_ ); 01858 for (int i = 0; i < blockSize_; i++) { 01859 T(i,i) = theta_[i]; 01860 } 01861 MVT::MvTimesMatAddMv( -ONE, *MX_, T, ONE, *R_ ); 01862 } 01863 01864 // R has been updated; mark the norms as out-of-date 01865 Rnorms_current_ = false; 01866 R2norms_current_ = false; 01867 01868 // When required, monitor some orthogonalities 01869 if (om_->isVerbosity( Debug ) ) { 01870 // Check almost everything here 01871 CheckList chk; 01872 chk.checkX = true; 01873 chk.checkKX = true; 01874 chk.checkMX = true; 01875 chk.checkP = true; 01876 chk.checkKP = true; 01877 chk.checkMP = true; 01878 chk.checkR = true; 01879 om_->print( Debug, accuracyCheck(chk, ": after local update") ); 01880 } 01881 else if (om_->isVerbosity( OrthoDetails )) { 01882 CheckList chk; 01883 chk.checkX = true; 01884 chk.checkP = true; 01885 chk.checkR = true; 01886 om_->print( OrthoDetails, accuracyCheck(chk, ": after local update") ); 01887 } 01888 } // end while (statusTest == false) 01889 } 01890 01891 01893 // compute/return residual M-norms 01894 template <class ScalarType, class MV, class OP> 01895 std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> 01896 LOBPCG<ScalarType,MV,OP>::getResNorms() { 01897 if (Rnorms_current_ == false) { 01898 // Update the residual norms 01899 orthman_->norm(*R_,Rnorms_); 01900 Rnorms_current_ = true; 01901 } 01902 return Rnorms_; 01903 } 01904 01905 01907 // compute/return residual 2-norms 01908 template <class ScalarType, class MV, class OP> 01909 std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> 01910 LOBPCG<ScalarType,MV,OP>::getRes2Norms() { 01911 if (R2norms_current_ == false) { 01912 // Update the residual 2-norms 01913 MVT::MvNorm(*R_,R2norms_); 01914 R2norms_current_ = true; 01915 } 01916 return R2norms_; 01917 } 01918 01919 01920 01921 01923 // Check accuracy, orthogonality, and other debugging stuff 01924 // 01925 // bools specify which tests we want to run (instead of running more than we actually care about) 01926 // 01927 // we don't bother checking the following because they are computed explicitly: 01928 // H == Prec*R 01929 // KH == K*H 01930 // 01931 // 01932 // checkX : X orthonormal 01933 // orthogonal to auxvecs 01934 // checkMX: check MX == M*X 01935 // checkKX: check KX == K*X 01936 // checkP : if fullortho P orthonormal and orthogonal to X 01937 // orthogonal to auxvecs 01938 // checkMP: check MP == M*P 01939 // checkKP: check KP == K*P 01940 // checkH : if fullortho H orthonormal and orthogonal to X and P 01941 // orthogonal to auxvecs 01942 // checkMH: check MH == M*H 01943 // checkR : check R orthogonal to X 01944 // checkQ : check that auxiliary vectors are actually orthonormal 01945 // 01946 // TODO: 01947 // add checkTheta 01948 // 01949 template <class ScalarType, class MV, class OP> 01950 std::string LOBPCG<ScalarType,MV,OP>::accuracyCheck( const CheckList &chk, const std::string &where ) const 01951 { 01952 using std::endl; 01953 01954 std::stringstream os; 01955 os.precision(2); 01956 os.setf(std::ios::scientific, std::ios::floatfield); 01957 MagnitudeType tmp; 01958 01959 os << " Debugging checks: iteration " << iter_ << where << endl; 01960 01961 // X and friends 01962 if (chk.checkX && initialized_) { 01963 tmp = orthman_->orthonormError(*X_); 01964 os << " >> Error in X^H M X == I : " << tmp << endl; 01965 for (Array_size_type i=0; i<auxVecs_.size(); i++) { 01966 tmp = orthman_->orthogError(*X_,*auxVecs_[i]); 01967 os << " >> Error in X^H M Q[" << i << "] == 0 : " << tmp << endl; 01968 } 01969 } 01970 if (chk.checkMX && hasM_ && initialized_) { 01971 tmp = Utils::errorEquality(*X_, *MX_, MOp_); 01972 os << " >> Error in MX == M*X : " << tmp << endl; 01973 } 01974 if (chk.checkKX && initialized_) { 01975 tmp = Utils::errorEquality(*X_, *KX_, Op_); 01976 os << " >> Error in KX == K*X : " << tmp << endl; 01977 } 01978 01979 // P and friends 01980 if (chk.checkP && hasP_ && initialized_) { 01981 if (fullOrtho_) { 01982 tmp = orthman_->orthonormError(*P_); 01983 os << " >> Error in P^H M P == I : " << tmp << endl; 01984 tmp = orthman_->orthogError(*P_,*X_); 01985 os << " >> Error in P^H M X == 0 : " << tmp << endl; 01986 } 01987 for (Array_size_type i=0; i<auxVecs_.size(); i++) { 01988 tmp = orthman_->orthogError(*P_,*auxVecs_[i]); 01989 os << " >> Error in P^H M Q[" << i << "] == 0 : " << tmp << endl; 01990 } 01991 } 01992 if (chk.checkMP && hasM_ && hasP_ && initialized_) { 01993 tmp = Utils::errorEquality(*P_, *MP_, MOp_); 01994 os << " >> Error in MP == M*P : " << tmp << endl; 01995 } 01996 if (chk.checkKP && hasP_ && initialized_) { 01997 tmp = Utils::errorEquality(*P_, *KP_, Op_); 01998 os << " >> Error in KP == K*P : " << tmp << endl; 01999 } 02000 02001 // H and friends 02002 if (chk.checkH && initialized_) { 02003 if (fullOrtho_) { 02004 tmp = orthman_->orthonormError(*H_); 02005 os << " >> Error in H^H M H == I : " << tmp << endl; 02006 tmp = orthman_->orthogError(*H_,*X_); 02007 os << " >> Error in H^H M X == 0 : " << tmp << endl; 02008 if (hasP_) { 02009 tmp = orthman_->orthogError(*H_,*P_); 02010 os << " >> Error in H^H M P == 0 : " << tmp << endl; 02011 } 02012 } 02013 for (Array_size_type i=0; i<auxVecs_.size(); i++) { 02014 tmp = orthman_->orthogError(*H_,*auxVecs_[i]); 02015 os << " >> Error in H^H M Q[" << i << "] == 0 : " << tmp << endl; 02016 } 02017 } 02018 if (chk.checkMH && hasM_ && initialized_) { 02019 tmp = Utils::errorEquality(*H_, *MH_, MOp_); 02020 os << " >> Error in MH == M*H : " << tmp << endl; 02021 } 02022 02023 // R: this is not M-orthogonality, but standard euclidean orthogonality 02024 if (chk.checkR && initialized_) { 02025 Teuchos::SerialDenseMatrix<int,ScalarType> xTx(blockSize_,blockSize_); 02026 MVT::MvTransMv(ONE,*X_,*R_,xTx); 02027 tmp = xTx.normFrobenius(); 02028 MVT::MvTransMv(ONE,*R_,*R_,xTx); 02029 double normR = xTx.normFrobenius(); 02030 os << " >> RelError in X^H R == 0: " << tmp/normR << endl; 02031 } 02032 02033 // Q 02034 if (chk.checkQ) { 02035 for (Array_size_type i=0; i<auxVecs_.size(); i++) { 02036 tmp = orthman_->orthonormError(*auxVecs_[i]); 02037 os << " >> Error in Q[" << i << "]^H M Q[" << i << "] == I : " << tmp << endl; 02038 for (Array_size_type j=i+1; j<auxVecs_.size(); j++) { 02039 tmp = orthman_->orthogError(*auxVecs_[i],*auxVecs_[j]); 02040 os << " >> Error in Q[" << i << "]^H M Q[" << j << "] == 0 : " << tmp << endl; 02041 } 02042 } 02043 } 02044 02045 os << endl; 02046 02047 return os.str(); 02048 } 02049 02050 02052 // Print the current status of the solver 02053 template <class ScalarType, class MV, class OP> 02054 void 02055 LOBPCG<ScalarType,MV,OP>::currentStatus(std::ostream &os) 02056 { 02057 using std::endl; 02058 02059 os.setf(std::ios::scientific, std::ios::floatfield); 02060 os.precision(6); 02061 os <<endl; 02062 os <<"================================================================================" << endl; 02063 os << endl; 02064 os <<" LOBPCG Solver Status" << endl; 02065 os << endl; 02066 os <<"The solver is "<<(initialized_ ? "initialized." : "not initialized.") << endl; 02067 os <<"The number of iterations performed is " << iter_ << endl; 02068 os <<"The current block size is " << blockSize_ << endl; 02069 os <<"The number of auxiliary vectors is " << numAuxVecs_ << endl; 02070 os <<"The number of operations Op*x is " << count_ApplyOp_ << endl; 02071 os <<"The number of operations M*x is " << count_ApplyM_ << endl; 02072 os <<"The number of operations Prec*x is " << count_ApplyPrec_ << endl; 02073 02074 os.setf(std::ios_base::right, std::ios_base::adjustfield); 02075 02076 if (initialized_) { 02077 os << endl; 02078 os <<"CURRENT EIGENVALUE ESTIMATES "<<endl; 02079 os << std::setw(20) << "Eigenvalue" 02080 << std::setw(20) << "Residual(M)" 02081 << std::setw(20) << "Residual(2)" 02082 << endl; 02083 os <<"--------------------------------------------------------------------------------"<<endl; 02084 for (int i=0; i<blockSize_; i++) { 02085 os << std::setw(20) << theta_[i]; 02086 if (Rnorms_current_) os << std::setw(20) << Rnorms_[i]; 02087 else os << std::setw(20) << "not current"; 02088 if (R2norms_current_) os << std::setw(20) << R2norms_[i]; 02089 else os << std::setw(20) << "not current"; 02090 os << endl; 02091 } 02092 } 02093 os <<"================================================================================" << endl; 02094 os << endl; 02095 } 02096 02098 // are we initialized or not? 02099 template <class ScalarType, class MV, class OP> 02100 bool LOBPCG<ScalarType,MV,OP>::isInitialized() const { 02101 return initialized_; 02102 } 02103 02104 02106 // is P valid or not? 02107 template <class ScalarType, class MV, class OP> 02108 bool LOBPCG<ScalarType,MV,OP>::hasP() { 02109 return hasP_; 02110 } 02111 02113 // is full orthogonalization enabled or not? 02114 template <class ScalarType, class MV, class OP> 02115 bool LOBPCG<ScalarType,MV,OP>::getFullOrtho() const { 02116 return(fullOrtho_); 02117 } 02118 02119 02121 // return the current auxilliary vectors 02122 template <class ScalarType, class MV, class OP> 02123 Teuchos::Array<Teuchos::RCP<const MV> > LOBPCG<ScalarType,MV,OP>::getAuxVecs() const { 02124 return auxVecs_; 02125 } 02126 02128 // return the current block size 02129 template <class ScalarType, class MV, class OP> 02130 int LOBPCG<ScalarType,MV,OP>::getBlockSize() const { 02131 return(blockSize_); 02132 } 02133 02135 // return the current eigenproblem 02136 template <class ScalarType, class MV, class OP> 02137 const Eigenproblem<ScalarType,MV,OP>& LOBPCG<ScalarType,MV,OP>::getProblem() const { 02138 return(*problem_); 02139 } 02140 02141 02143 // return the max subspace dimension 02144 template <class ScalarType, class MV, class OP> 02145 int LOBPCG<ScalarType,MV,OP>::getMaxSubspaceDim() const { 02146 return 3*blockSize_; 02147 } 02148 02150 // return the current subspace dimension 02151 template <class ScalarType, class MV, class OP> 02152 int LOBPCG<ScalarType,MV,OP>::getCurSubspaceDim() const { 02153 if (!initialized_) return 0; 02154 return nevLocal_; 02155 } 02156 02157 02159 // return the current ritz residual norms 02160 template <class ScalarType, class MV, class OP> 02161 std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> 02162 LOBPCG<ScalarType,MV,OP>::getRitzRes2Norms() 02163 { 02164 return this->getRes2Norms(); 02165 } 02166 02167 02169 // return the current compression indices 02170 template <class ScalarType, class MV, class OP> 02171 std::vector<int> LOBPCG<ScalarType,MV,OP>::getRitzIndex() { 02172 std::vector<int> ret(nevLocal_,0); 02173 return ret; 02174 } 02175 02176 02178 // return the current ritz values 02179 template <class ScalarType, class MV, class OP> 02180 std::vector<Value<ScalarType> > LOBPCG<ScalarType,MV,OP>::getRitzValues() { 02181 std::vector<Value<ScalarType> > ret(nevLocal_); 02182 for (int i=0; i<nevLocal_; i++) { 02183 ret[i].realpart = theta_[i]; 02184 ret[i].imagpart = ZERO; 02185 } 02186 return ret; 02187 } 02188 02190 // Set a new StatusTest for the solver. 02191 template <class ScalarType, class MV, class OP> 02192 void LOBPCG<ScalarType,MV,OP>::setStatusTest(Teuchos::RCP<StatusTest<ScalarType,MV,OP> > test) { 02193 TEST_FOR_EXCEPTION(test == Teuchos::null,std::invalid_argument, 02194 "Anasazi::LOBPCG::setStatusTest() was passed a null StatusTest."); 02195 tester_ = test; 02196 } 02197 02199 // Get the current StatusTest used by the solver. 02200 template <class ScalarType, class MV, class OP> 02201 Teuchos::RCP<StatusTest<ScalarType,MV,OP> > LOBPCG<ScalarType,MV,OP>::getStatusTest() const { 02202 return tester_; 02203 } 02204 02206 // return the current ritz vectors 02207 template <class ScalarType, class MV, class OP> 02208 Teuchos::RCP<const MV> LOBPCG<ScalarType,MV,OP>::getRitzVectors() { 02209 return X_; 02210 } 02211 02212 02214 // reset the iteration counter 02215 template <class ScalarType, class MV, class OP> 02216 void LOBPCG<ScalarType,MV,OP>::resetNumIters() { 02217 iter_=0; 02218 } 02219 02220 02222 // return the number of iterations 02223 template <class ScalarType, class MV, class OP> 02224 int LOBPCG<ScalarType,MV,OP>::getNumIters() const { 02225 return(iter_); 02226 } 02227 02228 02230 // return the state 02231 template <class ScalarType, class MV, class OP> 02232 LOBPCGState<ScalarType,MV> LOBPCG<ScalarType,MV,OP>::getState() const { 02233 LOBPCGState<ScalarType,MV> state; 02234 state.V = V_; 02235 state.KV = KV_; 02236 state.X = X_; 02237 state.KX = KX_; 02238 state.P = P_; 02239 state.KP = KP_; 02240 state.H = H_; 02241 state.KH = KH_; 02242 state.R = R_; 02243 state.T = Teuchos::rcp(new std::vector<MagnitudeType>(theta_)); 02244 if (hasM_) { 02245 state.MV = MV_; 02246 state.MX = MX_; 02247 state.MP = MP_; 02248 state.MH = MH_; 02249 } 02250 else { 02251 state.MX = Teuchos::null; 02252 state.MP = Teuchos::null; 02253 state.MH = Teuchos::null; 02254 } 02255 return state; 02256 } 02257 02258 } // end Anasazi namespace 02259 02260 #endif // ANASAZI_LOBPCG_HPP
1.7.4