|
IFPACK Development
|
00001 /*@HEADER 00002 // *********************************************************************** 00003 // 00004 // Ifpack: Object-Oriented Algebraic Preconditioner Package 00005 // Copyright (2002) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // This library is free software; you can redistribute it and/or modify 00011 // it under the terms of the GNU Lesser General Public License as 00012 // published by the Free Software Foundation; either version 2.1 of the 00013 // License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, but 00016 // WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00023 // USA 00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00025 // 00026 // *********************************************************************** 00027 //@HEADER 00028 */ 00029 00030 #include "Ifpack_ConfigDefs.h" 00031 #include <iomanip> 00032 #include "Epetra_Operator.h" 00033 #include "Epetra_CrsMatrix.h" 00034 #include "Epetra_Comm.h" 00035 #include "Epetra_Map.h" 00036 #include "Epetra_MultiVector.h" 00037 #include "Ifpack_Preconditioner.h" 00038 #include "Ifpack_PointRelaxation.h" 00039 #include "Ifpack_Utils.h" 00040 #include "Ifpack_Condest.h" 00041 00042 static const int IFPACK_JACOBI = 0; 00043 static const int IFPACK_GS = 1; 00044 static const int IFPACK_SGS = 2; 00045 00046 //============================================================================== 00047 Ifpack_PointRelaxation:: 00048 Ifpack_PointRelaxation(const Epetra_RowMatrix* Matrix_in) : 00049 IsInitialized_(false), 00050 IsComputed_(false), 00051 NumInitialize_(0), 00052 NumCompute_(0), 00053 NumApplyInverse_(0), 00054 InitializeTime_(0.0), 00055 ComputeTime_(0.0), 00056 ApplyInverseTime_(0.0), 00057 ComputeFlops_(0.0), 00058 ApplyInverseFlops_(0.0), 00059 NumSweeps_(1), 00060 DampingFactor_(1.0), 00061 UseTranspose_(false), 00062 Condest_(-1.0), 00063 ComputeCondest_(false), 00064 PrecType_(IFPACK_JACOBI), 00065 MinDiagonalValue_(0.0), 00066 NumMyRows_(0), 00067 NumMyNonzeros_(0), 00068 NumGlobalRows_(0), 00069 NumGlobalNonzeros_(0), 00070 Matrix_(Teuchos::rcp(Matrix_in,false)), 00071 IsParallel_(false), 00072 ZeroStartingSolution_(true), 00073 DoBackwardGS_(false) 00074 { 00075 } 00076 00077 //============================================================================== 00078 int Ifpack_PointRelaxation::SetParameters(Teuchos::ParameterList& List) 00079 { 00080 00081 string PT; 00082 if (PrecType_ == IFPACK_JACOBI) 00083 PT = "Jacobi"; 00084 else if (PrecType_ == IFPACK_GS) 00085 PT = "Gauss-Seidel"; 00086 else if (PrecType_ == IFPACK_SGS) 00087 PT = "symmetric Gauss-Seidel"; 00088 00089 PT = List.get("relaxation: type", PT); 00090 00091 if (PT == "Jacobi") 00092 PrecType_ = IFPACK_JACOBI; 00093 else if (PT == "Gauss-Seidel") 00094 PrecType_ = IFPACK_GS; 00095 else if (PT == "symmetric Gauss-Seidel") 00096 PrecType_ = IFPACK_SGS; 00097 else { 00098 IFPACK_CHK_ERR(-2); 00099 } 00100 00101 NumSweeps_ = List.get("relaxation: sweeps",NumSweeps_); 00102 DampingFactor_ = List.get("relaxation: damping factor", 00103 DampingFactor_); 00104 MinDiagonalValue_ = List.get("relaxation: min diagonal value", 00105 MinDiagonalValue_); 00106 ZeroStartingSolution_ = List.get("relaxation: zero starting solution", 00107 ZeroStartingSolution_); 00108 00109 DoBackwardGS_ = List.get("relaxation: backward mode",DoBackwardGS_); 00110 00111 SetLabel(); 00112 00113 return(0); 00114 } 00115 00116 //============================================================================== 00117 const Epetra_Comm& Ifpack_PointRelaxation::Comm() const 00118 { 00119 return(Matrix_->Comm()); 00120 } 00121 00122 //============================================================================== 00123 const Epetra_Map& Ifpack_PointRelaxation::OperatorDomainMap() const 00124 { 00125 return(Matrix_->OperatorDomainMap()); 00126 } 00127 00128 //============================================================================== 00129 const Epetra_Map& Ifpack_PointRelaxation::OperatorRangeMap() const 00130 { 00131 return(Matrix_->OperatorRangeMap()); 00132 } 00133 00134 //============================================================================== 00135 int Ifpack_PointRelaxation::Initialize() 00136 { 00137 IsInitialized_ = false; 00138 00139 if (Matrix_ == Teuchos::null) 00140 IFPACK_CHK_ERR(-2); 00141 00142 if (Time_ == Teuchos::null) 00143 Time_ = Teuchos::rcp( new Epetra_Time(Comm()) ); 00144 00145 if (Matrix().NumGlobalRows() != Matrix().NumGlobalCols()) 00146 IFPACK_CHK_ERR(-2); // only square matrices 00147 00148 NumMyRows_ = Matrix_->NumMyRows(); 00149 NumMyNonzeros_ = Matrix_->NumMyNonzeros(); 00150 NumGlobalRows_ = Matrix_->NumGlobalRows(); 00151 NumGlobalNonzeros_ = Matrix_->NumGlobalNonzeros(); 00152 00153 if (Comm().NumProc() != 1) 00154 IsParallel_ = true; 00155 else 00156 IsParallel_ = false; 00157 00158 ++NumInitialize_; 00159 InitializeTime_ += Time_->ElapsedTime(); 00160 IsInitialized_ = true; 00161 return(0); 00162 } 00163 00164 //============================================================================== 00165 int Ifpack_PointRelaxation::Compute() 00166 { 00167 int ierr = 0; 00168 if (!IsInitialized()) 00169 IFPACK_CHK_ERR(Initialize()); 00170 00171 Time_->ResetStartTime(); 00172 00173 // reset values 00174 IsComputed_ = false; 00175 Condest_ = -1.0; 00176 00177 if (NumSweeps_ == 0) ierr = 1; // Warning: no sweeps performed. 00178 if (NumSweeps_ < 0) 00179 IFPACK_CHK_ERR(-2); // at least one application 00180 00181 Diagonal_ = Teuchos::rcp( new Epetra_Vector(Matrix().RowMatrixRowMap()) ); 00182 00183 if (Diagonal_ == Teuchos::null) 00184 IFPACK_CHK_ERR(-5); 00185 00186 IFPACK_CHK_ERR(Matrix().ExtractDiagonalCopy(*Diagonal_)); 00187 00188 // check diagonal elements, store the inverses, and verify that 00189 // no zeros are around. If an element is zero, then by default 00190 // its inverse is zero as well (that is, the row is ignored). 00191 for (int i = 0 ; i < NumMyRows_ ; ++i) { 00192 double& diag = (*Diagonal_)[i]; 00193 if (IFPACK_ABS(diag) < MinDiagonalValue_) 00194 diag = MinDiagonalValue_; 00195 if (diag != 0.0) 00196 diag = 1.0 / diag; 00197 } 00198 ComputeFlops_ += NumMyRows_; 00199 00200 #if 0 00201 // some methods require the inverse of the diagonal, compute it 00202 // now and store it in `Diagonal_' 00203 if ((PrecType_ == IFPACK_JACOBI) || (PrecType_ == IFPACK_GS)) { 00204 Diagonal_->Reciprocal(*Diagonal_); 00205 // update flops 00206 ComputeFlops_ += NumMyRows_; 00207 } 00208 #endif 00209 00210 // We need to import data from external processors. Here I create an 00211 // Epetra_Import object because I cannot assume that Matrix_ has one. 00212 // This is a bit of waste of resources (but the code is more robust). 00213 // Note that I am doing some strange stuff to set the components of Y 00214 // from Y2 (to save some time). 00215 // 00216 if (IsParallel_ && ((PrecType_ == IFPACK_GS) || (PrecType_ == IFPACK_SGS))) { 00217 Importer_ = Teuchos::rcp( new Epetra_Import(Matrix().RowMatrixColMap(), 00218 Matrix().RowMatrixRowMap()) ); 00219 if (Importer_ == Teuchos::null) IFPACK_CHK_ERR(-5); 00220 } 00221 00222 ++NumCompute_; 00223 ComputeTime_ += Time_->ElapsedTime(); 00224 IsComputed_ = true; 00225 00226 IFPACK_CHK_ERR(ierr); 00227 return(0); 00228 } 00229 00230 //============================================================================== 00231 ostream& Ifpack_PointRelaxation::Print(ostream & os) const 00232 { 00233 00234 double MyMinVal, MyMaxVal; 00235 double MinVal, MaxVal; 00236 00237 if (IsComputed_) { 00238 Diagonal_->MinValue(&MyMinVal); 00239 Diagonal_->MaxValue(&MyMaxVal); 00240 Comm().MinAll(&MyMinVal,&MinVal,1); 00241 Comm().MinAll(&MyMaxVal,&MaxVal,1); 00242 } 00243 00244 if (!Comm().MyPID()) { 00245 os << endl; 00246 os << "================================================================================" << endl; 00247 os << "Ifpack_PointRelaxation" << endl; 00248 os << "Sweeps = " << NumSweeps_ << endl; 00249 os << "damping factor = " << DampingFactor_ << endl; 00250 if (PrecType_ == IFPACK_JACOBI) 00251 os << "Type = Jacobi" << endl; 00252 else if (PrecType_ == IFPACK_GS) 00253 os << "Type = Gauss-Seidel" << endl; 00254 else if (PrecType_ == IFPACK_SGS) 00255 os << "Type = symmetric Gauss-Seidel" << endl; 00256 if (DoBackwardGS_) 00257 os << "Using backward mode (GS only)" << endl; 00258 if (ZeroStartingSolution_) 00259 os << "Using zero starting solution" << endl; 00260 else 00261 os << "Using input starting solution" << endl; 00262 os << "Condition number estimate = " << Condest() << endl; 00263 os << "Global number of rows = " << Matrix_->NumGlobalRows() << endl; 00264 if (IsComputed_) { 00265 os << "Minimum value on stored diagonal = " << MinVal << endl; 00266 os << "Maximum value on stored diagonal = " << MaxVal << endl; 00267 } 00268 os << endl; 00269 os << "Phase # calls Total Time (s) Total MFlops MFlops/s" << endl; 00270 os << "----- ------- -------------- ------------ --------" << endl; 00271 os << "Initialize() " << std::setw(5) << NumInitialize_ 00272 << " " << std::setw(15) << InitializeTime_ 00273 << " 0.0 0.0" << endl; 00274 os << "Compute() " << std::setw(5) << NumCompute_ 00275 << " " << std::setw(15) << ComputeTime_ 00276 << " " << std::setw(15) << 1.0e-6 * ComputeFlops_; 00277 if (ComputeTime_ != 0.0) 00278 os << " " << std::setw(15) << 1.0e-6 * ComputeFlops_ / ComputeTime_ << endl; 00279 else 00280 os << " " << std::setw(15) << 0.0 << endl; 00281 os << "ApplyInverse() " << std::setw(5) << NumApplyInverse_ 00282 << " " << std::setw(15) << ApplyInverseTime_ 00283 << " " << std::setw(15) << 1.0e-6 * ApplyInverseFlops_; 00284 if (ApplyInverseTime_ != 0.0) 00285 os << " " << std::setw(15) << 1.0e-6 * ApplyInverseFlops_ / ApplyInverseTime_ << endl; 00286 else 00287 os << " " << std::setw(15) << 0.0 << endl; 00288 os << "================================================================================" << endl; 00289 os << endl; 00290 } 00291 00292 return(os); 00293 } 00294 00295 //============================================================================== 00296 double Ifpack_PointRelaxation:: 00297 Condest(const Ifpack_CondestType CT, 00298 const int MaxIters, const double Tol, 00299 Epetra_RowMatrix* Matrix_in) 00300 { 00301 if (!IsComputed()) // cannot compute right now 00302 return(-1.0); 00303 00304 // always computes it. Call Condest() with no parameters to get 00305 // the previous estimate. 00306 Condest_ = Ifpack_Condest(*this, CT, MaxIters, Tol, Matrix_in); 00307 00308 return(Condest_); 00309 } 00310 00311 //============================================================================== 00312 void Ifpack_PointRelaxation::SetLabel() 00313 { 00314 string PT; 00315 if (PrecType_ == IFPACK_JACOBI) 00316 PT = "Jacobi"; 00317 else if (PrecType_ == IFPACK_GS){ 00318 PT = "GS"; 00319 if(DoBackwardGS_) 00320 PT = "Backward " + PT; 00321 } 00322 else if (PrecType_ == IFPACK_SGS) 00323 PT = "SGS"; 00324 00325 Label_ = "IFPACK (" + PT + ", sweeps=" + Ifpack_toString(NumSweeps_) 00326 + ", damping=" + Ifpack_toString(DampingFactor_) + ")"; 00327 } 00328 00329 //============================================================================== 00330 // Note that Ifpack_PointRelaxation and Jacobi is much faster than 00331 // Ifpack_AdditiveSchwarz<Ifpack_PointRelaxation> (because of the 00332 // way the matrix-vector produce is performed). 00333 // 00334 // Another ML-related observation is that the starting solution (in Y) 00335 // is NOT supposed to be zero. This may slow down the application of just 00336 // one sweep of Jacobi. 00337 // 00338 int Ifpack_PointRelaxation:: 00339 ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const 00340 { 00341 if (!IsComputed()) 00342 IFPACK_CHK_ERR(-3); 00343 00344 if (X.NumVectors() != Y.NumVectors()) 00345 IFPACK_CHK_ERR(-2); 00346 00347 Time_->ResetStartTime(); 00348 00349 // AztecOO gives X and Y pointing to the same memory location, 00350 // need to create an auxiliary vector, Xcopy 00351 Teuchos::RefCountPtr< const Epetra_MultiVector > Xcopy; 00352 if (X.Pointers()[0] == Y.Pointers()[0]) 00353 Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) ); 00354 else 00355 Xcopy = Teuchos::rcp( &X, false ); 00356 00357 if (ZeroStartingSolution_) 00358 Y.PutScalar(0.0); 00359 00360 // Flops are updated in each of the following. 00361 switch (PrecType_) { 00362 case IFPACK_JACOBI: 00363 IFPACK_CHK_ERR(ApplyInverseJacobi(*Xcopy,Y)); 00364 break; 00365 case IFPACK_GS: 00366 IFPACK_CHK_ERR(ApplyInverseGS(*Xcopy,Y)); 00367 break; 00368 case IFPACK_SGS: 00369 IFPACK_CHK_ERR(ApplyInverseSGS(*Xcopy,Y)); 00370 break; 00371 default: 00372 IFPACK_CHK_ERR(-1); // something wrong 00373 } 00374 00375 ++NumApplyInverse_; 00376 ApplyInverseTime_ += Time_->ElapsedTime(); 00377 return(0); 00378 } 00379 00380 //============================================================================== 00381 // This preconditioner can be much slower than AztecOO and ML versions 00382 // if the matrix-vector product requires all ExtractMyRowCopy() 00383 // (as done through Ifpack_AdditiveSchwarz). 00384 int Ifpack_PointRelaxation:: 00385 ApplyInverseJacobi(const Epetra_MultiVector& RHS, Epetra_MultiVector& LHS) const 00386 { 00387 00388 int NumVectors = LHS.NumVectors(); 00389 Epetra_MultiVector A_times_LHS( LHS.Map(),NumVectors ); 00390 00391 for (int j = 0; j < NumSweeps_ ; j++) { 00392 00393 IFPACK_CHK_ERR(Apply(LHS,A_times_LHS)); 00394 IFPACK_CHK_ERR(A_times_LHS.Update(1.0,RHS,-1.0)); 00395 for (int v = 0 ; v < NumVectors ; ++v) 00396 IFPACK_CHK_ERR(LHS(v)->Multiply(DampingFactor_, *(A_times_LHS(v)), 00397 *Diagonal_, 1.0)); 00398 00399 } 00400 00401 // Flops: 00402 // - matrix vector (2 * NumGlobalNonzeros_) 00403 // - update (2 * NumGlobalRows_) 00404 // - Multiply: 00405 // - DampingFactor (NumGlobalRows_) 00406 // - Diagonal (NumGlobalRows_) 00407 // - A + B (NumGlobalRows_) 00408 // - 1.0 (NumGlobalRows_) 00409 ApplyInverseFlops_ += NumVectors * (6 * NumGlobalRows_ + 2 * NumGlobalNonzeros_); 00410 00411 return(0); 00412 } 00413 00414 //============================================================================== 00415 int Ifpack_PointRelaxation:: 00416 ApplyInverseGS(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const 00417 { 00418 const Epetra_CrsMatrix* CrsMatrix = dynamic_cast<const Epetra_CrsMatrix*>(&*Matrix_); 00419 // try to pick the best option; performances may be improved 00420 // if several sweeps are used. 00421 if (CrsMatrix != 0) 00422 { 00423 if (CrsMatrix->StorageOptimized()) 00424 return(ApplyInverseGS_FastCrsMatrix(CrsMatrix, X, Y)); 00425 else 00426 return(ApplyInverseGS_CrsMatrix(CrsMatrix, X, Y)); 00427 } 00428 else 00429 return(ApplyInverseGS_RowMatrix(X, Y)); 00430 } //ApplyInverseGS() 00431 00432 //============================================================================== 00433 int Ifpack_PointRelaxation:: 00434 ApplyInverseGS_RowMatrix(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const 00435 { 00436 int NumVectors = X.NumVectors(); 00437 00438 int Length = Matrix().MaxNumEntries(); 00439 vector<int> Indices(Length); 00440 vector<double> Values(Length); 00441 00442 Teuchos::RefCountPtr< Epetra_MultiVector > Y2; 00443 if (IsParallel_) 00444 Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) ); 00445 else 00446 Y2 = Teuchos::rcp( &Y, false ); 00447 00448 // extract views (for nicer and faster code) 00449 double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr; 00450 X.ExtractView(&x_ptr); 00451 Y.ExtractView(&y_ptr); 00452 Y2->ExtractView(&y2_ptr); 00453 Diagonal_->ExtractView(&d_ptr); 00454 00455 for (int j = 0; j < NumSweeps_ ; j++) { 00456 00457 // data exchange is here, once per sweep 00458 if (IsParallel_) 00459 IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert)); 00460 00461 // FIXME: do I really need this code below? 00462 if (NumVectors == 1) { 00463 00464 double* y0_ptr = y_ptr[0]; 00465 double* y20_ptr = y2_ptr[0]; 00466 double* x0_ptr = x_ptr[0]; 00467 00468 if(!DoBackwardGS_){ 00469 /* Forward Mode */ 00470 for (int i = 0 ; i < NumMyRows_ ; ++i) { 00471 00472 int NumEntries; 00473 int col; 00474 IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries, 00475 &Values[0], &Indices[0])); 00476 00477 double dtemp = 0.0; 00478 for (int k = 0 ; k < NumEntries ; ++k) { 00479 00480 col = Indices[k]; 00481 dtemp += Values[k] * y20_ptr[col]; 00482 } 00483 00484 y20_ptr[i] += DampingFactor_ * d_ptr[i] * (x0_ptr[i] - dtemp); 00485 } 00486 } 00487 else { 00488 /* Backward Mode */ 00489 for (int i = NumMyRows_ - 1 ; i > -1 ; --i) { 00490 00491 int NumEntries; 00492 int col; 00493 IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries, 00494 &Values[0], &Indices[0])); 00495 double dtemp = 0.0; 00496 for (int k = 0 ; k < NumEntries ; ++k) { 00497 00498 col = Indices[k]; 00499 dtemp += Values[k] * y20_ptr[i]; 00500 } 00501 00502 y20_ptr[i] += DampingFactor_ * d_ptr[i] * (x0_ptr[i] - dtemp); 00503 } 00504 } 00505 00506 // using Export() sounded quite expensive 00507 if (IsParallel_) 00508 for (int i = 0 ; i < NumMyRows_ ; ++i) 00509 y0_ptr[i] = y20_ptr[i]; 00510 00511 } 00512 else { 00513 if(!DoBackwardGS_){ 00514 /* Forward Mode */ 00515 for (int i = 0 ; i < NumMyRows_ ; ++i) { 00516 00517 int NumEntries; 00518 int col; 00519 IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries, 00520 &Values[0], &Indices[0])); 00521 00522 for (int m = 0 ; m < NumVectors ; ++m) { 00523 00524 double dtemp = 0.0; 00525 for (int k = 0 ; k < NumEntries ; ++k) { 00526 00527 col = Indices[k]; 00528 dtemp += Values[k] * y2_ptr[m][col]; 00529 } 00530 00531 y2_ptr[m][i] += DampingFactor_ * d_ptr[i] * (x_ptr[m][i] - dtemp); 00532 } 00533 } 00534 } 00535 else { 00536 /* Backward Mode */ 00537 for (int i = NumMyRows_ - 1 ; i > -1 ; --i) { 00538 int NumEntries; 00539 int col; 00540 IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries, 00541 &Values[0], &Indices[0])); 00542 00543 for (int m = 0 ; m < NumVectors ; ++m) { 00544 00545 double dtemp = 0.0; 00546 for (int k = 0 ; k < NumEntries ; ++k) { 00547 00548 col = Indices[k]; 00549 dtemp += Values[k] * y2_ptr[m][col]; 00550 } 00551 00552 y2_ptr[m][i] += DampingFactor_ * d_ptr[i] * (x_ptr[m][i] - dtemp); 00553 00554 } 00555 } 00556 } 00557 00558 // using Export() sounded quite expensive 00559 if (IsParallel_) 00560 for (int m = 0 ; m < NumVectors ; ++m) 00561 for (int i = 0 ; i < NumMyRows_ ; ++i) 00562 y_ptr[m][i] = y2_ptr[m][i]; 00563 00564 } 00565 } 00566 00567 ApplyInverseFlops_ += NumVectors * (4 * NumGlobalRows_ + 2 * NumGlobalNonzeros_); 00568 00569 return(0); 00570 } //ApplyInverseGS_RowMatrix() 00571 00572 //============================================================================== 00573 int Ifpack_PointRelaxation:: 00574 ApplyInverseGS_CrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const 00575 { 00576 int NumVectors = X.NumVectors(); 00577 00578 int* Indices; 00579 double* Values; 00580 00581 Teuchos::RefCountPtr< Epetra_MultiVector > Y2; 00582 if (IsParallel_) { 00583 Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) ); 00584 } 00585 else 00586 Y2 = Teuchos::rcp( &Y, false ); 00587 00588 double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr; 00589 X.ExtractView(&x_ptr); 00590 Y.ExtractView(&y_ptr); 00591 Y2->ExtractView(&y2_ptr); 00592 Diagonal_->ExtractView(&d_ptr); 00593 00594 for (int iter = 0 ; iter < NumSweeps_ ; ++iter) { 00595 00596 // only one data exchange per sweep 00597 if (IsParallel_) 00598 IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert)); 00599 00600 if(!DoBackwardGS_){ 00601 /* Forward Mode */ 00602 for (int i = 0 ; i < NumMyRows_ ; ++i) { 00603 00604 int NumEntries; 00605 int col; 00606 double diag = d_ptr[i]; 00607 00608 IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices)); 00609 00610 for (int m = 0 ; m < NumVectors ; ++m) { 00611 00612 double dtemp = 0.0; 00613 00614 for (int k = 0; k < NumEntries; ++k) { 00615 00616 col = Indices[k]; 00617 dtemp += Values[k] * y2_ptr[m][col]; 00618 } 00619 00620 y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag; 00621 } 00622 } 00623 } 00624 else { 00625 /* Backward Mode */ 00626 for (int i = NumMyRows_ - 1 ; i > -1 ; --i) { 00627 00628 int NumEntries; 00629 int col; 00630 double diag = d_ptr[i]; 00631 00632 IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices)); 00633 00634 for (int m = 0 ; m < NumVectors ; ++m) { 00635 00636 double dtemp = 0.0; 00637 for (int k = 0; k < NumEntries; ++k) { 00638 00639 col = Indices[k]; 00640 dtemp += Values[k] * y2_ptr[m][col]; 00641 } 00642 00643 y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag; 00644 00645 } 00646 } 00647 } 00648 00649 if (IsParallel_) 00650 for (int m = 0 ; m < NumVectors ; ++m) 00651 for (int i = 0 ; i < NumMyRows_ ; ++i) 00652 y_ptr[m][i] = y2_ptr[m][i]; 00653 } 00654 00655 ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_); 00656 return(0); 00657 } //ApplyInverseGS_CrsMatrix() 00658 00659 // 00660 //============================================================================== 00661 // ApplyInverseGS_FastCrsMatrix() requires Epetra_CrsMatrix + OptimizeStorage() 00662 00663 int Ifpack_PointRelaxation:: 00664 ApplyInverseGS_FastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const 00665 { 00666 int* IndexOffset; 00667 int* Indices; 00668 double* Values; 00669 IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values)); 00670 00671 int NumVectors = X.NumVectors(); 00672 00673 Teuchos::RefCountPtr< Epetra_MultiVector > Y2; 00674 if (IsParallel_) { 00675 Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) ); 00676 } 00677 else 00678 Y2 = Teuchos::rcp( &Y, false ); 00679 00680 double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr; 00681 X.ExtractView(&x_ptr); 00682 Y.ExtractView(&y_ptr); 00683 Y2->ExtractView(&y2_ptr); 00684 Diagonal_->ExtractView(&d_ptr); 00685 00686 for (int iter = 0 ; iter < NumSweeps_ ; ++iter) { 00687 00688 // only one data exchange per sweep 00689 if (IsParallel_) 00690 IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert)); 00691 00692 if(!DoBackwardGS_){ 00693 /* Forward Mode */ 00694 for (int i = 0 ; i < NumMyRows_ ; ++i) { 00695 00696 int col; 00697 double diag = d_ptr[i]; 00698 00699 for (int m = 0 ; m < NumVectors ; ++m) { 00700 00701 double dtemp = 0.0; 00702 00703 for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) { 00704 00705 col = Indices[k]; 00706 dtemp += Values[k] * y2_ptr[m][col]; 00707 } 00708 00709 y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag; 00710 } 00711 } 00712 } 00713 else { 00714 /* Backward Mode */ 00715 for (int i = NumMyRows_ - 1 ; i > -1 ; --i) { 00716 00717 int col; 00718 double diag = d_ptr[i]; 00719 00720 for (int m = 0 ; m < NumVectors ; ++m) { 00721 00722 double dtemp = 0.0; 00723 for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) { 00724 00725 col = Indices[k]; 00726 dtemp += Values[k] * y2_ptr[m][col]; 00727 } 00728 00729 y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag; 00730 00731 } 00732 } 00733 } 00734 00735 00736 if (IsParallel_) 00737 for (int m = 0 ; m < NumVectors ; ++m) 00738 for (int i = 0 ; i < NumMyRows_ ; ++i) 00739 y_ptr[m][i] = y2_ptr[m][i]; 00740 } 00741 00742 ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_); 00743 return(0); 00744 } //ApplyInverseGS_FastCrsMatrix() 00745 00746 //============================================================================== 00747 int Ifpack_PointRelaxation:: 00748 ApplyInverseSGS(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const 00749 { 00750 const Epetra_CrsMatrix* CrsMatrix = dynamic_cast<const Epetra_CrsMatrix*>(&*Matrix_); 00751 // try to pick the best option; performances may be improved 00752 // if several sweeps are used. 00753 if (CrsMatrix != 0) 00754 { 00755 if (CrsMatrix->StorageOptimized()) 00756 return(ApplyInverseSGS_FastCrsMatrix(CrsMatrix, X, Y)); 00757 else 00758 return(ApplyInverseSGS_CrsMatrix(CrsMatrix, X, Y)); 00759 } 00760 else 00761 return(ApplyInverseSGS_RowMatrix(X, Y)); 00762 } 00763 00764 //============================================================================== 00765 int Ifpack_PointRelaxation:: 00766 ApplyInverseSGS_RowMatrix(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const 00767 { 00768 int NumVectors = X.NumVectors(); 00769 int Length = Matrix().MaxNumEntries(); 00770 vector<int> Indices(Length); 00771 vector<double> Values(Length); 00772 00773 Teuchos::RefCountPtr< Epetra_MultiVector > Y2; 00774 if (IsParallel_) { 00775 Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) ); 00776 } 00777 else 00778 Y2 = Teuchos::rcp( &Y, false ); 00779 00780 double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr; 00781 X.ExtractView(&x_ptr); 00782 Y.ExtractView(&y_ptr); 00783 Y2->ExtractView(&y2_ptr); 00784 Diagonal_->ExtractView(&d_ptr); 00785 00786 for (int iter = 0 ; iter < NumSweeps_ ; ++iter) { 00787 00788 // only one data exchange per sweep 00789 if (IsParallel_) 00790 IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert)); 00791 00792 for (int i = 0 ; i < NumMyRows_ ; ++i) { 00793 00794 int NumEntries; 00795 int col; 00796 double diag = d_ptr[i]; 00797 00798 IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries, 00799 &Values[0], &Indices[0])); 00800 00801 for (int m = 0 ; m < NumVectors ; ++m) { 00802 00803 double dtemp = 0.0; 00804 00805 for (int k = 0 ; k < NumEntries ; ++k) { 00806 00807 col = Indices[k]; 00808 dtemp += Values[k] * y2_ptr[m][col]; 00809 } 00810 00811 y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag; 00812 } 00813 } 00814 00815 for (int i = NumMyRows_ - 1 ; i > -1 ; --i) { 00816 00817 int NumEntries; 00818 int col; 00819 double diag = d_ptr[i]; 00820 00821 IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries, 00822 &Values[0], &Indices[0])); 00823 00824 for (int m = 0 ; m < NumVectors ; ++m) { 00825 00826 double dtemp = 0.0; 00827 for (int k = 0 ; k < NumEntries ; ++k) { 00828 00829 col = Indices[k]; 00830 dtemp += Values[k] * y2_ptr[m][col]; 00831 } 00832 00833 y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag; 00834 00835 } 00836 } 00837 00838 if (IsParallel_) 00839 for (int m = 0 ; m < NumVectors ; ++m) 00840 for (int i = 0 ; i < NumMyRows_ ; ++i) 00841 y_ptr[m][i] = y2_ptr[m][i]; 00842 } 00843 00844 ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_); 00845 return(0); 00846 } 00847 00848 //============================================================================== 00849 int Ifpack_PointRelaxation:: 00850 ApplyInverseSGS_CrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const 00851 { 00852 int NumVectors = X.NumVectors(); 00853 00854 int* Indices; 00855 double* Values; 00856 00857 Teuchos::RefCountPtr< Epetra_MultiVector > Y2; 00858 if (IsParallel_) { 00859 Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) ); 00860 } 00861 else 00862 Y2 = Teuchos::rcp( &Y, false ); 00863 00864 double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr; 00865 X.ExtractView(&x_ptr); 00866 Y.ExtractView(&y_ptr); 00867 Y2->ExtractView(&y2_ptr); 00868 Diagonal_->ExtractView(&d_ptr); 00869 00870 for (int iter = 0 ; iter < NumSweeps_ ; ++iter) { 00871 00872 // only one data exchange per sweep 00873 if (IsParallel_) 00874 IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert)); 00875 00876 for (int i = 0 ; i < NumMyRows_ ; ++i) { 00877 00878 int NumEntries; 00879 int col; 00880 double diag = d_ptr[i]; 00881 00882 IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices)); 00883 00884 for (int m = 0 ; m < NumVectors ; ++m) { 00885 00886 double dtemp = 0.0; 00887 00888 for (int k = 0; k < NumEntries; ++k) { 00889 00890 col = Indices[k]; 00891 dtemp += Values[k] * y2_ptr[m][col]; 00892 } 00893 00894 y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag; 00895 } 00896 } 00897 00898 for (int i = NumMyRows_ - 1 ; i > -1 ; --i) { 00899 00900 int NumEntries; 00901 int col; 00902 double diag = d_ptr[i]; 00903 00904 IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices)); 00905 00906 for (int m = 0 ; m < NumVectors ; ++m) { 00907 00908 double dtemp = 0.0; 00909 for (int k = 0; k < NumEntries; ++k) { 00910 00911 col = Indices[k]; 00912 dtemp += Values[k] * y2_ptr[m][col]; 00913 } 00914 00915 y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag; 00916 00917 } 00918 } 00919 00920 if (IsParallel_) 00921 for (int m = 0 ; m < NumVectors ; ++m) 00922 for (int i = 0 ; i < NumMyRows_ ; ++i) 00923 y_ptr[m][i] = y2_ptr[m][i]; 00924 } 00925 00926 ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_); 00927 return(0); 00928 } 00929 00930 //============================================================================== 00931 // this requires Epetra_CrsMatrix + OptimizeStorage() 00932 int Ifpack_PointRelaxation:: 00933 ApplyInverseSGS_FastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const 00934 { 00935 int* IndexOffset; 00936 int* Indices; 00937 double* Values; 00938 IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values)); 00939 00940 int NumVectors = X.NumVectors(); 00941 00942 Teuchos::RefCountPtr< Epetra_MultiVector > Y2; 00943 if (IsParallel_) { 00944 Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) ); 00945 } 00946 else 00947 Y2 = Teuchos::rcp( &Y, false ); 00948 00949 double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr; 00950 X.ExtractView(&x_ptr); 00951 Y.ExtractView(&y_ptr); 00952 Y2->ExtractView(&y2_ptr); 00953 Diagonal_->ExtractView(&d_ptr); 00954 00955 for (int iter = 0 ; iter < NumSweeps_ ; ++iter) { 00956 00957 // only one data exchange per sweep 00958 if (IsParallel_) 00959 IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert)); 00960 00961 for (int i = 0 ; i < NumMyRows_ ; ++i) { 00962 00963 int col; 00964 double diag = d_ptr[i]; 00965 00966 // no need to extract row i 00967 //IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices)); 00968 00969 for (int m = 0 ; m < NumVectors ; ++m) { 00970 00971 double dtemp = 0.0; 00972 00973 for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) { 00974 00975 col = Indices[k]; 00976 dtemp += Values[k] * y2_ptr[m][col]; 00977 } 00978 00979 y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag; 00980 } 00981 } 00982 00983 for (int i = NumMyRows_ - 1 ; i > -1 ; --i) { 00984 00985 int col; 00986 double diag = d_ptr[i]; 00987 00988 // no need to extract row i 00989 //IFPACK_CHK_ERR(A->ExtractMyRowView(i, NumEntries, Values, Indices)); 00990 00991 for (int m = 0 ; m < NumVectors ; ++m) { 00992 00993 double dtemp = 0.0; 00994 for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) { 00995 00996 col = Indices[k]; 00997 dtemp += Values[k] * y2_ptr[m][col]; 00998 } 00999 01000 y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag; 01001 01002 } 01003 } 01004 01005 if (IsParallel_) 01006 for (int m = 0 ; m < NumVectors ; ++m) 01007 for (int i = 0 ; i < NumMyRows_ ; ++i) 01008 y_ptr[m][i] = y2_ptr[m][i]; 01009 } 01010 01011 ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_); 01012 return(0); 01013 }
1.7.4