|
IFPACK Development
|
00001 /*@HEADER 00002 // *********************************************************************** 00003 // 00004 // Ifpack: Object-Oriented Algebraic Preconditioner Package 00005 // Copyright (2002) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // This library is free software; you can redistribute it and/or modify 00011 // it under the terms of the GNU Lesser General Public License as 00012 // published by the Free Software Foundation; either version 2.1 of the 00013 // License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, but 00016 // WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00023 // USA 00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00025 // 00026 // *********************************************************************** 00027 //@HEADER 00028 */ 00029 00030 #include "Ifpack_ConfigDefs.h" 00031 #include "Ifpack_Preconditioner.h" 00032 #include "Ifpack_ILUT.h" 00033 #include "Ifpack_Condest.h" 00034 #include "Ifpack_Utils.h" 00035 #include "Ifpack_HashTable.h" 00036 #include "Epetra_SerialComm.h" 00037 #include "Epetra_Comm.h" 00038 #include "Epetra_Map.h" 00039 #include "Epetra_RowMatrix.h" 00040 #include "Epetra_CrsMatrix.h" 00041 #include "Epetra_Vector.h" 00042 #include "Epetra_MultiVector.h" 00043 #include "Epetra_Util.h" 00044 #include "Teuchos_ParameterList.hpp" 00045 #include "Teuchos_RefCountPtr.hpp" 00046 #include <functional> 00047 00048 using namespace Teuchos; 00049 00050 //============================================================================== 00051 // FIXME: allocate Comm_ and Time_ the first Initialize() call 00052 Ifpack_ILUT::Ifpack_ILUT(const Epetra_RowMatrix* A) : 00053 A_(*A), 00054 Comm_(A->Comm()), 00055 Condest_(-1.0), 00056 Relax_(0.), 00057 Athresh_(0.0), 00058 Rthresh_(1.0), 00059 LevelOfFill_(1.0), 00060 DropTolerance_(1e-12), 00061 IsInitialized_(false), 00062 IsComputed_(false), 00063 UseTranspose_(false), 00064 NumMyRows_(-1), 00065 NumInitialize_(0), 00066 NumCompute_(0), 00067 NumApplyInverse_(0), 00068 InitializeTime_(0.0), 00069 ComputeTime_(0.0), 00070 ApplyInverseTime_(0.0), 00071 ComputeFlops_(0.0), 00072 ApplyInverseFlops_(0.0), 00073 Time_(Comm()), 00074 GlobalNonzeros_(0) 00075 { 00076 // do nothing here.. 00077 } 00078 00079 //============================================================================== 00080 Ifpack_ILUT::~Ifpack_ILUT() 00081 { 00082 Destroy(); 00083 } 00084 00085 //============================================================================== 00086 void Ifpack_ILUT::Destroy() 00087 { 00088 IsInitialized_ = false; 00089 IsComputed_ = false; 00090 } 00091 00092 //========================================================================== 00093 int Ifpack_ILUT::SetParameters(Teuchos::ParameterList& List) 00094 { 00095 try 00096 { 00097 LevelOfFill_ = List.get<double>("fact: ilut level-of-fill", LevelOfFill()); 00098 if (LevelOfFill_ <= 0.0) 00099 IFPACK_CHK_ERR(-2); // must be greater than 0.0 00100 00101 Athresh_ = List.get<double>("fact: absolute threshold", Athresh_); 00102 Rthresh_ = List.get<double>("fact: relative threshold", Rthresh_); 00103 Relax_ = List.get<double>("fact: relax value", Relax_); 00104 DropTolerance_ = List.get<double>("fact: drop tolerance", DropTolerance_); 00105 00106 Label_ = "IFPACK ILUT (fill=" + Ifpack_toString(LevelOfFill()) 00107 + ", relax=" + Ifpack_toString(RelaxValue()) 00108 + ", athr=" + Ifpack_toString(AbsoluteThreshold()) 00109 + ", rthr=" + Ifpack_toString(RelativeThreshold()) 00110 + ", droptol=" + Ifpack_toString(DropTolerance()) 00111 + ")"; 00112 return(0); 00113 } 00114 catch (...) 00115 { 00116 cerr << "Caught an exception while parsing the parameter list" << endl; 00117 cerr << "This typically means that a parameter was set with the" << endl; 00118 cerr << "wrong type (for example, int instead of double). " << endl; 00119 cerr << "please check the documentation for the type required by each parameer." << endl; 00120 IFPACK_CHK_ERR(-1); 00121 } 00122 00123 return(0); 00124 } 00125 00126 //========================================================================== 00127 int Ifpack_ILUT::Initialize() 00128 { 00129 // delete previously allocated factorization 00130 Destroy(); 00131 00132 Time_.ResetStartTime(); 00133 00134 // check only in serial 00135 if (Comm().NumProc() == 1 && Matrix().NumMyRows() != Matrix().NumMyCols()) 00136 IFPACK_CHK_ERR(-2); 00137 00138 NumMyRows_ = Matrix().NumMyRows(); 00139 00140 // nothing else to do here 00141 IsInitialized_ = true; 00142 ++NumInitialize_; 00143 InitializeTime_ += Time_.ElapsedTime(); 00144 00145 return(0); 00146 } 00147 00148 //========================================================================== 00149 class Ifpack_AbsComp 00150 { 00151 public: 00152 inline bool operator()(const double& x, const double& y) 00153 { 00154 return(IFPACK_ABS(x) > IFPACK_ABS(y)); 00155 } 00156 }; 00157 00158 //========================================================================== 00159 // MS // completely rewritten the algorithm on 25-Jan-05, using more STL 00160 // MS // and in a nicer and cleaner way. Also, it is more efficient. 00161 // MS // WARNING: Still not fully tested! 00162 int Ifpack_ILUT::Compute() 00163 { 00164 if (!IsInitialized()) 00165 IFPACK_CHK_ERR(Initialize()); 00166 00167 Time_.ResetStartTime(); 00168 IsComputed_ = false; 00169 00170 NumMyRows_ = A_.NumMyRows(); 00171 int Length = A_.MaxNumEntries(); 00172 vector<int> RowIndicesL(Length); 00173 vector<double> RowValuesL(Length); 00174 vector<int> RowIndicesU(Length); 00175 vector<double> RowValuesU(Length); 00176 bool distributed = (Comm().NumProc() > 1)?true:false; 00177 00178 if (distributed) 00179 { 00180 SerialComm_ = rcp(new Epetra_SerialComm); 00181 SerialMap_ = rcp(new Epetra_Map(NumMyRows_, 0, *SerialComm_)); 00182 assert (SerialComm_.get() != 0); 00183 assert (SerialMap_.get() != 0); 00184 } 00185 else 00186 SerialMap_ = rcp(const_cast<Epetra_Map*>(&A_.RowMatrixRowMap()), false); 00187 00188 int RowNnzU; 00189 00190 L_ = rcp(new Epetra_CrsMatrix(Copy, *SerialMap_, 0)); 00191 U_ = rcp(new Epetra_CrsMatrix(Copy, *SerialMap_, 0)); 00192 00193 if ((L_.get() == 0) || (U_.get() == 0)) 00194 IFPACK_CHK_ERR(-5); // memory allocation error 00195 00196 // insert first row in U_ and L_ 00197 IFPACK_CHK_ERR(A_.ExtractMyRowCopy(0,Length,RowNnzU, 00198 &RowValuesU[0],&RowIndicesU[0])); 00199 00200 if (distributed) 00201 { 00202 int count = 0; 00203 for (int i = 0 ;i < RowNnzU ; ++i) 00204 { 00205 if (RowIndicesU[i] < NumMyRows_){ 00206 RowIndicesU[count] = RowIndicesU[i]; 00207 RowValuesU[count] = RowValuesU[i]; 00208 ++count; 00209 } 00210 else 00211 continue; 00212 } 00213 RowNnzU = count; 00214 } 00215 00216 // modify diagonal 00217 for (int i = 0 ;i < RowNnzU ; ++i) { 00218 if (RowIndicesU[i] == 0) { 00219 double& v = RowValuesU[i]; 00220 v = AbsoluteThreshold() * EPETRA_SGN(v) + RelativeThreshold() * v; 00221 break; 00222 } 00223 } 00224 00225 IFPACK_CHK_ERR(U_->InsertGlobalValues(0,RowNnzU,&(RowValuesU[0]), 00226 &(RowIndicesU[0]))); 00227 // FIXME: DOES IT WORK IN PARALLEL ?? 00228 RowValuesU[0] = 1.0; 00229 RowIndicesU[0] = 0; 00230 IFPACK_CHK_ERR(L_->InsertGlobalValues(0,1,&(RowValuesU[0]), 00231 &(RowIndicesU[0]))); 00232 00233 int max_keys = (int) 10 * A_.MaxNumEntries() * LevelOfFill() ; 00234 Ifpack_HashTable SingleRowU(max_keys , 1); 00235 Ifpack_HashTable SingleRowL(max_keys , 1); 00236 00237 int hash_size = SingleRowU.getRecommendedHashSize(max_keys) ; 00238 vector<int> keys; keys.reserve(hash_size * 10); 00239 vector<double> values; values.reserve(hash_size * 10); 00240 vector<double> AbsRow; AbsRow.reserve(hash_size * 10); 00241 00242 // =================== // 00243 // start factorization // 00244 // =================== // 00245 00246 double this_proc_flops = 0.0; 00247 00248 for (int row_i = 1 ; row_i < NumMyRows_ ; ++row_i) 00249 { 00250 // get row `row_i' of the matrix, store in U pointers 00251 IFPACK_CHK_ERR(A_.ExtractMyRowCopy(row_i,Length,RowNnzU, 00252 &RowValuesU[0],&RowIndicesU[0])); 00253 00254 if (distributed) 00255 { 00256 int count = 0; 00257 for (int i = 0 ;i < RowNnzU ; ++i) 00258 { 00259 if (RowIndicesU[i] < NumMyRows_){ 00260 RowIndicesU[count] = RowIndicesU[i]; 00261 RowValuesU[count] = RowValuesU[i]; 00262 ++count; 00263 } 00264 else 00265 continue; 00266 } 00267 RowNnzU = count; 00268 } 00269 00270 int NnzLower = 0; 00271 int NnzUpper = 0; 00272 00273 for (int i = 0 ;i < RowNnzU ; ++i) { 00274 if (RowIndicesU[i] < row_i) 00275 NnzLower++; 00276 else if (RowIndicesU[i] == row_i) { 00277 // add threshold 00278 NnzUpper++; 00279 double& v = RowValuesU[i]; 00280 v = AbsoluteThreshold() * EPETRA_SGN(v) + RelativeThreshold() * v; 00281 } 00282 else 00283 NnzUpper++; 00284 } 00285 00286 int FillL = (int)(LevelOfFill() * NnzLower); 00287 int FillU = (int)(LevelOfFill() * NnzUpper); 00288 if (FillL == 0) FillL = 1; 00289 if (FillU == 0) FillU = 1; 00290 00291 // convert line `row_i' into STL map for fast access 00292 SingleRowU.reset(); 00293 00294 for (int i = 0 ; i < RowNnzU ; ++i) { 00295 SingleRowU.set(RowIndicesU[i], RowValuesU[i]); 00296 } 00297 00298 // for the multipliers 00299 SingleRowL.reset(); 00300 00301 int start_col = NumMyRows_; 00302 for (int i = 0 ; i < RowNnzU ; ++i) 00303 start_col = EPETRA_MIN(start_col, RowIndicesU[i]); 00304 00305 int flops = 0; 00306 00307 for (int col_k = start_col ; col_k < row_i ; ++col_k) { 00308 00309 int* ColIndicesK; 00310 double* ColValuesK; 00311 int ColNnzK; 00312 00313 double xxx = SingleRowU.get(col_k); 00314 // This factorization is too "relaxed". Leaving it as it is, as Tifpack 00315 // does it correctly. 00316 if (IFPACK_ABS(xxx) > DropTolerance()) { 00317 IFPACK_CHK_ERR(U_->ExtractGlobalRowView(col_k, ColNnzK, ColValuesK, 00318 ColIndicesK)); 00319 00320 // FIXME: can keep trace of diagonals 00321 double DiagonalValueK = 0.0; 00322 for (int i = 0 ; i < ColNnzK ; ++i) { 00323 if (ColIndicesK[i] == col_k) { 00324 DiagonalValueK = ColValuesK[i]; 00325 break; 00326 } 00327 } 00328 00329 // FIXME: this should never happen! 00330 if (DiagonalValueK == 0.0) 00331 DiagonalValueK = AbsoluteThreshold(); 00332 00333 double yyy = xxx / DiagonalValueK ; 00334 SingleRowL.set(col_k, yyy); 00335 ++flops; 00336 00337 for (int j = 0 ; yyy != 0.0 && j < ColNnzK ; ++j) 00338 { 00339 int col_j = ColIndicesK[j]; 00340 00341 if (col_j < col_k) continue; 00342 00343 SingleRowU.set(col_j, -yyy * ColValuesK[j], true); 00344 flops += 2; 00345 } 00346 } 00347 } 00348 00349 this_proc_flops += 1.0 * flops; 00350 00351 double cutoff = DropTolerance(); 00352 double DiscardedElements = 0.0; 00353 int count; 00354 00355 // drop elements to satisfy LevelOfFill(), start with L 00356 count = 0; 00357 int sizeL = SingleRowL.getNumEntries(); 00358 keys.resize(sizeL); 00359 values.resize(sizeL); 00360 00361 AbsRow.resize(sizeL); 00362 00363 SingleRowL.arrayify( 00364 keys.size() ? &keys[0] : 0, 00365 values.size() ? &values[0] : 0 00366 ); 00367 for (int i = 0; i < sizeL; ++i) 00368 if (IFPACK_ABS(values[i]) > DropTolerance()) { 00369 AbsRow[count++] = IFPACK_ABS(values[i]); 00370 } 00371 00372 if (count > FillL) { 00373 nth_element(AbsRow.begin(), AbsRow.begin() + FillL, AbsRow.begin() + count, 00374 greater<double>()); 00375 cutoff = AbsRow[FillL]; 00376 } 00377 00378 for (int i = 0; i < sizeL; ++i) { 00379 if (IFPACK_ABS(values[i]) >= cutoff) { 00380 IFPACK_CHK_ERR(L_->InsertGlobalValues(row_i,1, &values[i], (int*)&keys[i])); 00381 } 00382 else 00383 DiscardedElements += values[i]; 00384 } 00385 00386 // FIXME: DOES IT WORK IN PARALLEL ??? 00387 // add 1 to the diagonal 00388 double dtmp = 1.0; 00389 IFPACK_CHK_ERR(L_->InsertGlobalValues(row_i,1, &dtmp, &row_i)); 00390 00391 // same business with U_ 00392 count = 0; 00393 int sizeU = SingleRowU.getNumEntries(); 00394 AbsRow.resize(sizeU + 1); 00395 keys.resize(sizeU + 1); 00396 values.resize(sizeU + 1); 00397 00398 SingleRowU.arrayify(&keys[0], &values[0]); 00399 00400 for (int i = 0; i < sizeU; ++i) 00401 if (keys[i] >= row_i && IFPACK_ABS(values[i]) > DropTolerance()) 00402 { 00403 AbsRow[count++] = IFPACK_ABS(values[i]); 00404 } 00405 00406 if (count > FillU) { 00407 nth_element(AbsRow.begin(), AbsRow.begin() + FillU, AbsRow.begin() + count, 00408 greater<double>()); 00409 cutoff = AbsRow[FillU]; 00410 } 00411 00412 // sets the factors in U_ 00413 for (int i = 0; i < sizeU; ++i) 00414 { 00415 int col = keys[i]; 00416 double val = values[i]; 00417 00418 if (col >= row_i) { 00419 if (IFPACK_ABS(val) >= cutoff || row_i == col) { 00420 IFPACK_CHK_ERR(U_->InsertGlobalValues(row_i,1, &val, &col)); 00421 } 00422 else 00423 DiscardedElements += val; 00424 } 00425 } 00426 00427 // FIXME: not so sure of that! 00428 if (RelaxValue() != 0.0) { 00429 DiscardedElements *= RelaxValue(); 00430 IFPACK_CHK_ERR(U_->InsertGlobalValues(row_i,1, &DiscardedElements, 00431 &row_i)); 00432 } 00433 } 00434 00435 double tf; 00436 Comm().SumAll(&this_proc_flops, &tf, 1); 00437 ComputeFlops_ += tf; 00438 00439 IFPACK_CHK_ERR(L_->FillComplete()); 00440 IFPACK_CHK_ERR(U_->FillComplete()); 00441 00442 #if 0 00443 // to check the complete factorization 00444 Epetra_Vector LHS(A_.RowMatrixRowMap()); 00445 Epetra_Vector RHS1(A_.RowMatrixRowMap()); 00446 Epetra_Vector RHS2(A_.RowMatrixRowMap()); 00447 Epetra_Vector RHS3(A_.RowMatrixRowMap()); 00448 LHS.Random(); 00449 00450 cout << "A = " << A_.NumGlobalNonzeros() << endl; 00451 cout << "L = " << L_->NumGlobalNonzeros() << endl; 00452 cout << "U = " << U_->NumGlobalNonzeros() << endl; 00453 00454 A_.Multiply(false,LHS,RHS1); 00455 U_->Multiply(false,LHS,RHS2); 00456 L_->Multiply(false,RHS2,RHS3); 00457 00458 RHS1.Update(-1.0, RHS3, 1.0); 00459 double Norm; 00460 RHS1.Norm2(&Norm); 00461 #endif 00462 00463 int MyNonzeros = L_->NumGlobalNonzeros() + U_->NumGlobalNonzeros(); 00464 Comm().SumAll(&MyNonzeros, &GlobalNonzeros_, 1); 00465 00466 IsComputed_ = true; 00467 00468 ++NumCompute_; 00469 ComputeTime_ += Time_.ElapsedTime(); 00470 00471 return(0); 00472 00473 } 00474 00475 //============================================================================= 00476 int Ifpack_ILUT::ApplyInverse(const Epetra_MultiVector& X, 00477 Epetra_MultiVector& Y) const 00478 { 00479 if (!IsComputed()) 00480 IFPACK_CHK_ERR(-2); // compute preconditioner first 00481 00482 if (X.NumVectors() != Y.NumVectors()) 00483 IFPACK_CHK_ERR(-3); // Return error: X and Y not the same size 00484 00485 Time_.ResetStartTime(); 00486 00487 // NOTE: L_ and U_ are based on SerialMap_, while Xcopy is based 00488 // on A.Map()... which are in general different. However, Solve() 00489 // does not seem to care... which is fine with me. 00490 // 00491 // AztecOO gives X and Y pointing to the same memory location, 00492 // need to create an auxiliary vector, Xcopy 00493 Teuchos::RefCountPtr<const Epetra_MultiVector> Xcopy; 00494 if (X.Pointers()[0] == Y.Pointers()[0]) 00495 Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) ); 00496 else 00497 Xcopy = Teuchos::rcp( &X, false ); 00498 00499 if (!UseTranspose_) 00500 { 00501 // solves LU Y = X 00502 IFPACK_CHK_ERR(L_->Solve(false,false,false,*Xcopy,Y)); 00503 IFPACK_CHK_ERR(U_->Solve(true,false,false,Y,Y)); 00504 } 00505 else 00506 { 00507 // solves U(trans) L(trans) Y = X 00508 IFPACK_CHK_ERR(U_->Solve(true,true,false,*Xcopy,Y)); 00509 IFPACK_CHK_ERR(L_->Solve(false,true,false,Y,Y)); 00510 } 00511 00512 ++NumApplyInverse_; 00513 ApplyInverseFlops_ += X.NumVectors() * 2 * GlobalNonzeros_; 00514 ApplyInverseTime_ += Time_.ElapsedTime(); 00515 00516 return(0); 00517 00518 } 00519 //============================================================================= 00520 // This function finds X such that LDU Y = X or U(trans) D L(trans) Y = X for multiple RHS 00521 int Ifpack_ILUT::Apply(const Epetra_MultiVector& X, 00522 Epetra_MultiVector& Y) const 00523 { 00524 return(-98); 00525 } 00526 00527 //============================================================================= 00528 double Ifpack_ILUT::Condest(const Ifpack_CondestType CT, 00529 const int MaxIters, const double Tol, 00530 Epetra_RowMatrix* Matrix_in) 00531 { 00532 if (!IsComputed()) // cannot compute right now 00533 return(-1.0); 00534 00535 // NOTE: this is computing the *local* condest 00536 if (Condest_ == -1.0) 00537 Condest_ = Ifpack_Condest(*this, CT, MaxIters, Tol, Matrix_in); 00538 00539 return(Condest_); 00540 } 00541 00542 //============================================================================= 00543 std::ostream& 00544 Ifpack_ILUT::Print(std::ostream& os) const 00545 { 00546 if (!Comm().MyPID()) { 00547 os << endl; 00548 os << "================================================================================" << endl; 00549 os << "Ifpack_ILUT: " << Label() << endl << endl; 00550 os << "Level-of-fill = " << LevelOfFill() << endl; 00551 os << "Absolute threshold = " << AbsoluteThreshold() << endl; 00552 os << "Relative threshold = " << RelativeThreshold() << endl; 00553 os << "Relax value = " << RelaxValue() << endl; 00554 os << "Condition number estimate = " << Condest() << endl; 00555 os << "Global number of rows = " << A_.NumGlobalRows() << endl; 00556 if (IsComputed_) { 00557 os << "Number of nonzeros in A = " << A_.NumGlobalNonzeros() << endl; 00558 os << "Number of nonzeros in L + U = " << NumGlobalNonzeros() 00559 << " ( = " << 100.0 * NumGlobalNonzeros() / A_.NumGlobalNonzeros() 00560 << " % of A)" << endl; 00561 os << "nonzeros / rows = " 00562 << 1.0 * NumGlobalNonzeros() / U_->NumGlobalRows() << endl; 00563 } 00564 os << endl; 00565 os << "Phase # calls Total Time (s) Total MFlops MFlops/s" << endl; 00566 os << "----- ------- -------------- ------------ --------" << endl; 00567 os << "Initialize() " << std::setw(5) << NumInitialize() 00568 << " " << std::setw(15) << InitializeTime() 00569 << " 0.0 0.0" << endl; 00570 os << "Compute() " << std::setw(5) << NumCompute() 00571 << " " << std::setw(15) << ComputeTime() 00572 << " " << std::setw(15) << 1.0e-6 * ComputeFlops(); 00573 if (ComputeTime() != 0.0) 00574 os << " " << std::setw(15) << 1.0e-6 * ComputeFlops() / ComputeTime() << endl; 00575 else 00576 os << " " << std::setw(15) << 0.0 << endl; 00577 os << "ApplyInverse() " << std::setw(5) << NumApplyInverse() 00578 << " " << std::setw(15) << ApplyInverseTime() 00579 << " " << std::setw(15) << 1.0e-6 * ApplyInverseFlops(); 00580 if (ApplyInverseTime() != 0.0) 00581 os << " " << std::setw(15) << 1.0e-6 * ApplyInverseFlops() / ApplyInverseTime() << endl; 00582 else 00583 os << " " << std::setw(15) << 0.0 << endl; 00584 os << "================================================================================" << endl; 00585 os << endl; 00586 } 00587 00588 return(os); 00589 }
1.7.4