|
Anasazi Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Anasazi: Block Eigensolvers Package 00005 // Copyright (2010) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // This library is free software; you can redistribute it and/or modify 00011 // it under the terms of the GNU Lesser General Public License as 00012 // published by the Free Software Foundation; either version 2.1 of the 00013 // License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, but 00016 // WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00023 // USA 00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00025 // 00026 // *********************************************************************** 00027 // @HEADER 00028 00029 #ifndef __TSQR_Test_DistTest_hpp 00030 #define __TSQR_Test_DistTest_hpp 00031 00032 #include <Tsqr_Config.hpp> 00033 #include <Tsqr_Random_NormalGenerator.hpp> 00034 #include <Tsqr_verifyTimerConcept.hpp> 00035 00036 #include <Tsqr_generateStack.hpp> 00037 #include <Tsqr_DistTsqr.hpp> 00038 #include <Tsqr_GlobalVerify.hpp> 00039 #include <Tsqr_printGlobalMatrix.hpp> 00040 00041 #include <algorithm> 00042 #include <iomanip> 00043 #include <iostream> 00044 #include <vector> 00045 00048 00049 namespace TSQR { 00050 namespace Test { 00051 00055 template< class Ordinal, class Scalar > 00056 class DistTsqrVerifier { 00057 TSQR::Random::NormalGenerator< Ordinal, Scalar > gen_; 00058 Teuchos::RCP< MessengerBase< Ordinal > > const ordinalComm_; 00059 Teuchos::RCP< MessengerBase< Scalar > > const scalarComm_; 00060 std::string scalarTypeName_; 00061 std::ostream& out_; 00062 std::ostream& err_; 00063 const bool testFactorExplicit_, testFactorImplicit_; 00064 const bool humanReadable_, printMatrices_, debug_; 00065 00066 public: 00067 typedef Ordinal ordinal_type; 00068 typedef Scalar scalar_type; 00069 typedef typename ScalarTraits< scalar_type >::magnitude_type magnitude_type; 00070 typedef typename std::pair< magnitude_type, magnitude_type > result_type; 00071 typedef Matrix< ordinal_type, scalar_type > matrix_type; 00072 00093 DistTsqrVerifier (const Teuchos::RCP< MessengerBase< Ordinal > >& ordinalComm, 00094 const Teuchos::RCP< MessengerBase< Scalar > >& scalarComm, 00095 const std::vector<int>& seed, 00096 const std::string& scalarTypeName, 00097 std::ostream& out, 00098 std::ostream& err, 00099 const bool testFactorExplicit, 00100 const bool testFactorImplicit, 00101 const bool humanReadable, 00102 const bool printMatrices, 00103 const bool debug) : 00104 gen_ (seed), 00105 ordinalComm_ (ordinalComm), 00106 scalarComm_ (scalarComm), 00107 scalarTypeName_ (scalarTypeName), 00108 out_ (out), 00109 err_ (err), 00110 testFactorExplicit_ (testFactorExplicit), 00111 testFactorImplicit_ (testFactorImplicit), 00112 humanReadable_ (humanReadable), 00113 printMatrices_ (printMatrices), 00114 debug_ (debug) 00115 {} 00116 00138 DistTsqrVerifier (const Teuchos::RCP< MessengerBase< Ordinal > >& ordinalComm, 00139 const Teuchos::RCP< MessengerBase< Scalar > >& scalarComm, 00140 const std::string& scalarTypeName, 00141 std::ostream& out, 00142 std::ostream& err, 00143 const bool testFactorExplicit, 00144 const bool testFactorImplicit, 00145 const bool humanReadable, 00146 const bool printMatrices, 00147 const bool debug) : 00148 ordinalComm_ (ordinalComm), 00149 scalarComm_ (scalarComm), 00150 scalarTypeName_ (scalarTypeName), 00151 out_ (out), 00152 err_ (err), 00153 testFactorExplicit_ (testFactorExplicit), 00154 testFactorImplicit_ (testFactorImplicit), 00155 humanReadable_ (humanReadable), 00156 printMatrices_ (printMatrices), 00157 debug_ (debug) 00158 {} 00159 00166 void 00167 getSeed (std::vector<int>& seed) const 00168 { 00169 gen_.getSeed (seed); 00170 } 00171 00176 void 00177 verify (const Ordinal numCols) 00178 { 00179 using std::endl; 00180 00181 const int myRank = scalarComm_->rank(); 00182 if (debug_) 00183 { 00184 scalarComm_->barrier(); 00185 if (myRank == 0) 00186 err_ << "Verifying DistTsqr:" << endl; 00187 scalarComm_->barrier(); 00188 } 00189 00190 // Generate test problem. 00191 Matrix< Ordinal, Scalar > A_local, Q_local, R; 00192 testProblem (A_local, Q_local, R, numCols); 00193 if (debug_) 00194 { 00195 scalarComm_->barrier(); 00196 if (myRank == 0) 00197 err_ << "-- Generated test problem." << endl; 00198 scalarComm_->barrier(); 00199 } 00200 00201 // Set up TSQR implementation. 00202 DistTsqr< Ordinal, Scalar > par (scalarComm_); 00203 if (debug_) 00204 { 00205 scalarComm_->barrier(); 00206 if (myRank == 0) 00207 err_ << "-- DistTsqr object initialized" << endl << endl; 00208 } 00209 00210 // Whether we've printed column headers yet (only matters for 00211 // non-humanReadable output). 00212 bool printedHeaders = false; 00213 00214 // Test DistTsqr::factor() and DistTsqr::explicit_Q(). 00215 if (testFactorImplicit_) 00216 { 00217 // Factor the matrix A (copied into R, which will be 00218 // overwritten on output) 00219 typedef typename DistTsqr< Ordinal, Scalar >::FactorOutput 00220 factor_output_type; 00221 factor_output_type factorOutput = par.factor (R.view()); 00222 if (debug_) 00223 { 00224 scalarComm_->barrier(); 00225 if (myRank == 0) 00226 err_ << "-- Finished DistTsqr::factor" << endl; 00227 } 00228 // Compute the explicit Q factor 00229 par.explicit_Q (numCols, Q_local.get(), Q_local.lda(), factorOutput); 00230 if (debug_) 00231 { 00232 scalarComm_->barrier(); 00233 if (myRank == 0) 00234 err_ << "-- Finished DistTsqr::explicit_Q" << endl; 00235 } 00236 // Verify the factorization 00237 result_type result = 00238 global_verify (numCols, numCols, A_local.get(), A_local.lda(), 00239 Q_local.get(), Q_local.lda(), R.get(), R.lda(), 00240 scalarComm_.get()); 00241 if (debug_) 00242 { 00243 scalarComm_->barrier(); 00244 if (myRank == 0) 00245 err_ << "-- Finished global_verify" << endl; 00246 } 00247 reportResults ("DistTsqr", numCols, result, (! printedHeaders)); 00248 if (! printedHeaders) 00249 printedHeaders = true; 00250 } 00251 00252 // Test DistTsqr::factorExplicit() 00253 if (testFactorExplicit_) 00254 { 00255 // Factor the matrix and compute the explicit Q factor, both 00256 // in a single operation. 00257 par.factorExplicit (R.view(), Q_local.view()); 00258 if (debug_) 00259 { 00260 scalarComm_->barrier(); 00261 if (myRank == 0) 00262 err_ << "-- Finished DistTsqr::factorExplicit" << endl; 00263 } 00264 00265 if (printMatrices_) 00266 { 00267 if (myRank == 0) 00268 err_ << std::endl << "Computed Q factor:" << std::endl; 00269 printGlobalMatrix (err_, Q_local, scalarComm_.get(), ordinalComm_.get()); 00270 if (myRank == 0) 00271 { 00272 err_ << std::endl << "Computed R factor:" << std::endl; 00273 print_local_matrix (err_, R.nrows(), R.ncols(), R.get(), R.lda()); 00274 err_ << std::endl; 00275 } 00276 } 00277 00278 // Verify the factorization 00279 result_type result = 00280 global_verify (numCols, numCols, A_local.get(), A_local.lda(), 00281 Q_local.get(), Q_local.lda(), R.get(), R.lda(), 00282 scalarComm_.get()); 00283 if (debug_) 00284 { 00285 scalarComm_->barrier(); 00286 if (myRank == 0) 00287 err_ << "-- Finished global_verify" << endl; 00288 } 00289 reportResults ("DistTsqrRB", numCols, result, (! printedHeaders)); 00290 if (! printedHeaders) 00291 printedHeaders = true; 00292 } 00293 } 00294 00295 private: 00302 void 00303 reportResults (const std::string& method, 00304 const Ordinal numCols, 00305 const result_type& result, 00306 const bool printHeaders) 00307 { 00308 using std::endl; 00309 00310 const int numProcs = scalarComm_->size(); 00311 const int myRank = scalarComm_->rank(); 00312 00313 if (myRank == 0) 00314 { 00315 if (humanReadable_) 00316 { 00317 out_ << method << " accuracy results:" << endl 00318 << "Scalar type = " << scalarTypeName_ << endl 00319 << "Number of columns = " << numCols << endl 00320 << "Number of (MPI) processes = " << numProcs << endl 00321 << "Relative residual $\\|A - Q*R\\|_2 / \\|A\\|_2$ = " 00322 << result.first << endl 00323 << "Relative orthogonality $\\|I - Q^T*Q\\|_2$ = " 00324 << result.second << endl; 00325 } 00326 else 00327 { 00328 // Use scientific notation for floating-point numbers 00329 out_ << std::scientific; 00330 00331 if (printHeaders) 00332 out_ << "%method,scalarType,numCols,numProcs,relResid,relOrthog" << endl; 00333 00334 out_ << method 00335 << "," << scalarTypeName_ 00336 << "," << numCols 00337 << "," << numProcs 00338 << "," << result.first 00339 << "," << result.second 00340 << endl; 00341 } 00342 } 00343 } 00344 00345 void 00346 testProblem (Matrix< Ordinal, Scalar >& A_local, 00347 Matrix< Ordinal, Scalar >& Q_local, 00348 Matrix< Ordinal, Scalar >& R, 00349 const Ordinal numCols) 00350 { 00351 const Ordinal numRowsLocal = numCols; 00352 00353 // A_local: Space for the matrix A to factor -- local to each 00354 // processor. 00355 // 00356 // A_global: Global matrix (only nonempty on Proc 0); only 00357 // used temporarily. 00358 Matrix< Ordinal, Scalar > A_global; 00359 00360 // This modifies A_local on all procs, and A_global on Proc 0. 00361 par_tsqr_test_problem (gen_, A_local, A_global, numCols, scalarComm_); 00362 00363 if (printMatrices_) 00364 { 00365 const int myRank = scalarComm_->rank(); 00366 00367 if (myRank == 0) 00368 err_ << "Input matrix A:" << std::endl; 00369 printGlobalMatrix (err_, A_local, scalarComm_.get(), ordinalComm_.get()); 00370 if (myRank == 0) 00371 err_ << std::endl; 00372 } 00373 00374 // Copy the test problem input into R, since the factorization 00375 // will overwrite it in place with the final R factor. 00376 R.reshape (numCols, numCols); 00377 R.fill (Scalar(0)); 00378 R.copy (A_local); 00379 00380 // Prepare space in which to construct the explicit Q factor 00381 // (local component on this processor) 00382 Q_local.reshape (numRowsLocal, numCols); 00383 Q_local.fill (Scalar(0)); 00384 } 00385 }; 00386 00387 00391 template< class Ordinal, class Scalar, class TimerType > 00392 class DistTsqrBenchmarker { 00393 TSQR::Random::NormalGenerator< Ordinal, Scalar > gen_; 00394 Teuchos::RCP< MessengerBase< Scalar > > scalarComm_; 00395 Teuchos::RCP< MessengerBase< double > > doubleComm_; 00396 std::string scalarTypeName_; 00397 00398 std::ostream& out_; 00399 std::ostream& err_; 00400 const bool testFactorExplicit_, testFactorImplicit_; 00401 const bool humanReadable_, debug_; 00402 00403 public: 00404 typedef Ordinal ordinal_type; 00405 typedef Scalar scalar_type; 00406 typedef typename ScalarTraits< scalar_type >::magnitude_type magnitude_type; 00407 typedef TimerType timer_type; 00408 00432 DistTsqrBenchmarker (const Teuchos::RCP< MessengerBase< Scalar > >& scalarComm, 00433 const Teuchos::RCP< MessengerBase< double > >& doubleComm, 00434 const std::vector<int>& seed, 00435 const std::string& scalarTypeName, 00436 std::ostream& out, 00437 std::ostream& err, 00438 const bool testFactorExplicit, 00439 const bool testFactorImplicit, 00440 const bool humanReadable, 00441 const bool debug) : 00442 gen_ (seed), 00443 scalarComm_ (scalarComm), 00444 doubleComm_ (doubleComm), 00445 scalarTypeName_ (scalarTypeName), 00446 out_ (out), 00447 err_ (err), 00448 testFactorExplicit_ (testFactorExplicit), 00449 testFactorImplicit_ (testFactorImplicit), 00450 humanReadable_ (humanReadable), 00451 debug_ (debug) 00452 {} 00453 00478 DistTsqrBenchmarker (const Teuchos::RCP< MessengerBase< Scalar > >& scalarComm, 00479 const Teuchos::RCP< MessengerBase< double > >& doubleComm, 00480 const std::string& scalarTypeName, 00481 std::ostream& out, 00482 std::ostream& err, 00483 const bool testFactorExplicit, 00484 const bool testFactorImplicit, 00485 const bool humanReadable, 00486 const bool debug) : 00487 scalarComm_ (scalarComm), 00488 doubleComm_ (doubleComm), 00489 scalarTypeName_ (scalarTypeName), 00490 out_ (out), 00491 err_ (err), 00492 testFactorExplicit_ (testFactorExplicit), 00493 testFactorImplicit_ (testFactorImplicit), 00494 humanReadable_ (humanReadable), 00495 debug_ (debug) 00496 {} 00497 00504 void 00505 getSeed (std::vector<int>& seed) const 00506 { 00507 gen_.getSeed (seed); 00508 } 00509 00516 void 00517 benchmark (const int numTrials, const Ordinal numCols) 00518 { 00519 using std::endl; 00520 00521 // Set up test problem. 00522 Matrix< Ordinal, Scalar > A_local, Q_local, R; 00523 testProblem (A_local, Q_local, R, numCols); 00524 00525 // Set up TSQR implementation. 00526 DistTsqr< Ordinal, Scalar > par (scalarComm_); 00527 00528 // Whether we've printed column headers yet (only matters for 00529 // non-humanReadable output). 00530 bool printedHeaders = false; 00531 00532 if (testFactorImplicit_) 00533 { 00534 std::string timerName ("DistTsqr"); 00535 00536 // Benchmark DistTsqr (factor() and explicit_Q()) for 00537 // numTrials trials. 00538 timer_type timer (timerName); 00539 timer.start(); 00540 for (int trialNum = 0; trialNum < numTrials; ++trialNum) 00541 { 00542 // Factor the matrix A (copied into R, which will be 00543 // overwritten on output) 00544 typedef typename DistTsqr< Ordinal, Scalar >::FactorOutput 00545 factor_output_type; 00546 factor_output_type factorOutput = par.factor (R.view()); 00547 00548 // Compute the explicit Q factor 00549 par.explicit_Q (numCols, Q_local.get(), Q_local.lda(), factorOutput); 00550 } 00551 // Cumulative timing on this MPI process. 00552 // "Cumulative" means the elapsed time of numTrials executions. 00553 const double localCumulativeTiming = timer.stop(); 00554 00555 // reportResults() must be called on all processes, since this 00556 // figures out the min and max timings over all processes. 00557 reportResults (timerName, numTrials, numCols, localCumulativeTiming, 00558 (! printedHeaders)); 00559 if (! printedHeaders) 00560 printedHeaders = true; 00561 } 00562 00563 if (testFactorExplicit_) 00564 { 00565 std::string timerName ("DistTsqrRB"); 00566 00567 // Benchmark DistTsqr::factorExplicit() for numTrials trials. 00568 timer_type timer (timerName); 00569 timer.start(); 00570 for (int trialNum = 0; trialNum < numTrials; ++trialNum) 00571 { 00572 par.factorExplicit (R.view(), Q_local.view()); 00573 } 00574 // Cumulative timing on this MPI process. 00575 // "Cumulative" means the elapsed time of numTrials executions. 00576 const double localCumulativeTiming = timer.stop(); 00577 00578 // Report cumulative (not per-invocation) timing results 00579 reportResults (timerName, numTrials, numCols, 00580 localCumulativeTiming, (! printedHeaders)); 00581 if (! printedHeaders) 00582 printedHeaders = true; 00583 00584 // Per-invocation timings (for factorExplicit() benchmark 00585 // only). localTimings were computed on this MPI process; 00586 // globalTimings are statistical summaries of those over 00587 // all MPI processes. We only collect that data for 00588 // factorExplicit(). 00589 std::vector< TimeStats > localTimings; 00590 std::vector< TimeStats > globalTimings; 00591 par.getFactorExplicitTimings (localTimings); 00592 for (std::vector< TimeStats >::size_type k = 0; k < localTimings.size(); ++k) 00593 globalTimings.push_back (TimeStats::globalTimeStats (doubleComm_.get(), localTimings[k])); 00594 std::vector< std::string > timingLabels; 00595 par.getFactorExplicitTimingLabels (timingLabels); 00596 00597 if (humanReadable_) 00598 out_ << timerName << " per-invocation benchmark results:" << endl; 00599 00600 const std::string labelLabel ("label,scalarType"); 00601 for (std::vector< std::string >::size_type k = 0; k < timingLabels.size(); ++k) 00602 { 00603 // Only print column headers once 00604 const bool printHeaders = (k == 0); 00605 globalTimings[k].print (out_, humanReadable_, 00606 timingLabels[k] + "," + scalarTypeName_, 00607 labelLabel, printHeaders); 00608 } 00609 } 00610 } 00611 00612 private: 00625 void 00626 reportResults (const std::string& method, 00627 const int numTrials, 00628 const ordinal_type numCols, 00629 const double localTiming, 00630 const bool printHeaders) 00631 { 00632 using std::endl; 00633 00634 // Find min and max timing over all MPI processes 00635 TimeStats localStats; 00636 localStats.update (localTiming); 00637 TimeStats globalStats = TimeStats::globalTimeStats (doubleComm_.get(), localStats); 00638 00639 // Only Rank 0 prints the final results. 00640 const bool printResults = (doubleComm_->rank() == 0); 00641 if (printResults) 00642 { 00643 const int numProcs = doubleComm_->size(); 00644 if (humanReadable_) 00645 { 00646 out_ << method << " cumulative benchmark results (total time over all trials):" << endl 00647 << "Scalar type = " << scalarTypeName_ << endl 00648 << "Number of columns = " << numCols << endl 00649 << "Number of (MPI) processes = " << numProcs << endl 00650 << "Number of trials = " << numTrials << endl 00651 << "Min timing (in seconds) = " << globalStats.min() << endl 00652 << "Mean timing (in seconds) = " << globalStats.mean() << endl 00653 << "Max timing (in seconds) = " << globalStats.max() << endl 00654 << endl; 00655 } 00656 else 00657 { 00658 // Use scientific notation for floating-point numbers 00659 out_ << std::scientific; 00660 00661 if (printHeaders) 00662 out_ << "%method,scalarType,numCols,numProcs,numTrials,min,mean,max" << endl; 00663 00664 out_ << method 00665 << "," << scalarTypeName_ 00666 << "," << numCols 00667 << "," << numProcs 00668 << "," << numTrials 00669 << "," << globalStats.min() 00670 << "," << globalStats.mean() 00671 << "," << globalStats.max() 00672 << endl; 00673 } 00674 } 00675 } 00676 00677 void 00678 testProblem (Matrix< Ordinal, Scalar >& A_local, 00679 Matrix< Ordinal, Scalar >& Q_local, 00680 Matrix< Ordinal, Scalar >& R, 00681 const Ordinal numCols) 00682 { 00683 const Ordinal numRowsLocal = numCols; 00684 00685 // A_local: Space for the matrix A to factor -- local to each 00686 // processor. 00687 // 00688 // A_global: Global matrix (only nonempty on Proc 0); only 00689 // used temporarily. 00690 Matrix< Ordinal, Scalar > A_global; 00691 00692 // This modifies A_local on all procs, and A_global on Proc 0. 00693 par_tsqr_test_problem (gen_, A_local, A_global, numCols, scalarComm_); 00694 00695 // Copy the test problem input into R, since the factorization 00696 // will overwrite it in place with the final R factor. 00697 R.reshape (numCols, numCols); 00698 R.copy (A_local); 00699 00700 // Prepare space in which to construct the explicit Q factor 00701 // (local component on this processor) 00702 Q_local.reshape (numRowsLocal, numCols); 00703 Q_local.fill (Scalar(0)); 00704 } 00705 00708 static void 00709 conceptChecks () 00710 { 00711 verifyTimerConcept< timer_type >(); 00712 } 00713 }; 00714 00715 00716 } // namespace Test 00717 } // namespace TSQR 00718 00719 #endif // __TSQR_Test_DistTest_hpp
1.7.4