|
Anasazi Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Anasazi: Block Eigensolvers Package 00005 // Copyright (2010) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // This library is free software; you can redistribute it and/or modify 00011 // it under the terms of the GNU Lesser General Public License as 00012 // published by the Free Software Foundation; either version 2.1 of the 00013 // License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, but 00016 // WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00023 // USA 00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00025 // 00026 // *********************************************************************** 00027 // @HEADER 00028 00029 #include <Tsqr_Config.hpp> 00030 #include <Tsqr_SeqTest.hpp> 00031 00032 #include <Tsqr_Random_NormalGenerator.hpp> 00033 #include <Tsqr_nodeTestProblem.hpp> 00034 #include <Tsqr_verifyTimerConcept.hpp> 00035 00036 #include <Tsqr_Blas.hpp> 00037 #include <Tsqr_Lapack.hpp> 00038 #include <Tsqr_LocalVerify.hpp> 00039 #include <Tsqr_Matrix.hpp> 00040 #include <Tsqr_ScalarTraits.hpp> 00041 #include <Tsqr_SequentialTsqr.hpp> 00042 #include <Tsqr_Util.hpp> 00043 00044 #include <algorithm> 00045 #include <cstring> // size_t definition 00046 #include <fstream> 00047 #include <iomanip> 00048 #include <iostream> 00049 #include <limits> 00050 #include <sstream> 00051 #include <stdexcept> 00052 #include <vector> 00053 00056 00057 namespace TSQR { 00058 namespace Test { 00059 00063 template< class Ordinal, class Scalar > 00064 static void 00065 verifySeqTsqrTemplate (std::ostream& out, 00066 TSQR::Random::NormalGenerator< Ordinal, Scalar >& generator, 00067 const std::string& datatype, 00068 const std::string& shortDatatype, 00069 const Ordinal nrows, 00070 const Ordinal ncols, 00071 const size_t cache_block_size, 00072 const bool contiguous_cache_blocks, 00073 const bool save_matrices, 00074 const bool human_readable, 00075 const bool b_debug) 00076 { 00077 typedef typename ScalarTraits< Scalar >::magnitude_type magnitude_type; 00078 using std::cerr; 00079 using std::endl; 00080 using std::pair; 00081 using std::string; 00082 00083 SequentialTsqr< Ordinal, Scalar > actor (cache_block_size); 00084 Ordinal numCacheBlocks; 00085 00086 if (b_debug) 00087 { 00088 cerr << "Sequential TSQR test problem:" << endl 00089 << "* " << nrows << " x " << ncols << endl 00090 << "* Cache block of " << actor.cache_block_size() << " bytes" << endl; 00091 if (contiguous_cache_blocks) 00092 cerr << "* Contiguous cache blocks" << endl; 00093 } 00094 00095 Matrix< Ordinal, Scalar > A (nrows, ncols); 00096 Matrix< Ordinal, Scalar > A_copy (nrows, ncols); 00097 Matrix< Ordinal, Scalar > Q (nrows, ncols); 00098 Matrix< Ordinal, Scalar > R (ncols, ncols); 00099 if (std::numeric_limits< Scalar >::has_quiet_NaN) 00100 { 00101 A.fill (std::numeric_limits< Scalar>::quiet_NaN()); 00102 A_copy.fill (std::numeric_limits< Scalar >::quiet_NaN()); 00103 Q.fill (std::numeric_limits< Scalar >::quiet_NaN()); 00104 R.fill (std::numeric_limits< Scalar >::quiet_NaN()); 00105 } 00106 const Ordinal lda = nrows; 00107 const Ordinal ldq = nrows; 00108 const Ordinal ldr = ncols; 00109 00110 // Create a test problem 00111 nodeTestProblem (generator, nrows, ncols, A.get(), A.lda(), true); 00112 00113 if (save_matrices) 00114 { 00115 string filename = "A_" + shortDatatype + ".txt"; 00116 if (b_debug) 00117 cerr << "-- Saving test problem to \"" << filename << "\"" << endl; 00118 std::ofstream fileOut (filename.c_str()); 00119 print_local_matrix (fileOut, nrows, ncols, A.get(), A.lda()); 00120 fileOut.close(); 00121 } 00122 00123 if (b_debug) 00124 cerr << "-- Generated test problem" << endl; 00125 00126 // Copy A into A_copy, since TSQR overwrites the input. If 00127 // specified, rearrange the data in A_copy so that the data in 00128 // each cache block is contiguously stored. 00129 if (! contiguous_cache_blocks) 00130 { 00131 A_copy.copy (A); 00132 if (b_debug) 00133 cerr << "-- Copied test problem from A into A_copy" << endl; 00134 } 00135 else 00136 { 00137 actor.cache_block (nrows, ncols, A_copy.get(), A.get(), A.lda()); 00138 if (b_debug) 00139 cerr << "-- Reorganized test matrix to have contiguous " 00140 "cache blocks" << endl; 00141 00142 // Verify cache blocking, when in debug mode. 00143 if (b_debug) 00144 { 00145 Matrix< Ordinal, Scalar > A2 (nrows, ncols); 00146 if (std::numeric_limits< Scalar >::has_quiet_NaN) 00147 A2.fill (std::numeric_limits< Scalar >::quiet_NaN()); 00148 00149 actor.un_cache_block (nrows, ncols, A2.get(), A2.lda(), A_copy.get()); 00150 if (A == A2) 00151 { 00152 if (b_debug) 00153 cerr << "-- Cache blocking test succeeded!" << endl; 00154 } 00155 else 00156 throw std::logic_error ("Cache blocking failed"); 00157 } 00158 } 00159 00160 // Fill R with zeros, since the factorization may not overwrite 00161 // the strict lower triangle of R. 00162 R.fill (Scalar(0)); 00163 00164 // Count the number of cache blocks that factor() will use. 00165 // This is only for diagnostic purposes. 00166 numCacheBlocks = 00167 actor.factor_num_cache_blocks (nrows, ncols, A_copy.get(), 00168 A_copy.lda(), contiguous_cache_blocks); 00169 // In debug mode, report how many cache blocks factor() will use. 00170 if (b_debug) 00171 cerr << "-- Number of cache blocks factor() will use: " 00172 << numCacheBlocks << endl << endl; 00173 00174 // Factor the matrix and compute the explicit Q factor 00175 typedef typename SequentialTsqr< Ordinal, Scalar >::FactorOutput 00176 factor_output_type; 00177 factor_output_type factorOutput = 00178 actor.factor (nrows, ncols, A_copy.get(), A_copy.lda(), 00179 R.get(), R.lda(), contiguous_cache_blocks); 00180 if (b_debug) 00181 cerr << "-- Finished SequentialTsqr::factor" << endl; 00182 00183 if (save_matrices) 00184 { 00185 string filename = "R_" + shortDatatype + ".txt"; 00186 if (b_debug) 00187 cerr << "-- Saving R factor to \"" << filename << "\"" << endl; 00188 std::ofstream fileOut (filename.c_str()); 00189 print_local_matrix (fileOut, ncols, ncols, R.get(), R.lda()); 00190 fileOut.close(); 00191 } 00192 00193 actor.explicit_Q (nrows, ncols, A_copy.get(), lda, factorOutput, 00194 ncols, Q.get(), Q.lda(), contiguous_cache_blocks); 00195 if (b_debug) 00196 cerr << "-- Finished SequentialTsqr::explicit_Q" << endl; 00197 00198 // "Un"-cache-block the output, if contiguous cache blocks were 00199 // used. This is only necessary because local_verify() doesn't 00200 // currently support contiguous cache blocks. 00201 if (contiguous_cache_blocks) 00202 { 00203 // Use A_copy as temporary storage for un-cache-blocking Q. 00204 actor.un_cache_block (nrows, ncols, A_copy.get(), A_copy.lda(), Q.get()); 00205 Q.copy (A_copy); 00206 if (b_debug) 00207 cerr << "-- Un-cache-blocked output Q factor" << endl; 00208 } 00209 00210 if (save_matrices) 00211 { 00212 string filename = "Q_" + shortDatatype + ".txt"; 00213 if (b_debug) 00214 cerr << "-- Saving Q factor to \"" << filename << "\"" << endl; 00215 std::ofstream fileOut (filename.c_str()); 00216 print_local_matrix (fileOut, nrows, ncols, Q.get(), Q.lda()); 00217 fileOut.close(); 00218 } 00219 00220 // Print out the R factor 00221 if (false && b_debug) 00222 { 00223 cerr << endl << "-- R factor:" << endl; 00224 print_local_matrix (cerr, ncols, ncols, R.get(), R.lda()); 00225 cerr << endl; 00226 } 00227 00228 // Validate the factorization 00229 pair< magnitude_type, magnitude_type > results = 00230 local_verify (nrows, ncols, A.get(), lda, Q.get(), ldq, R.get(), ldr); 00231 if (b_debug) 00232 cerr << "-- Finished local_verify" << endl; 00233 00234 // Print the results 00235 if (human_readable) 00236 out << "Sequential (cache-blocked) TSQR:" << endl 00237 << "Datatype: " << datatype << endl 00238 << "Relative residual: " << results.first << endl 00239 << "Relative orthogonality: " << results.second 00240 << endl << endl; 00241 else 00242 out << "SeqTSQR" 00243 << "," << datatype 00244 << "," << nrows 00245 << "," << ncols 00246 << "," << actor.cache_block_size() 00247 << "," << numCacheBlocks 00248 << "," << contiguous_cache_blocks 00249 << "," << results.first 00250 << "," << results.second 00251 << endl; 00252 } 00253 00254 00255 void 00256 verifySeqTsqr (std::ostream& out, 00257 const int nrows, 00258 const int ncols, 00259 const size_t cache_block_size, 00260 const bool test_complex_arithmetic, 00261 const bool save_matrices, 00262 const bool contiguous_cache_blocks, 00263 const bool human_readable, 00264 const bool b_debug) 00265 { 00266 using TSQR::Random::NormalGenerator; 00267 using std::complex; 00268 using std::string; 00269 using std::vector; 00270 00271 // 00272 // We do tests one after another, using the seed from the 00273 // previous test in the current test, so that the pseudorandom 00274 // streams used by the tests are independent. 00275 // 00276 00277 // On output: Seed for the next pseudorandom number generator. 00278 vector< int > iseed(4); 00279 string datatype; // name of the current datatype being tested 00280 string shortDatatype; // one-letter version of datatype 00281 00282 // First test. The PRNG seeds itself with a default value. 00283 // This will be the same each time, so if you want 00284 // nondeterministic behavior, you should pick the seed values 00285 // yourself. 00286 NormalGenerator< int, float > normgenS; 00287 datatype = "float"; 00288 shortDatatype = "S"; 00289 verifySeqTsqrTemplate (out, normgenS, datatype, shortDatatype, nrows, ncols, 00290 cache_block_size, contiguous_cache_blocks, 00291 save_matrices, human_readable, b_debug); 00292 // Fetch the pseudorandom seed from the previous test. 00293 normgenS.getSeed (iseed); 00294 NormalGenerator< int, double > normgenD (iseed); 00295 // Next test. 00296 datatype = "double"; 00297 shortDatatype = "D"; 00298 verifySeqTsqrTemplate (out, normgenD, datatype, shortDatatype, nrows, ncols, 00299 cache_block_size, contiguous_cache_blocks, 00300 save_matrices, human_readable, b_debug); 00301 00302 if (test_complex_arithmetic) 00303 { 00304 normgenD.getSeed (iseed); 00305 NormalGenerator< int, complex<float> > normgenC (iseed); 00306 datatype = "complex<float>"; 00307 shortDatatype = "C"; 00308 verifySeqTsqrTemplate (out, normgenC, datatype, shortDatatype, nrows, ncols, 00309 cache_block_size, contiguous_cache_blocks, 00310 save_matrices, human_readable, b_debug); 00311 normgenC.getSeed (iseed); 00312 NormalGenerator< int, complex<double> > normgenZ (iseed); 00313 datatype = "complex<double>"; 00314 shortDatatype = "Z"; 00315 verifySeqTsqrTemplate (out, normgenZ, datatype, shortDatatype, nrows, ncols, 00316 cache_block_size, contiguous_cache_blocks, 00317 save_matrices, human_readable, b_debug); 00318 } 00319 } 00320 00321 00322 00323 template< class Ordinal, class Scalar > 00324 static void 00325 verifyLapackTemplate (TSQR::Random::NormalGenerator< Ordinal, Scalar >& generator, 00326 const std::string& datatype, 00327 const Ordinal nrows, 00328 const Ordinal ncols, 00329 const bool human_readable, 00330 const bool b_debug) 00331 { 00332 typedef typename ScalarTraits< Scalar >::magnitude_type magnitude_type; 00333 using std::ostringstream; 00334 using std::cerr; 00335 using std::cout; 00336 using std::endl; 00337 00338 // Initialize LAPACK. 00339 LAPACK< Ordinal, Scalar > lapack; 00340 00341 if (b_debug) 00342 cerr << "LAPACK test problem:" << endl 00343 << "* " << nrows << " x " << ncols << endl; 00344 00345 Matrix< Ordinal, Scalar > A (nrows, ncols); 00346 Matrix< Ordinal, Scalar > A_copy (nrows, ncols); 00347 Matrix< Ordinal, Scalar > Q (nrows, ncols); 00348 Matrix< Ordinal, Scalar > R (ncols, ncols); 00349 if (std::numeric_limits< Scalar >::has_quiet_NaN) 00350 { 00351 A.fill (std::numeric_limits< Scalar>::quiet_NaN()); 00352 A_copy.fill (std::numeric_limits< Scalar >::quiet_NaN()); 00353 Q.fill (std::numeric_limits< Scalar >::quiet_NaN()); 00354 R.fill (std::numeric_limits< Scalar >::quiet_NaN()); 00355 } 00356 const Ordinal lda = nrows; 00357 const Ordinal ldq = nrows; 00358 const Ordinal ldr = ncols; 00359 00360 // Create a test problem 00361 nodeTestProblem (generator, nrows, ncols, A.get(), A.lda(), true); 00362 00363 if (b_debug) 00364 cerr << "-- Generated test problem" << endl; 00365 00366 // Copy A into A_copy, since LAPACK QR overwrites the input. 00367 A_copy.copy (A); 00368 if (b_debug) 00369 cerr << "-- Copied test problem from A into A_copy" << endl; 00370 00371 // Now determine the required workspace for the factorization. 00372 const Ordinal lwork = lworkQueryLapackQr (lapack, nrows, ncols, A_copy.lda()); 00373 std::vector< Scalar > work (lwork); 00374 std::vector< Scalar > tau (ncols); 00375 00376 // Fill R with zeros, since the factorization may not overwrite 00377 // the strict lower triangle of R. 00378 R.fill (Scalar(0)); 00379 00380 // Compute the QR factorization 00381 int info = 0; // INFO is always an int 00382 lapack.GEQRF (nrows, ncols, A_copy.get(), A_copy.lda(), 00383 &tau[0], &work[0], lwork, &info); 00384 if (info != 0) 00385 { 00386 ostringstream os; 00387 os << "LAPACK QR factorization (_GEQRF) failed: INFO = " << info; 00388 throw std::runtime_error (os.str()); 00389 } 00390 00391 // Copy out the R factor from A_copy (where we computed the QR 00392 // factorization in place) into R. 00393 copy_upper_triangle (ncols, ncols, R.get(), ldr, A_copy.get(), lda); 00394 00395 if (b_debug) 00396 { 00397 cerr << endl << "-- R factor:" << endl; 00398 print_local_matrix (cerr, ncols, ncols, R.get(), R.lda()); 00399 cerr << endl; 00400 } 00401 00402 // The explicit Q factor will be computed in place, so copy the 00403 // result of the factorization into Q. 00404 Q.copy (A_copy); 00405 00406 // Compute the explicit Q factor 00407 lapack.ORGQR (nrows, ncols, ncols, Q.get(), ldq, &tau[0], &work[0], lwork, &info); 00408 if (info != 0) 00409 { 00410 ostringstream os; 00411 os << "LAPACK explicit Q computation (_ORGQR) failed: INFO = " << info; 00412 throw std::runtime_error (os.str()); 00413 } 00414 00415 // Validate the factorization 00416 std::pair< magnitude_type, magnitude_type > results = 00417 local_verify (nrows, ncols, A.get(), lda, Q.get(), ldq, R.get(), ldr); 00418 00419 // Print the results 00420 if (human_readable) 00421 cout << "LAPACK QR (DGEQRF and DORGQR):" << endl 00422 << "Datatype: " << datatype << endl 00423 << "Relative residual: " << results.first << endl 00424 << "Relative orthogonality: " << results.second << endl; 00425 else 00426 cout << "LAPACK" 00427 << "," << datatype 00428 << "," << nrows 00429 << "," << ncols 00430 << "," << size_t(0) // cache_block_size 00431 << "," << false // contiguous_cache_blocks 00432 << "," << results.first 00433 << "," << results.second 00434 << endl; 00435 } 00436 00437 00438 void 00439 verifyLapack (const int nrows, 00440 const int ncols, 00441 const bool test_complex_arithmetic, 00442 const bool human_readable, 00443 const bool b_debug) 00444 { 00445 using TSQR::Random::NormalGenerator; 00446 using std::complex; 00447 using std::string; 00448 using std::vector; 00449 00450 // 00451 // We do tests one after another, using the seed from the 00452 // previous test in the current test, so that the pseudorandom 00453 // streams used by the tests are independent. 00454 // 00455 00456 // On output: Seed for the next pseudorandom number generator. 00457 vector< int > iseed(4); 00458 string datatype; // name of the current datatype being tested 00459 00460 // First test. The PRNG seeds itself with a default value. 00461 // This will be the same each time, so if you want 00462 // nondeterministic behavior, you should pick the seed values 00463 // yourself. 00464 NormalGenerator< int, float > normgenS; 00465 datatype = "float"; 00466 verifyLapackTemplate (normgenS, datatype, nrows, ncols, 00467 human_readable, b_debug); 00468 // Fetch the pseudorandom seed from the previous test. 00469 normgenS.getSeed (iseed); 00470 NormalGenerator< int, double > normgenD (iseed); 00471 // Next test. 00472 datatype = "double"; 00473 verifyLapackTemplate (normgenD, datatype, nrows, ncols, 00474 human_readable, b_debug); 00475 00476 if (test_complex_arithmetic) 00477 { 00478 normgenD.getSeed (iseed); 00479 NormalGenerator< int, complex<float> > normgenC (iseed); 00480 datatype = "complex<float>"; 00481 verifyLapackTemplate (normgenC, datatype, nrows, ncols, 00482 human_readable, b_debug); 00483 normgenC.getSeed (iseed); 00484 NormalGenerator< int, complex<double> > normgenZ (iseed); 00485 datatype = "complex<double>"; 00486 verifyLapackTemplate (normgenZ, datatype, nrows, ncols, 00487 human_readable, b_debug); 00488 } 00489 } 00490 00491 00492 } // namespace Test 00493 } // namespace TSQR
1.7.4