|
Anasazi Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Anasazi: Block Eigensolvers Package 00005 // Copyright (2010) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // This library is free software; you can redistribute it and/or modify 00011 // it under the terms of the GNU Lesser General Public License as 00012 // published by the Free Software Foundation; either version 2.1 of the 00013 // License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, but 00016 // WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00023 // USA 00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00025 // 00026 // *********************************************************************** 00027 // @HEADER 00028 00029 #ifndef __TSQR_TbbTsqr_hpp 00030 #define __TSQR_TbbTsqr_hpp 00031 00032 #include <TbbTsqr_TbbParallelTsqr.hpp> 00033 #include <Tsqr_TimeStats.hpp> 00034 #include <Teuchos_Time.hpp> 00035 // #include <TbbRecursiveTsqr.hpp> 00036 00037 #include <stdexcept> 00038 #include <string> 00039 #include <utility> // std::pair 00040 #include <vector> 00041 00044 00045 namespace TSQR { 00046 namespace TBB { 00047 00061 template< class LocalOrdinal, class Scalar, class TimerType = Teuchos::Time > 00062 class TbbTsqr { 00063 private: 00064 // Note: this is NOT a use of the pImpl idiom. TbbRecursiveTsqr 00065 // is a nonparallel implementation that emulates the control 00066 // flow of the parallel implementation TbbParallelTsqr. The 00067 // latter depends on the Intel Threading Building Blocks 00068 // library. 00069 // 00070 //TbbRecursiveTsqr< LocalOrdinal, Scalar > impl_; 00071 TbbParallelTsqr< LocalOrdinal, Scalar, TimerType > impl_; 00072 00073 // Collected running statistcs on various computations 00074 mutable TimeStats factorStats_, applyStats_, explicitQStats_, cacheBlockStats_, unCacheBlockStats_; 00075 00076 // Timers for various computations 00077 mutable TimerType factorTimer_, applyTimer_, explicitQTimer_, cacheBlockTimer_, unCacheBlockTimer_; 00078 00079 public: 00080 typedef Scalar scalar_type; 00081 typedef typename ScalarTraits< Scalar >::magnitude_type magnitude_type; 00082 typedef LocalOrdinal ordinal_type; 00083 // typedef typename TbbRecursiveTsqr< LocalOrdinal, Scalar >::FactorOutput FactorOutput; 00084 typedef typename TbbParallelTsqr< LocalOrdinal, Scalar, TimerType >::FactorOutput FactorOutput; 00085 00087 size_t ncores() const { return impl_.ncores(); } 00088 00090 size_t cache_block_size() const { return impl_.cache_block_size(); } 00091 00104 TbbTsqr (const size_t numCores, 00105 const size_t cacheBlockSize = 0) : 00106 impl_ (numCores, cacheBlockSize), 00107 factorTimer_ ("TbbTsqr::factor"), 00108 applyTimer_ ("TbbTsqr::apply"), 00109 explicitQTimer_ ("TbbTsqr::explicit_Q"), 00110 cacheBlockTimer_ ("TbbTsqr::cache_block"), 00111 unCacheBlockTimer_ ("TbbTsqr::un_cache_block") 00112 {} 00113 00116 static bool QR_produces_R_factor_with_nonnegative_diagonal() { 00117 typedef TbbParallelTsqr< LocalOrdinal, Scalar, TimerType > impl_type; 00118 return impl_type::QR_produces_R_factor_with_nonnegative_diagonal(); 00119 } 00120 00121 void 00122 cache_block (const LocalOrdinal nrows, 00123 const LocalOrdinal ncols, 00124 Scalar A_out[], 00125 const Scalar A_in[], 00126 const LocalOrdinal lda_in) const 00127 { 00128 cacheBlockTimer_.start(true); 00129 impl_.cache_block (nrows, ncols, A_out, A_in, lda_in); 00130 cacheBlockStats_.update (cacheBlockTimer_.stop()); 00131 } 00132 00133 void 00134 un_cache_block (const LocalOrdinal nrows, 00135 const LocalOrdinal ncols, 00136 Scalar A_out[], 00137 const LocalOrdinal lda_out, 00138 const Scalar A_in[]) const 00139 { 00140 unCacheBlockTimer_.start(true); 00141 impl_.un_cache_block (nrows, ncols, A_out, lda_out, A_in); 00142 unCacheBlockStats_.update (unCacheBlockTimer_.stop()); 00143 } 00144 00145 void 00146 fill_with_zeros (const LocalOrdinal nrows, 00147 const LocalOrdinal ncols, 00148 Scalar C[], 00149 const LocalOrdinal ldc, 00150 const bool contiguous_cache_blocks = false) const 00151 { 00152 impl_.fill_with_zeros (nrows, ncols, C, ldc, contiguous_cache_blocks); 00153 } 00154 00155 template< class MatrixViewType > 00156 MatrixViewType 00157 top_block (const MatrixViewType& C, 00158 const bool contiguous_cache_blocks = false) const 00159 { 00160 return impl_.top_block (C, contiguous_cache_blocks); 00161 } 00162 00198 FactorOutput 00199 factor (const LocalOrdinal nrows, 00200 const LocalOrdinal ncols, 00201 Scalar A[], 00202 const LocalOrdinal lda, 00203 Scalar R[], 00204 const LocalOrdinal ldr, 00205 const bool contiguous_cache_blocks = false) 00206 { 00207 factorTimer_.start(true); 00208 return impl_.factor (nrows, ncols, A, lda, R, ldr, contiguous_cache_blocks); 00209 factorStats_.update (factorTimer_.stop()); 00210 } 00211 00245 void 00246 apply (const ApplyType& apply_type, 00247 const LocalOrdinal nrows, 00248 const LocalOrdinal ncols_Q, 00249 const Scalar Q[], 00250 const LocalOrdinal ldq, 00251 const FactorOutput& factor_output, 00252 const LocalOrdinal ncols_C, 00253 Scalar C[], 00254 const LocalOrdinal ldc, 00255 const bool contiguous_cache_blocks = false) 00256 { 00257 applyTimer_.start(true); 00258 impl_.apply (apply_type, nrows, ncols_Q, Q, ldq, factor_output, 00259 ncols_C, C, ldc, contiguous_cache_blocks); 00260 applyStats_.update (applyTimer_.stop()); 00261 } 00262 00289 void 00290 explicit_Q (const LocalOrdinal nrows, 00291 const LocalOrdinal ncols_Q_in, 00292 const Scalar Q_in[], 00293 const LocalOrdinal ldq_in, 00294 const FactorOutput& factor_output, 00295 const LocalOrdinal ncols_Q_out, 00296 Scalar Q_out[], 00297 const LocalOrdinal ldq_out, 00298 const bool contiguous_cache_blocks = false) 00299 { 00300 explicitQTimer_.start(true); 00301 impl_.explicit_Q (nrows, ncols_Q_in, Q_in, ldq_in, factor_output, 00302 ncols_Q_out, Q_out, ldq_out, contiguous_cache_blocks); 00303 explicitQStats_.update (explicitQTimer_.stop()); 00304 } 00305 00310 void 00311 Q_times_B (const LocalOrdinal nrows, 00312 const LocalOrdinal ncols, 00313 Scalar Q[], 00314 const LocalOrdinal ldq, 00315 const Scalar B[], 00316 const LocalOrdinal ldb, 00317 const bool contiguous_cache_blocks = false) const 00318 { 00319 impl_.Q_times_B (nrows, ncols, Q, ldq, B, ldb, contiguous_cache_blocks); 00320 } 00321 00329 LocalOrdinal 00330 reveal_R_rank (const LocalOrdinal ncols, 00331 Scalar R[], 00332 const LocalOrdinal ldr, 00333 Scalar U[], 00334 const LocalOrdinal ldu, 00335 const magnitude_type tol) const 00336 { 00337 return impl_.reveal_R_rank (ncols, R, ldr, U, ldu, tol); 00338 } 00339 00351 LocalOrdinal 00352 reveal_rank (const LocalOrdinal nrows, 00353 const LocalOrdinal ncols, 00354 Scalar Q[], 00355 const LocalOrdinal ldq, 00356 Scalar R[], 00357 const LocalOrdinal ldr, 00358 const magnitude_type tol, 00359 const bool contiguous_cache_blocks = false) 00360 { 00361 return impl_.reveal_rank (nrows, ncols, Q, ldq, R, ldr, tol, 00362 contiguous_cache_blocks); 00363 } 00364 00365 double 00366 min_seq_factor_timing () const { return impl_.min_seq_factor_timing(); } 00367 double 00368 max_seq_factor_timing () const { return impl_.max_seq_factor_timing(); } 00369 double 00370 min_seq_apply_timing () const { return impl_.min_seq_apply_timing(); } 00371 double 00372 max_seq_apply_timing () const { return impl_.max_seq_apply_timing(); } 00373 00374 void getStats (std::vector< TimeStats >& stats) { 00375 const int numStats = 5; 00376 stats.resize (numStats); 00377 stats[0] = factorStats_; 00378 stats[1] = applyStats_; 00379 stats[2] = explicitQStats_; 00380 stats[3] = cacheBlockStats_; 00381 stats[4] = unCacheBlockStats_; 00382 } 00383 00384 void getStatsLabels (std::vector< std::string >& labels) { 00385 const int numStats = 5; 00386 labels.resize (numStats); 00387 labels[0] = factorTimer_.name(); 00388 labels[1] = applyTimer_.name(); 00389 labels[2] = explicitQTimer_.name(); 00390 labels[3] = cacheBlockTimer_.name(); 00391 labels[4] = unCacheBlockTimer_.name(); 00392 } 00393 00394 }; // class TbbTsqr 00395 00396 } // namespace TBB 00397 } // namespace TSQR 00398 00399 #endif // __TSQR_TbbTsqr_hpp
1.7.4