|
Anasazi Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Anasazi: Block Eigensolvers Package 00005 // Copyright (2010) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // This library is free software; you can redistribute it and/or modify 00011 // it under the terms of the GNU Lesser General Public License as 00012 // published by the Free Software Foundation; either version 2.1 of the 00013 // License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, but 00016 // WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00023 // USA 00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00025 // 00026 // *********************************************************************** 00027 // @HEADER 00028 00029 #ifndef __TSQR_CacheBlocker_hpp 00030 #define __TSQR_CacheBlocker_hpp 00031 00032 #include <Tsqr_CacheBlockingStrategy.hpp> 00033 #include <Tsqr_MatView.hpp> 00034 #include <Tsqr_Util.hpp> 00035 00036 // #include <iostream> 00037 #include <sstream> 00038 #include <stdexcept> 00039 00040 // #define CACHE_BLOCKER_DEBUG 1 00041 // #ifdef CACHE_BLOCKER_DEBUG 00042 // # undef CACHE_BLOCKER_DEBUG 00043 // #endif // CACHE_BLOCKER_DEBUG 00044 #ifdef CACHE_BLOCKER_DEBUG 00045 # include <iostream> 00046 using std::cerr; 00047 using std::endl; 00048 #endif // CACHE_BLOCKER_DEBUG 00049 00052 00053 namespace TSQR { 00054 00063 template< class Ordinal, class Scalar > // class CBS=CacheBlockingStrategy< Ordinal, Scalar > 00064 class CacheBlocker { 00065 private: 00066 typedef MatView< Ordinal, Scalar > mat_view; 00067 typedef ConstMatView< Ordinal, Scalar > const_mat_view; 00068 00069 void 00070 validate () 00071 { 00072 if (nrows_cache_block_ < ncols_) 00073 { 00074 std::ostringstream os; 00075 os << "Cache block is too small: only " << nrows_cache_block_ 00076 << " rows fit, but need at least as many rows as columns (= " 00077 << ncols_ << ")"; 00078 throw std::logic_error (os.str()); 00079 } 00080 } 00081 00082 public: 00090 CacheBlocker (const Ordinal num_rows, 00091 const Ordinal num_cols, 00092 const CacheBlockingStrategy< Ordinal, Scalar >& strategy) : 00093 nrows_ (num_rows), 00094 ncols_ (num_cols), 00095 strategy_ (strategy) 00096 { 00097 #ifdef CACHE_BLOCKER_DEBUG 00098 cerr << "CacheBlocker:" << endl 00099 << "# rows = " << nrows() << endl 00100 << "# cols = " << ncols() << endl 00101 << "Cache block size (bytes) = " << strategy_.cache_block_size() 00102 << endl << endl; 00103 #endif 00104 const Ordinal temp_nrows_cache_block = 00105 strategy_.cache_block_num_rows (ncols()); 00106 #ifdef CACHE_BLOCKER_DEBUG 00107 cerr << "Strategy says: # rows per cache block = " 00108 << temp_nrows_cache_block << endl; 00109 #endif 00110 nrows_cache_block_ = temp_nrows_cache_block; 00111 validate (); 00112 } 00113 00115 CacheBlocker (const CacheBlocker& rhs) : 00116 nrows_ (rhs.nrows()), 00117 ncols_ (rhs.ncols()), 00118 strategy_ (rhs.strategy_) 00119 {} 00120 00122 CacheBlocker& operator= (const CacheBlocker& rhs) { 00123 nrows_ = rhs.nrows(); 00124 ncols_ = rhs.ncols(); 00125 strategy_ = rhs.strategy_; 00126 return *this; 00127 } 00128 00130 size_t cache_block_size () const { return strategy_.cache_block_size(); } 00131 00133 Ordinal nrows () const { return nrows_; } 00134 00136 Ordinal ncols () const { return ncols_; } 00137 00145 template< class MatrixViewType > 00146 MatrixViewType 00147 split_top_block (MatrixViewType& A, const bool contiguous_cache_blocks) const 00148 { 00149 typedef typename MatrixViewType::ordinal_type ordinal_type; 00150 const ordinal_type nrows_top = 00151 strategy_.top_block_split_nrows (A.nrows(), ncols(), 00152 nrows_cache_block()); 00153 // split_top() modifies A 00154 return A.split_top (nrows_top, contiguous_cache_blocks); 00155 } 00156 00159 template< class MatrixViewType > 00160 MatrixViewType 00161 top_block (const MatrixViewType& A, const bool contiguous_cache_blocks) const 00162 { 00163 typedef typename MatrixViewType::ordinal_type ordinal_type; 00164 const ordinal_type nrows_top = 00165 strategy_.top_block_split_nrows (A.nrows(), ncols(), 00166 nrows_cache_block()); 00167 MatrixViewType A_copy (A); 00168 return A_copy.split_top (nrows_top, contiguous_cache_blocks); 00169 } 00170 00171 template< class MatrixViewType > 00172 MatrixViewType 00173 split_bottom_block (MatrixViewType& A, const bool contiguous_cache_blocks) const 00174 { 00175 typedef typename MatrixViewType::ordinal_type ordinal_type; 00176 const ordinal_type nrows_bottom = 00177 strategy_.bottom_block_split_nrows (A.nrows(), ncols(), 00178 nrows_cache_block()); 00179 // split_bottom() modifies A 00180 return A.split_bottom (nrows_bottom, contiguous_cache_blocks); 00181 } 00182 00191 template< class MatrixViewType > 00192 void 00193 fill_with_zeros (MatrixViewType A, 00194 const bool contiguous_cache_blocks) const 00195 { 00196 // Note: if the cache blocks are stored contiguously, A.lda() 00197 // won't be the correct leading dimension of A, but it won't 00198 // matter: we only ever operate on A_cur here, and A_cur's 00199 // leading dimension is set correctly by split_top_block(). 00200 while (! A.empty()) 00201 { 00202 // This call modifies A, but that's OK since we passed the 00203 // input view by copy, not by reference. 00204 MatrixViewType A_cur = split_top_block (A, contiguous_cache_blocks); 00205 A_cur.fill (Scalar(0)); 00206 } 00207 } 00208 00226 void 00227 cache_block (const Ordinal num_rows, 00228 const Ordinal num_cols, 00229 Scalar A_out[], 00230 const Scalar A_in[], 00231 const Ordinal lda_in) const 00232 { 00233 // We say "*_rest" because it points to the remaining part of 00234 // the matrix left to cache block; at the beginning, the 00235 // "remaining" part is the whole matrix, but that will change as 00236 // the algorithm progresses. 00237 const_mat_view A_in_rest (num_rows, num_cols, A_in, lda_in); 00238 // Leading dimension doesn't matter since A_out will be cache blocked. 00239 mat_view A_out_rest (num_rows, num_cols, A_out, lda_in); 00240 00241 while (! A_in_rest.empty()) 00242 { 00243 if (A_out_rest.empty()) 00244 throw std::logic_error("A_out_rest is empty, but A_in_rest is not"); 00245 00246 // This call modifies A_in_rest. 00247 const_mat_view A_in_cur = split_top_block (A_in_rest, false); 00248 00249 // This call modifies A_out_rest. 00250 mat_view A_out_cur = split_top_block (A_out_rest, true); 00251 00252 copy_matrix (A_in_cur.nrows(), num_cols, A_out_cur.get(), 00253 A_out_cur.lda(), A_in_cur.get(), A_in_cur.lda()); 00254 } 00255 } 00256 00259 void 00260 un_cache_block (const Ordinal num_rows, 00261 const Ordinal num_cols, 00262 Scalar A_out[], 00263 const Ordinal lda_out, 00264 const Scalar A_in[]) const 00265 { 00266 // We say "*_rest" because it points to the remaining part of 00267 // the matrix left to cache block; at the beginning, the 00268 // "remaining" part is the whole matrix, but that will change as 00269 // the algorithm progresses. 00270 // 00271 // Leading dimension doesn't matter since A_in is cache blocked. 00272 const_mat_view A_in_rest (num_rows, num_cols, A_in, lda_out); 00273 mat_view A_out_rest (num_rows, num_cols, A_out, lda_out); 00274 00275 while (! A_in_rest.empty()) 00276 { 00277 if (A_out_rest.empty()) 00278 throw std::logic_error("A_out_rest is empty, but A_in_rest is not"); 00279 00280 // This call modifies A_in_rest. 00281 const_mat_view A_in_cur = split_top_block (A_in_rest, true); 00282 00283 // This call modifies A_out_rest. 00284 mat_view A_out_cur = split_top_block (A_out_rest, false); 00285 00286 copy_matrix (A_in_cur.nrows(), num_cols, A_out_cur.get(), 00287 A_out_cur.lda(), A_in_cur.get(), A_in_cur.lda()); 00288 } 00289 } 00290 00291 00292 void 00293 fill_with_zeros (const Ordinal num_rows, 00294 const Ordinal num_cols, 00295 Scalar A[], 00296 const Ordinal lda, 00297 const bool contiguous_cache_blocks) 00298 { 00299 // We say "A_rest" because it points to the remaining part of 00300 // the matrix left to process; at the beginning, the "remaining" 00301 // part is the whole matrix, but that will change as the 00302 // algorithm progresses. 00303 // 00304 // Note: if the cache blocks are stored contiguously, lda won't 00305 // be the correct leading dimension of A, but it won't matter: 00306 // we only ever operate on A_cur here, and A_cur's leading 00307 // dimension is set correctly by A_rest.split_top(). 00308 mat_view A_rest (num_rows, num_cols, A, lda); 00309 00310 while (! A_rest.empty()) 00311 { 00312 // This call modifies A_rest. 00313 mat_view A_cur = split_top_block (A_rest, contiguous_cache_blocks); 00314 A_cur.fill (Scalar(0)); 00315 } 00316 } 00317 00318 00319 private: 00320 Ordinal nrows_; 00321 Ordinal ncols_; 00322 CacheBlockingStrategy< Ordinal, Scalar > strategy_; 00323 Ordinal nrows_cache_block_; 00324 00330 size_t nrows_cache_block () const { return nrows_cache_block_; } 00331 }; 00332 00333 } // namespace TSQR 00334 00335 00336 #endif // __TSQR_CacheBlocker_hpp
1.7.4