|
Anasazi Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Anasazi: Block Eigensolvers Package 00005 // Copyright (2010) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // This library is free software; you can redistribute it and/or modify 00011 // it under the terms of the GNU Lesser General Public License as 00012 // published by the Free Software Foundation; either version 2.1 of the 00013 // License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, but 00016 // WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00023 // USA 00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00025 // 00026 // *********************************************************************** 00027 // @HEADER 00028 00029 #ifndef __TSQR_TbbRecursiveTsqr_hpp 00030 #define __TSQR_TbbRecursiveTsqr_hpp 00031 00032 #include <Tsqr_ApplyType.hpp> 00033 #include <Tsqr_CacheBlocker.hpp> 00034 #include <Tsqr_SequentialTsqr.hpp> 00035 #include <TbbTsqr_Partitioner.hpp> 00036 00037 #include <stdexcept> 00038 #include <string> 00039 #include <utility> // std::pair 00040 #include <vector> 00041 00044 00045 namespace TSQR { 00046 namespace TBB { 00047 00048 template< class LocalOrdinal, class Scalar > 00049 class TbbRecursiveTsqr { 00050 public: 00057 TbbRecursiveTsqr (const size_t num_cores = 1, 00058 const size_t cache_block_size = 0); 00059 00063 size_t ncores() const { return ncores_; } 00064 00066 size_t cache_block_size() const { return seq_.cache_block_size(); } 00067 00069 typedef typename SequentialTsqr< LocalOrdinal, Scalar >::FactorOutput SeqOutput; 00072 typedef std::vector< std::vector< Scalar > > ParOutput; 00076 typedef typename std::pair< std::vector< SeqOutput >, ParOutput > FactorOutput; 00077 00081 void 00082 cache_block (const LocalOrdinal nrows, 00083 const LocalOrdinal ncols, 00084 Scalar A_out[], 00085 const Scalar A_in[], 00086 const LocalOrdinal lda_in) const; 00087 00092 void 00093 un_cache_block (const LocalOrdinal nrows, 00094 const LocalOrdinal ncols, 00095 Scalar A_out[], 00096 const LocalOrdinal lda_out, 00097 const Scalar A_in[]) const; 00098 00105 FactorOutput 00106 factor (const LocalOrdinal nrows, 00107 const LocalOrdinal ncols, 00108 Scalar A[], 00109 const LocalOrdinal lda, 00110 Scalar R[], 00111 const LocalOrdinal ldr, 00112 const bool contiguous_cache_blocks = false); 00113 00117 void 00118 apply (const std::string& op, 00119 const LocalOrdinal nrows, 00120 const LocalOrdinal ncols_C, 00121 Scalar C[], 00122 const LocalOrdinal ldc, 00123 const LocalOrdinal ncols_Q, 00124 const Scalar Q[], 00125 const LocalOrdinal ldq, 00126 const FactorOutput& factor_output, 00127 const bool contiguous_cache_blocks = false); 00128 00131 void 00132 explicit_Q (const LocalOrdinal nrows, 00133 const LocalOrdinal ncols_Q_in, 00134 const Scalar Q_in[], 00135 const LocalOrdinal ldq_in, 00136 const LocalOrdinal ncols_Q_out, 00137 Scalar Q_out[], 00138 const LocalOrdinal ldq_out, 00139 const FactorOutput& factor_output, 00140 const bool contiguous_cache_blocks = false); 00141 00142 private: 00143 size_t ncores_; 00144 TSQR::SequentialTsqr< LocalOrdinal, Scalar > seq_; 00145 Partitioner< LocalOrdinal, Scalar > partitioner_; 00146 00147 typedef MatView< LocalOrdinal, Scalar > mat_view; 00148 typedef ConstMatView< LocalOrdinal, Scalar > const_mat_view; 00149 typedef std::pair< const_mat_view, const_mat_view > const_split_t; 00150 typedef std::pair< mat_view, mat_view > split_t; 00151 typedef std::pair< const_mat_view, mat_view > top_blocks_t; 00152 typedef std::vector< top_blocks_t > array_top_blocks_t; 00153 00154 void 00155 explicit_Q_helper (const size_t P_first, 00156 const size_t P_last, 00157 MatView< LocalOrdinal, Scalar >& Q_out, 00158 const bool contiguous_cache_blocks); 00159 00162 MatView< LocalOrdinal, Scalar > 00163 factor_helper (const size_t P_first, 00164 const size_t P_last, 00165 const size_t depth, 00166 MatView< LocalOrdinal, Scalar > A, 00167 std::vector< SeqOutput >& seq_outputs, 00168 ParOutput& par_outputs, 00169 Scalar R[], 00170 const LocalOrdinal ldr, 00171 const bool contiguous_cache_blocks); 00172 00173 bool 00174 apply_helper_empty (const size_t P_first, 00175 const size_t P_last, 00176 const_mat_view &Q, 00177 mat_view& C) const; 00178 00182 void 00183 build_partition_array (const size_t P_first, 00184 const size_t P_last, 00185 array_top_blocks_t& top_blocks, 00186 const_mat_view& Q, 00187 mat_view& C, 00188 const bool contiguous_cache_blocks) const; 00189 00192 void 00193 apply_helper (const size_t P_first, 00194 const size_t P_last, 00195 const_mat_view Q, 00196 mat_view C, 00197 array_top_blocks_t& top_blocks, 00198 const FactorOutput& factor_output, 00199 const bool contiguous_cache_blocks); 00200 00204 std::pair< ConstMatView< LocalOrdinal, Scalar >, MatView< LocalOrdinal, Scalar > > 00205 apply_transpose_helper (const std::string& op, 00206 const size_t P_first, 00207 const size_t P_last, 00208 const_mat_view Q, 00209 mat_view C, 00210 const FactorOutput& factor_output, 00211 const bool contiguous_cache_blocks); 00212 00213 void 00214 factor_pair (const size_t P_top, 00215 const size_t P_bot, 00216 mat_view& A_top, 00217 mat_view& A_bot, 00218 std::vector< std::vector< Scalar > >& par_outputs, 00219 const bool contiguous_cache_blocks); 00220 00221 void 00222 apply_pair (const std::string& trans, 00223 const size_t P_top, 00224 const size_t P_bot, 00225 const_mat_view& Q_bot, 00226 const std::vector< std::vector< Scalar > >& tau_arrays, 00227 mat_view& C_top, 00228 mat_view& C_bot, 00229 const bool contiguous_cache_blocks); 00230 00231 void 00232 cache_block_helper (MatView< LocalOrdinal, Scalar >& A_out, 00233 ConstMatView< LocalOrdinal, Scalar >& A_in, 00234 const size_t P_first, 00235 const size_t P_last) const; 00236 00237 void 00238 un_cache_block_helper (MatView< LocalOrdinal, Scalar >& A_out, 00239 const ConstMatView< LocalOrdinal, Scalar >& A_in, 00240 const size_t P_first, 00241 const size_t P_last) const; 00242 00243 }; // class TbbRecursiveTsqr 00244 } // namespace TBB 00245 } // namespace TSQR 00246 00247 #include <TSQR/TBB/TbbRecursiveTsqr_Def.hpp> 00248 00249 #endif // __TSQR_TbbRecursiveTsqr_hpp
1.7.4