|
Anasazi Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Anasazi: Block Eigensolvers Package 00005 // Copyright (2010) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // This library is free software; you can redistribute it and/or modify 00011 // it under the terms of the GNU Lesser General Public License as 00012 // published by the Free Software Foundation; either version 2.1 of the 00013 // License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, but 00016 // WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00023 // USA 00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00025 // 00026 // *********************************************************************** 00027 // @HEADER 00028 00029 #ifndef __TSQR_TBB_ApplyTask_hpp 00030 #define __TSQR_TBB_ApplyTask_hpp 00031 00032 #include <tbb/task.h> 00033 #include <TbbTsqr_Partitioner.hpp> 00034 #include <Tsqr_SequentialTsqr.hpp> 00035 00038 00039 namespace TSQR { 00040 namespace TBB { 00041 00042 template< class LocalOrdinal, class Scalar, class TimerType > 00043 class ApplyTask : public tbb::task { 00044 public: 00045 typedef MatView< LocalOrdinal, Scalar > mat_view; 00046 typedef ConstMatView< LocalOrdinal, Scalar > const_mat_view; 00047 typedef std::pair< mat_view, mat_view > split_t; 00048 typedef std::pair< const_mat_view, const_mat_view > const_split_t; 00049 typedef std::pair< const_mat_view, mat_view > top_blocks_t; 00050 typedef std::vector< top_blocks_t > array_top_blocks_t; 00051 00053 typedef typename SequentialTsqr< LocalOrdinal, Scalar >::FactorOutput SeqOutput; 00056 typedef std::vector< std::vector< Scalar > > ParOutput; 00060 typedef typename std::pair< std::vector< SeqOutput >, ParOutput > FactorOutput; 00061 00062 ApplyTask (const size_t P_first__, 00063 const size_t P_last__, 00064 ConstMatView< LocalOrdinal, Scalar > Q, 00065 MatView< LocalOrdinal, Scalar > C, 00066 array_top_blocks_t& top_blocks, 00067 const FactorOutput& factor_output, 00068 const SequentialTsqr< LocalOrdinal, Scalar >& seq, 00069 double& my_seq_timing, 00070 double& min_seq_timing, 00071 double& max_seq_timing, 00072 const bool contiguous_cache_blocks) : 00073 P_first_ (P_first__), 00074 P_last_ (P_last__), 00075 Q_ (Q), 00076 C_ (C), 00077 top_blocks_ (top_blocks), 00078 factor_output_ (factor_output), 00079 seq_ (seq), 00080 apply_type_ (ApplyType::NoTranspose), // FIXME: modify to support Q^T and Q^H 00081 my_seq_timing_ (my_seq_timing), 00082 min_seq_timing_ (min_seq_timing), 00083 max_seq_timing_ (max_seq_timing), 00084 contiguous_cache_blocks_ (contiguous_cache_blocks) 00085 {} 00086 00087 tbb::task* execute () { 00088 if (P_first_ > P_last_ || Q_.empty() || C_.empty()) 00089 return NULL; 00090 else if (P_first_ == P_last_) 00091 { 00092 TimerType timer(""); 00093 timer.start(); 00094 const std::vector< SeqOutput >& seq_outputs = factor_output_.first; 00095 seq_.apply (apply_type_, Q_.nrows(), Q_.ncols(), 00096 Q_.get(), Q_.lda(), seq_outputs[P_first_], 00097 C_.ncols(), C_.get(), C_.lda(), 00098 contiguous_cache_blocks_); 00099 my_seq_timing_ = timer.stop(); 00100 return NULL; 00101 } 00102 else 00103 { 00104 // Recurse on two intervals: [P_first, P_mid] and [P_mid+1, P_last] 00105 const size_t P_mid = (P_first_ + P_last_) / 2; 00106 const_split_t Q_split = 00107 partitioner_.split (Q_, P_first_, P_mid, P_last_, 00108 contiguous_cache_blocks_); 00109 split_t C_split = 00110 partitioner_.split (C_, P_first_, P_mid, P_last_, 00111 contiguous_cache_blocks_); 00112 00113 double top_timing; 00114 double top_min_timing = 0.0; 00115 double top_max_timing = 0.0; 00116 double bot_timing; 00117 double bot_min_timing = 0.0; 00118 double bot_max_timing = 0.0; 00119 00120 apply_pair (P_first_, P_mid+1); 00121 ApplyTask& topTask = *new( allocate_child() ) 00122 ApplyTask (P_first_, P_mid, Q_split.first, C_split.first, 00123 top_blocks_, factor_output_, seq_, 00124 top_timing, top_min_timing, top_max_timing, 00125 contiguous_cache_blocks_); 00126 ApplyTask& botTask = *new( allocate_child() ) 00127 ApplyTask (P_mid+1, P_last_, Q_split.second, C_split.second, 00128 top_blocks_, factor_output_, seq_, 00129 bot_timing, bot_min_timing, bot_max_timing, 00130 contiguous_cache_blocks_); 00131 00132 set_ref_count (3); // 3 children (2 + 1 for the wait) 00133 spawn (topTask); 00134 spawn_and_wait_for_all (botTask); 00135 00136 top_min_timing = (top_min_timing == 0.0) ? top_timing : top_min_timing; 00137 top_max_timing = (top_max_timing == 0.0) ? top_timing : top_max_timing; 00138 00139 bot_min_timing = (bot_min_timing == 0.0) ? bot_timing : bot_min_timing; 00140 bot_max_timing = (bot_max_timing == 0.0) ? bot_timing : bot_max_timing; 00141 00142 min_seq_timing_ = std::min (top_min_timing, bot_min_timing); 00143 max_seq_timing_ = std::min (top_max_timing, bot_max_timing); 00144 00145 return NULL; 00146 } 00147 } 00148 00149 private: 00150 size_t P_first_, P_last_; 00151 const_mat_view Q_; 00152 mat_view C_; 00153 array_top_blocks_t& top_blocks_; 00154 const FactorOutput& factor_output_; 00155 SequentialTsqr< LocalOrdinal, Scalar > seq_; 00156 TSQR::ApplyType apply_type_; 00157 TSQR::Combine< LocalOrdinal, Scalar > combine_; 00158 Partitioner< LocalOrdinal, Scalar > partitioner_; 00159 double& my_seq_timing_; 00160 double& min_seq_timing_; 00161 double& max_seq_timing_; 00162 bool contiguous_cache_blocks_; 00163 00164 void 00165 apply_pair (const size_t P_top, 00166 const size_t P_bot) 00167 { 00168 if (P_top == P_bot) 00169 throw std::logic_error("apply_pair: should never get here!"); 00170 00171 const_mat_view& Q_bot = top_blocks_[P_bot].first; 00172 mat_view& C_top = top_blocks_[P_top].second; 00173 mat_view& C_bot = top_blocks_[P_bot].second; 00174 00175 const ParOutput& par_output = factor_output_.second; 00176 const std::vector< Scalar >& tau = par_output[P_bot]; 00177 std::vector< Scalar > work (C_top.ncols()); 00178 combine_.apply_pair (apply_type_, C_top.ncols(), Q_bot.ncols(), 00179 Q_bot.get(), Q_bot.lda(), &tau[0], 00180 C_top.get(), C_top.lda(), 00181 C_bot.get(), C_bot.lda(), &work[0]); 00182 } 00183 00184 }; 00185 00186 } // namespace TBB 00187 } // namespace TSQR 00188 00189 00190 #endif // __TSQR_TBB_ApplyTask_hpp
1.7.4