|
Anasazi Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Anasazi: Block Eigensolvers Package 00005 // Copyright (2010) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // This library is free software; you can redistribute it and/or modify 00011 // it under the terms of the GNU Lesser General Public License as 00012 // published by the Free Software Foundation; either version 2.1 of the 00013 // License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, but 00016 // WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00023 // USA 00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00025 // 00026 // *********************************************************************** 00027 // @HEADER 00028 00029 #ifndef __TSQR_TBB_RevealRankTask_hpp 00030 #define __TSQR_TBB_RevealRankTask_hpp 00031 00032 #include <tbb/task.h> 00033 #include <TbbTsqr_Partitioner.hpp> 00034 #include <Tsqr_SequentialTsqr.hpp> 00035 00038 00039 namespace TSQR { 00040 namespace TBB { 00041 00042 template< class LocalOrdinal, class Scalar > 00043 class RevealRankTask : public tbb::task { 00044 public: 00045 typedef MatView< LocalOrdinal, Scalar > mat_view; 00046 typedef ConstMatView< LocalOrdinal, Scalar > const_mat_view; 00047 typedef std::pair< mat_view, mat_view > split_type; 00048 typedef SequentialTsqr< LocalOrdinal, Scalar > seq_tsqr_type; 00049 00050 RevealRankTask (const size_t P_first, 00051 const size_t P_last, 00052 const mat_view& Q, 00053 const const_mat_view& U, 00054 const seq_tsqr_type& seq, 00055 const bool contiguous_cache_blocks) : 00056 P_first_ (P_first), 00057 P_last_ (P_last), 00058 Q_ (Q), 00059 U_ (U), 00060 seq_ (seq), 00061 contiguous_cache_blocks_ (contiguous_cache_blocks) 00062 {} 00063 00064 tbb::task* execute () { 00065 using tbb::task; 00066 00067 if (P_first_ > P_last_ || Q_.empty()) 00068 return NULL; // shouldn't get here, but just in case... 00069 else if (P_first_ == P_last_) 00070 { 00071 // Use SequentialTsqr to compute Q*U for this core's local 00072 // part of Q. The method is called "Q_times_B" so that it 00073 // doesn't suggest any orthogonality of the B input 00074 // matrix, though in this case B is U and U is orthogonal. 00075 seq_.Q_times_B (Q_.nrows(), Q_.ncols(), Q_.get(), Q_.lda(), 00076 U_.get(), U_.lda(), contiguous_cache_blocks_); 00077 return NULL; 00078 } 00079 else 00080 { 00081 // "c": continuation task 00082 tbb::empty_task& c = *new( allocate_continuation() ) tbb::empty_task; 00083 00084 // Recurse on two intervals: [P_first, P_mid] and [P_mid+1, P_last] 00085 const size_t P_mid = (P_first_ + P_last_) / 2; 00086 split_type out_split = 00087 partitioner_.split (Q_, P_first_, P_mid, P_last_, 00088 contiguous_cache_blocks_); 00089 00090 RevealRankTask& topTask = *new( c.allocate_child() ) 00091 RevealRankTask (P_first_, P_mid, out_split.first, seq_, 00092 contiguous_cache_blocks_); 00093 RevealRankTask& botTask = *new( c.allocate_child() ) 00094 RevealRankTask (P_mid+1, P_last_, out_split.second, seq_, 00095 contiguous_cache_blocks_); 00096 // Set reference count of parent (in this case, the 00097 // continuation task) to 2 (since 2 children -- no 00098 // additional task since no waiting). 00099 c.set_ref_count (2); 00100 c.spawn (botTask); 00101 return &topTask; // scheduler bypass optimization 00102 } 00103 } 00104 00105 private: 00106 size_t P_first_, P_last_; 00107 mat_view Q_; 00108 const_mat_view U_; 00109 SequentialTsqr< LocalOrdinal, Scalar > seq_; 00110 Partitioner< LocalOrdinal, Scalar > partitioner_; 00111 bool contiguous_cache_blocks_; 00112 }; 00113 00114 } // namespace TBB 00115 } // namespace TSQR 00116 00117 00118 #endif // __TSQR_TBB_RevealRankTask_hpp
1.7.4