|
Anasazi Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Anasazi: Block Eigensolvers Package 00005 // Copyright (2010) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // This library is free software; you can redistribute it and/or modify 00011 // it under the terms of the GNU Lesser General Public License as 00012 // published by the Free Software Foundation; either version 2.1 of the 00013 // License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, but 00016 // WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00023 // USA 00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00025 // 00026 // *********************************************************************** 00027 // @HEADER 00028 00029 #ifndef __TSQR_CombineDefault_hpp 00030 #define __TSQR_CombineDefault_hpp 00031 00032 #include <Tsqr_ApplyType.hpp> 00033 #include <Tsqr_Lapack.hpp> 00034 #include <Tsqr_Matrix.hpp> 00035 #include <Tsqr_ScalarTraits.hpp> 00036 00037 #include <algorithm> 00038 #include <sstream> 00039 #include <stdexcept> 00040 00043 00044 namespace TSQR { 00045 00057 template< class Ordinal, class Scalar > 00058 class CombineDefault { 00059 private: 00060 typedef LAPACK< Ordinal, Scalar > lapack_type; 00061 00062 public: 00063 typedef Scalar scalar_type; 00064 typedef typename ScalarTraits< Scalar >::magnitude_type magnitude_type; 00065 typedef Ordinal ordinal_type; 00066 00067 CombineDefault () {} 00068 00072 static bool QR_produces_R_factor_with_nonnegative_diagonal() 00073 { 00074 return lapack_type::QR_produces_R_factor_with_nonnegative_diagonal(); 00075 } 00076 00077 void 00078 factor_first (const Ordinal nrows, 00079 const Ordinal ncols, 00080 Scalar A[], 00081 const Ordinal lda, 00082 Scalar tau[], 00083 Scalar work[]) 00084 { 00085 // info must be an int, not a LocalOrdinal, since LAPACK 00086 // routines always (???) use int for the INFO output argument. 00087 int info = 0; 00088 lapack_.GEQR2 (nrows, ncols, A, lda, tau, work, &info); 00089 if (info != 0) 00090 { 00091 std::ostringstream os; 00092 os << "TSQR::CombineDefault::factor_first(): LAPACK\'s " 00093 << "GEQR2 failed with INFO = " << info; 00094 throw std::logic_error (os.str()); 00095 } 00096 } 00097 00098 void 00099 apply_first (const ApplyType& applyType, 00100 const Ordinal nrows, 00101 const Ordinal ncols_C, 00102 const Ordinal ncols_A, 00103 const Scalar A[], 00104 const Ordinal lda, 00105 const Scalar tau[], 00106 Scalar C[], 00107 const Ordinal ldc, 00108 Scalar work[]) 00109 { 00110 int info = 0; 00111 // LAPACK has the nice feature that it only reads the first 00112 // letter of input strings that specify things like which side 00113 // to which to apply the operator, or whether to apply the 00114 // transpose. That means we can make the strings more verbose, 00115 // as in "Left" here for the SIDE parameter. 00116 lapack_.ORM2R ("Left", applyType.toString().c_str(), 00117 nrows, ncols_C, ncols_A, 00118 A, lda, tau, 00119 C, ldc, work, &info); 00120 if (info != 0) 00121 { 00122 std::ostringstream os; 00123 os << "TSQR::CombineDefault::apply_first(): LAPACK\'s " 00124 << "ORM2R failed with INFO = " << info; 00125 throw std::logic_error (os.str()); 00126 } 00127 } 00128 00129 void 00130 apply_inner (const ApplyType& apply_type, 00131 const Ordinal m, 00132 const Ordinal ncols_C, 00133 const Ordinal ncols_Q, 00134 const Scalar A[], 00135 const Ordinal lda, 00136 const Scalar tau[], 00137 Scalar C_top[], 00138 const Ordinal ldc_top, 00139 Scalar C_bot[], 00140 const Ordinal ldc_bot, 00141 Scalar work[]) 00142 { 00143 typedef ConstMatView< Ordinal, Scalar > const_matview_type; 00144 typedef MatView< Ordinal, Scalar > matview_type; 00145 const Ordinal numRows = m + ncols_Q; 00146 00147 A_buf_.reshape (numRows, ncols_Q); 00148 A_buf_.fill (Scalar(0)); 00149 const_matview_type A_bot (m, ncols_Q, A, lda); 00150 matview_type A_buf_bot (m, ncols_Q, &A_buf_(ncols_Q, 0), A_buf_.lda()); 00151 A_buf_bot.copy (A_bot); 00152 00153 C_buf_.reshape (numRows, ncols_C); 00154 C_buf_.fill (Scalar(0)); 00155 matview_type C_buf_top (ncols_Q, ncols_C, &C_buf_(0, 0), C_buf_.lda()); 00156 matview_type C_buf_bot (m, ncols_C, &C_buf_(ncols_Q, 0), C_buf_.lda()); 00157 matview_type C_top_view (ncols_Q, ncols_C, C_top, ldc_top); 00158 matview_type C_bot_view (m, ncols_C, C_bot, ldc_bot); 00159 C_buf_top.copy (C_top_view); 00160 C_buf_bot.copy (C_bot_view); 00161 00162 int info = 0; 00163 lapack_.ORM2R ("Left", apply_type.toString().c_str(), 00164 numRows, ncols_C, ncols_Q, 00165 A_buf_.get(), A_buf_.lda(), tau, 00166 C_buf_.get(), C_buf_.lda(), 00167 work, &info); 00168 if (info != 0) 00169 { 00170 std::ostringstream os; 00171 os << "TSQR::CombineDefault::apply_inner(): LAPACK\'s " 00172 << "ORM2R failed with INFO = " << info; 00173 throw std::logic_error (os.str()); 00174 } 00175 // Copy back the results. 00176 C_top_view.copy (C_buf_top); 00177 C_bot_view.copy (C_buf_bot); 00178 } 00179 00180 void 00181 factor_inner (const Ordinal m, 00182 const Ordinal n, 00183 Scalar R[], 00184 const Ordinal ldr, 00185 Scalar A[], 00186 const Ordinal lda, 00187 Scalar tau[], 00188 Scalar work[]) 00189 { 00190 const Ordinal numRows = m + n; 00191 00192 A_buf_.reshape (numRows, n); 00193 A_buf_.fill (Scalar(0)); 00194 // R might be a view of the upper triangle of a cache block, but 00195 // we only want to include the upper triangle in the 00196 // factorization. Thus, only copy the upper triangle of R into 00197 // the appropriate place in the buffer. 00198 copy_upper_triangle (n, n, &A_buf_(0, 0), A_buf_.lda(), R, ldr); 00199 copy_matrix (m, n, &A_buf_(n, 0), A_buf_.lda(), A, lda); 00200 00201 int info = 0; 00202 lapack_.GEQR2 (numRows, n, A_buf_.get(), A_buf_.lda(), tau, work, &info); 00203 if (info != 0) 00204 throw std::logic_error ("TSQR::CombineDefault: GEQR2 failed"); 00205 00206 // Copy back the results. R might be a view of the upper 00207 // triangle of a cache block, so only copy into the upper 00208 // triangle of R. 00209 copy_upper_triangle (n, n, R, ldr, &A_buf_(0, 0), A_buf_.lda()); 00210 copy_matrix (m, n, A, lda, &A_buf_(n, 0), A_buf_.lda()); 00211 } 00212 00213 void 00214 factor_pair (const Ordinal n, 00215 Scalar R_top[], 00216 const Ordinal ldr_top, 00217 Scalar R_bot[], 00218 const Ordinal ldr_bot, 00219 Scalar tau[], 00220 Scalar work[]) 00221 { 00222 const Ordinal numRows = Ordinal(2) * n; 00223 00224 A_buf_.reshape (numRows, n); 00225 A_buf_.fill (Scalar(0)); 00226 // Copy the inputs into the compute buffer. Only touch the 00227 // upper triangles of R_top and R_bot, since they each may be 00228 // views of some cache block (where the strict lower triangle 00229 // contains things we don't want to include in the 00230 // factorization). 00231 copy_upper_triangle (n, n, &A_buf_(0, 0), A_buf_.lda(), R_top, ldr_top); 00232 copy_upper_triangle (n, n, &A_buf_(n, 0), A_buf_.lda(), R_bot, ldr_bot); 00233 00234 int info = 0; 00235 lapack_.GEQR2 (numRows, n, A_buf_.get(), A_buf_.lda(), tau, work, &info); 00236 if (info != 0) 00237 { 00238 std::ostringstream os; 00239 os << "TSQR::CombineDefault::factor_pair(): " 00240 << "GEQR2 failed with INFO = " << info; 00241 throw std::logic_error (os.str()); 00242 } 00243 00244 // Copy back the results. Only read the upper triangles of the 00245 // two n by n row blocks of A_buf_ (this means we don't have to 00246 // zero out the strict lower triangles), and only touch the 00247 // upper triangles of R_top and R_bot. 00248 copy_upper_triangle (n, n, R_top, ldr_top, &A_buf_(0, 0), A_buf_.lda()); 00249 copy_upper_triangle (n, n, R_bot, ldr_bot, &A_buf_(n, 0), A_buf_.lda()); 00250 } 00251 00252 void 00253 apply_pair (const ApplyType& apply_type, 00254 const Ordinal ncols_C, 00255 const Ordinal ncols_Q, 00256 const Scalar R_bot[], 00257 const Ordinal ldr_bot, 00258 const Scalar tau[], 00259 Scalar C_top[], 00260 const Ordinal ldc_top, 00261 Scalar C_bot[], 00262 const Ordinal ldc_bot, 00263 Scalar work[]) 00264 { 00265 const Ordinal numRows = Ordinal(2) * ncols_Q; 00266 00267 A_buf_.reshape (numRows, ncols_Q); 00268 A_buf_.fill (Scalar(0)); 00269 copy_upper_triangle (ncols_Q, ncols_Q, 00270 &A_buf_(ncols_Q, 0), A_buf_.lda(), 00271 R_bot, ldr_bot); 00272 C_buf_.reshape (numRows, ncols_C); 00273 copy_matrix (ncols_Q, ncols_C, &C_buf_(0, 0), C_buf_.lda(), C_top, ldc_top); 00274 copy_matrix (ncols_Q, ncols_C, &C_buf_(ncols_Q, 0), C_buf_.lda(), C_bot, ldc_bot); 00275 00276 int info = 0; 00277 lapack_.ORM2R ("Left", apply_type.toString().c_str(), 00278 numRows, ncols_C, ncols_Q, 00279 A_buf_.get(), A_buf_.lda(), tau, 00280 C_buf_.get(), C_buf_.lda(), 00281 work, &info); 00282 if (info != 0) 00283 throw std::logic_error ("TSQR::CombineDefault: ORM2R failed"); 00284 00285 // Copy back the results. 00286 copy_matrix (ncols_Q, ncols_C, C_top, ldc_top, &C_buf_(0, 0), C_buf_.lda()); 00287 copy_matrix (ncols_Q, ncols_C, C_bot, ldc_bot, &C_buf_(ncols_Q, 0), C_buf_.lda()); 00288 } 00289 00290 private: 00291 LAPACK< Ordinal, Scalar > lapack_; 00292 Matrix< Ordinal, Scalar > A_buf_; 00293 Matrix< Ordinal, Scalar > C_buf_; 00294 }; 00295 00296 00297 } // namespace TSQR 00298 00299 #endif // __TSQR_CombineDefault_hpp
1.7.4