|
Anasazi Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Anasazi: Block Eigensolvers Package 00005 // Copyright (2010) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // This library is free software; you can redistribute it and/or modify 00011 // it under the terms of the GNU Lesser General Public License as 00012 // published by the Free Software Foundation; either version 2.1 of the 00013 // License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, but 00016 // WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00023 // USA 00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00025 // 00026 // *********************************************************************** 00027 // @HEADER 00028 00029 #ifndef __TSQR_TBB_TbbRecursiveTsqr_Def_hpp 00030 #define __TSQR_TBB_TbbRecursiveTsqr_Def_hpp 00031 00032 #include <TbbTsqr_TbbRecursiveTsqr.hpp> 00033 #include <Tsqr_ScalarTraits.hpp> 00034 #include <Tsqr_Util.hpp> 00035 00036 // #define TBB_DEBUG 1 00037 #ifdef TBB_DEBUG 00038 # include <iostream> 00039 using std::cerr; 00040 using std::endl; 00041 #endif // TBB_DEBUG 00042 00045 00046 namespace TSQR { 00047 namespace TBB { 00048 00049 template< class LocalOrdinal, class Scalar > 00050 void 00051 TbbRecursiveTsqr< LocalOrdinal, Scalar >:: 00052 explicit_Q_helper (const size_t P_first, 00053 const size_t P_last, 00054 MatView< LocalOrdinal, Scalar >& Q_out, 00055 const bool contiguous_cache_blocks) 00056 { 00057 if (P_first > P_last || Q_out.empty()) 00058 return; 00059 else if (P_first == P_last) 00060 { 00061 CacheBlocker< LocalOrdinal, Scalar > 00062 blocker (Q_out.nrows(), Q_out.ncols(), 00063 seq_.cache_blocking_strategy()); 00064 #ifdef TBB_DEBUG 00065 cerr << "explicit_Q_helper: On P_first = " << P_first 00066 << ", filling Q_out with zeros:" << endl 00067 << "Q_out is " << Q_out.nrows() << " x " << Q_out.ncols() 00068 << " with leading dimension " << Q_out.lda() << endl; 00069 #endif // TBB_DEBUG 00070 // Fill my partition with zeros. 00071 blocker.fill_with_zeros (Q_out, contiguous_cache_blocks); 00072 00073 // If our partition is the first (topmost), fill it with 00074 // the first Q_out.ncols() columns of the identity matrix. 00075 if (P_first == 0) 00076 { 00077 // Fetch the topmost cache block of my partition. Its 00078 // leading dimension should be set correctly by 00079 // top_block(). 00080 mat_view Q_out_top = 00081 blocker.top_block (Q_out, contiguous_cache_blocks); 00082 00083 for (LocalOrdinal j = 0; j < Q_out_top.ncols(); ++j) 00084 Q_out_top(j,j) = Scalar(1); 00085 } 00086 } 00087 else 00088 { 00089 // Recurse on two intervals: [P_first, P_mid] and [P_mid+1, P_last] 00090 const size_t P_mid = (P_first + P_last) / 2; 00091 split_t Q_out_split = 00092 partitioner_.split (Q_out, P_first, P_mid, P_last, 00093 contiguous_cache_blocks); 00094 explicit_Q_helper (P_first, P_mid, Q_out_split.first, 00095 contiguous_cache_blocks); 00096 explicit_Q_helper (P_mid+1, P_last, Q_out_split.second, 00097 contiguous_cache_blocks); 00098 } 00099 } 00100 00101 00102 template< class LocalOrdinal, class Scalar > 00103 MatView< LocalOrdinal, Scalar > 00104 TbbRecursiveTsqr< LocalOrdinal, Scalar >:: 00105 factor_helper (const size_t P_first, 00106 const size_t P_last, 00107 const size_t depth, 00108 MatView< LocalOrdinal, Scalar > A, 00109 std::vector< typename TbbRecursiveTsqr< LocalOrdinal, Scalar >::SeqOutput >& seq_outputs, 00110 typename TbbRecursiveTsqr< LocalOrdinal, Scalar >::ParOutput& par_outputs, 00111 Scalar R[], 00112 const LocalOrdinal ldr, 00113 const bool contiguous_cache_blocks) 00114 { 00115 mat_view A_top; 00116 if (P_first > P_last || A.empty()) 00117 return A; 00118 else if (P_first == P_last) 00119 { 00120 std::pair< SeqOutput, MatView< LocalOrdinal, Scalar > > results = 00121 seq_.factor (A.nrows(), A.ncols(), A.get(), A.lda(), contiguous_cache_blocks); 00122 seq_outputs[P_first] = results.first; 00123 A_top = A; 00124 } 00125 else 00126 { 00127 // Recurse on two intervals: [P_first, P_mid] and [P_mid+1, P_last] 00128 const size_t P_mid = (P_first + P_last) / 2; 00129 split_t A_split = 00130 partitioner_.split (A, P_first, P_mid, P_last, 00131 contiguous_cache_blocks); 00132 A_top = factor_helper (P_first, P_mid, depth+1, A_split.first, 00133 seq_outputs, par_outputs, R, ldr, 00134 contiguous_cache_blocks); 00135 mat_view A_bot = 00136 factor_helper (P_mid+1, P_last, depth+1, A_split.second, 00137 seq_outputs, par_outputs, R, ldr, 00138 contiguous_cache_blocks); 00139 // Combine the two results 00140 factor_pair (P_first, P_mid+1, A_top, A_bot, par_outputs, 00141 contiguous_cache_blocks); 00142 } 00143 00144 // If we're completely done, extract the final R factor from 00145 // the topmost partition. 00146 if (depth == 0) 00147 { 00148 #ifdef TBB_DEBUG 00149 cerr << "factor_helper: On P_first = " << P_first 00150 << ", extracting R:" << endl 00151 << "A_top is " << A_top.nrows() << " x " << A_top.ncols() 00152 << " with leading dimension " << A_top.lda(); 00153 #endif // TBB_DEBUG 00154 seq_.extract_R (A_top.nrows(), A_top.ncols(), A_top.get(), 00155 A_top.lda(), R, ldr, contiguous_cache_blocks); 00156 } 00157 return A_top; 00158 } 00159 00160 00161 template< class LocalOrdinal, class Scalar > 00162 bool 00163 TbbRecursiveTsqr< LocalOrdinal, Scalar >:: 00164 apply_helper_empty (const size_t P_first, 00165 const size_t P_last, 00166 ConstMatView< LocalOrdinal, Scalar >& Q, 00167 MatView< LocalOrdinal, Scalar >& C) const 00168 { 00169 if (Q.empty()) 00170 { 00171 if (! C.empty()) 00172 throw std::logic_error("Q is empty but C is not!"); 00173 else 00174 return true; 00175 } 00176 else if (C.empty()) 00177 { 00178 if (! Q.empty()) 00179 throw std::logic_error("C is empty but Q is not!"); 00180 else 00181 return true; 00182 } 00183 else if (P_first > P_last) 00184 return true; 00185 else 00186 return false; 00187 } 00188 00189 00190 template< class LocalOrdinal, class Scalar > 00191 void 00192 TbbRecursiveTsqr< LocalOrdinal, Scalar >:: 00193 build_partition_array (const size_t P_first, 00194 const size_t P_last, 00195 typename TbbRecursiveTsqr< LocalOrdinal, Scalar >::array_top_blocks_t& top_blocks, 00196 ConstMatView< LocalOrdinal, Scalar >& Q, 00197 MatView< LocalOrdinal, Scalar >& C, 00198 const bool contiguous_cache_blocks) const 00199 { 00200 #ifdef TBB_DEBUG 00201 cerr << "build_partition_array: [" << P_first << ", " << P_last << "]:" << endl 00202 << "Q is " << Q.nrows() << " x " << Q.ncols() << " w/ LDA = " 00203 << Q.lda() << endl << "C is " << C.nrows() << " x " << C.ncols() 00204 << " w/ LDA = " << C.lda() << endl; 00205 #endif // TBB_DEBUG 00206 00207 if (P_first > P_last) 00208 return; 00209 else if (P_first == P_last) 00210 { 00211 CacheBlocker< LocalOrdinal, Scalar > blocker (Q.nrows(), Q.ncols(), seq_.cache_blocking_strategy()); 00212 const_mat_view Q_top = blocker.top_block (Q, contiguous_cache_blocks); 00213 mat_view C_top = blocker.top_block (C, contiguous_cache_blocks); 00214 top_blocks[P_first] = 00215 std::make_pair (const_mat_view (Q_top.ncols(), Q_top.ncols(), Q_top.get(), Q_top.lda()), 00216 mat_view (C_top.ncols(), C_top.ncols(), C_top.get(), C_top.lda())); 00217 } 00218 else 00219 { 00220 // Recurse on two intervals: [P_first, P_mid] and [P_mid+1, P_last] 00221 const size_t P_mid = (P_first + P_last) / 2; 00222 const_split_t Q_split = 00223 partitioner_.split (Q, P_first, P_mid, P_last, 00224 contiguous_cache_blocks); 00225 split_t C_split = 00226 partitioner_.split (C, P_first, P_mid, P_last, 00227 contiguous_cache_blocks); 00228 build_partition_array (P_first, P_mid, top_blocks, Q_split.first, 00229 C_split.first, contiguous_cache_blocks); 00230 build_partition_array (P_mid+1, P_last, top_blocks, Q_split.second, 00231 C_split.second, contiguous_cache_blocks); 00232 } 00233 } 00234 00235 00236 template< class LocalOrdinal, class Scalar > 00237 void 00238 TbbRecursiveTsqr< LocalOrdinal, Scalar >:: 00239 apply_helper (const size_t P_first, 00240 const size_t P_last, 00241 ConstMatView< LocalOrdinal, Scalar > Q, 00242 MatView< LocalOrdinal, Scalar > C, 00243 typename TbbRecursiveTsqr< LocalOrdinal, Scalar >::array_top_blocks_t& top_blocks, 00244 const FactorOutput& factor_output, 00245 const bool contiguous_cache_blocks) 00246 { 00247 typedef std::pair< const_mat_view, mat_view > apply_t; 00248 #ifdef TBB_DEBUG 00249 cerr << "apply_helper: [" << P_first << ", " << P_last << "]:" << endl 00250 << "Q is " << Q.nrows() << " x " << Q.ncols() << " w/ LDA = " 00251 << Q.lda() << endl << "C is " << C.nrows() << " x " << C.ncols() 00252 << " w/ LDA = " << C.lda() << endl; 00253 #endif // TBB_DEBUG 00254 00255 if (apply_helper_empty (P_first, P_last, Q, C)) 00256 return; 00257 else if (P_first == P_last) 00258 { 00259 const std::vector< SeqOutput >& seq_outputs = factor_output.first; 00260 seq_.apply ("N", Q.nrows(), Q.ncols(), Q.get(), Q.lda(), 00261 seq_outputs[P_first], C.ncols(), C.get(), 00262 C.lda(), contiguous_cache_blocks); 00263 #ifdef TBB_DEBUG 00264 cerr << "BOO!!!" << endl; 00265 #endif // TBB_DEBUG 00266 } 00267 else 00268 { 00269 // Recurse on two intervals: [P_first, P_mid] and [P_mid+1, P_last] 00270 const size_t P_mid = (P_first + P_last) / 2; 00271 const_split_t Q_split = 00272 partitioner_.split (Q, P_first, P_mid, P_last, 00273 contiguous_cache_blocks); 00274 split_t C_split = 00275 partitioner_.split (C, P_first, P_mid, P_last, 00276 contiguous_cache_blocks); 00277 const ParOutput& par_output = factor_output.second; 00278 00279 apply_pair ("N", P_first, P_mid+1, top_blocks[P_mid+1].first, 00280 par_output, top_blocks[P_first].second, 00281 top_blocks[P_mid+1].second, contiguous_cache_blocks); 00282 apply_helper (P_first, P_mid, Q_split.first, C_split.first, 00283 top_blocks, factor_output, contiguous_cache_blocks); 00284 apply_helper (P_mid+1, P_last, Q_split.second, C_split.second, 00285 top_blocks, factor_output, contiguous_cache_blocks); 00286 } 00287 } 00288 00289 00290 template< class LocalOrdinal, class Scalar > 00291 typename TbbRecursiveTsqr< LocalOrdinal, Scalar >::top_blocks_t 00292 TbbRecursiveTsqr< LocalOrdinal, Scalar >:: 00293 apply_transpose_helper (const std::string& op, 00294 const size_t P_first, 00295 const size_t P_last, 00296 ConstMatView< LocalOrdinal, Scalar > Q, 00297 MatView< LocalOrdinal, Scalar > C, 00298 const typename TbbRecursiveTsqr< LocalOrdinal, Scalar >::FactorOutput& factor_output, 00299 const bool contiguous_cache_blocks) 00300 { 00301 if (apply_helper_empty (P_first, P_last, Q, C)) 00302 return std::make_pair (Q, C); 00303 else if (P_first == P_last) 00304 { 00305 const std::vector< SeqOutput >& seq_outputs = factor_output.first; 00306 seq_.apply (op, Q.nrows(), Q.ncols(), Q.get(), Q.lda(), 00307 seq_outputs[P_first], C.ncols(), C.get(), 00308 C.lda(), contiguous_cache_blocks); 00309 return std::make_pair (Q, C); 00310 } 00311 else 00312 { 00313 // Recurse on two intervals: [P_first, P_mid] and [P_mid+1, P_last] 00314 const size_t P_mid = (P_first + P_last) / 2; 00315 00316 const_split_t Q_split = 00317 partitioner_.split (Q, P_first, P_mid, P_last, 00318 contiguous_cache_blocks); 00319 split_t C_split = 00320 partitioner_.split (C, P_first, P_mid, P_last, 00321 contiguous_cache_blocks); 00322 const ParOutput& par_output = factor_output.second; 00323 top_blocks_t Top = 00324 apply_transpose_helper (op, P_first, P_mid, Q_split.first, 00325 C_split.first, factor_output, 00326 contiguous_cache_blocks); 00327 top_blocks_t Bottom = 00328 apply_transpose_helper (op, P_mid+1, P_last, Q_split.second, 00329 C_split.second, factor_output, 00330 contiguous_cache_blocks); 00331 apply_pair (op, P_first, P_mid+1, Bottom.first, 00332 par_output, Top.second, Bottom.second, 00333 contiguous_cache_blocks); 00334 return Top; 00335 } 00336 } 00337 00338 00339 template< class LocalOrdinal, class Scalar > 00340 void 00341 TbbRecursiveTsqr< LocalOrdinal, Scalar >:: 00342 factor_pair (const size_t P_top, 00343 const size_t P_bot, 00344 mat_view& A_top, 00345 mat_view& A_bot, 00346 std::vector< std::vector< Scalar > >& par_outputs, 00347 const bool contiguous_cache_blocks) 00348 { 00349 if (P_top == P_bot) 00350 { 00351 throw std::logic_error("factor_pair: should never get here!"); 00352 return; // to pacify the compiler 00353 } 00354 // We only read and write the upper ncols x ncols triangle of 00355 // each block. 00356 const LocalOrdinal ncols = A_top.ncols(); 00357 if (A_bot.ncols() != ncols) 00358 throw std::logic_error("A_bot.ncols() != A_top.ncols()"); 00359 00360 std::vector< Scalar >& tau = par_outputs[P_bot]; 00361 std::vector< Scalar > work (ncols); 00362 00363 TSQR::Combine< LocalOrdinal, Scalar > combine_; 00364 combine_.factor_pair (ncols, A_top.get(), A_top.lda(), 00365 A_bot.get(), A_bot.lda(), &tau[0], &work[0]); 00366 } 00367 00368 template< class LocalOrdinal, class Scalar > 00369 void 00370 TbbRecursiveTsqr< LocalOrdinal, Scalar >:: 00371 apply_pair (const std::string& trans, 00372 const size_t P_top, 00373 const size_t P_bot, 00374 ConstMatView< LocalOrdinal, Scalar >& Q_bot, 00375 const std::vector< std::vector< Scalar > >& tau_arrays, 00376 MatView< LocalOrdinal, Scalar >& C_top, 00377 MatView< LocalOrdinal, Scalar >& C_bot, 00378 const bool contiguous_cache_blocks) 00379 { 00380 if (P_top == P_bot) 00381 { 00382 throw std::logic_error("apply_pair: should never get here!"); 00383 return; // to pacify the compiler 00384 } 00385 const std::vector< Scalar >& tau = tau_arrays[P_bot]; 00386 std::vector< Scalar > work (C_top.ncols()); 00387 00388 TSQR::Combine< LocalOrdinal, Scalar > combine_; 00389 combine_.apply_pair (trans.c_str(), C_top.ncols(), Q_bot.ncols(), 00390 Q_bot.get(), Q_bot.lda(), &tau[0], 00391 C_top.get(), C_top.lda(), 00392 C_bot.get(), C_bot.lda(), &work[0]); 00393 } 00394 00395 template< class LocalOrdinal, class Scalar > 00396 void 00397 TbbRecursiveTsqr< LocalOrdinal, Scalar >:: 00398 cache_block_helper (MatView< LocalOrdinal, Scalar >& A_out, 00399 ConstMatView< LocalOrdinal, Scalar >& A_in, 00400 const size_t P_first, 00401 const size_t P_last) const 00402 { 00403 if (P_first > P_last) 00404 return; 00405 else if (P_first == P_last) 00406 seq_.cache_block (A_out.nrows(), A_out.ncols(), A_out.get(), 00407 A_in.get(), A_in.lda()); 00408 else 00409 { 00410 const size_t P_mid = (P_first + P_last) / 2; 00411 const_split_t A_in_split = 00412 partitioner_.split (A_in, P_first, P_mid, P_last, false); 00413 split_t A_out_split = 00414 partitioner_.split (A_out, P_first, P_mid, P_last, true); 00415 cache_block_helper (A_out_split.first, A_in_split.first, 00416 P_first, P_mid); 00417 cache_block_helper (A_out_split.second, A_in_split.second, 00418 P_mid+1, P_last); 00419 } 00420 } 00421 00422 template< class LocalOrdinal, class Scalar > 00423 void 00424 TbbRecursiveTsqr< LocalOrdinal, Scalar >:: 00425 un_cache_block_helper (MatView< LocalOrdinal, Scalar >& A_out, 00426 const ConstMatView< LocalOrdinal, Scalar >& A_in, 00427 const size_t P_first, 00428 const size_t P_last) const 00429 { 00430 if (P_first > P_last) 00431 return; 00432 else if (P_first == P_last) 00433 seq_.un_cache_block (A_out.nrows(), A_out.ncols(), A_out.get(), 00434 A_out.lda(), A_in.get()); 00435 else 00436 { 00437 const size_t P_mid = (P_first + P_last) / 2; 00438 const const_split_t A_in_split = 00439 partitioner_.split (A_in, P_first, P_mid, P_last, true); 00440 split_t A_out_split = 00441 partitioner_.split (A_out, P_first, P_mid, P_last, false); 00442 00443 un_cache_block_helper (A_out_split.first, A_in_split.first, 00444 P_first, P_mid); 00445 un_cache_block_helper (A_out_split.second, A_in_split.second, 00446 P_mid+1, P_last); 00447 } 00448 } 00449 00452 00453 template< class LocalOrdinal, class Scalar > 00454 TbbRecursiveTsqr< LocalOrdinal, Scalar >:: 00455 TbbRecursiveTsqr (const size_t num_cores, 00456 const size_t cache_block_size) 00457 : seq_ (cache_block_size) 00458 { 00459 if (num_cores < 1) 00460 ncores_ = 1; // default is no parallelism 00461 else 00462 ncores_ = num_cores; 00463 } 00464 00465 template< class LocalOrdinal, class Scalar > 00466 void 00467 TbbRecursiveTsqr< LocalOrdinal, Scalar >:: 00468 cache_block (const LocalOrdinal nrows, 00469 const LocalOrdinal ncols, 00470 Scalar A_out[], 00471 const Scalar A_in[], 00472 const LocalOrdinal lda_in) const 00473 { 00474 const_mat_view A_in_view (nrows, ncols, A_in, lda_in); 00475 // Leading dimension doesn't matter, since we're going to cache block it. 00476 mat_view A_out_view (nrows, ncols, A_out, lda_in); 00477 cache_block_helper (A_out_view, A_in_view, 0, ncores()-1); 00478 } 00479 00480 template< class LocalOrdinal, class Scalar > 00481 void 00482 TbbRecursiveTsqr< LocalOrdinal, Scalar >:: 00483 un_cache_block (const LocalOrdinal nrows, 00484 const LocalOrdinal ncols, 00485 Scalar A_out[], 00486 const LocalOrdinal lda_out, 00487 const Scalar A_in[]) const 00488 { 00489 // Leading dimension doesn't matter, since it's cache-blocked. 00490 const_mat_view A_in_view (nrows, ncols, A_in, lda_out); 00491 mat_view A_out_view (nrows, ncols, A_out, lda_out); 00492 un_cache_block_helper (A_out_view, A_in_view, 0, ncores()-1); 00493 } 00494 00495 template< class LocalOrdinal, class Scalar > 00496 typename TbbRecursiveTsqr< LocalOrdinal, Scalar >::FactorOutput 00497 TbbRecursiveTsqr< LocalOrdinal, Scalar >:: 00498 factor (const LocalOrdinal nrows, 00499 const LocalOrdinal ncols, 00500 Scalar A[], 00501 const LocalOrdinal lda, 00502 Scalar R[], 00503 const LocalOrdinal ldr, 00504 const bool contiguous_cache_blocks) 00505 { 00506 mat_view A_view (nrows, ncols, A, lda); 00507 std::vector< SeqOutput > seq_outputs (ncores()); 00508 ParOutput par_outputs (ncores(), std::vector< Scalar >(ncols)); 00509 (void) factor_helper (0, ncores()-1, 0, A_view, seq_outputs, 00510 par_outputs, R, ldr, contiguous_cache_blocks); 00511 return std::make_pair (seq_outputs, par_outputs); 00512 } 00513 00514 template< class LocalOrdinal, class Scalar > 00515 void 00516 TbbRecursiveTsqr< LocalOrdinal, Scalar >:: 00517 apply (const std::string& op, 00518 const LocalOrdinal nrows, 00519 const LocalOrdinal ncols_C, 00520 Scalar C[], 00521 const LocalOrdinal ldc, 00522 const LocalOrdinal ncols_Q, 00523 const Scalar Q[], 00524 const LocalOrdinal ldq, 00525 const typename TbbRecursiveTsqr< LocalOrdinal, Scalar >::FactorOutput& factor_output, 00526 const bool contiguous_cache_blocks) 00527 { 00528 const ApplyType apply_type (op); 00529 if (apply_type == ApplyType::ConjugateTranspose && 00530 ScalarTraits< Scalar >::is_complex) 00531 throw std::logic_error("Applying Q^H for complex scalar types " 00532 "not yet implemented"); 00533 00534 const_mat_view Q_view (nrows, ncols_Q, Q, ldq); 00535 mat_view C_view (nrows, ncols_C, C, ldc); 00536 if (! apply_type.transposed()) 00537 { 00538 array_top_blocks_t top_blocks (ncores()); 00539 build_partition_array (0, ncores()-1, top_blocks, Q_view, 00540 C_view, contiguous_cache_blocks); 00541 apply_helper (0, ncores()-1, Q_view, C_view, top_blocks, 00542 factor_output, contiguous_cache_blocks); 00543 } 00544 else 00545 apply_transpose_helper (op, 0, ncores()-1, Q_view, C_view, 00546 factor_output, contiguous_cache_blocks); 00547 } 00548 00549 00550 template< class LocalOrdinal, class Scalar > 00551 void 00552 TbbRecursiveTsqr< LocalOrdinal, Scalar >:: 00553 explicit_Q (const LocalOrdinal nrows, 00554 const LocalOrdinal ncols_Q_in, 00555 const Scalar Q_in[], 00556 const LocalOrdinal ldq_in, 00557 const LocalOrdinal ncols_Q_out, 00558 Scalar Q_out[], 00559 const LocalOrdinal ldq_out, 00560 const typename TbbRecursiveTsqr< LocalOrdinal, Scalar >::FactorOutput& factor_output, 00561 const bool contiguous_cache_blocks) 00562 { 00563 if (ncols_Q_out != ncols_Q_in) 00564 throw std::logic_error("FIXME Currently, explicit_Q() only works for ncols_Q_out == ncols_Q_in"); 00565 00566 const_mat_view Q_in_view (nrows, ncols_Q_in, Q_in, ldq_in); 00567 mat_view Q_out_view (nrows, ncols_Q_out, Q_out, ldq_out); 00568 00569 explicit_Q_helper (0, ncores()-1, Q_out_view, contiguous_cache_blocks); 00570 apply ("N", nrows, ncols_Q_out, Q_out, ldq_out, ncols_Q_in, 00571 Q_in, ldq_in, factor_output, contiguous_cache_blocks); 00572 } 00573 00574 } // namespace TBB 00575 } // namespace TSQR 00576 00577 00578 #endif // __TSQR_TBB_TbbRecursiveTsqr_Def_hpp
1.7.4