|
Tpetra Matrix/Vector Services Version of the Day
|
00001 //@HEADER 00002 // ************************************************************************ 00003 // 00004 // Tpetra: Templated Linear Algebra Services Package 00005 // Copyright (2008) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // This library is free software; you can redistribute it and/or modify 00011 // it under the terms of the GNU Lesser General Public License as 00012 // published by the Free Software Foundation; either version 2.1 of the 00013 // License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, but 00016 // WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00023 // USA 00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00025 // 00026 // ************************************************************************ 00027 //@HEADER 00028 00029 #ifndef TPETRA_CRSMATRIXMULTIPLYOP_DEF_HPP 00030 #define TPETRA_CRSMATRIXMULTIPLYOP_DEF_HPP 00031 00032 #include "Tpetra_CrsMatrix.hpp" 00033 00034 #ifdef DOXYGEN_USE_ONLY 00035 #include "Tpetra_CrsMatrixMultiplyOp_decl.hpp" 00036 #endif 00037 00043 namespace Tpetra { 00044 00045 template <class OpScalar, class MatScalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00046 CrsMatrixMultiplyOp<OpScalar,MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::CrsMatrixMultiplyOp(const Teuchos::RCP<const CrsMatrix<MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps> > &A) 00047 : matrix_(A) { 00048 // we don't require that A is fill complete; we will query for the importer/exporter at apply()-time 00049 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_PROFILING 00050 importTimer_ = Teuchos::TimeMonitor::getNewTimer( "CrsMatrixMultiplyOp::import" ); 00051 exportTimer_ = Teuchos::TimeMonitor::getNewTimer( "CrsMatrixMultiplyOp::export" ); 00052 #endif 00053 } 00054 00055 template <class OpScalar, class MatScalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00056 CrsMatrixMultiplyOp<OpScalar,MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::~CrsMatrixMultiplyOp() { 00057 } 00058 00059 template <class OpScalar, class MatScalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00060 void 00061 CrsMatrixMultiplyOp<OpScalar,MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::apply( 00062 const MultiVector<OpScalar,LocalOrdinal,GlobalOrdinal,Node> & X_in, 00063 MultiVector<OpScalar,LocalOrdinal,GlobalOrdinal,Node> &Y_in, 00064 Teuchos::ETransp mode, OpScalar alpha, OpScalar beta) const 00065 { 00066 TEST_FOR_EXCEPTION(!matrix_->isFillComplete(), std::runtime_error, 00067 Teuchos::typeName(*this) << "::apply(): underlying matrix is not fill-complete."); 00068 TEST_FOR_EXCEPTION(X_in.getNumVectors() != Y_in.getNumVectors(), std::runtime_error, 00069 Teuchos::typeName(*this) << "::apply(X,Y): X and Y must have the same number of vectors."); 00070 TEST_FOR_EXCEPTION(Teuchos::ScalarTraits<OpScalar>::isComplex && mode == Teuchos::TRANS, std::logic_error, 00071 Teuchos::typeName(*this) << "::apply() does not currently support transposed multiplications for complex scalar types."); 00072 if (mode == Teuchos::NO_TRANS) { 00073 applyNonTranspose(X_in, Y_in, alpha, beta); 00074 } 00075 else { 00076 applyTranspose(X_in, Y_in, alpha, beta); 00077 } 00078 } 00079 00081 template <class OpScalar, class MatScalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00082 void 00083 CrsMatrixMultiplyOp<OpScalar,MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::applyNonTranspose( 00084 const MultiVector<OpScalar,LocalOrdinal,GlobalOrdinal,Node> & X_in, 00085 MultiVector<OpScalar,LocalOrdinal,GlobalOrdinal,Node> & Y_in, 00086 OpScalar alpha, OpScalar beta) const 00087 { 00088 typedef Teuchos::ScalarTraits<OpScalar> ST; 00089 using Teuchos::null; 00090 00091 const int myImageID = Teuchos::rank(*matrix_->getComm()); 00092 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP 00093 Teuchos::RCP<Teuchos::FancyOStream> out = Teuchos::VerboseObjectBase::getDefaultOStream(); 00094 if (myImageID == 0) { 00095 *out << "Entering CrsMatrixMultiplyOp::applyNonTranspose()" << std::endl 00096 << "Column Map: " << std::endl; 00097 } 00098 *out << matrix_->getColMap() << std::endl; 00099 if (myImageID == 0) { 00100 *out << "Initial input: " << std::endl; 00101 } 00102 X_in.describe(*out,Teuchos::VERB_EXTREME); 00103 #endif 00104 00105 const size_t numVectors = X_in.getNumVectors(); 00106 // because of Views, it is difficult to determine if X and Y point to the same data. 00107 // however, if they reference the exact same object, we will do the user the favor of copying X into new storage (with a warning) 00108 // we ony need to do this if we have trivial importers; otherwise, we don't actually apply the operator from X into Y 00109 const Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Node> > importer = matrix_->getGraph()->getImporter(); 00110 const Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Node> > exporter = matrix_->getGraph()->getExporter(); 00111 // access X indirectly, in case we need to create temporary storage 00112 Teuchos::RCP<const MV> X; 00113 00114 // some parameters for below 00115 const bool Y_is_replicated = !Y_in.isDistributed(), 00116 Y_is_overwritten = (beta == ST::zero()); 00117 if (Y_is_replicated && myImageID > 0) { 00118 beta = ST::zero(); 00119 } 00120 00121 // currently, cannot multiply from multivector of non-constant stride 00122 if (X_in.isConstantStride() == false && importer == null) { 00123 // generate a strided copy of X_in 00124 X = Teuchos::rcp(new MV(X_in)); 00125 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP 00126 if (myImageID == 0) *out << "X is not constant stride, duplicating X results in a strided copy" << std::endl; 00127 X->describe(*out,Teuchos::VERB_EXTREME); 00128 #endif 00129 } 00130 else { 00131 // just temporary, so this non-owning RCP is okay 00132 X = Teuchos::rcp(&X_in, false); 00133 } 00134 00135 // set up import/export temporary multivectors 00136 if (importer != null) { 00137 if (importMV_ != null && importMV_->getNumVectors() != numVectors) importMV_ = null; 00138 if (importMV_ == null) { 00139 importMV_ = Teuchos::rcp( new MV(matrix_->getColMap(),numVectors) ); 00140 } 00141 } 00142 if (exporter != null) { 00143 if (exportMV_ != null && exportMV_->getNumVectors() != numVectors) exportMV_ = null; 00144 if (exportMV_ == null) { 00145 exportMV_ = Teuchos::rcp( new MV(matrix_->getRowMap(),numVectors) ); 00146 } 00147 } 00148 00149 // If we have a non-trivial importer, we must import elements that are permuted or are on other processors 00150 if (importer != null) { 00151 { 00152 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_PROFILING 00153 Teuchos::TimeMonitor lcltimer(*importTimer_); 00154 #endif 00155 importMV_->doImport(X_in, *importer, INSERT); 00156 } 00157 // multiply out of importMV_ 00158 X = importMV_; 00159 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP 00160 if (myImageID == 0) { 00161 *out << "Performed import of X using importer..." << std::endl; 00162 } 00163 X->describe(*out,Teuchos::VERB_EXTREME); 00164 #endif 00165 } 00166 00167 // If we have a non-trivial exporter, we must export elements that are permuted or go to other processors 00168 // We will compute solution into the to-be-exported MV 00169 if (exporter != null) { 00170 // Do actual computation 00171 matrix_->template multiply<OpScalar,OpScalar>(*X, *exportMV_, Teuchos::NO_TRANS, alpha, ST::zero()); 00172 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP 00173 if (myImageID == 0) *out << "Export vector after multiply()..." << std::endl; 00174 exportMV_->describe(*out,Teuchos::VERB_EXTREME); 00175 #endif 00176 if (Y_is_overwritten) Y_in.putScalar(ST::zero()); 00177 else Y_in.scale(beta); 00178 { 00179 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_PROFILING 00180 Teuchos::TimeMonitor lcltimer(*exportTimer_); 00181 #endif 00182 Y_in.doExport(*exportMV_, *exporter, ADD); 00183 } 00184 } 00185 // otherwise, multiply into Y 00186 else { 00187 // can't multiply in-situ; can't mutiply into non-strided multivector 00188 if (Y_in.isConstantStride() == false || X.getRawPtr() == &Y_in) { 00189 // generate a strided copy of Y 00190 MV Y(Y_in); 00191 matrix_->template multiply<OpScalar,OpScalar>(*X, Y, Teuchos::NO_TRANS, alpha, beta); 00192 Y_in = Y; 00193 } 00194 else { 00195 matrix_->template multiply<OpScalar,OpScalar>(*X, Y_in, Teuchos::NO_TRANS, alpha, beta); 00196 } 00197 } 00198 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP 00199 if (myImageID == 0) *out << "Y_in vector after multiply/export..." << std::endl; 00200 Y_in.describe(*out,Teuchos::VERB_EXTREME); 00201 #endif 00202 // Handle case of rangemap being a local replicated map: in this case, sum contributions from each processor 00203 if (Y_is_replicated) { 00204 Y_in.reduce(); 00205 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP 00206 if (myImageID == 0) *out << "Output vector is local; result after reduce()..." << std::endl; 00207 Y_in.describe(*out,Teuchos::VERB_EXTREME); 00208 #endif 00209 } 00210 } 00211 00213 template <class OpScalar, class MatScalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00214 void 00215 CrsMatrixMultiplyOp<OpScalar,MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::applyTranspose( 00216 const MultiVector<OpScalar,LocalOrdinal,GlobalOrdinal,Node> & X_in, 00217 MultiVector<OpScalar,LocalOrdinal,GlobalOrdinal,Node> & Y_in, 00218 OpScalar alpha, OpScalar beta) const 00219 { 00220 typedef Teuchos::ScalarTraits<OpScalar> ST; 00221 using Teuchos::null; 00222 00223 int myImageID = Teuchos::rank(*matrix_->getComm()); 00224 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP 00225 Teuchos::RCP<Teuchos::FancyOStream> out = Teuchos::VerboseObjectBase::getDefaultOStream(); 00226 if (myImageID == 0) { 00227 *out << "Entering CrsMatrixMultiplyOp::applyTranspose()" << std::endl 00228 << "Column Map: " << std::endl; 00229 } 00230 *out << matrix_->getColMap() << std::endl; 00231 if (myImageID == 0) { 00232 *out << "Initial input: " << std::endl; 00233 } 00234 X_in.describe(*out,Teuchos::VERB_EXTREME); 00235 #endif 00236 00237 const size_t numVectors = X_in.getNumVectors(); 00238 // because of Views, it is difficult to determine if X and Y point to the same data. 00239 // however, if they reference the exact same object, we will do the user the favor of copying X into new storage (with a warning) 00240 // we ony need to do this if we have trivial importers; otherwise, we don't actually apply the operator from X into Y 00241 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Node> > importer = matrix_->getGraph()->getImporter(); 00242 Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Node> > exporter = matrix_->getGraph()->getExporter(); 00243 // access X indirectly, in case we need to create temporary storage 00244 Teuchos::RCP<const MV> X; 00245 00246 // some parameters for below 00247 const bool Y_is_replicated = !Y_in.isDistributed(), 00248 Y_is_overwritten = (beta == ST::zero()); 00249 if (Y_is_replicated && myImageID > 0) { 00250 beta = ST::zero(); 00251 } 00252 00253 // currently, cannot multiply from multivector of non-constant stride 00254 if (X_in.isConstantStride() == false && importer==null) { 00255 // generate a strided copy of X_in 00256 X = Teuchos::rcp(new MV(X_in)); 00257 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP 00258 if (myImageID == 0) *out << "X is not constant stride, duplicating X results in a strided copy" << std::endl; 00259 X->describe(*out,Teuchos::VERB_EXTREME); 00260 #endif 00261 } 00262 else { 00263 // just temporary, so this non-owning RCP is okay 00264 X = Teuchos::rcp(&X_in, false); 00265 } 00266 00267 // set up import/export temporary multivectors 00268 if (importer != null) { 00269 if (importMV_ != null && importMV_->getNumVectors() != numVectors) importMV_ = null; 00270 if (importMV_ == null) { 00271 importMV_ = Teuchos::rcp( new MV(matrix_->getColMap(),numVectors) ); 00272 } 00273 } 00274 if (exporter != null) { 00275 if (exportMV_ != null && exportMV_->getNumVectors() != numVectors) exportMV_ = null; 00276 if (exportMV_ == null) { 00277 exportMV_ = Teuchos::rcp( new MV(matrix_->getRowMap(),numVectors) ); 00278 } 00279 } 00280 00281 // If we have a non-trivial exporter, we must import elements that are permuted or are on other processors 00282 if (exporter != null) { 00283 { 00284 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_PROFILING 00285 Teuchos::TimeMonitor lcltimer(*importTimer_); 00286 #endif 00287 exportMV_->doImport(X_in,*exporter,INSERT); 00288 } 00289 // multiply out of exportMV_ 00290 X = exportMV_; 00291 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP 00292 if (myImageID == 0) { 00293 *out << "Performed import of X using exporter..." << std::endl; 00294 } 00295 X->describe(*out,Teuchos::VERB_EXTREME); 00296 #endif 00297 } 00298 00299 00300 // If we have a non-trivial importer, we must export elements that are permuted or belong to other processors 00301 // We will compute solution into the to-be-exported MV; get a view 00302 if (importer != null) { 00303 // Do actual computation 00304 matrix_->template multiply<OpScalar,OpScalar>(*X, *importMV_, Teuchos::CONJ_TRANS, alpha, ST::zero()); 00305 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP 00306 if (myImageID == 0) *out << "Import vector after multiply()..." << std::endl; 00307 importMV_->describe(*out,Teuchos::VERB_EXTREME); 00308 #endif 00309 if (Y_is_overwritten) Y_in.putScalar(ST::zero()); 00310 else Y_in.scale(beta); 00311 // 00312 { 00313 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_PROFILING 00314 Teuchos::TimeMonitor lcltimer(*importTimer_); 00315 #endif 00316 Y_in.doExport(*importMV_,*importer,ADD); 00317 } 00318 } 00319 // otherwise, multiply into Y 00320 else { 00321 // can't multiply in-situ; can't multiply into non-strided multivector 00322 if (Y_in.isConstantStride() == false || X.getRawPtr() == &Y_in) { 00323 // generate a strided copy of Y 00324 MV Y(Y_in); 00325 matrix_->template multiply<OpScalar,OpScalar>(*X, Y, Teuchos::CONJ_TRANS, alpha, beta); 00326 Y_in = Y; 00327 } 00328 else { 00329 matrix_->template multiply<OpScalar,OpScalar>(*X, Y_in, Teuchos::CONJ_TRANS, alpha, beta); 00330 } 00331 } 00332 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP 00333 if (myImageID == 0) *out << "Y_in vector after multiply/export..." << std::endl; 00334 Y_in.describe(*out,Teuchos::VERB_EXTREME); 00335 #endif 00336 // Handle case of rangemap being a local replicated map: in this case, sum contributions from each processor 00337 if (Y_is_replicated) { 00338 Y_in.reduce(); 00339 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP 00340 if (myImageID == 0) *out << "Output vector is local; result after reduce()..." << std::endl; 00341 Y_in.describe(*out,Teuchos::VERB_EXTREME); 00342 #endif 00343 } 00344 } 00345 00346 template <class OpScalar, class MatScalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00347 bool 00348 CrsMatrixMultiplyOp<OpScalar,MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::hasTransposeApply() const { 00349 return true; 00350 } 00351 00352 template <class OpScalar, class MatScalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00353 const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > & 00354 CrsMatrixMultiplyOp<OpScalar,MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getDomainMap() const { 00355 return matrix_->getDomainMap(); 00356 } 00357 00358 template <class OpScalar, class MatScalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00359 const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > & 00360 CrsMatrixMultiplyOp<OpScalar,MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getRangeMap() const { 00361 return matrix_->getRangeMap(); 00362 } 00363 00364 } // Tpetra namespace 00365 00366 template <class OpScalar, class MatScalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps> 00367 Teuchos::RCP< Tpetra::CrsMatrixMultiplyOp<OpScalar,MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps> > 00368 Tpetra::createCrsMatrixMultiplyOp(const Teuchos::RCP<const Tpetra::CrsMatrix<MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps> > &A) { 00369 return Teuchos::rcp(new Tpetra::CrsMatrixMultiplyOp<OpScalar,MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>(A) ); 00370 } 00371 00372 // 00373 // Explicit instantiation macro 00374 // 00375 // Must be expanded from within the Tpetra namespace! 00376 // 00377 00379 #define TPETRA_CRSMATRIX_MULTIPLYOP_INSTANT(OPSCALAR,MATSCALAR,LO,GO,NODE) \ 00380 \ 00381 template class CrsMatrixMultiplyOp< OPSCALAR , MATSCALAR , LO , GO , NODE >; \ 00382 \ 00383 template Teuchos::RCP< Tpetra::CrsMatrixMultiplyOp<OPSCALAR,MATSCALAR,LO,GO,NODE> > \ 00384 createCrsMatrixMultiplyOp(const Teuchos::RCP<const Tpetra::CrsMatrix<MATSCALAR,LO,GO,NODE> > &A); \ 00385 \ 00386 template void CrsMatrix<MATSCALAR,LO,GO,NODE>::multiply<OPSCALAR,OPSCALAR>( \ 00387 const MultiVector<OPSCALAR,LO,GO,NODE> &X, \ 00388 MultiVector<OPSCALAR,LO,GO,NODE> &Y, \ 00389 Teuchos::ETransp mode, \ 00390 OPSCALAR alpha, OPSCALAR beta \ 00391 ) const; \ 00392 00393 #endif // TPETRA_CRSMATRIXMULTIPLYOP_DEF_HPP
1.7.4