|
Anasazi Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Anasazi: Block Eigensolvers Package 00005 // Copyright (2010) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // This library is free software; you can redistribute it and/or modify 00011 // it under the terms of the GNU Lesser General Public License as 00012 // published by the Free Software Foundation; either version 2.1 of the 00013 // License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, but 00016 // WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00023 // USA 00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00025 // 00026 // *********************************************************************** 00027 // @HEADER 00028 00029 #ifndef __Tsqr_Random_GlobalMatrix_hpp 00030 #define __Tsqr_Random_GlobalMatrix_hpp 00031 00032 #include "Tsqr_Blas.hpp" 00033 #include "Tsqr_Matrix.hpp" 00034 #include "Tsqr_Random_MatrixGenerator.hpp" 00035 #include "Tsqr_ScalarTraits.hpp" 00036 #include "Tsqr_RMessenger.hpp" 00037 00038 #include <algorithm> 00039 #include <functional> 00040 #include <iostream> 00041 #include <sstream> 00042 #include <stdexcept> 00043 #include <vector> 00044 00047 00048 namespace TSQR { 00049 namespace Random { 00050 00051 template< class MatrixViewType > 00052 static void 00053 scaleMatrix (MatrixViewType& A, 00054 const typename MatrixViewType::scalar_type& denom) 00055 { 00056 typedef typename MatrixViewType::ordinal_type ordinal_type; 00057 typedef typename MatrixViewType::scalar_type scalar_type; 00058 00059 const ordinal_type nrows = A.nrows(); 00060 const ordinal_type ncols = A.ncols(); 00061 const ordinal_type lda = A.lda(); 00062 00063 if (nrows == lda) 00064 { 00065 // This works because storage in A is contiguous. 00066 const ordinal_type nelts = nrows * ncols; 00067 scalar_type* const A_ptr = A.get(); 00068 std::transform (A_ptr, A_ptr + nelts, A_ptr, std::bind2nd(std::divides<double>(), denom)); 00069 } 00070 else 00071 { 00072 for (ordinal_type j = 0; j < ncols; ++j) 00073 { 00074 scalar_type* const A_j = &A(0,j); 00075 std::transform (A_j, A_j + nrows, A_j, std::bind2nd(std::divides<double>(), denom)); 00076 } 00077 } 00078 } 00079 00080 template< class MatrixViewType, class Generator > 00081 void 00082 randomGlobalMatrix (Generator* const pGenerator, 00083 MatrixViewType& A_local, 00084 const typename ScalarTraits< typename MatrixViewType::scalar_type >::magnitude_type singular_values[], 00085 MessengerBase< typename MatrixViewType::ordinal_type >* const ordinalMessenger, 00086 MessengerBase< typename MatrixViewType::scalar_type >* const scalarMessenger) 00087 { 00088 typedef typename MatrixViewType::ordinal_type ordinal_type; 00089 typedef typename MatrixViewType::scalar_type scalar_type; 00090 typedef typename ScalarTraits< scalar_type >::magnitude_type magnitude_type; 00091 using std::vector; 00092 00093 const bool b_local_debug = false; 00094 00095 const int rootProc = 0; 00096 const int nprocs = ordinalMessenger->size(); 00097 const int myRank = ordinalMessenger->rank(); 00098 BLAS< ordinal_type, scalar_type > blas; 00099 00100 const ordinal_type nrowsLocal = A_local.nrows(); 00101 const ordinal_type ncols = A_local.ncols(); 00102 00103 // Theory: Suppose there are P processors. Proc q wants an m_q by n 00104 // component of the matrix A, which we write as A_q. On Proc 0, we 00105 // generate random m_q by n orthogonal matrices Q_q (in explicit 00106 // form), and send Q_q to Proc q. The m by n matrix [Q_0; Q_1; ...; 00107 // Q_{P-1}] is not itself orthogonal. However, the m by n matrix 00108 // Q = [Q_0 / P; Q_1 / P; ...; Q_{P-1} / P] is orthogonal: 00109 // 00110 // \sum_{q = 0}^{P-1} (Q_q^T * Q_q) / P = I. 00111 00112 if (myRank == rootProc) 00113 { 00114 typedef Random::MatrixGenerator< ordinal_type, scalar_type, Generator > matgen_type; 00115 matgen_type matGen (*pGenerator); 00116 00117 // Generate a random ncols by ncols upper triangular matrix 00118 // R with the given singular values. 00119 Matrix< ordinal_type, scalar_type > R (ncols, ncols, scalar_type(0)); 00120 matGen.fill_random_R (ncols, R.get(), R.lda(), singular_values); 00121 00122 // Broadcast R to all the processors. 00123 scalarMessenger->broadcast (R.get(), ncols*ncols, rootProc); 00124 00125 // Generate (for myself) a random nrowsLocal x ncols 00126 // orthogonal matrix, stored in explicit form. 00127 Matrix< ordinal_type, scalar_type > Q_local (nrowsLocal, ncols); 00128 matGen.explicit_Q (nrowsLocal, ncols, Q_local.get(), Q_local.lda()); 00129 00130 // Scale the (local) orthogonal matrix by the number of 00131 // processors P, to make the columns of the global matrix Q 00132 // orthogonal. (Otherwise the norm of each column will be P 00133 // instead of 1.) 00134 const scalar_type P = static_cast< scalar_type > (nprocs); 00135 // Do overflow check. If casting P back to scalar_type 00136 // doesn't produce the same value as nprocs, the cast 00137 // overflowed. 00138 if (static_cast< int > (P) != nprocs) 00139 throw std::runtime_error ("Casting nprocs to Scalar failed"); 00140 00141 scaleMatrix (Q_local, P); 00142 00143 // A_local := Q_local * R 00144 blas.GEMM ("N", "N", nrowsLocal, ncols, ncols, 00145 scalar_type(1), Q_local.get(), Q_local.lda(), 00146 R.get(), R.lda(), 00147 scalar_type(0), A_local.get(), A_local.lda()); 00148 00149 for (int recvProc = 1; recvProc < nprocs; ++recvProc) 00150 { 00151 // Ask the receiving processor how big (i.e., how many rows) 00152 // its local component of the matrix is. 00153 ordinal_type nrowsRemote = 0; 00154 ordinalMessenger->recv (&nrowsRemote, 1, recvProc, 0); 00155 00156 if (b_local_debug) 00157 { 00158 std::ostringstream os; 00159 os << "For Proc " << recvProc << ": local block is " 00160 << nrowsRemote << " by " << ncols << std::endl; 00161 std::cerr << os.str(); 00162 } 00163 00164 // Make sure Q_local is big enough to hold the data for 00165 // the current receiver proc. 00166 Q_local.reshape (nrowsRemote, ncols); 00167 00168 // Compute a random nrowsRemote * ncols orthogonal 00169 // matrix Q_local, for the current receiving processor. 00170 matGen.explicit_Q (nrowsRemote, ncols, Q_local.get(), Q_local.lda()); 00171 00172 // Send Q_local to the current receiving processor. 00173 scalarMessenger->send (Q_local.get(), nrowsRemote*ncols, recvProc, 0); 00174 } 00175 } 00176 else 00177 { 00178 // Receive the R factor from Proc 0. There's only 1 R 00179 // factor for all the processes. 00180 Matrix< ordinal_type, scalar_type > R (ncols, ncols, scalar_type (0)); 00181 scalarMessenger->broadcast (R.get(), ncols*ncols, rootProc); 00182 00183 // Q_local (nrows_local by ncols, random orthogonal matrix) 00184 // will be received from Proc 0, where it was generated. 00185 const ordinal_type recvSize = nrowsLocal * ncols; 00186 Matrix< ordinal_type, scalar_type > Q_local (nrowsLocal, ncols); 00187 00188 // Tell Proc 0 how many rows there are in the random orthogonal 00189 // matrix I want to receive from Proc 0. 00190 ordinalMessenger->send (&nrowsLocal, 1, rootProc, 0); 00191 00192 // Receive the orthogonal matrix from Proc 0. 00193 scalarMessenger->recv (Q_local.get(), recvSize, rootProc, 0); 00194 00195 // Scale the (local) orthogonal matrix by the number of 00196 // processors, to make the global matrix Q orthogonal. 00197 const scalar_type P = static_cast< scalar_type > (nprocs); 00198 // Do overflow check. If casting P back to scalar_type 00199 // doesn't produce the same value as nprocs, the cast 00200 // overflowed. 00201 if (static_cast< int > (P) != nprocs) 00202 throw std::runtime_error ("Casting nprocs to Scalar failed"); 00203 scaleMatrix (Q_local, P); 00204 00205 // A_local := Q_local * R 00206 blas.GEMM ("N", "N", nrowsLocal, ncols, ncols, 00207 scalar_type(1), Q_local.get(), Q_local.lda(), 00208 R.get(), R.lda(), 00209 scalar_type(0), A_local.get(), A_local.lda()); 00210 } 00211 } 00212 } // namespace Random 00213 } // namespace TSQR 00214 00215 #endif // __Tsqr_Random_GlobalMatrix_hpp
1.7.4