|
Anasazi Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Anasazi: Block Eigensolvers Package 00005 // Copyright (2010) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // This library is free software; you can redistribute it and/or modify 00011 // it under the terms of the GNU Lesser General Public License as 00012 // published by the Free Software Foundation; either version 2.1 of the 00013 // License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, but 00016 // WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00023 // USA 00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00025 // 00026 // *********************************************************************** 00027 // @HEADER 00028 00029 #ifndef __TSQR_Tsqr_GlobalVerify_hpp 00030 #define __TSQR_Tsqr_GlobalVerify_hpp 00031 00032 #include <Tsqr_LocalVerify.hpp> 00033 #include <Tsqr_MessengerBase.hpp> 00034 #include <Tsqr_ScalarTraits.hpp> 00035 #include <Tsqr_Blas.hpp> 00036 #include <Tsqr_Util.hpp> 00037 00038 #include <utility> // std::pair 00039 #include <vector> 00040 00043 00044 namespace TSQR { 00045 00046 // Unfortunately, you need c++0x support to have default template 00047 // arguments of template functions. Otherwise we would make this a 00048 // template function and set the default value of isComplex to 00049 // ScalarTraits< Scalar >::is_complex. Also, C++ doesn't like 00050 // partial specialization of template functions, for no good reason. 00051 // So we have to make this a class. 00052 template< class Scalar, bool isComplex = ScalarTraits< Scalar >::is_complex > 00053 class GlobalSummer { 00054 public: 00055 typedef Scalar scalar_type; 00056 typedef typename ScalarTraits< Scalar >::magnitude_type magnitude_type; 00057 00058 static magnitude_type 00059 sum (const Scalar& localSum, 00060 MessengerBase< Scalar >* const messenger); 00061 }; 00062 00063 // Complex-arithmetic "forward declaration" 00064 template< class Scalar > 00065 class GlobalSummer< Scalar, true > { 00066 public: 00067 typedef Scalar scalar_type; 00068 typedef typename ScalarTraits< Scalar >::magnitude_type magnitude_type; 00069 00070 static magnitude_type 00071 sum (const Scalar& localSum, 00072 MessengerBase< Scalar >* const messenger); 00073 }; 00074 00075 // Real-arithmetic "forward declaration" 00076 template< class Scalar > 00077 class GlobalSummer< Scalar, false > { 00078 public: 00079 typedef Scalar scalar_type; 00080 typedef typename ScalarTraits< Scalar >::magnitude_type magnitude_type; 00081 00082 static magnitude_type 00083 sum (const Scalar& localSum, 00084 MessengerBase< Scalar >* const messenger); 00085 }; 00086 00087 // Complex-arithmetic case 00088 template< class Scalar > 00089 typename ScalarTraits< Scalar >::magnitude_type 00090 GlobalSummer< Scalar, true >::sum (const Scalar& localSum, 00091 MessengerBase< Scalar >* const messenger) 00092 { 00093 // In order to use a MessengerBase<Scalar> on magnitude_type 00094 // values, we have to convert local_result to a Scalar, and then 00095 // convert back the result. We convert by setting the real 00096 // component of the Scalar to the magnitude_type. This isn't 00097 // guaranteed to work if magnitude_type has a greater dynamic 00098 // range than Scalar. That's possible, but that's not how we do 00099 // things with ScalarTraits< std::complex< T > >, and that's not 00100 // how LAPACK does it either, so it's fair to assume that 00101 // magnitude_type and the individual components of Scalar have the 00102 // same dynamic range. 00103 const magnitude_type localSumAbs = ScalarTraits< Scalar >::abs (localSum); 00104 const Scalar localSumAsScalar (localSumAbs, magnitude_type(0)); 00105 const Scalar globalSumAsScalar = messenger->globalSum (localSumAsScalar); 00106 const magnitude_type globalSum = ScalarTraits< Scalar >::abs (globalSumAsScalar); 00107 return globalSum; 00108 } 00109 00110 // Real-arithmetic case 00111 template< class Scalar > 00112 typename ScalarTraits< Scalar >::magnitude_type 00113 GlobalSummer< Scalar, false >::sum (const Scalar& localSum, 00114 MessengerBase< Scalar >* const messenger) 00115 { 00116 const Scalar localSumAsScalar (localSum); 00117 const Scalar globalSumAsScalar = messenger->globalSum (localSumAsScalar); 00118 const magnitude_type globalSum = ScalarTraits< Scalar >::abs (globalSumAsScalar); 00119 return globalSum; 00120 } 00121 00122 template< class LocalOrdinal, class Scalar > 00123 typename ScalarTraits< Scalar >::magnitude_type 00124 global_frobenius_norm (const LocalOrdinal nrows_local, 00125 const LocalOrdinal ncols, 00126 const Scalar A_local[], 00127 const LocalOrdinal lda_local, 00128 MessengerBase< Scalar >* const messenger) 00129 { 00130 typedef typename ScalarTraits< Scalar >::magnitude_type magnitude_type; 00131 00132 // FIXME (mfh 20 Apr 2010) This is currently implemented using an 00133 // all-reduction. This may result in different processors getting 00134 // slightly different answers, due to floating-point arithmetic 00135 // roundoff. We might not want this if we are using this function 00136 // to test a routine. 00137 00138 magnitude_type local_result (0); 00139 for (LocalOrdinal j = 0; j < ncols; j++) 00140 { 00141 const Scalar* const cur_col = &A_local[j*lda_local]; 00142 for (LocalOrdinal i = 0; i < nrows_local; ++i) 00143 { 00144 const magnitude_type abs_xi = ScalarTraits< Scalar >::abs (cur_col[i]); 00145 local_result = local_result + abs_xi * abs_xi; 00146 } 00147 } 00148 // GlobalSummmer() is a hack to let us use a Scalar - type 00149 // MessengerBase with magnitude_type inputs and outputs. 00150 // Otherwise we would need to carry around a MessengerBase< 00151 // magnitude_type > object as well. 00152 const magnitude_type globalResult = 00153 GlobalSummer< Scalar, ScalarTraits< Scalar >::is_complex >::sum (local_result, messenger); 00154 return sqrt (globalResult); 00155 } 00156 00157 template< class LocalOrdinal, class Scalar > 00158 std::pair< typename ScalarTraits< Scalar >::magnitude_type, typename ScalarTraits< Scalar >::magnitude_type > 00159 global_verify (const LocalOrdinal nrows_local, 00160 const LocalOrdinal ncols, 00161 const Scalar A_local[], 00162 const LocalOrdinal lda_local, 00163 const Scalar Q_local[], 00164 const LocalOrdinal ldq_local, 00165 const Scalar R[], 00166 const LocalOrdinal ldr, 00167 MessengerBase< Scalar >* const messenger) 00168 { 00169 typedef typename ScalarTraits< Scalar >::magnitude_type magnitude_type; 00170 using std::make_pair; 00171 using std::pair; 00172 using std::vector; 00173 00174 const magnitude_type ZERO (0); 00175 const magnitude_type ONE (1); 00176 BLAS< LocalOrdinal, Scalar > blas; 00177 00178 // 00179 // Compute $\| I - Q^T * Q \|_F$ 00180 // 00181 00182 // Compute Q_local^T * Q_local (this node's component of Q^T*Q) 00183 vector< Scalar > Temp (ncols*ncols, std::numeric_limits< Scalar >::quiet_NaN()); 00184 const LocalOrdinal ld_temp = ncols; 00185 00186 if (ScalarTraits< Scalar >::is_complex) 00187 blas.GEMM ("C", "N", ncols, ncols, nrows_local, 00188 ONE, Q_local, ldq_local, Q_local, ldq_local, 00189 ZERO, &Temp[0], ld_temp); 00190 else 00191 blas.GEMM ("T", "N", ncols, ncols, nrows_local, 00192 ONE, Q_local, ldq_local, Q_local, ldq_local, 00193 ZERO, &Temp[0], ld_temp); 00194 00195 // Reduce over all the processors to get the global Q^T*Q in Temp2. 00196 vector< Scalar > Temp2 (ncols*ncols, std::numeric_limits< Scalar >::quiet_NaN()); 00197 messenger->globalVectorSum (&Temp[0], &Temp2[0], ncols*ncols); 00198 00199 // Compute I-(Q^T*Q) redundantly on all processors 00200 for (LocalOrdinal j = 0; j < ncols; j++) 00201 Temp2[j + j*ld_temp] = ONE - Temp2[j + j*ld_temp]; 00202 00203 // Compute the Frobenius norm of I - Q^T*Q, redundantly on all processors. 00204 const magnitude_type Orthog_F = 00205 local_frobenius_norm (ncols, ncols, &Temp2[0], ld_temp); 00206 00207 // Compute the Frobenius norm of A. 00208 const magnitude_type A_F = 00209 global_frobenius_norm (nrows_local, ncols, &A_local[0], lda_local, messenger); 00210 00211 // 00212 // Compute $\| A - Q*R \|_F$ 00213 // 00214 00215 vector< Scalar > Resid (nrows_local * ncols, std::numeric_limits< Scalar >::quiet_NaN()); 00216 const LocalOrdinal ld_resid = nrows_local; 00217 00218 // Resid := A (deep copy) 00219 copy_matrix (nrows_local, ncols, &Resid[0], ld_resid, A_local, lda_local); 00220 00221 // Resid := Resid - Q*R 00222 blas.GEMM ("N", "N", nrows_local, ncols, ncols, 00223 -ONE, Q_local, ldq_local, R, ldr, 00224 ONE, &Resid[0], ld_resid); 00225 00226 const magnitude_type Resid_F = 00227 global_frobenius_norm (nrows_local, ncols, &Resid[0], ld_resid, messenger); 00228 00229 return make_pair (Resid_F / A_F, Orthog_F / A_F); 00230 } 00231 00232 } // namespace TSQR 00233 00234 #endif // __TSQR_Tsqr_GlobalVerify_hpp 00235
1.7.4