|
DenseLinAlgPack: Concreate C++ Classes for Dense Blas-Compatible Linear Algebra Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Moocho: Multi-functional Object-Oriented arCHitecture for Optimization 00005 // Copyright (2003) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // This library is free software; you can redistribute it and/or modify 00011 // it under the terms of the GNU Lesser General Public License as 00012 // published by the Free Software Foundation; either version 2.1 of the 00013 // License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, but 00016 // WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00023 // USA 00024 // Questions? Contact Roscoe A. Bartlett (rabartl@sandia.gov) 00025 // 00026 // *********************************************************************** 00027 // @HEADER 00028 00029 #include "DenseLinAlgPack_DMatrixOp.hpp" 00030 #include "DenseLinAlgPack_BLAS_Cpp.hpp" 00031 00032 namespace { 00033 00034 using DenseLinAlgPack::DVector; 00035 using DenseLinAlgPack::DVectorSlice; 00036 using DenseLinAlgPack::DMatrix; 00037 using DenseLinAlgPack::DMatrixSlice; 00038 using DenseLinAlgPack::col; 00039 using DenseLinAlgPack::value_type; 00040 using DenseLinAlgPack::assert_gms_sizes; 00041 using DenseLinAlgPack::DMatrixSliceTriEle; 00042 using DenseLinAlgPack::DMatrixSliceTri; 00043 using DenseLinAlgPack::DMatrixSliceSym; 00044 using DenseLinAlgPack::assign; 00045 using BLAS_Cpp::Transp; 00046 using BLAS_Cpp::no_trans; 00047 using BLAS_Cpp::trans; 00048 using BLAS_Cpp::Uplo; 00049 using BLAS_Cpp::upper; 00050 using BLAS_Cpp::lower; 00051 00052 } // end namespace 00053 00054 // /////////////////////////////////////////////////////////////////////////////////// 00055 // Assignment Fucntions 00056 00057 namespace { 00058 00059 // implementation: gms_lhs = alpha (elementwise) 00060 inline void i_assign(DMatrixSlice* gms_lhs, value_type alpha) { 00061 for(DMatrixSlice::size_type i = 1; i <= gms_lhs->cols(); ++i) 00062 gms_lhs->col(i) = alpha; 00063 } 00064 00065 // implementaion: gms_lhs = gms_rhs. Most basic copy function for rectangular matrices. 00066 inline void i_assign_basic(DMatrixSlice* gms_lhs, const DMatrixSlice& gms_rhs 00067 , BLAS_Cpp::Transp trans_rhs) 00068 { 00069 for(DMatrixSlice::size_type i = 1; i <= gms_lhs->cols(); ++i) 00070 gms_lhs->col(i) = col(gms_rhs,trans_rhs,i); 00071 } 00072 00073 // implementaion: gms_lhs = op(gms_rhs). Checks for overlap and creates temporaries accordingly. 00074 inline void i_assign(DMatrixSlice* gms_lhs, const DMatrixSlice& gms_rhs, BLAS_Cpp::Transp trans_rhs) 00075 { 00076 switch(gms_lhs->overlap(gms_rhs)) { 00077 case DenseLinAlgPack::NO_OVERLAP: // no overlap so just perform the copy 00078 i_assign_basic(gms_lhs,gms_rhs,trans_rhs); 00079 return; 00080 case DenseLinAlgPack::SAME_MEM: 00081 if(trans_rhs == BLAS_Cpp::no_trans) return; // assignment to self, nothing to do. 00082 default: // either same memory that needs to be transposed or some overlap so just generate temp. 00083 DMatrix temp = gms_rhs; 00084 i_assign_basic(gms_lhs,temp(),trans_rhs); 00085 return; 00086 } 00087 } 00088 00089 // Copy one triangular region into another. Does not check sizes or aliasing of argument matrices. 00090 // A row of a upper triangular region corresponds to a col of a BLAS_Cpp::lower triangular region. 00091 inline void i_assign_basic(DMatrixSliceTriEle* tri_lhs, const DMatrixSliceTriEle& tri_rhs) 00092 { 00093 DMatrixSlice::size_type n = tri_lhs->gms().cols(); 00094 00095 // Access BLAS_Cpp::lower tri by col and upper tri by row 00096 BLAS_Cpp::Transp 00097 trans_lhs = (tri_lhs->uplo() == BLAS_Cpp::lower) ? BLAS_Cpp::no_trans : BLAS_Cpp::trans, 00098 trans_rhs = (tri_rhs.uplo() == BLAS_Cpp::lower) ? BLAS_Cpp::no_trans : BLAS_Cpp::trans; 00099 00100 for(int i = 1; i <= n; ++i) { // Only copy the part of the vec in tri region. 00101 col(tri_lhs->gms(),trans_lhs,i)(i,n) = col(tri_rhs.gms(),trans_rhs,i)(i,n); 00102 } 00103 } 00104 00105 } // end namespace 00106 00107 // gm_lhs = alpha (elementwise) 00108 void DenseLinAlgPack::assign(DMatrix* gm_lhs, value_type alpha) 00109 { 00110 if(!gm_lhs->rows()) gm_lhs->resize(1,1,alpha); 00111 i_assign(&(*gm_lhs)(),alpha); 00112 } 00113 00114 // gm_lhs = op(gms_rhs) 00115 void DenseLinAlgPack::assign(DMatrix* gm_lhs, const DMatrixSlice& gms_rhs, BLAS_Cpp::Transp trans_rhs) 00116 { 00117 if(gm_lhs->overlap(gms_rhs) == SAME_MEM && trans_rhs == BLAS_Cpp::no_trans) return; // assignment to self 00118 if(gm_lhs->overlap(gms_rhs) != NO_OVERLAP) { 00119 // some overlap so we must create a copy 00120 DMatrix tmp(gms_rhs); 00121 resize_gm_lhs(gm_lhs,gms_rhs.rows(),gms_rhs.cols(),trans_rhs); 00122 i_assign(&(*gm_lhs)(), tmp(), trans_rhs); 00123 } 00124 else { 00125 // no overlap so just assign 00126 resize_gm_lhs(gm_lhs,gms_rhs.rows(),gms_rhs.cols(),trans_rhs); 00127 i_assign(&(*gm_lhs)(), gms_rhs, trans_rhs); 00128 } 00129 } 00130 00131 // gms_lhs = alpha (elementwise) 00132 void DenseLinAlgPack::assign(DMatrixSlice* gms_lhs, value_type alpha) 00133 { 00134 TEST_FOR_EXCEPTION( 00135 !gms_lhs->rows(), std::length_error 00136 ,"Error, assign(gms_lhs,alpha): You can not assign Scalar to an unsized DMatrixSlice" ); 00137 i_assign(gms_lhs, alpha); 00138 } 00139 00140 // gms_lhs = op(gms_rhs) 00141 void DenseLinAlgPack::assign(DMatrixSlice* gms_lhs, const DMatrixSlice& gms_rhs, BLAS_Cpp::Transp trans_rhs) 00142 { 00143 assert_gms_lhs(*gms_lhs,gms_rhs.rows(),gms_rhs.cols(),trans_rhs); 00144 i_assign(gms_lhs, gms_rhs, trans_rhs); 00145 } 00146 00147 // tri_lhs = alpha (elementwise) 00148 void DenseLinAlgPack::assign(DMatrixSliceTriEle* tri_lhs, value_type alpha) 00149 { 00150 TEST_FOR_EXCEPTION( 00151 !tri_lhs->gms().rows(), std::length_error 00152 ,"Error, assign(tri_lhs,alpha): You can not assign a scalar to an unsized triangular DMatrixSlice" ); 00153 assert_gms_square(tri_lhs->gms()); 00154 DMatrixSlice::size_type n = tri_lhs->gms().cols(); 00155 // access BLAS_Cpp::lower tri by col and upper tri by row 00156 BLAS_Cpp::Transp 00157 trans_lhs = (tri_lhs->uplo() == BLAS_Cpp::lower) ? BLAS_Cpp::no_trans : BLAS_Cpp::trans; 00158 for(int i = 1; i <= n; ++i) 00159 col( tri_lhs->gms(), trans_lhs , i )(i,n) = alpha; 00160 } 00161 00162 // tri_lhs = tri_rhs 00163 void DenseLinAlgPack::assign(DMatrixSliceTriEle* tri_lhs, const DMatrixSliceTriEle& tri_rhs) 00164 { 00165 assert_gms_lhs(tri_lhs->gms(),tri_rhs.gms().rows(),tri_rhs.gms().cols(),BLAS_Cpp::no_trans); 00166 00167 switch(tri_lhs->gms().overlap(tri_rhs.gms())) { 00168 case SAME_MEM: 00169 if(tri_lhs->uplo() == tri_rhs.uplo()) return; // Assignment to self to exit 00170 00171 case SOME_OVERLAP: 00172 // Test for the special case where the upper tri region (above diagonal) of a 00173 // matrix is being copied into the BLAS_Cpp::lower tri region (below the diagonal) of 00174 // the same matrix or visa-versa. No temp is need in this case 00175 if(tri_lhs->uplo() != tri_rhs.uplo()) { 00176 const DMatrixSlice *up = (tri_lhs->uplo() == upper) 00177 ? &tri_lhs->gms() : &tri_rhs.gms(); 00178 const DMatrixSlice *lo = (tri_rhs.uplo() == BLAS_Cpp::lower) 00179 ? &tri_lhs->gms() : &tri_rhs.gms(); 00180 if(lo->col_ptr(1) + lo->max_rows() - 1 == up->col_ptr(1)) { // false if SAME_MEM 00181 // One triangular region is being copied into another so no temp needed. 00182 i_assign_basic(tri_lhs, tri_rhs); 00183 return; 00184 } 00185 } 00186 // Give up and copy the vs_rhs as a temp. 00187 { 00188 DMatrix temp(tri_rhs.gms()); 00189 i_assign_basic(tri_lhs, tri_ele(temp(),tri_rhs.uplo())); 00190 return; 00191 } 00192 00193 case NO_OVERLAP: // no overlap so just perform the copy 00194 i_assign_basic(tri_lhs, tri_rhs); 00195 return; 00196 } 00197 } 00198 00199 // ///////////////////////////////////////////////////////////////////////////////////////// 00200 // Element-wise Algebraic Operations 00201 00202 void DenseLinAlgPack::Mt_S(DMatrixSlice* gms_lhs, value_type alpha) 00203 { 00204 TEST_FOR_EXCEPTION( 00205 !gms_lhs->rows(), std::length_error 00206 ,"Error, Mt_S(gms_lhs,alpha): You can not scale an unsized DMatrixSlice" ); 00207 for(int j = 1; j <= gms_lhs->cols(); ++j) 00208 Vt_S(&gms_lhs->col(j), alpha); 00209 } 00210 00211 void DenseLinAlgPack::M_diagVtM( DMatrixSlice* gms_lhs, const DVectorSlice& vs_rhs 00212 , const DMatrixSlice& gms_rhs, BLAS_Cpp::Transp trans_rhs ) 00213 { 00214 Mp_M_assert_sizes(gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans 00215 , gms_rhs.rows(), gms_rhs.cols(), trans_rhs); 00216 for(DMatrixSlice::size_type j = 1; j <= gms_lhs->cols(); ++j) 00217 prod( &gms_lhs->col(j), vs_rhs, col(gms_rhs,trans_rhs,j) ); 00218 } 00219 00220 void DenseLinAlgPack::Mt_S(DMatrixSliceTriEle* tri_lhs, value_type alpha) { 00221 TEST_FOR_EXCEPTION( 00222 !tri_lhs->gms().rows(), std::length_error 00223 ,"Error, Mt_S(tri_lhs,alpha): You can not scale an unsized triangular DMatrixSlice" ); 00224 BLAS_Cpp::Transp 00225 trans_lhs = (tri_lhs->uplo() == BLAS_Cpp::lower) ? BLAS_Cpp::no_trans : BLAS_Cpp::trans; 00226 00227 DMatrixSlice::size_type n = tri_lhs->gms().cols(); 00228 for(DMatrixSlice::size_type j = 1; j <= n; ++j) 00229 Vt_S( &col( tri_lhs->gms(), trans_lhs , j )(j,n), alpha ); 00230 } 00231 00232 void DenseLinAlgPack::Mp_StM(DMatrixSliceTriEle* tri_lhs, value_type alpha, const DMatrixSliceTriEle& tri_ele_rhs) 00233 { 00234 Mp_M_assert_sizes(tri_lhs->gms().rows(), tri_lhs->gms().cols(), BLAS_Cpp::no_trans 00235 , tri_ele_rhs.gms().rows(), tri_ele_rhs.gms().cols(), BLAS_Cpp::no_trans); 00236 00237 BLAS_Cpp::Transp 00238 trans_lhs = (tri_lhs->uplo() == BLAS_Cpp::lower) ? BLAS_Cpp::no_trans : BLAS_Cpp::trans, 00239 trans_rhs = (tri_ele_rhs.uplo() == BLAS_Cpp::lower) ? BLAS_Cpp::no_trans : BLAS_Cpp::trans; 00240 00241 DMatrixSlice::size_type n = tri_lhs->gms().cols(); 00242 for(DMatrixSlice::size_type j = 1; j <= n; ++j) 00243 Vp_StV( &col(tri_lhs->gms(),trans_lhs,j)(j,n), alpha, col(tri_ele_rhs.gms(),trans_rhs,j)(j,n) ); 00244 } 00245 00246 // LinAlgOpPack Compatable (compile time polymorphism) 00247 00248 void DenseLinAlgPack::Mp_StM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSlice& gms_rhs 00249 , BLAS_Cpp::Transp trans_rhs) 00250 { 00251 Mp_M_assert_sizes(gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans 00252 , gms_rhs.rows(), gms_rhs.cols(), trans_rhs); 00253 for(DMatrixSlice::size_type j = 1; j <= gms_lhs->cols(); ++j) 00254 Vp_StV( &gms_lhs->col(j), alpha, col(gms_rhs,trans_rhs,j) ); 00255 } 00256 00257 namespace { 00258 // Implement upper and lower copies for a symmetric matrix 00259 // Does not check sizes. 00260 // inline 00261 void i_Mp_StSM( DenseLinAlgPack::DMatrixSlice* gms_lhs, DenseLinAlgPack::value_type alpha 00262 , const DenseLinAlgPack::DMatrixSliceTriEle& tri_ele_rhs) 00263 { 00264 using DenseLinAlgPack::Mp_StM; 00265 using DenseLinAlgPack::tri_ele; 00266 const DenseLinAlgPack::size_type n = gms_lhs->rows(); // same as cols 00267 Mp_StM( const_cast<DenseLinAlgPack::DMatrixSliceTriEle*>( 00268 &tri_ele((*gms_lhs)(2,n,1,n-1), BLAS_Cpp::lower ) ) 00269 , alpha, tri_ele_rhs ); 00270 Mp_StM( const_cast<DenseLinAlgPack::DMatrixSliceTriEle*>( 00271 &tri_ele((*gms_lhs)(1,n-1,2,n), BLAS_Cpp::upper ) ) 00272 , alpha, tri_ele_rhs ); 00273 } 00274 } // end namespace 00275 00276 void DenseLinAlgPack::Mp_StM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSliceSym& sym_rhs 00277 , BLAS_Cpp::Transp trans_rhs ) 00278 { 00279 Mp_M_assert_sizes(gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans 00280 , sym_rhs.rows(), sym_rhs.cols(), trans_rhs); 00281 Vp_StV( &gms_lhs->diag(), alpha, sym_rhs.gms().diag() ); 00282 const size_type n = gms_lhs->rows(); // same as cols 00283 switch(sym_rhs.uplo()) { 00284 case BLAS_Cpp::lower: 00285 i_Mp_StSM( gms_lhs, alpha, tri_ele(sym_rhs.gms()(2,n,1,n-1),BLAS_Cpp::lower) ); 00286 break; 00287 case BLAS_Cpp::upper: 00288 i_Mp_StSM( gms_lhs, alpha, tri_ele(sym_rhs.gms()(1,n-1,2,n),BLAS_Cpp::upper) ); 00289 break; 00290 } 00291 } 00292 00293 void DenseLinAlgPack::Mp_StM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSliceTri& tri_rhs 00294 , BLAS_Cpp::Transp trans_rhs) 00295 { 00296 using BLAS_Cpp::trans; 00297 using BLAS_Cpp::no_trans; 00298 using BLAS_Cpp::lower; 00299 using BLAS_Cpp::upper; 00300 Mp_M_assert_sizes(gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans 00301 , tri_rhs.rows(), tri_rhs.cols(), trans_rhs ); 00302 // diagonal 00303 if( tri_rhs.diag() == BLAS_Cpp::nonunit ) 00304 Vp_StV( &gms_lhs->diag(), alpha, tri_rhs.gms().diag() ); 00305 else 00306 Vp_S( &gms_lhs->diag(), alpha ); 00307 // upper or lower triangle 00308 const size_type n = gms_lhs->rows(); // same as cols 00309 if( n == 1 ) 00310 return; // There is not lower or upper triangular region 00311 const DMatrixSliceTriEle 00312 tri = ( tri_rhs.uplo() == lower 00313 ? tri_ele(tri_rhs.gms()(2,n,1,n-1),lower) 00314 : tri_ele(tri_rhs.gms()(1,n-1,2,n),upper) ); 00315 const BLAS_Cpp::Uplo 00316 as_uplo = ( ( tri_rhs.uplo() == lower && trans_rhs == no_trans ) 00317 || ( tri_rhs.uplo() == upper && trans_rhs == trans ) 00318 ? lower : upper ); 00319 switch(as_uplo) { 00320 case lower: 00321 Mp_StM( const_cast<DenseLinAlgPack::DMatrixSliceTriEle*>( 00322 &tri_ele((*gms_lhs)(2,n,1,n-1),lower) ) 00323 , alpha, tri ); 00324 break; 00325 case upper: 00326 Mp_StM( const_cast<DenseLinAlgPack::DMatrixSliceTriEle*>( 00327 &tri_ele((*gms_lhs)(1,n-1,2,n),upper) ) 00328 , alpha, tri ); 00329 break; 00330 } 00331 } 00332 00333 // ///////////////////////////////////////////////////////////////////////////////////// 00334 // Level-2 BLAS (vector-matrtix) Liner Algebra Operations 00335 00336 void DenseLinAlgPack::Vp_StMtV(DVectorSlice* vs_lhs, value_type alpha, const DMatrixSlice& gms_rhs1 00337 , BLAS_Cpp::Transp trans_rhs1, const DVectorSlice& vs_rhs2, value_type beta) 00338 { 00339 Vp_MtV_assert_sizes(vs_lhs->dim(), gms_rhs1.rows() , gms_rhs1.cols(), trans_rhs1 00340 , vs_rhs2.dim()); 00341 BLAS_Cpp::gemv(trans_rhs1,gms_rhs1.rows(),gms_rhs1.cols(),alpha,gms_rhs1.col_ptr(1) 00342 ,gms_rhs1.max_rows(), vs_rhs2.raw_ptr(),vs_rhs2.stride(),beta,vs_lhs->raw_ptr() 00343 ,vs_lhs->stride()); 00344 } 00345 00346 void DenseLinAlgPack::Vp_StMtV(DVectorSlice* vs_lhs, value_type alpha, const DMatrixSliceSym& sym_rhs1 00347 , BLAS_Cpp::Transp trans_rhs1, const DVectorSlice& vs_rhs2, value_type beta) 00348 { 00349 assert_gms_square(sym_rhs1.gms()); 00350 Vp_MtV_assert_sizes(vs_lhs->dim(), sym_rhs1.gms().rows(), sym_rhs1.gms().cols(), trans_rhs1 00351 , vs_rhs2.dim()); 00352 BLAS_Cpp::symv(sym_rhs1.uplo(),sym_rhs1.gms().rows(),alpha,sym_rhs1.gms().col_ptr(1) 00353 ,sym_rhs1.gms().max_rows(),vs_rhs2.raw_ptr(),vs_rhs2.stride(),beta 00354 ,vs_lhs->raw_ptr(),vs_lhs->stride()); 00355 } 00356 00357 void DenseLinAlgPack::V_MtV(DVector* v_lhs, const DMatrixSliceTri& tri_rhs1, BLAS_Cpp::Transp trans_rhs1 00358 , const DVectorSlice& vs_rhs2) 00359 { 00360 assert_gms_square(tri_rhs1.gms()); 00361 MtV_assert_sizes(tri_rhs1.gms().rows(), tri_rhs1.gms().cols(), trans_rhs1, vs_rhs2.dim()); 00362 v_lhs->resize(tri_rhs1.gms().rows()); 00363 (*v_lhs) = vs_rhs2; 00364 BLAS_Cpp::trmv(tri_rhs1.uplo(),trans_rhs1,tri_rhs1.diag(),tri_rhs1.gms().rows() 00365 ,tri_rhs1.gms().col_ptr(1),tri_rhs1.gms().max_rows(), v_lhs->raw_ptr(),1); 00366 } 00367 00368 void DenseLinAlgPack::V_MtV(DVectorSlice* vs_lhs, const DMatrixSliceTri& tri_rhs1, BLAS_Cpp::Transp trans_rhs1 00369 , const DVectorSlice& vs_rhs2) 00370 { 00371 assert_gms_square(tri_rhs1.gms()); 00372 MtV_assert_sizes(tri_rhs1.gms().rows(), tri_rhs1.gms().cols(), trans_rhs1, vs_rhs2.dim()); 00373 Vp_V_assert_sizes( vs_lhs->dim(), tri_rhs1.gms().rows() ); 00374 (*vs_lhs) = vs_rhs2; 00375 BLAS_Cpp::trmv(tri_rhs1.uplo(),trans_rhs1,tri_rhs1.diag(),tri_rhs1.gms().rows() 00376 ,tri_rhs1.gms().col_ptr(1),tri_rhs1.gms().max_rows(), vs_lhs->raw_ptr(),vs_lhs->stride()); 00377 } 00378 00379 void DenseLinAlgPack::Vp_StMtV(DVectorSlice* vs_lhs, value_type alpha, const DMatrixSliceTri& tri_rhs1 00380 , BLAS_Cpp::Transp trans_rhs1, const DVectorSlice& vs_rhs2, value_type beta) 00381 { 00382 Vp_MtV_assert_sizes(vs_lhs->dim(),tri_rhs1.gms().rows(),tri_rhs1.gms().cols() 00383 ,trans_rhs1,vs_rhs2.dim() ); 00384 00385 // If op(gms_rhs2) == gms_lhs and beta = 0.0 then this is a direct call to the BLAS. 00386 if( vs_lhs->overlap(vs_rhs2) == SAME_MEM && beta == 0.0 ) 00387 { 00388 V_MtV(vs_lhs, tri_rhs1, trans_rhs1, vs_rhs2); 00389 if(alpha != 1.0) Vt_S(vs_lhs,alpha); 00390 } 00391 else { 00392 // This is something else so the alias problem is not handled. 00393 if(beta != 1.0) Vt_S(vs_lhs,beta); 00394 DVector tmp; 00395 V_MtV(&tmp, tri_rhs1, trans_rhs1, vs_rhs2); 00396 Vp_StV(vs_lhs,alpha,tmp()); 00397 } 00398 } 00399 00400 void DenseLinAlgPack::V_InvMtV(DVector* v_lhs, const DMatrixSliceTri& tri_rhs1, BLAS_Cpp::Transp trans_rhs1 00401 , const DVectorSlice& vs_rhs2) 00402 { 00403 assert_gms_square(tri_rhs1.gms()); 00404 MtV_assert_sizes(tri_rhs1.gms().rows(), tri_rhs1.gms().cols(), trans_rhs1, vs_rhs2.dim()); 00405 v_lhs->resize(tri_rhs1.gms().rows()); 00406 (*v_lhs) = vs_rhs2; 00407 BLAS_Cpp::trsv(tri_rhs1.uplo(),trans_rhs1,tri_rhs1.diag(),tri_rhs1.gms().rows() 00408 ,tri_rhs1.gms().col_ptr(1),tri_rhs1.gms().max_rows(), v_lhs->raw_ptr(),1); 00409 } 00410 00411 void DenseLinAlgPack::V_InvMtV(DVectorSlice* vs_lhs, const DMatrixSliceTri& tri_rhs1, BLAS_Cpp::Transp trans_rhs1 00412 , const DVectorSlice& vs_rhs2) 00413 { 00414 assert_gms_square(tri_rhs1.gms()); 00415 MtV_assert_sizes(tri_rhs1.gms().rows(), tri_rhs1.gms().cols(), trans_rhs1, vs_rhs2.dim()); 00416 Vp_V_assert_sizes( vs_lhs->dim(), tri_rhs1.gms().rows() ); 00417 (*vs_lhs) = vs_rhs2; 00418 BLAS_Cpp::trsv(tri_rhs1.uplo(),trans_rhs1,tri_rhs1.diag(),tri_rhs1.gms().rows() 00419 ,tri_rhs1.gms().col_ptr(1),tri_rhs1.gms().max_rows(), vs_lhs->raw_ptr(),vs_lhs->stride()); 00420 } 00421 00422 00423 void DenseLinAlgPack::ger( 00424 value_type alpha, const DVectorSlice& vs_rhs1, const DVectorSlice& vs_rhs2 00425 , DMatrixSlice* gms_lhs ) 00426 { 00427 Vp_MtV_assert_sizes( vs_rhs2.dim(), gms_lhs->rows(), gms_lhs->cols() 00428 , BLAS_Cpp::no_trans, vs_rhs1.dim() ); 00429 BLAS_Cpp::ger( 00430 gms_lhs->rows(), gms_lhs->cols(), alpha 00431 ,vs_rhs1.raw_ptr(), vs_rhs1.stride() 00432 ,vs_rhs2.raw_ptr(), vs_rhs2.stride() 00433 ,gms_lhs->col_ptr(1), gms_lhs->max_rows() ); 00434 } 00435 00436 void DenseLinAlgPack::syr(value_type alpha, const DVectorSlice& vs_rhs, DMatrixSliceSym* sym_lhs) 00437 { 00438 assert_gms_square(sym_lhs->gms()); 00439 MtV_assert_sizes( sym_lhs->gms().rows(), sym_lhs->gms().cols() 00440 , BLAS_Cpp::no_trans, vs_rhs.dim() ); 00441 BLAS_Cpp::syr( sym_lhs->uplo(), vs_rhs.dim(), alpha, vs_rhs.raw_ptr() 00442 , vs_rhs.stride(), sym_lhs->gms().col_ptr(1), sym_lhs->gms().max_rows() ); 00443 } 00444 00445 void DenseLinAlgPack::syr2(value_type alpha, const DVectorSlice& vs_rhs1, const DVectorSlice& vs_rhs2 00446 , DMatrixSliceSym* sym_lhs) 00447 { 00448 assert_gms_square(sym_lhs->gms()); 00449 VopV_assert_sizes( vs_rhs1.dim(), vs_rhs2.dim() ); 00450 MtV_assert_sizes( sym_lhs->gms().rows(), sym_lhs->gms().cols() 00451 , BLAS_Cpp::no_trans, vs_rhs1.dim() ); 00452 BLAS_Cpp::syr2( sym_lhs->uplo(), vs_rhs1.dim(), alpha, vs_rhs1.raw_ptr() 00453 , vs_rhs1.stride(), vs_rhs2.raw_ptr(), vs_rhs2.stride() 00454 , sym_lhs->gms().col_ptr(1), sym_lhs->gms().max_rows() ); 00455 } 00456 00457 // ////////////////////////////////////////////////////////////////////////////////////////// 00458 // Level-3 BLAS (matrix-matrix) Linear Algebra Operations 00459 00460 // //////////////////////////// 00461 // Rectangular Matrices 00462 00463 void DenseLinAlgPack::Mp_StMtM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSlice& gms_rhs1 00464 , BLAS_Cpp::Transp trans_rhs1, const DMatrixSlice& gms_rhs2 00465 , BLAS_Cpp::Transp trans_rhs2, value_type beta) 00466 { 00467 Mp_MtM_assert_sizes( gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans 00468 , gms_rhs1.rows(), gms_rhs1.cols(), trans_rhs1 00469 , gms_rhs2.rows(), gms_rhs2.cols(), trans_rhs2); 00470 BLAS_Cpp::gemm(trans_rhs1,trans_rhs2,gms_lhs->rows(),gms_lhs->cols() 00471 ,cols(gms_rhs1.rows(),gms_rhs1.cols(),trans_rhs1) 00472 ,alpha,gms_rhs1.col_ptr(1),gms_rhs1.max_rows() 00473 ,gms_rhs2.col_ptr(1),gms_rhs2.max_rows() 00474 ,beta,gms_lhs->col_ptr(1),gms_lhs->max_rows() ); 00475 } 00476 00477 // //////////////////////////// 00478 // Symmetric Matrices 00479 00480 namespace { 00481 00482 // implementation: gms_lhs = alpha * sym_rhs * gms_rhs + beta * gms_lhs (left) (BLAS xSYMM). 00483 // or gms_lhs = alpha * gms_rhs * sym_rhs + beta * gms_lhs (right). 00484 // does not check sizes. 00485 void i_symm(BLAS_Cpp::Side side, value_type alpha, const DMatrixSliceSym& sym_rhs 00486 , const DMatrixSlice& gms_rhs, value_type beta, DMatrixSlice* gms_lhs) 00487 { 00488 BLAS_Cpp::symm(side,sym_rhs.uplo(),gms_lhs->rows(),gms_lhs->cols() 00489 ,alpha,sym_rhs.gms().col_ptr(1),sym_rhs.gms().max_rows() 00490 ,gms_rhs.col_ptr(1),gms_rhs.max_rows() 00491 ,beta,gms_lhs->col_ptr(1),gms_lhs->max_rows() ); 00492 } 00493 00494 } // end namespace 00495 00496 void DenseLinAlgPack::Mp_StMtM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSliceSym& sym_rhs1 00497 , BLAS_Cpp::Transp trans_rhs1, const DMatrixSlice& gms_rhs2 00498 , BLAS_Cpp::Transp trans_rhs2, value_type beta) 00499 { 00500 Mp_MtM_assert_sizes( gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans 00501 , sym_rhs1.gms().rows(), sym_rhs1.gms().cols(), trans_rhs1 00502 , gms_rhs2.rows(), gms_rhs2.cols(), trans_rhs2); 00503 if(trans_rhs2 == BLAS_Cpp::no_trans) { 00504 i_symm(BLAS_Cpp::left,alpha,sym_rhs1,gms_rhs2,beta,gms_lhs); 00505 } 00506 else { 00507 // must make a temporary copy to call the BLAS 00508 DMatrix tmp; 00509 assign(&tmp,gms_rhs2,trans_rhs2); 00510 i_symm(BLAS_Cpp::left,alpha,sym_rhs1,tmp(),beta,gms_lhs); 00511 } 00512 } 00513 00514 void DenseLinAlgPack::Mp_StMtM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSlice& gms_rhs1 00515 , BLAS_Cpp::Transp trans_rhs1, const DMatrixSliceSym& sym_rhs2 00516 , BLAS_Cpp::Transp trans_rhs2, value_type beta) 00517 { 00518 Mp_MtM_assert_sizes( gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans 00519 , gms_rhs1.rows(), gms_rhs1.cols(), trans_rhs1 00520 , sym_rhs2.gms().rows(), sym_rhs2.gms().cols(), trans_rhs2 ); 00521 if(trans_rhs1 == BLAS_Cpp::no_trans) { 00522 i_symm(BLAS_Cpp::right,alpha,sym_rhs2,gms_rhs1,beta,gms_lhs); 00523 } 00524 else { 00525 // must make a temporary copy to call the BLAS 00526 DMatrix tmp; 00527 assign(&tmp,gms_rhs1,trans_rhs1); 00528 i_symm(BLAS_Cpp::right,alpha,sym_rhs2,tmp(),beta,gms_lhs); 00529 } 00530 } 00531 00532 void DenseLinAlgPack::syrk(BLAS_Cpp::Transp trans, value_type alpha, const DMatrixSlice& gms_rhs 00533 , value_type beta, DMatrixSliceSym* sym_lhs) 00534 { 00535 Mp_MtM_assert_sizes( sym_lhs->gms().rows(), sym_lhs->gms().cols(), BLAS_Cpp::no_trans 00536 , gms_rhs.rows(), gms_rhs.cols(), trans 00537 , gms_rhs.rows(), gms_rhs.cols(), trans_not(trans) ); 00538 BLAS_Cpp::syrk(sym_lhs->uplo(),trans,sym_lhs->gms().rows() 00539 ,cols(gms_rhs.rows(), gms_rhs.cols(), trans),alpha 00540 ,gms_rhs.col_ptr(1),gms_rhs.max_rows(),beta 00541 ,sym_lhs->gms().col_ptr(1),sym_lhs->gms().max_rows() ); 00542 } 00543 00544 void DenseLinAlgPack::syr2k(BLAS_Cpp::Transp trans,value_type alpha, const DMatrixSlice& gms_rhs1 00545 , const DMatrixSlice& gms_rhs2, value_type beta, DMatrixSliceSym* sym_lhs) 00546 { 00547 Mp_MtM_assert_sizes( sym_lhs->gms().rows(), sym_lhs->gms().cols(), BLAS_Cpp::no_trans 00548 , gms_rhs1.rows(), gms_rhs1.cols(), trans 00549 , gms_rhs2.rows(), gms_rhs2.cols(), trans_not(trans) ); 00550 BLAS_Cpp::syr2k(sym_lhs->uplo(),trans,sym_lhs->gms().rows() 00551 ,cols(gms_rhs1.rows(), gms_rhs1.cols(), trans),alpha 00552 ,gms_rhs1.col_ptr(1),gms_rhs1.max_rows() 00553 ,gms_rhs2.col_ptr(1),gms_rhs2.max_rows(),beta 00554 ,sym_lhs->gms().col_ptr(1),sym_lhs->gms().max_rows() ); 00555 } 00556 00557 // //////////////////////////// 00558 // Triangular Matrices 00559 00560 // ToDo: Finish the definitions below. 00561 00562 namespace { 00563 00564 // implementation: gms_lhs = alpha * op(tri_rhs) * gms_lhs (left) (BLAS xTRMM). 00565 // or gms_lhs = alpha * gms_rhs * op(tri_rhs) (right). 00566 // does not check sizes. 00567 inline 00568 void i_trmm(BLAS_Cpp::Side side, BLAS_Cpp::Transp trans, value_type alpha, const DMatrixSliceTri& tri_rhs 00569 , DMatrixSlice* gms_lhs) 00570 { 00571 BLAS_Cpp::trmm(side,tri_rhs.uplo(),trans,tri_rhs.diag(),gms_lhs->rows(),gms_lhs->cols() 00572 ,alpha,tri_rhs.gms().col_ptr(1),tri_rhs.gms().max_rows() 00573 ,gms_lhs->col_ptr(1),gms_lhs->max_rows() ); 00574 } 00575 00576 // implementation: gms_lhs = alpha * op(tri_rhs) * op(gms_rhs) (left) 00577 // or gms_lhs = alpha * op(gms_rhs) * op(tri_rhs) (right) 00578 // Takes care of temporaries but does not check sizes. 00579 inline 00580 void i_trmm_alt( BLAS_Cpp::Side side, value_type alpha, const DMatrixSliceTri& tri_rhs 00581 , BLAS_Cpp::Transp trans_tri_rhs, const DMatrixSlice& gms_rhs 00582 , BLAS_Cpp::Transp trans_gms_rhs, DMatrixSlice* gms_lhs ) 00583 { 00584 assign(gms_lhs,gms_rhs,trans_gms_rhs); 00585 i_trmm( side, trans_tri_rhs, alpha, tri_rhs, gms_lhs ); 00586 } 00587 00588 // implementation: gms_lhs = alpha * inv(op(tri_rhs)) * gms_lhs (left) (BLAS xTRSM). 00589 // or gms_lhs = alpha * gms_rhs * inv(op(tri_rhs)) (right). 00590 // does not check sizes. 00591 inline 00592 void i_trsm(BLAS_Cpp::Side side, BLAS_Cpp::Transp trans, value_type alpha, const DMatrixSliceTri& tri_rhs 00593 , DMatrixSlice* gms_lhs) 00594 { 00595 BLAS_Cpp::trsm(side,tri_rhs.uplo(),trans,tri_rhs.diag(),gms_lhs->rows(),gms_lhs->cols() 00596 ,alpha,tri_rhs.gms().col_ptr(1),tri_rhs.gms().max_rows() 00597 ,gms_lhs->col_ptr(1),gms_lhs->max_rows() ); 00598 } 00599 00600 // implementation: gms_lhs = alpha * inv(op(tri_rhs)) * op(gms_rhs) (left) 00601 // or gms_lhs = alpha * op(gms_rhs) * inv(op(tri_rhs)) (right) 00602 // Takes care of temporaries but does not check sizes. 00603 inline 00604 void i_trsm_alt(BLAS_Cpp::Side side, value_type alpha, const DMatrixSliceTri& tri_rhs 00605 , BLAS_Cpp::Transp trans_tri_rhs, const DMatrixSlice& gms_rhs, BLAS_Cpp::Transp trans_gms_rhs 00606 , DMatrixSlice* gms_lhs) 00607 { 00608 assign(gms_lhs,gms_rhs,trans_gms_rhs); 00609 i_trsm( side, trans_tri_rhs, alpha, tri_rhs, gms_lhs ); 00610 } 00611 00612 } // end namespace 00613 00614 void DenseLinAlgPack::M_StMtM(DMatrix* gm_lhs, value_type alpha, const DMatrixSliceTri& tri_rhs1 00615 , BLAS_Cpp::Transp trans_rhs1, const DMatrixSlice& gms_rhs2 00616 , BLAS_Cpp::Transp trans_rhs2) 00617 { 00618 MtM_assert_sizes( tri_rhs1.gms().rows(), tri_rhs1.gms().cols(), trans_rhs1 00619 , gms_rhs2.rows(), gms_rhs2.cols(), trans_rhs2 ); 00620 gm_lhs->resize( rows(tri_rhs1.gms().rows(), tri_rhs1.gms().cols(), trans_rhs1) 00621 , cols(gms_rhs2.rows(), gms_rhs2.cols(), trans_rhs2) ); 00622 i_trmm_alt(BLAS_Cpp::left,alpha,tri_rhs1,trans_rhs1,gms_rhs2,trans_rhs2,&(*gm_lhs)()); 00623 } 00624 00625 void DenseLinAlgPack::M_StMtM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSliceTri& tri_rhs1 00626 , BLAS_Cpp::Transp trans_rhs1, const DMatrixSlice& gms_rhs2 00627 , BLAS_Cpp::Transp trans_rhs2) 00628 { 00629 Mp_MtM_assert_sizes( gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans 00630 , tri_rhs1.gms().rows(), tri_rhs1.gms().cols(), trans_rhs1 00631 , gms_rhs2.rows(), gms_rhs2.cols(), trans_rhs2 ); 00632 i_trmm_alt(BLAS_Cpp::left,alpha,tri_rhs1,trans_rhs1,gms_rhs2,trans_rhs2,gms_lhs); 00633 } 00634 00635 void DenseLinAlgPack::M_StMtM(DMatrix* gm_lhs, value_type alpha, const DMatrixSlice& gms_rhs1 00636 , BLAS_Cpp::Transp trans_rhs1, const DMatrixSliceTri& tri_rhs2 00637 , BLAS_Cpp::Transp trans_rhs2) 00638 { 00639 MtM_assert_sizes( gms_rhs1.rows(), gms_rhs1.cols(), trans_rhs1 00640 , tri_rhs2.gms().rows(), tri_rhs2.gms().cols(), trans_rhs2 ); 00641 gm_lhs->resize( rows(gms_rhs1.rows(), gms_rhs1.cols(), trans_rhs1) 00642 , cols(tri_rhs2.gms().rows(), tri_rhs2.gms().cols(), trans_rhs2) ); 00643 i_trmm_alt(BLAS_Cpp::right,alpha,tri_rhs2,trans_rhs2,gms_rhs1,trans_rhs1,&(*gm_lhs)()); 00644 } 00645 00646 void DenseLinAlgPack::M_StMtM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSlice& gms_rhs1 00647 , BLAS_Cpp::Transp trans_rhs1, const DMatrixSliceTri& tri_rhs2 00648 , BLAS_Cpp::Transp trans_rhs2) 00649 { 00650 Mp_MtM_assert_sizes( gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans 00651 , gms_rhs1.rows(), gms_rhs1.cols(), trans_rhs1 00652 , tri_rhs2.gms().rows(), tri_rhs2.gms().cols(), trans_rhs2 ); 00653 i_trmm_alt(BLAS_Cpp::right,alpha,tri_rhs2,trans_rhs2,gms_rhs1,trans_rhs1,gms_lhs); 00654 } 00655 00656 void DenseLinAlgPack::Mp_StMtM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSliceTri& tri_rhs1 00657 , BLAS_Cpp::Transp trans_rhs1, const DMatrixSlice& gms_rhs2 00658 , BLAS_Cpp::Transp trans_rhs2, value_type beta) 00659 { 00660 // If op(gms_rhs2) == gms_lhs and beta = 0.0 then this is a direct call to the BLAS. 00661 if( gms_lhs->overlap(gms_rhs2) == SAME_MEM && trans_rhs2 == BLAS_Cpp::no_trans 00662 && beta == 0.0 ) 00663 { 00664 i_trmm(BLAS_Cpp::left,trans_rhs1,alpha,tri_rhs1,gms_lhs); 00665 } 00666 else { 00667 // This is something else so the alias problem is not handled. 00668 if(beta != 1.0) Mt_S(gms_lhs,beta); 00669 DMatrix tmp; 00670 M_StMtM(&tmp,alpha,tri_rhs1,trans_rhs1,gms_rhs2,trans_rhs2); 00671 Mp_StM(gms_lhs,1.0,tmp(),BLAS_Cpp::no_trans); 00672 } 00673 } 00674 00675 void DenseLinAlgPack::Mp_StMtM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSlice& gms_rhs1 00676 , BLAS_Cpp::Transp trans_rhs1, const DMatrixSliceTri& tri_rhs2 00677 , BLAS_Cpp::Transp trans_rhs2, value_type beta) 00678 { 00679 // If op(gms_rhs1) == gms_lhs and beta = 0.0 then this is a direct call to the BLAS. 00680 if( gms_lhs->overlap(gms_rhs1) == SAME_MEM && trans_rhs1 == BLAS_Cpp::no_trans 00681 && beta == 0.0 ) 00682 { 00683 i_trmm(BLAS_Cpp::right,trans_rhs2,alpha,tri_rhs2,gms_lhs); 00684 } 00685 else { 00686 // This is something else so the alias problem is not handled. 00687 if(beta != 1.0) Mt_S(gms_lhs,beta); 00688 DMatrix tmp; 00689 M_StMtM(&tmp,alpha,gms_rhs1,trans_rhs1,tri_rhs2,trans_rhs2); 00690 Mp_StM(gms_lhs,1.0,tmp(),BLAS_Cpp::no_trans); 00691 } 00692 } 00693 00694 void DenseLinAlgPack::M_StInvMtM(DMatrix* gm_lhs, value_type alpha, const DMatrixSliceTri& tri_rhs1 00695 , BLAS_Cpp::Transp trans_rhs1, const DMatrixSlice& gms_rhs2 00696 , BLAS_Cpp::Transp trans_rhs2) 00697 { 00698 MtM_assert_sizes( tri_rhs1.gms().rows(), tri_rhs1.gms().cols(), trans_rhs1 00699 , gms_rhs2.rows(), gms_rhs2.cols(), trans_rhs2 ); 00700 gm_lhs->resize( rows(tri_rhs1.gms().rows(), tri_rhs1.gms().cols(), trans_rhs1) 00701 , cols(gms_rhs2.rows(), gms_rhs2.cols(), trans_rhs2) ); 00702 i_trsm_alt(BLAS_Cpp::left,alpha,tri_rhs1,trans_rhs1,gms_rhs2,trans_rhs2,&(*gm_lhs)()); 00703 } 00704 00705 void DenseLinAlgPack::M_StInvMtM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSliceTri& tri_rhs1 00706 , BLAS_Cpp::Transp trans_rhs1, const DMatrixSlice& gms_rhs2 00707 , BLAS_Cpp::Transp trans_rhs2) 00708 { 00709 Mp_MtM_assert_sizes( gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans 00710 , tri_rhs1.gms().rows(), tri_rhs1.gms().cols(), trans_rhs1 00711 , gms_rhs2.rows(), gms_rhs2.cols(), trans_rhs2 ); 00712 i_trsm_alt(BLAS_Cpp::left,alpha,tri_rhs1,trans_rhs1,gms_rhs2,trans_rhs2,gms_lhs); 00713 } 00714 00715 void DenseLinAlgPack::M_StMtInvM(DMatrix* gm_lhs, value_type alpha, const DMatrixSlice& gms_rhs1 00716 , BLAS_Cpp::Transp trans_rhs1, const DMatrixSliceTri& tri_rhs2 00717 , BLAS_Cpp::Transp trans_rhs2) 00718 { 00719 MtM_assert_sizes( gms_rhs1.rows(), gms_rhs1.cols(), trans_rhs1 00720 , tri_rhs2.gms().rows(), tri_rhs2.gms().cols(), trans_rhs2 ); 00721 gm_lhs->resize( rows(gms_rhs1.rows(), gms_rhs1.cols(), trans_rhs1) 00722 , cols(tri_rhs2.gms().rows(), tri_rhs2.gms().cols(), trans_rhs2) ); 00723 i_trsm_alt(BLAS_Cpp::right,alpha,tri_rhs2,trans_rhs2,gms_rhs1,trans_rhs1,&(*gm_lhs)()); 00724 } 00725 00726 void DenseLinAlgPack::M_StMtInvM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSlice& gms_rhs1 00727 , BLAS_Cpp::Transp trans_rhs1, const DMatrixSliceTri& tri_rhs2 00728 , BLAS_Cpp::Transp trans_rhs2) 00729 { 00730 Mp_MtM_assert_sizes( gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans 00731 , gms_rhs1.rows(), gms_rhs1.cols(), trans_rhs1 00732 , tri_rhs2.gms().rows(), tri_rhs2.gms().cols(), trans_rhs2 ); 00733 i_trsm_alt(BLAS_Cpp::right,alpha,tri_rhs2,trans_rhs2,gms_rhs1,trans_rhs1,gms_lhs); 00734 }
1.7.4