|
AbstractLinAlgPack: C++ Interfaces For Vectors, Matrices And Related Linear Algebra Objects Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Moocho: Multi-functional Object-Oriented arCHitecture for Optimization 00005 // Copyright (2003) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // This library is free software; you can redistribute it and/or modify 00011 // it under the terms of the GNU Lesser General Public License as 00012 // published by the Free Software Foundation; either version 2.1 of the 00013 // License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, but 00016 // WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00023 // USA 00024 // Questions? Contact Roscoe A. Bartlett (rabartl@sandia.gov) 00025 // 00026 // *********************************************************************** 00027 // @HEADER 00028 00029 #ifndef COO_MATRIX_TMPL_OP_DEF_H 00030 #define COO_MATRIX_TMPL_OP_DEF_H 00031 00032 #include "AbstractLinAlgPack_COOMatrixTmplOpDecl.hpp" 00033 00034 #include "DenseLinAlgPack_DMatrixClass.hpp" 00035 #include "DenseLinAlgPack_DVectorOp.hpp" 00036 #include "DenseLinAlgPack_AssertOp.hpp" 00037 00038 namespace AbstractLinAlgPack { 00039 00040 using BLAS_Cpp::trans_not; 00041 00042 using DenseLinAlgPack::Mp_M_assert_sizes; 00043 using DenseLinAlgPack::Vp_MtV_assert_sizes; 00044 using DenseLinAlgPack::Mp_MtM_assert_sizes; 00045 00046 // gms_lhs += alpha * coom_rhs (time = O(coom_rhs.nz()), space = O(1)) 00047 template<class T_COOM> 00048 void Mp_StCOOM(DMatrixSlice* gms_lhs, value_type alpha, const T_COOM& coom_rhs 00049 , BLAS_Cpp::Transp trans_rhs) 00050 { 00051 Mp_M_assert_sizes( gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans 00052 , coom_rhs.rows(), coom_rhs.cols(), trans_rhs ); 00053 typename T_COOM::difference_type 00054 i_o = coom_rhs.row_offset(), 00055 j_o = coom_rhs.col_offset(); 00056 if(trans_rhs == BLAS_Cpp::no_trans) 00057 for(typename T_COOM::const_iterator itr = coom_rhs.begin(); itr != coom_rhs.end(); ++itr) 00058 (*gms_lhs)(itr->row_i()+i_o,itr->col_j()+j_o) += alpha * itr->value(); 00059 else 00060 for(typename T_COOM::const_iterator itr = coom_rhs.begin(); itr != coom_rhs.end(); ++itr) 00061 (*gms_lhs)(itr->col_j()+j_o,itr->row_i()+i_o) += alpha * itr->value(); 00062 } 00063 00064 // vs_lhs += alpha * coom_rhs1 * vs_rhs2 (BLAS xGEMV) (time = O(coom_rhs.nz()), space = O(1)) 00065 template<class T_COOM> 00066 void Vp_StCOOMtV(DVectorSlice* vs_lhs, value_type alpha, const T_COOM& coom_rhs1 00067 , BLAS_Cpp::Transp trans_rhs1, const DVectorSlice& vs_rhs2) 00068 { 00069 Vp_MtV_assert_sizes( vs_lhs->dim(), coom_rhs1.rows(), coom_rhs1.cols(), trans_rhs1, vs_rhs2.dim() ); 00070 typename T_COOM::difference_type 00071 i_o = coom_rhs1.row_offset(), 00072 j_o = coom_rhs1.col_offset(); 00073 if(trans_rhs1 == BLAS_Cpp::no_trans) 00074 for(typename T_COOM::const_iterator itr = coom_rhs1.begin(); itr != coom_rhs1.end(); ++itr) 00075 (*vs_lhs)(itr->row_i()+i_o) += alpha * itr->value() * vs_rhs2(itr->col_j()+j_o); 00076 else 00077 for(typename T_COOM::const_iterator itr = coom_rhs1.begin(); itr != coom_rhs1.end(); ++itr) 00078 (*vs_lhs)(itr->col_j()+j_o) += alpha * itr->value() * vs_rhs2(itr->row_i()+i_o); 00079 } 00080 00081 namespace UtilityPack { 00082 00083 // op(gms_lhs) += alpha * op(gms_rhs1) * op(coom_rhs2) (BLAS xGEMM) 00084 template<class T_COOM> 00085 void imp_Mp_StMtCOOM(DMatrixSlice& gms_lhs, BLAS_Cpp::Transp trans_lhs, value_type alpha 00086 , const DMatrixSlice& gms_rhs1, BLAS_Cpp::Transp trans_rhs1 00087 , const T_COOM& coom_rhs2, BLAS_Cpp::Transp trans_rhs2 ); 00088 00089 } // end namespace UtilityPack 00090 00091 00092 // gms_lhs += alpha * op(coom_rhs1) * op(gms_rhs2) (right) (BLAS xGEMM) 00093 template<class T_COOM> 00094 void Mp_StCOOMtM(DMatrixSlice* gms_lhs, value_type alpha, const T_COOM& coom_rhs1 00095 , BLAS_Cpp::Transp trans_rhs1, const DMatrixSlice& gms_rhs2, BLAS_Cpp::Transp trans_rhs2) 00096 { 00097 Mp_MtM_assert_sizes( gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans 00098 , coom_rhs1.rows(), coom_rhs1.cols(), trans_rhs1 00099 , gms_rhs2.rows() ,gms_rhs2.cols(), trans_rhs2 ); 00100 UtilityPack::imp_Mp_StMtCOOM( gms_lhs, BLAS_Cpp::trans, alpha, gms_rhs2 00101 , trans_not(trans_rhs2), coom_rhs1, trans_not(trans_rhs1) ); 00102 } 00103 00104 // gms_lhs += alpha * op(gms_rhs1) * op(coom_rhs2) (left) (BLAS xGEMM) 00105 template<class T_COOM> 00106 void Mp_StMtCOOM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSlice& gms_rhs1 00107 , BLAS_Cpp::Transp trans_rhs1, const T_COOM& coom_rhs2, BLAS_Cpp::Transp trans_rhs2) 00108 { 00109 Mp_MtM_assert_sizes( gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans 00110 , gms_rhs1.rows() ,gms_rhs1.cols(), trans_rhs1 00111 , coom_rhs2.rows(), coom_rhs2.cols(), trans_rhs2 ); 00112 UtilityPack::imp_Mp_StMtCOOM( gms_lhs, BLAS_Cpp::no_trans, alpha, gms_rhs1 00113 , trans_rhs1, coom_rhs2, trans_rhs2 ); 00114 } 00115 00116 // ToDo: implement the level-3 BLAS operations below 00117 00118 // gms_lhs = alpha * op(coom_rhs1) * op(sym_rhs2) (right) (BLAS xSYMM) 00119 //template<class T_COOM> 00120 //void Mp_StCOOMtSM(DMatrixSlice* gms_lhs, value_type alpha, const T_COOM& coom_rhs1 00121 // , BLAS_Cpp::Transp trans_rhs1, const DMatrixSliceSym& sym_rhs2, BLAS_Cpp::Transp trans_rhs2); 00122 00123 // gms_lhs = alpha * op(sym_rhs1) * op(coom_rhs2) (left) (BLAS xSYMM) 00124 //template<class T_COOM> 00125 //void Mp_StSMtCOOM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSliceSym& sym_rhs1 00126 // , BLAS_Cpp::Transp trans_rhs1, const T_COOM& coom_rhs2, BLAS_Cpp::Transp trans_rhs2); 00127 00128 // gms_lhs = alpha * op(coom_rhs1) * op(tri_rhs2) (right) (BLAS xTRMM) 00129 //template<class T_COOM> 00130 //void Mp_StCOOMtSM(DMatrixSlice* gms_lhs, value_type alpha, const T_COOM& coom_rhs1 00131 // , BLAS_Cpp::Transp trans_rhs1, const DMatrixSliceTri& tri_rhs2, BLAS_Cpp::Transp trans_rhs2); 00132 00133 // gms_lhs = alpha * op(tri_rhs1) * op(coom_rhs2) (left) (BLAS xTRMM) 00134 //template<class T_COOM> 00135 //void Mp_StSMtCOOM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSliceTri& tri_rhs1 00136 // , BLAS_Cpp::Transp trans_rhs1, const T_COOM& coom_rhs2, BLAS_Cpp::Transp trans_rhs2); 00137 00138 namespace UtilityPack { 00139 00140 // op(gms_lhs) += alpha * op(gms_rhs1) * op(coom_rhs2) (BLAS xGEMM) 00141 // 00142 // This function perform this operation by looping through the nonzero elements of 00143 // coom_rhs2 and for each nonzero element (val,i,j) it performs the following operation. 00144 // 00145 // op(gms_lhs).col(j) += (alpha * val) * op(gms_rhs1).col(i) 00146 // 00147 // 00148 // jth ith jth 00149 // [ # ] [ # ] [ ] 00150 // [ # ] [ # ] [ ] 00151 // m [ # ] += [ # ] * [ ] n 00152 // [ # ] [ # ] ith [ val ] 00153 // n p [ ] 00154 // p 00155 // op(gms_lhs) op(gms_rhs1) op(coom_rhs2) 00156 // 00157 // The number of arithmetic operations performed are: 00158 // floats = (2*m + 1) * nz 00159 // 00160 // Strictly speeking the number of memory references is: 00161 // mem_refs = (2*m + 1) * nz 00162 // but this does not take into account that elements are accessed by columns 00163 // and this has some ramifications on cache effects and paging. If op(gms_lhs) 00164 // == gms_lhs' or op(gms_rhs1) == gms_rhs1' then elements in a column are 00165 // not adjacent to each other and if m is large enough each element may 00166 // even reside on a seperate page of memory. On Win32 0x86 systems a page is 00167 // 4 K so 4,000 (bytes/page) / 8 (bytes/double) = 500 doubles / page. If 00168 // the working set of pages is small this could cause some serious page thrashing 00169 // for large m. 00170 // 00171 // Another concideration is to sorting order of the elements in the COO matrix. 00172 // If op(coom_rhs2) is sorted by row then columns of op(gms_lhs) will be accessed 00173 // consecutivly and will result in better performance. The same goes for op(gms_rhs1) 00174 // if op(coom_rhs2) is sorted by column. 00175 // 00176 // There is opertunity for some vectorization and it is handled by calling 00177 // DenseLinAlgPack::Vp_StV(...). 00178 // 00179 template<class T_COOM> 00180 void imp_Mp_StMtCOOM(DMatrixSlice* gms_lhs, BLAS_Cpp::Transp trans_lhs, value_type alpha 00181 , const DMatrixSlice& gms_rhs1, BLAS_Cpp::Transp trans_rhs1 00182 , const T_COOM& coom_rhs2, BLAS_Cpp::Transp trans_rhs2) 00183 { 00184 using BLAS_Cpp::rows; 00185 using BLAS_Cpp::cols; 00186 using DenseLinAlgPack::col; 00187 00188 typename T_COOM::difference_type 00189 i_o = coom_rhs2.row_offset(), 00190 j_o = coom_rhs2.col_offset(); 00191 for(typename T_COOM::const_iterator itr = coom_rhs2.begin(); itr != coom_rhs2.end(); ++itr) { 00192 size_type i = rows( itr->row_i() + i_o , itr->col_j() + j_o , trans_rhs2 ), 00193 j = cols( itr->row_i() + i_o , itr->col_j() + j_o , trans_rhs2 ); 00194 // op(gms_lhs).col(j) += (alpha * val) * op(gms_rhs1).col(i) 00195 DenseLinAlgPack::Vp_StV( &col(*gms_lhs,trans_lhs,j), alpha * itr->value() 00196 , col(gms_rhs1,trans_rhs1,i) ); 00197 } 00198 } 00199 00200 } // end namespace UtilityPack 00201 00202 } // end namespace AbstractLinAlgPack 00203 00204 #endif // COO_MATRIX_TMPL_OP_DEF_H
1.7.4