|
EpetraExt Development
|
00001 //@HEADER 00002 // *********************************************************************** 00003 // 00004 // EpetraExt: Epetra Extended - Linear Algebra Services Package 00005 // Copyright (2001) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // This library is free software; you can redistribute it and/or modify 00011 // it under the terms of the GNU Lesser General Public License as 00012 // published by the Free Software Foundation; either version 2.1 of the 00013 // License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, but 00016 // WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00023 // USA 00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00025 // 00026 // *********************************************************************** 00027 //@HEADER 00028 00029 #include <EpetraExt_ConfigDefs.h> 00030 #include <EpetraExt_MatrixMatrix.h> 00031 00032 #include <EpetraExt_MMHelpers.h> 00033 00034 #include <EpetraExt_Transpose_RowMatrix.h> 00035 00036 #include <Epetra_Export.h> 00037 #include <Epetra_Import.h> 00038 #include <Epetra_Util.h> 00039 #include <Epetra_Map.h> 00040 #include <Epetra_Comm.h> 00041 #include <Epetra_CrsMatrix.h> 00042 #include <Epetra_Directory.h> 00043 #include <Epetra_HashTable.h> 00044 #include <Epetra_Distributor.h> 00045 00046 #ifdef HAVE_VECTOR 00047 #include <vector> 00048 #endif 00049 00050 namespace EpetraExt { 00051 00052 // 00053 //Method for internal use... sparsedot forms a dot-product between two 00054 //sparsely-populated 'vectors'. 00055 //Important assumption: assumes the indices in u_ind and v_ind are sorted. 00056 // 00057 double sparsedot(double* u, int* u_ind, int u_len, 00058 double* v, int* v_ind, int v_len) 00059 { 00060 double result = 0.0; 00061 00062 int v_idx = 0; 00063 int u_idx = 0; 00064 00065 while(v_idx < v_len && u_idx < u_len) { 00066 int ui = u_ind[u_idx]; 00067 int vi = v_ind[v_idx]; 00068 00069 if (ui < vi) { 00070 ++u_idx; 00071 } 00072 else if (ui > vi) { 00073 ++v_idx; 00074 } 00075 else { 00076 result += u[u_idx++]*v[v_idx++]; 00077 } 00078 } 00079 00080 return(result); 00081 } 00082 00083 //kernel method for computing the local portion of C = A*B 00084 int mult_A_B(CrsMatrixStruct& Aview, 00085 CrsMatrixStruct& Bview, 00086 CrsWrapper& C) 00087 { 00088 int C_firstCol = Bview.colMap->MinLID(); 00089 int C_lastCol = Bview.colMap->MaxLID(); 00090 00091 int C_firstCol_import = 0; 00092 int C_lastCol_import = -1; 00093 00094 int* bcols = Bview.colMap->MyGlobalElements(); 00095 int* bcols_import = NULL; 00096 if (Bview.importColMap != NULL) { 00097 C_firstCol_import = Bview.importColMap->MinLID(); 00098 C_lastCol_import = Bview.importColMap->MaxLID(); 00099 00100 bcols_import = Bview.importColMap->MyGlobalElements(); 00101 } 00102 00103 int C_numCols = C_lastCol - C_firstCol + 1; 00104 int C_numCols_import = C_lastCol_import - C_firstCol_import + 1; 00105 00106 if (C_numCols_import > C_numCols) C_numCols = C_numCols_import; 00107 double* dwork = new double[C_numCols]; 00108 int* iwork = new int[C_numCols]; 00109 00110 double* C_row_i = dwork; 00111 int* C_cols = iwork; 00112 00113 int C_row_i_length, i, j, k; 00114 00115 //To form C = A*B we're going to execute this expression: 00116 // 00117 // C(i,j) = sum_k( A(i,k)*B(k,j) ) 00118 // 00119 //Our goal, of course, is to navigate the data in A and B once, without 00120 //performing searches for column-indices, etc. 00121 00122 bool C_filled = C.Filled(); 00123 00124 //loop over the rows of A. 00125 for(i=0; i<Aview.numRows; ++i) { 00126 00127 //only navigate the local portion of Aview... (It's probable that we 00128 //imported more of A than we need for A*B, because other cases like A^T*B 00129 //need the extra rows.) 00130 if (Aview.remote[i]) { 00131 continue; 00132 } 00133 00134 int* Aindices_i = Aview.indices[i]; 00135 double* Aval_i = Aview.values[i]; 00136 00137 int global_row = Aview.rowMap->GID(i); 00138 00139 //loop across the i-th row of A and for each corresponding row 00140 //in B, loop across colums and accumulate product 00141 //A(i,k)*B(k,j) into our partial sum quantities C_row_i. In other words, 00142 //as we stride across B(k,:) we're calculating updates for row i of the 00143 //result matrix C. 00144 00145 for(k=0; k<Aview.numEntriesPerRow[i]; ++k) { 00146 int Ak = Bview.rowMap->LID(Aview.colMap->GID(Aindices_i[k])); 00147 double Aval = Aval_i[k]; 00148 00149 int* Bcol_inds = Bview.indices[Ak]; 00150 double* Bvals_k = Bview.values[Ak]; 00151 00152 C_row_i_length = 0; 00153 00154 if (Bview.remote[Ak]) { 00155 for(j=0; j<Bview.numEntriesPerRow[Ak]; ++j) { 00156 C_row_i[C_row_i_length] = Aval*Bvals_k[j]; 00157 C_cols[C_row_i_length++] = bcols_import[Bcol_inds[j]]; 00158 } 00159 } 00160 else { 00161 for(j=0; j<Bview.numEntriesPerRow[Ak]; ++j) { 00162 C_row_i[C_row_i_length] = Aval*Bvals_k[j]; 00163 C_cols[C_row_i_length++] = bcols[Bcol_inds[j]]; 00164 } 00165 } 00166 00167 // 00168 //Now put the C_row_i values into C. 00169 // 00170 00171 int err = C_filled ? 00172 C.SumIntoGlobalValues(global_row, C_row_i_length, C_row_i, C_cols) 00173 : 00174 C.InsertGlobalValues(global_row, C_row_i_length, C_row_i, C_cols); 00175 00176 if (err < 0) { 00177 return(err); 00178 } 00179 if (err > 0) { 00180 if (C_filled) { 00181 //C is Filled, and doesn't 00182 //have all the necessary nonzero locations. 00183 return(err); 00184 } 00185 } 00186 } 00187 } 00188 00189 delete [] dwork; 00190 delete [] iwork; 00191 00192 return(0); 00193 } 00194 00195 //kernel method for computing the local portion of C = A*B^T 00196 int mult_A_Btrans(CrsMatrixStruct& Aview, 00197 CrsMatrixStruct& Bview, 00198 CrsWrapper& C) 00199 { 00200 int i, j, k; 00201 int returnValue = 0; 00202 00203 int maxlen = 0; 00204 for(i=0; i<Aview.numRows; ++i) { 00205 if (Aview.numEntriesPerRow[i] > maxlen) maxlen = Aview.numEntriesPerRow[i]; 00206 } 00207 for(i=0; i<Bview.numRows; ++i) { 00208 if (Bview.numEntriesPerRow[i] > maxlen) maxlen = Bview.numEntriesPerRow[i]; 00209 } 00210 00211 //cout << "Aview: " << endl; 00212 //dumpCrsMatrixStruct(Aview); 00213 00214 //cout << "Bview: " << endl; 00215 //dumpCrsMatrixStruct(Bview); 00216 00217 int numBcols = Bview.colMap->NumMyElements(); 00218 int numBrows = Bview.numRows; 00219 00220 int iworklen = maxlen*2 + numBcols; 00221 int* iwork = new int[iworklen]; 00222 00223 int* bcols = iwork+maxlen*2; 00224 int* bgids = Bview.colMap->MyGlobalElements(); 00225 double* bvals = new double[maxlen*2]; 00226 double* avals = bvals+maxlen; 00227 00228 int max_all_b = Bview.colMap->MaxAllGID(); 00229 int min_all_b = Bview.colMap->MinAllGID(); 00230 00231 //bcols will hold the GIDs from B's column-map for fast access 00232 //during the computations below 00233 for(i=0; i<numBcols; ++i) { 00234 int blid = Bview.colMap->LID(bgids[i]); 00235 bcols[blid] = bgids[i]; 00236 } 00237 00238 //next create arrays indicating the first and last column-index in 00239 //each row of B, so that we can know when to skip certain rows below. 00240 //This will provide a large performance gain for banded matrices, and 00241 //a somewhat smaller gain for *most* other matrices. 00242 int* b_firstcol = new int[2*numBrows]; 00243 int* b_lastcol = b_firstcol+numBrows; 00244 int temp; 00245 for(i=0; i<numBrows; ++i) { 00246 b_firstcol[i] = max_all_b; 00247 b_lastcol[i] = min_all_b; 00248 00249 int Blen_i = Bview.numEntriesPerRow[i]; 00250 if (Blen_i < 1) continue; 00251 int* Bindices_i = Bview.indices[i]; 00252 00253 if (Bview.remote[i]) { 00254 for(k=0; k<Blen_i; ++k) { 00255 temp = Bview.importColMap->GID(Bindices_i[k]); 00256 if (temp < b_firstcol[i]) b_firstcol[i] = temp; 00257 if (temp > b_lastcol[i]) b_lastcol[i] = temp; 00258 } 00259 } 00260 else { 00261 for(k=0; k<Blen_i; ++k) { 00262 temp = bcols[Bindices_i[k]]; 00263 if (temp < b_firstcol[i]) b_firstcol[i] = temp; 00264 if (temp > b_lastcol[i]) b_lastcol[i] = temp; 00265 } 00266 } 00267 } 00268 00269 Epetra_Util util; 00270 00271 int* Aind = iwork; 00272 int* Bind = iwork+maxlen; 00273 00274 bool C_filled = C.Filled(); 00275 00276 //To form C = A*B^T, we're going to execute this expression: 00277 // 00278 // C(i,j) = sum_k( A(i,k)*B(j,k) ) 00279 // 00280 //This is the easiest case of all to code (easier than A*B, A^T*B, A^T*B^T). 00281 //But it requires the use of a 'sparsedot' function (we're simply forming 00282 //dot-products with row A_i and row B_j for all i and j). 00283 00284 //loop over the rows of A. 00285 for(i=0; i<Aview.numRows; ++i) { 00286 if (Aview.remote[i]) { 00287 continue; 00288 } 00289 00290 int* Aindices_i = Aview.indices[i]; 00291 double* Aval_i = Aview.values[i]; 00292 int A_len_i = Aview.numEntriesPerRow[i]; 00293 if (A_len_i < 1) { 00294 continue; 00295 } 00296 00297 for(k=0; k<A_len_i; ++k) { 00298 Aind[k] = Aview.colMap->GID(Aindices_i[k]); 00299 avals[k] = Aval_i[k]; 00300 } 00301 00302 util.Sort(true, A_len_i, Aind, 1, &avals, 0, NULL); 00303 00304 int mina = Aind[0]; 00305 int maxa = Aind[A_len_i-1]; 00306 00307 if (mina > max_all_b || maxa < min_all_b) { 00308 continue; 00309 } 00310 00311 int global_row = Aview.rowMap->GID(i); 00312 00313 //loop over the rows of B and form results C_ij = dot(A(i,:),B(j,:)) 00314 for(j=0; j<Bview.numRows; ++j) { 00315 if (b_firstcol[j] > maxa || b_lastcol[j] < mina) { 00316 continue; 00317 } 00318 00319 int* Bindices_j = Bview.indices[j]; 00320 int B_len_j = Bview.numEntriesPerRow[j]; 00321 if (B_len_j < 1) { 00322 continue; 00323 } 00324 00325 int tmp, Blen = 0; 00326 00327 if (Bview.remote[j]) { 00328 for(k=0; k<B_len_j; ++k) { 00329 tmp = Bview.importColMap->GID(Bindices_j[k]); 00330 if (tmp < mina || tmp > maxa) { 00331 continue; 00332 } 00333 00334 bvals[Blen] = Bview.values[j][k]; 00335 Bind[Blen++] = tmp; 00336 } 00337 } 00338 else { 00339 for(k=0; k<B_len_j; ++k) { 00340 tmp = bcols[Bindices_j[k]]; 00341 if (tmp < mina || tmp > maxa) { 00342 continue; 00343 } 00344 00345 bvals[Blen] = Bview.values[j][k]; 00346 Bind[Blen++] = tmp; 00347 } 00348 } 00349 00350 if (Blen < 1) { 00351 continue; 00352 } 00353 00354 util.Sort(true, Blen, Bind, 1, &bvals, 0, NULL); 00355 00356 double C_ij = sparsedot(avals, Aind, A_len_i, 00357 bvals, Bind, Blen); 00358 00359 if (C_ij == 0.0) { 00360 continue; 00361 } 00362 int global_col = Bview.rowMap->GID(j); 00363 00364 int err = C_filled ? 00365 C.SumIntoGlobalValues(global_row, 1, &C_ij, &global_col) 00366 : 00367 C.InsertGlobalValues(global_row, 1, &C_ij, &global_col); 00368 00369 if (err < 0) { 00370 return(err); 00371 } 00372 if (err > 0) { 00373 if (C_filled) { 00374 //C.Filled()==true, and C doesn't have all the necessary nonzero 00375 //locations, or global_row or global_col is out of range (less 00376 //than 0 or non local). 00377 std::cerr << "EpetraExt::MatrixMatrix::Multiply Warning: failed " 00378 << "to insert value in result matrix at position "<<global_row 00379 <<"," <<global_col<<", possibly because result matrix has a " 00380 << "column-map that doesn't include column "<<global_col 00381 <<" on this proc." <<std::endl; 00382 return(err); 00383 } 00384 } 00385 } 00386 } 00387 00388 delete [] iwork; 00389 delete [] bvals; 00390 delete [] b_firstcol; 00391 00392 return(returnValue); 00393 } 00394 00395 //kernel method for computing the local portion of C = A^T*B 00396 int mult_Atrans_B(CrsMatrixStruct& Aview, 00397 CrsMatrixStruct& Bview, 00398 CrsWrapper& C) 00399 { 00400 int C_firstCol = Bview.colMap->MinLID(); 00401 int C_lastCol = Bview.colMap->MaxLID(); 00402 00403 int C_firstCol_import = 0; 00404 int C_lastCol_import = -1; 00405 00406 if (Bview.importColMap != NULL) { 00407 C_firstCol_import = Bview.importColMap->MinLID(); 00408 C_lastCol_import = Bview.importColMap->MaxLID(); 00409 } 00410 00411 int C_numCols = C_lastCol - C_firstCol + 1; 00412 int C_numCols_import = C_lastCol_import - C_firstCol_import + 1; 00413 00414 if (C_numCols_import > C_numCols) C_numCols = C_numCols_import; 00415 00416 double* C_row_i = new double[C_numCols]; 00417 int* C_colInds = new int[C_numCols]; 00418 00419 int i, j, k; 00420 00421 for(j=0; j<C_numCols; ++j) { 00422 C_row_i[j] = 0.0; 00423 C_colInds[j] = 0; 00424 } 00425 00426 //To form C = A^T*B, compute a series of outer-product updates. 00427 // 00428 // for (ith column of A^T) { 00429 // C_i = outer product of A^T(:,i) and B(i,:) 00430 // Where C_i is the ith matrix update, 00431 // A^T(:,i) is the ith column of A^T, and 00432 // B(i,:) is the ith row of B. 00433 // 00434 00435 //dumpCrsMatrixStruct(Aview); 00436 //dumpCrsMatrixStruct(Bview); 00437 int localProc = Bview.colMap->Comm().MyPID(); 00438 00439 int* Arows = Aview.rowMap->MyGlobalElements(); 00440 00441 bool C_filled = C.Filled(); 00442 00443 //loop over the rows of A (which are the columns of A^T). 00444 for(i=0; i<Aview.numRows; ++i) { 00445 00446 int* Aindices_i = Aview.indices[i]; 00447 double* Aval_i = Aview.values[i]; 00448 00449 //we'll need to get the row of B corresponding to Arows[i], 00450 //where Arows[i] is the GID of A's ith row. 00451 int Bi = Bview.rowMap->LID(Arows[i]); 00452 if (Bi<0) { 00453 cout << "mult_Atrans_B ERROR, proc "<<localProc<<" needs row " 00454 <<Arows[i]<<" of matrix B, but doesn't have it."<<endl; 00455 return(-1); 00456 } 00457 00458 int* Bcol_inds = Bview.indices[Bi]; 00459 double* Bvals_i = Bview.values[Bi]; 00460 00461 //for each column-index Aj in the i-th row of A, we'll update 00462 //global-row GID(Aj) of the result matrix C. In that row of C, 00463 //we'll update column-indices given by the column-indices in the 00464 //ith row of B that we're now holding (Bcol_inds). 00465 00466 //First create a list of GIDs for the column-indices 00467 //that we'll be updating. 00468 00469 int Blen = Bview.numEntriesPerRow[Bi]; 00470 if (Bview.remote[Bi]) { 00471 for(j=0; j<Blen; ++j) { 00472 C_colInds[j] = Bview.importColMap->GID(Bcol_inds[j]); 00473 } 00474 } 00475 else { 00476 for(j=0; j<Blen; ++j) { 00477 C_colInds[j] = Bview.colMap->GID(Bcol_inds[j]); 00478 } 00479 } 00480 00481 //loop across the i-th row of A (column of A^T) 00482 for(j=0; j<Aview.numEntriesPerRow[i]; ++j) { 00483 00484 int Aj = Aindices_i[j]; 00485 double Aval = Aval_i[j]; 00486 00487 int global_row; 00488 if (Aview.remote[i]) { 00489 global_row = Aview.importColMap->GID(Aj); 00490 } 00491 else { 00492 global_row = Aview.colMap->GID(Aj); 00493 } 00494 00495 if (!C.RowMap().MyGID(global_row)) { 00496 continue; 00497 } 00498 00499 for(k=0; k<Blen; ++k) { 00500 C_row_i[k] = Aval*Bvals_i[k]; 00501 } 00502 00503 // 00504 //Now add this row-update to C. 00505 // 00506 00507 int err = C_filled ? 00508 C.SumIntoGlobalValues(global_row, Blen, C_row_i, C_colInds) 00509 : 00510 C.InsertGlobalValues(global_row, Blen, C_row_i, C_colInds); 00511 00512 if (err < 0) { 00513 return(err); 00514 } 00515 if (err > 0) { 00516 if (C_filled) { 00517 //C is Filled, and doesn't have all the necessary nonzero locations. 00518 return(err); 00519 } 00520 } 00521 } 00522 } 00523 00524 delete [] C_row_i; 00525 delete [] C_colInds; 00526 00527 return(0); 00528 } 00529 00530 //kernel method for computing the local portion of C = A^T*B^T 00531 int mult_Atrans_Btrans(CrsMatrixStruct& Aview, 00532 CrsMatrixStruct& Bview, 00533 CrsWrapper& C) 00534 { 00535 int C_firstCol = Aview.rowMap->MinLID(); 00536 int C_lastCol = Aview.rowMap->MaxLID(); 00537 00538 int C_firstCol_import = 0; 00539 int C_lastCol_import = -1; 00540 00541 if (Aview.importColMap != NULL) { 00542 C_firstCol_import = Aview.importColMap->MinLID(); 00543 C_lastCol_import = Aview.importColMap->MaxLID(); 00544 } 00545 00546 int C_numCols = C_lastCol - C_firstCol + 1; 00547 int C_numCols_import = C_lastCol_import - C_firstCol_import + 1; 00548 00549 if (C_numCols_import > C_numCols) C_numCols = C_numCols_import; 00550 00551 double* dwork = new double[C_numCols]; 00552 int* iwork = new int[C_numCols]; 00553 00554 double* C_col_j = dwork; 00555 int* C_inds = iwork; 00556 00557 //cout << "Aview: " << endl; 00558 //dumpCrsMatrixStruct(Aview); 00559 00560 //cout << "Bview: " << endl; 00561 //dumpCrsMatrixStruct(Bview); 00562 00563 00564 int i, j, k; 00565 00566 for(j=0; j<C_numCols; ++j) { 00567 C_col_j[j] = 0.0; 00568 C_inds[j] = -1; 00569 } 00570 00571 int* A_col_inds = Aview.colMap->MyGlobalElements(); 00572 int* A_col_inds_import = Aview.importColMap ? 00573 Aview.importColMap->MyGlobalElements() : 0; 00574 00575 const Epetra_Map* Crowmap = &(C.RowMap()); 00576 00577 //To form C = A^T*B^T, we're going to execute this expression: 00578 // 00579 // C(i,j) = sum_k( A(k,i)*B(j,k) ) 00580 // 00581 //Our goal, of course, is to navigate the data in A and B once, without 00582 //performing searches for column-indices, etc. In other words, we avoid 00583 //column-wise operations like the plague... 00584 00585 int* Brows = Bview.rowMap->MyGlobalElements(); 00586 00587 //loop over the rows of B 00588 for(j=0; j<Bview.numRows; ++j) { 00589 int* Bindices_j = Bview.indices[j]; 00590 double* Bvals_j = Bview.values[j]; 00591 00592 int global_col = Brows[j]; 00593 00594 //loop across columns in the j-th row of B and for each corresponding 00595 //row in A, loop across columns and accumulate product 00596 //A(k,i)*B(j,k) into our partial sum quantities in C_col_j. In other 00597 //words, as we stride across B(j,:), we use selected rows in A to 00598 //calculate updates for column j of the result matrix C. 00599 00600 for(k=0; k<Bview.numEntriesPerRow[j]; ++k) { 00601 00602 int bk = Bindices_j[k]; 00603 double Bval = Bvals_j[k]; 00604 00605 int global_k; 00606 if (Bview.remote[j]) { 00607 global_k = Bview.importColMap->GID(bk); 00608 } 00609 else { 00610 global_k = Bview.colMap->GID(bk); 00611 } 00612 00613 //get the corresponding row in A 00614 int ak = Aview.rowMap->LID(global_k); 00615 if (ak<0) { 00616 continue; 00617 } 00618 00619 int* Aindices_k = Aview.indices[ak]; 00620 double* Avals_k = Aview.values[ak]; 00621 00622 int C_len = 0; 00623 00624 if (Aview.remote[ak]) { 00625 for(i=0; i<Aview.numEntriesPerRow[ak]; ++i) { 00626 C_col_j[C_len] = Avals_k[i]*Bval; 00627 C_inds[C_len++] = A_col_inds_import[Aindices_k[i]]; 00628 } 00629 } 00630 else { 00631 for(i=0; i<Aview.numEntriesPerRow[ak]; ++i) { 00632 C_col_j[C_len] = Avals_k[i]*Bval; 00633 C_inds[C_len++] = A_col_inds[Aindices_k[i]]; 00634 } 00635 } 00636 00637 //Now loop across the C_col_j values and put non-zeros into C. 00638 00639 for(i=0; i < C_len ; ++i) { 00640 if (C_col_j[i] == 0.0) continue; 00641 00642 int global_row = C_inds[i]; 00643 if (!Crowmap->MyGID(global_row)) { 00644 continue; 00645 } 00646 00647 int err = C.SumIntoGlobalValues(global_row, 1, &(C_col_j[i]), &global_col); 00648 00649 if (err < 0) { 00650 return(err); 00651 } 00652 else { 00653 if (err > 0) { 00654 err = C.InsertGlobalValues(global_row, 1, &(C_col_j[i]), &global_col); 00655 if (err < 0) { 00656 return(err); 00657 } 00658 } 00659 } 00660 } 00661 00662 } 00663 } 00664 00665 delete [] dwork; 00666 delete [] iwork; 00667 00668 return(0); 00669 } 00670 00671 int import_and_extract_views(const Epetra_CrsMatrix& M, 00672 const Epetra_Map& targetMap, 00673 CrsMatrixStruct& Mview) 00674 { 00675 //The goal of this method is to populate the 'Mview' struct with views of the 00676 //rows of M, including all rows that correspond to elements in 'targetMap'. 00677 // 00678 //If targetMap includes local elements that correspond to remotely-owned rows 00679 //of M, then those remotely-owned rows will be imported into 00680 //'Mview.importMatrix', and views of them will be included in 'Mview'. 00681 00682 Mview.deleteContents(); 00683 00684 const Epetra_Map& Mrowmap = M.RowMap(); 00685 00686 int numProcs = Mrowmap.Comm().NumProc(); 00687 00688 Mview.numRows = targetMap.NumMyElements(); 00689 00690 int* Mrows = targetMap.MyGlobalElements(); 00691 00692 if (Mview.numRows > 0) { 00693 Mview.numEntriesPerRow = new int[Mview.numRows]; 00694 Mview.indices = new int*[Mview.numRows]; 00695 Mview.values = new double*[Mview.numRows]; 00696 Mview.remote = new bool[Mview.numRows]; 00697 } 00698 00699 Mview.numRemote = 0; 00700 00701 int i; 00702 for(i=0; i<Mview.numRows; ++i) { 00703 int mlid = Mrowmap.LID(Mrows[i]); 00704 if (mlid < 0) { 00705 Mview.remote[i] = true; 00706 ++Mview.numRemote; 00707 } 00708 else { 00709 EPETRA_CHK_ERR( M.ExtractMyRowView(mlid, Mview.numEntriesPerRow[i], 00710 Mview.values[i], Mview.indices[i]) ); 00711 Mview.remote[i] = false; 00712 } 00713 } 00714 00715 Mview.origRowMap = &(M.RowMap()); 00716 Mview.rowMap = &targetMap; 00717 Mview.colMap = &(M.ColMap()); 00718 Mview.domainMap = &(M.DomainMap()); 00719 Mview.importColMap = NULL; 00720 00721 if (numProcs < 2) { 00722 if (Mview.numRemote > 0) { 00723 cerr << "EpetraExt::MatrixMatrix::Multiply ERROR, numProcs < 2 but " 00724 << "attempting to import remote matrix rows."<<endl; 00725 return(-1); 00726 } 00727 00728 //If only one processor we don't need to import any remote rows, so return. 00729 return(0); 00730 } 00731 00732 // 00733 //Now we will import the needed remote rows of M, if the global maximum 00734 //value of numRemote is greater than 0. 00735 // 00736 00737 int globalMaxNumRemote = 0; 00738 Mrowmap.Comm().MaxAll(&Mview.numRemote, &globalMaxNumRemote, 1); 00739 00740 if (globalMaxNumRemote > 0) { 00741 //Create a map that describes the remote rows of M that we need. 00742 00743 int* MremoteRows = Mview.numRemote>0 ? new int[Mview.numRemote] : NULL; 00744 int offset = 0; 00745 for(i=0; i<Mview.numRows; ++i) { 00746 if (Mview.remote[i]) { 00747 MremoteRows[offset++] = Mrows[i]; 00748 } 00749 } 00750 00751 Epetra_Map MremoteRowMap(-1, Mview.numRemote, MremoteRows, 00752 Mrowmap.IndexBase(), Mrowmap.Comm()); 00753 00754 //Create an importer with target-map MremoteRowMap and 00755 //source-map Mrowmap. 00756 Epetra_Import importer(MremoteRowMap, Mrowmap); 00757 00758 //Now create a new matrix into which we can import the remote rows of M 00759 //that we need. 00760 Mview.importMatrix = new Epetra_CrsMatrix(Copy, MremoteRowMap, 1); 00761 00762 EPETRA_CHK_ERR( Mview.importMatrix->Import(M, importer, Insert) ); 00763 00764 EPETRA_CHK_ERR( Mview.importMatrix->FillComplete(M.DomainMap(), M.RangeMap()) ); 00765 00766 //Finally, use the freshly imported data to fill in the gaps in our views 00767 //of rows of M. 00768 for(i=0; i<Mview.numRows; ++i) { 00769 if (Mview.remote[i]) { 00770 int importLID = MremoteRowMap.LID(Mrows[i]); 00771 EPETRA_CHK_ERR( Mview.importMatrix->ExtractMyRowView(importLID, 00772 Mview.numEntriesPerRow[i], 00773 Mview.values[i], 00774 Mview.indices[i]) ); 00775 } 00776 } 00777 00778 Mview.importColMap = &(Mview.importMatrix->ColMap()); 00779 00780 delete [] MremoteRows; 00781 } 00782 00783 return(0); 00784 } 00785 00786 int distribute_list(const Epetra_Comm& Comm, 00787 int lenSendList, 00788 const int* sendList, 00789 int& maxSendLen, 00790 int*& recvList) 00791 { 00792 maxSendLen = 0; 00793 Comm.MaxAll(&lenSendList, &maxSendLen, 1); 00794 int numProcs = Comm.NumProc(); 00795 recvList = new int[numProcs*maxSendLen]; 00796 int* send = new int[maxSendLen]; 00797 for(int i=0; i<lenSendList; ++i) { 00798 send[i] = sendList[i]; 00799 } 00800 00801 Comm.GatherAll(send, recvList, maxSendLen); 00802 delete [] send; 00803 00804 return(0); 00805 } 00806 00807 Epetra_Map* create_map_from_imported_rows(const Epetra_Map* map, 00808 int totalNumSend, 00809 int* sendRows, 00810 int numProcs, 00811 int* numSendPerProc) 00812 { 00813 //Perform sparse all-to-all communication to send the row-GIDs 00814 //in sendRows to appropriate processors according to offset 00815 //information in numSendPerProc. 00816 //Then create and return a map containing the rows that we 00817 //received on the local processor. 00818 00819 Epetra_Distributor* distributor = map->Comm().CreateDistributor(); 00820 00821 int* sendPIDs = totalNumSend>0 ? new int[totalNumSend] : NULL; 00822 int offset = 0; 00823 for(int i=0; i<numProcs; ++i) { 00824 for(int j=0; j<numSendPerProc[i]; ++j) { 00825 sendPIDs[offset++] = i; 00826 } 00827 } 00828 00829 int numRecv = 0; 00830 int err = distributor->CreateFromSends(totalNumSend, sendPIDs, 00831 true, numRecv); 00832 assert( err == 0 ); 00833 00834 char* c_recv_objs = numRecv>0 ? new char[numRecv*sizeof(int)] : NULL; 00835 int num_c_recv = numRecv*(int)sizeof(int); 00836 00837 err = distributor->Do(reinterpret_cast<char*>(sendRows), 00838 (int)sizeof(int), num_c_recv, c_recv_objs); 00839 assert( err == 0 ); 00840 00841 int* recvRows = reinterpret_cast<int*>(c_recv_objs); 00842 00843 //Now create a map with the rows we've received from other processors. 00844 Epetra_Map* import_rows = new Epetra_Map(-1, numRecv, recvRows, 00845 map->IndexBase(), map->Comm()); 00846 00847 delete [] c_recv_objs; 00848 delete [] sendPIDs; 00849 00850 delete distributor; 00851 00852 return( import_rows ); 00853 } 00854 00855 int form_map_union(const Epetra_Map* map1, 00856 const Epetra_Map* map2, 00857 const Epetra_Map*& mapunion) 00858 { 00859 //form the union of two maps 00860 00861 if (map1 == NULL) { 00862 mapunion = new Epetra_Map(*map2); 00863 return(0); 00864 } 00865 00866 if (map2 == NULL) { 00867 mapunion = new Epetra_Map(*map1); 00868 return(0); 00869 } 00870 00871 int map1_len = map1->NumMyElements(); 00872 int* map1_elements = map1->MyGlobalElements(); 00873 int map2_len = map2->NumMyElements(); 00874 int* map2_elements = map2->MyGlobalElements(); 00875 00876 int* union_elements = new int[map1_len+map2_len]; 00877 00878 int map1_offset = 0, map2_offset = 0, union_offset = 0; 00879 00880 while(map1_offset < map1_len && map2_offset < map2_len) { 00881 int map1_elem = map1_elements[map1_offset]; 00882 int map2_elem = map2_elements[map2_offset]; 00883 00884 if (map1_elem < map2_elem) { 00885 union_elements[union_offset++] = map1_elem; 00886 ++map1_offset; 00887 } 00888 else if (map1_elem > map2_elem) { 00889 union_elements[union_offset++] = map2_elem; 00890 ++map2_offset; 00891 } 00892 else { 00893 union_elements[union_offset++] = map1_elem; 00894 ++map1_offset; 00895 ++map2_offset; 00896 } 00897 } 00898 00899 int i; 00900 for(i=map1_offset; i<map1_len; ++i) { 00901 union_elements[union_offset++] = map1_elements[i]; 00902 } 00903 00904 for(i=map2_offset; i<map2_len; ++i) { 00905 union_elements[union_offset++] = map2_elements[i]; 00906 } 00907 00908 mapunion = new Epetra_Map(-1, union_offset, union_elements, 00909 map1->IndexBase(), map1->Comm()); 00910 00911 delete [] union_elements; 00912 00913 return(0); 00914 } 00915 00916 Epetra_Map* find_rows_containing_cols(const Epetra_CrsMatrix& M, 00917 const Epetra_Map* colmap) 00918 { 00919 //The goal of this function is to find all rows in the matrix M that contain 00920 //column-indices which are in 'colmap'. A map containing those rows is 00921 //returned. 00922 00923 int numProcs = colmap->Comm().NumProc(); 00924 int localProc = colmap->Comm().MyPID(); 00925 00926 if (numProcs < 2) { 00927 Epetra_Map* result_map = NULL; 00928 00929 int err = form_map_union(&(M.RowMap()), NULL, 00930 (const Epetra_Map*&)result_map); 00931 if (err != 0) { 00932 return(NULL); 00933 } 00934 return(result_map); 00935 } 00936 00937 int MnumRows = M.NumMyRows(); 00938 int numCols = colmap->NumMyElements(); 00939 00940 int* iwork = new int[numCols+2*numProcs+numProcs*MnumRows]; 00941 int iworkOffset = 0; 00942 00943 int* cols = &(iwork[iworkOffset]); iworkOffset += numCols; 00944 00945 cols[0] = numCols; 00946 colmap->MyGlobalElements( &(cols[1]) ); 00947 00948 //cols are not necessarily sorted at this point, so we'll make sure 00949 //they are sorted. 00950 Epetra_Util util; 00951 util.Sort(true, numCols, &(cols[1]), 0, NULL, 0, NULL); 00952 00953 int* all_proc_cols = NULL; 00954 00955 int max_num_cols; 00956 distribute_list(colmap->Comm(), numCols+1, cols, max_num_cols, all_proc_cols); 00957 00958 const Epetra_CrsGraph& Mgraph = M.Graph(); 00959 const Epetra_Map& Mrowmap = M.RowMap(); 00960 const Epetra_Map& Mcolmap = M.ColMap(); 00961 int MminMyLID = Mrowmap.MinLID(); 00962 00963 int* procNumCols = &(iwork[iworkOffset]); iworkOffset += numProcs; 00964 int* procNumRows = &(iwork[iworkOffset]); iworkOffset += numProcs; 00965 int* procRows_1D = &(iwork[iworkOffset]); 00966 int** procCols = new int*[numProcs]; 00967 int** procRows = new int*[numProcs]; 00968 int i, err; 00969 int offset = 0; 00970 for(i=0; i<numProcs; ++i) { 00971 procNumCols[i] = all_proc_cols[offset]; 00972 procCols[i] = &(all_proc_cols[offset+1]); 00973 offset += max_num_cols; 00974 00975 procNumRows[i] = 0; 00976 procRows[i] = &(procRows_1D[i*MnumRows]); 00977 } 00978 00979 int* Mindices; 00980 00981 for(int row=0; row<MnumRows; ++row) { 00982 int localRow = MminMyLID+row; 00983 int globalRow = Mrowmap.GID(localRow); 00984 int MnumCols; 00985 err = Mgraph.ExtractMyRowView(localRow, MnumCols, Mindices); 00986 if (err != 0) { 00987 cerr << "proc "<<localProc<<", error in Mgraph.ExtractMyRowView, row " 00988 <<localRow<<endl; 00989 return(NULL); 00990 } 00991 00992 for(int j=0; j<MnumCols; ++j) { 00993 int colGID = Mcolmap.GID(Mindices[j]); 00994 00995 for(int p=0; p<numProcs; ++p) { 00996 if (p==localProc) continue; 00997 00998 int insertPoint; 00999 int foundOffset = Epetra_Util_binary_search(colGID, procCols[p], 01000 procNumCols[p], insertPoint); 01001 if (foundOffset > -1) { 01002 int numRowsP = procNumRows[p]; 01003 int* prows = procRows[p]; 01004 if (numRowsP < 1 || prows[numRowsP-1] < globalRow) { 01005 prows[numRowsP] = globalRow; 01006 procNumRows[p]++; 01007 } 01008 } 01009 } 01010 } 01011 } 01012 01013 //Now make the contents of procRows occupy a contiguous section 01014 //of procRows_1D. 01015 offset = procNumRows[0]; 01016 for(i=1; i<numProcs; ++i) { 01017 for(int j=0; j<procNumRows[i]; ++j) { 01018 procRows_1D[offset++] = procRows[i][j]; 01019 } 01020 } 01021 01022 int totalNumSend = offset; 01023 //Next we will do a sparse all-to-all communication to send the lists of rows 01024 //to the appropriate processors, and create a map with the rows we've received 01025 //from other processors. 01026 Epetra_Map* recvd_rows = 01027 create_map_from_imported_rows(&Mrowmap, totalNumSend, 01028 procRows_1D, numProcs, procNumRows); 01029 01030 Epetra_Map* result_map = NULL; 01031 01032 err = form_map_union(&(M.RowMap()), recvd_rows, (const Epetra_Map*&)result_map); 01033 if (err != 0) { 01034 return(NULL); 01035 } 01036 01037 delete [] iwork; 01038 delete [] procCols; 01039 delete [] procRows; 01040 delete [] all_proc_cols; 01041 delete recvd_rows; 01042 01043 return(result_map); 01044 } 01045 01046 int MatrixMatrix::Multiply(const Epetra_CrsMatrix& A, 01047 bool transposeA, 01048 const Epetra_CrsMatrix& B, 01049 bool transposeB, 01050 Epetra_CrsMatrix& C, 01051 bool call_FillComplete_on_result) 01052 { 01053 // 01054 //This method forms the matrix-matrix product C = op(A) * op(B), where 01055 //op(A) == A if transposeA is false, 01056 //op(A) == A^T if transposeA is true, 01057 //and similarly for op(B). 01058 // 01059 01060 //A and B should already be Filled. 01061 //(Should we go ahead and call FillComplete() on them if necessary? 01062 // or error out? For now, we choose to error out.) 01063 if (!A.Filled() || !B.Filled()) { 01064 EPETRA_CHK_ERR(-1); 01065 } 01066 01067 //We're going to refer to the different combinations of op(A) and op(B) 01068 //as scenario 1 through 4. 01069 01070 int scenario = 1;//A*B 01071 if (transposeB && !transposeA) scenario = 2;//A*B^T 01072 if (transposeA && !transposeB) scenario = 3;//A^T*B 01073 if (transposeA && transposeB) scenario = 4;//A^T*B^T 01074 01075 //now check size compatibility 01076 int Aouter = transposeA ? A.NumGlobalCols() : A.NumGlobalRows(); 01077 int Bouter = transposeB ? B.NumGlobalRows() : B.NumGlobalCols(); 01078 int Ainner = transposeA ? A.NumGlobalRows() : A.NumGlobalCols(); 01079 int Binner = transposeB ? B.NumGlobalCols() : B.NumGlobalRows(); 01080 if (Ainner != Binner) { 01081 cerr << "MatrixMatrix::Multiply: ERROR, inner dimensions of op(A) and op(B) " 01082 << "must match for matrix-matrix product. op(A) is " 01083 <<Aouter<<"x"<<Ainner << ", op(B) is "<<Binner<<"x"<<Bouter<<endl; 01084 return(-1); 01085 } 01086 01087 //The result matrix C must at least have a row-map that reflects the 01088 //correct row-size. Don't check the number of columns because rectangular 01089 //matrices which were constructed with only one map can still end up 01090 //having the correct capacity and dimensions when filled. 01091 if (Aouter > C.NumGlobalRows()) { 01092 cerr << "MatrixMatrix::Multiply: ERROR, dimensions of result C must " 01093 << "match dimensions of op(A) * op(B). C has "<<C.NumGlobalRows() 01094 << " rows, should have at least "<<Aouter << endl; 01095 return(-1); 01096 } 01097 01098 //It doesn't matter whether C is already Filled or not. If it is already 01099 //Filled, it must have space allocated for the positions that will be 01100 //referenced in forming C = op(A)*op(B). If it doesn't have enough space, 01101 //we'll error out later when trying to store result values. 01102 01103 //We're going to need to import remotely-owned sections of A and/or B 01104 //if more than 1 processor is performing this run, depending on the scenario. 01105 int numProcs = A.Comm().NumProc(); 01106 01107 //If we are to use the transpose of A and/or B, we'll need to be able to 01108 //access, on the local processor, all rows that contain column-indices in 01109 //the domain-map. 01110 const Epetra_Map* domainMap_A = &(A.DomainMap()); 01111 const Epetra_Map* domainMap_B = &(B.DomainMap()); 01112 01113 const Epetra_Map* rowmap_A = &(A.RowMap()); 01114 const Epetra_Map* rowmap_B = &(B.RowMap()); 01115 01116 //Declare some 'work-space' maps which may be created depending on 01117 //the scenario, and which will be deleted before exiting this function. 01118 const Epetra_Map* workmap1 = NULL; 01119 const Epetra_Map* workmap2 = NULL; 01120 const Epetra_Map* mapunion1 = NULL; 01121 01122 //Declare a couple of structs that will be used to hold views of the data 01123 //of A and B, to be used for fast access during the matrix-multiplication. 01124 CrsMatrixStruct Aview; 01125 CrsMatrixStruct Bview; 01126 01127 const Epetra_Map* targetMap_A = rowmap_A; 01128 const Epetra_Map* targetMap_B = rowmap_B; 01129 01130 if (numProcs > 1) { 01131 //If op(A) = A^T, find all rows of A that contain column-indices in the 01132 //local portion of the domain-map. (We'll import any remote rows 01133 //that fit this criteria onto the local processor.) 01134 if (transposeA) { 01135 workmap1 = find_rows_containing_cols(A, domainMap_A); 01136 targetMap_A = workmap1; 01137 } 01138 } 01139 01140 //Now import any needed remote rows and populate the Aview struct. 01141 EPETRA_CHK_ERR( import_and_extract_views(A, *targetMap_A, Aview) ); 01142 01143 //We will also need local access to all rows of B that correspond to the 01144 //column-map of op(A). 01145 if (numProcs > 1) { 01146 const Epetra_Map* colmap_op_A = NULL; 01147 if (transposeA) { 01148 colmap_op_A = targetMap_A; 01149 } 01150 else { 01151 colmap_op_A = &(A.ColMap()); 01152 } 01153 01154 targetMap_B = colmap_op_A; 01155 01156 //If op(B) = B^T, find all rows of B that contain column-indices in the 01157 //local-portion of the domain-map, or in the column-map of op(A). 01158 //We'll import any remote rows that fit this criteria onto the 01159 //local processor. 01160 if (transposeB) { 01161 EPETRA_CHK_ERR( form_map_union(colmap_op_A, domainMap_B, mapunion1) ); 01162 workmap2 = find_rows_containing_cols(B, mapunion1); 01163 targetMap_B = workmap2; 01164 } 01165 } 01166 01167 //Now import any needed remote rows and populate the Bview struct. 01168 EPETRA_CHK_ERR( import_and_extract_views(B, *targetMap_B, Bview) ); 01169 01170 //If the result matrix C is not already FillComplete'd, we will do a 01171 //preprocessing step to create the nonzero structure, then call FillComplete, 01172 if (!C.Filled()) { 01173 CrsWrapper_GraphBuilder crsgraphbuilder(C.RowMap()); 01174 01175 //pass the graph-builder object to the multiplication kernel to fill in all 01176 //the nonzero positions that will be used in the result matrix. 01177 switch(scenario) { 01178 case 1: EPETRA_CHK_ERR( mult_A_B(Aview, Bview, crsgraphbuilder) ); 01179 break; 01180 case 2: EPETRA_CHK_ERR( mult_A_Btrans(Aview, Bview, crsgraphbuilder) ); 01181 break; 01182 case 3: EPETRA_CHK_ERR( mult_Atrans_B(Aview, Bview, crsgraphbuilder) ); 01183 break; 01184 case 4: EPETRA_CHK_ERR( mult_Atrans_Btrans(Aview, Bview, crsgraphbuilder) ); 01185 break; 01186 } 01187 01188 //now insert all of the nonzero positions into the result matrix. 01189 insert_matrix_locations(crsgraphbuilder, C); 01190 01191 if (call_FillComplete_on_result) { 01192 const Epetra_Map* domainmap = 01193 transposeB ? &(B.RangeMap()) : &(B.DomainMap()); 01194 01195 const Epetra_Map* rangemap = 01196 transposeA ? &(A.DomainMap()) : &(A.RangeMap()); 01197 01198 EPETRA_CHK_ERR( C.FillComplete(*domainmap, *rangemap) ); 01199 call_FillComplete_on_result = false; 01200 } 01201 } 01202 01203 //Pre-zero the result matrix: 01204 C.PutScalar(0.0); 01205 01206 //Now call the appropriate method to perform the actual multiplication. 01207 01208 CrsWrapper_Epetra_CrsMatrix ecrsmat(C); 01209 01210 switch(scenario) { 01211 case 1: EPETRA_CHK_ERR( mult_A_B(Aview, Bview, ecrsmat) ); 01212 break; 01213 case 2: EPETRA_CHK_ERR( mult_A_Btrans(Aview, Bview, ecrsmat) ); 01214 break; 01215 case 3: EPETRA_CHK_ERR( mult_Atrans_B(Aview, Bview, ecrsmat) ); 01216 break; 01217 case 4: EPETRA_CHK_ERR( mult_Atrans_Btrans(Aview, Bview, ecrsmat) ); 01218 break; 01219 } 01220 01221 if (call_FillComplete_on_result) { 01222 //We'll call FillComplete on the C matrix before we exit, and give 01223 //it a domain-map and a range-map. 01224 //The domain-map will be the domain-map of B, unless 01225 //op(B)==transpose(B), in which case the range-map of B will be used. 01226 //The range-map will be the range-map of A, unless 01227 //op(A)==transpose(A), in which case the domain-map of A will be used. 01228 01229 const Epetra_Map* domainmap = 01230 transposeB ? &(B.RangeMap()) : &(B.DomainMap()); 01231 01232 const Epetra_Map* rangemap = 01233 transposeA ? &(A.DomainMap()) : &(A.RangeMap()); 01234 01235 if (!C.Filled()) { 01236 EPETRA_CHK_ERR( C.FillComplete(*domainmap, *rangemap) ); 01237 } 01238 } 01239 01240 //Finally, delete the objects that were potentially created 01241 //during the course of importing remote sections of A and B. 01242 01243 delete mapunion1; mapunion1 = NULL; 01244 delete workmap1; workmap1 = NULL; 01245 delete workmap2; workmap2 = NULL; 01246 01247 return(0); 01248 } 01249 01250 int MatrixMatrix::Add(const Epetra_CrsMatrix& A, 01251 bool transposeA, 01252 double scalarA, 01253 Epetra_CrsMatrix& B, 01254 double scalarB ) 01255 { 01256 // 01257 //This method forms the matrix-matrix sum B = scalarA * op(A) + scalarB * B, where 01258 01259 //A should already be Filled. It doesn't matter whether B is 01260 //already Filled, but if it is, then its graph must already contain 01261 //all nonzero locations that will be referenced in forming the 01262 //sum. 01263 01264 if (!A.Filled() ) { 01265 std::cerr << "EpetraExt::MatrixMatrix::Add ERROR, input matrix A.Filled() is false, it is required to be true. (Result matrix B is not required to be Filled())."<<std::endl; 01266 EPETRA_CHK_ERR(-1); 01267 } 01268 01269 //explicit tranpose A formed as necessary 01270 Epetra_CrsMatrix * Aprime = 0; 01271 EpetraExt::RowMatrix_Transpose * Atrans = 0; 01272 if( transposeA ) 01273 { 01274 Atrans = new EpetraExt::RowMatrix_Transpose(); 01275 Aprime = &(dynamic_cast<Epetra_CrsMatrix&>(((*Atrans)(const_cast<Epetra_CrsMatrix&>(A))))); 01276 } 01277 else 01278 Aprime = const_cast<Epetra_CrsMatrix*>(&A); 01279 01280 int MaxNumEntries = EPETRA_MAX( A.MaxNumEntries(), B.MaxNumEntries() ); 01281 int A_NumEntries, B_NumEntries; 01282 int * A_Indices = new int[MaxNumEntries]; 01283 double * A_Values = new double[MaxNumEntries]; 01284 int* B_Indices; 01285 double* B_Values; 01286 01287 int NumMyRows = B.NumMyRows(); 01288 int Row, err; 01289 int ierr = 0; 01290 01291 if( scalarA ) 01292 { 01293 //Loop over B's rows and sum into 01294 for( int i = 0; i < NumMyRows; ++i ) 01295 { 01296 Row = B.GRID(i); 01297 EPETRA_CHK_ERR( Aprime->ExtractGlobalRowCopy( Row, MaxNumEntries, A_NumEntries, A_Values, A_Indices ) ); 01298 01299 if (scalarB != 1.0) { 01300 if (!B.Filled()) { 01301 EPETRA_CHK_ERR( B.ExtractGlobalRowView( Row, B_NumEntries, 01302 B_Values, B_Indices)); 01303 } 01304 else { 01305 EPETRA_CHK_ERR( B.ExtractMyRowView( i, B_NumEntries, 01306 B_Values, B_Indices)); 01307 } 01308 01309 for(int jj=0; jj<B_NumEntries; ++jj) { 01310 B_Values[jj] = scalarB*B_Values[jj]; 01311 } 01312 } 01313 01314 if( scalarA != 1.0 ) { 01315 for( int j = 0; j < A_NumEntries; ++j ) A_Values[j] *= scalarA; 01316 } 01317 01318 if( B.Filled() ) {//Sum In Values 01319 err = B.SumIntoGlobalValues( Row, A_NumEntries, A_Values, A_Indices ); 01320 assert( err >= 0 ); 01321 if (err < 0) ierr = err; 01322 } 01323 else { 01324 err = B.InsertGlobalValues( Row, A_NumEntries, A_Values, A_Indices ); 01325 assert( err == 0 || err == 1 || err == 3 ); 01326 if (err < 0) ierr = err; 01327 } 01328 } 01329 } 01330 else { 01331 EPETRA_CHK_ERR( B.Scale(scalarB) ); 01332 } 01333 01334 delete [] A_Indices; 01335 delete [] A_Values; 01336 01337 if( Atrans ) delete Atrans; 01338 01339 return(ierr); 01340 } 01341 01342 int MatrixMatrix::Add(const Epetra_CrsMatrix& A, 01343 bool transposeA, 01344 double scalarA, 01345 const Epetra_CrsMatrix & B, 01346 bool transposeB, 01347 double scalarB, 01348 Epetra_CrsMatrix * & C) 01349 { 01350 // 01351 //This method forms the matrix-matrix sum C = scalarA * op(A) + scalarB * op(B), where 01352 01353 //A and B should already be Filled. C should be an empty pointer. 01354 01355 if (!A.Filled() || !B.Filled() ) { 01356 std::cerr << "EpetraExt::MatrixMatrix::Add ERROR, input matrix A.Filled() or B.Filled() is false," 01357 << "they are required to be true. (Result matrix C should be an empty pointer)" << std::endl; 01358 EPETRA_CHK_ERR(-1); 01359 } 01360 01361 Epetra_CrsMatrix * Aprime = 0, * Bprime=0; 01362 EpetraExt::RowMatrix_Transpose * Atrans = 0,* Btrans = 0; 01363 01364 //explicit tranpose A formed as necessary 01365 if( transposeA ) { 01366 Atrans = new EpetraExt::RowMatrix_Transpose(); 01367 Aprime = &(dynamic_cast<Epetra_CrsMatrix&>(((*Atrans)(const_cast<Epetra_CrsMatrix&>(A))))); 01368 } 01369 else 01370 Aprime = const_cast<Epetra_CrsMatrix*>(&A); 01371 01372 //explicit tranpose B formed as necessary 01373 if( transposeB ) { 01374 Btrans = new EpetraExt::RowMatrix_Transpose(); 01375 Bprime = &(dynamic_cast<Epetra_CrsMatrix&>(((*Btrans)(const_cast<Epetra_CrsMatrix&>(B))))); 01376 } 01377 else 01378 Bprime = const_cast<Epetra_CrsMatrix*>(&B); 01379 01380 // allocate or zero the new matrix 01381 if(C!=0) 01382 C->PutScalar(0.0); 01383 else 01384 C = new Epetra_CrsMatrix(Copy,Aprime->RowMap(),0); 01385 01386 // build arrays for easy resuse 01387 int ierr = 0; 01388 Epetra_CrsMatrix * Mat[] = { Aprime,Bprime}; 01389 double scalar[] = { scalarA, scalarB}; 01390 01391 // do a loop over each matrix to add: A reordering might be more efficient 01392 for(int k=0;k<2;k++) { 01393 int MaxNumEntries = Mat[k]->MaxNumEntries(); 01394 int NumEntries; 01395 int * Indices = new int[MaxNumEntries]; 01396 double * Values = new double[MaxNumEntries]; 01397 01398 int NumMyRows = Mat[k]->NumMyRows(); 01399 int Row, err; 01400 int ierr = 0; 01401 01402 //Loop over rows and sum into C 01403 for( int i = 0; i < NumMyRows; ++i ) { 01404 Row = Mat[k]->GRID(i); 01405 EPETRA_CHK_ERR( Mat[k]->ExtractGlobalRowCopy( Row, MaxNumEntries, NumEntries, Values, Indices)); 01406 01407 if( scalar[k] != 1.0 ) 01408 for( int j = 0; j < NumEntries; ++j ) Values[j] *= scalar[k]; 01409 01410 if(C->Filled()) { // Sum in values 01411 err = C->SumIntoGlobalValues( Row, NumEntries, Values, Indices ); 01412 if (err < 0) ierr = err; 01413 } else { // just add it to the unfilled CRS Matrix 01414 err = C->InsertGlobalValues( Row, NumEntries, Values, Indices ); 01415 if (err < 0) ierr = err; 01416 } 01417 } 01418 01419 delete [] Indices; 01420 delete [] Values; 01421 } 01422 01423 if( Atrans ) delete Atrans; 01424 if( Btrans ) delete Btrans; 01425 01426 return(ierr); 01427 } 01428 01429 } // namespace EpetraExt 01430
1.7.4