|
IFPACK Development
|
00001 /*@HEADER 00002 // *********************************************************************** 00003 // 00004 // Ifpack: Object-Oriented Algebraic Preconditioner Package 00005 // Copyright (2009) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // This library is free software; you can redistribute it and/or modify 00011 // it under the terms of the GNU Lesser General Public License as 00012 // published by the Free Software Foundation; either version 2.1 of the 00013 // License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, but 00016 // WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00023 // USA 00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00025 // 00026 // *********************************************************************** 00027 //@HEADER 00028 */ 00029 00030 #include "Mat_dh.h" 00031 #include "getRow_dh.h" 00032 #include "SubdomainGraph_dh.h" 00033 #include "TimeLog_dh.h" 00034 #include "Mem_dh.h" 00035 #include "Numbering_dh.h" 00036 #include "Parser_dh.h" 00037 #include "mat_dh_private.h" 00038 #include "io_dh.h" 00039 #include "Hash_i_dh.h" 00040 00041 static void setup_matvec_sends_private (Mat_dh mat, int *inlist); 00042 static void setup_matvec_receives_private (Mat_dh mat, int *beg_rows, 00043 int *end_rows, int reqlen, 00044 int *reqind, int *outlist); 00045 00046 #if 0 00047 00048 partial (?) 00049 implementation below; 00050 not used anyplace, I think; 00051 for future 00052 expansion ?[mar 21, 2 K + 1] 00053 static void Mat_dhAllocate_getRow_private (Mat_dh A); 00054 #endif 00055 00056 static bool commsOnly = false; /* experimental, for matvec functions */ 00057 00058 #undef __FUNC__ 00059 #define __FUNC__ "Mat_dhCreate" 00060 void Mat_dhCreate (Mat_dh * mat) 00061 { 00062 START_FUNC_DH 00063 struct _mat_dh *tmp = 00064 (struct _mat_dh *) MALLOC_DH (sizeof (struct _mat_dh)); 00065 CHECK_V_ERROR; 00066 *mat = tmp; 00067 00068 commsOnly = Parser_dhHasSwitch (parser_dh, "-commsOnly"); 00069 if (myid_dh == 0 && commsOnly == true) 00070 { 00071 /* printf("\n@@@ commsOnly == true for matvecs! @@@\n"); */ 00072 fflush (stdout); 00073 } 00074 00075 tmp->m = 0; 00076 tmp->n = 0; 00077 tmp->beg_row = 0; 00078 tmp->bs = 1; 00079 00080 tmp->rp = NULL; 00081 tmp->len = NULL; 00082 tmp->cval = NULL; 00083 tmp->aval = NULL; 00084 tmp->diag = NULL; 00085 tmp->fill = NULL; 00086 tmp->owner = true; 00087 00088 tmp->len_private = 0; 00089 tmp->rowCheckedOut = -1; 00090 tmp->cval_private = NULL; 00091 tmp->aval_private = NULL; 00092 00093 tmp->row_perm = NULL; 00094 00095 tmp->num_recv = 0; 00096 tmp->num_send = 0; 00097 tmp->recv_req = NULL; 00098 tmp->send_req = NULL; 00099 tmp->status = NULL; 00100 tmp->recvbuf = NULL; 00101 tmp->sendbuf = NULL; 00102 tmp->sendind = NULL; 00103 tmp->sendlen = 0; 00104 tmp->recvlen = 0; 00105 tmp->numb = NULL; 00106 tmp->matvecIsSetup = false; 00107 00108 Mat_dhZeroTiming (tmp); 00109 CHECK_V_ERROR; 00110 tmp->matvec_timing = true; 00111 00112 tmp->debug = Parser_dhHasSwitch (parser_dh, "-debug_Mat"); 00113 END_FUNC_DH} 00114 00115 #undef __FUNC__ 00116 #define __FUNC__ "Mat_dhDestroy" 00117 void 00118 Mat_dhDestroy (Mat_dh mat) 00119 { 00120 START_FUNC_DH int i; 00121 00122 if (mat->owner) 00123 { 00124 if (mat->rp != NULL) 00125 { 00126 FREE_DH (mat->rp); 00127 CHECK_V_ERROR; 00128 } 00129 if (mat->len != NULL) 00130 { 00131 FREE_DH (mat->len); 00132 CHECK_V_ERROR; 00133 } 00134 if (mat->cval != NULL) 00135 { 00136 FREE_DH (mat->cval); 00137 CHECK_V_ERROR; 00138 } 00139 if (mat->aval != NULL) 00140 { 00141 FREE_DH (mat->aval); 00142 CHECK_V_ERROR; 00143 } 00144 if (mat->diag != NULL) 00145 { 00146 FREE_DH (mat->diag); 00147 CHECK_V_ERROR; 00148 } 00149 if (mat->fill != NULL) 00150 { 00151 FREE_DH (mat->fill); 00152 CHECK_V_ERROR; 00153 } 00154 if (mat->cval_private != NULL) 00155 { 00156 FREE_DH (mat->cval_private); 00157 CHECK_V_ERROR; 00158 } 00159 if (mat->aval_private != NULL) 00160 { 00161 FREE_DH (mat->aval_private); 00162 CHECK_V_ERROR; 00163 } 00164 if (mat->row_perm != NULL) 00165 { 00166 FREE_DH (mat->row_perm); 00167 CHECK_V_ERROR; 00168 } 00169 } 00170 00171 for (i = 0; i < mat->num_recv; i++) 00172 MPI_Request_free (&mat->recv_req[i]); 00173 for (i = 0; i < mat->num_send; i++) 00174 MPI_Request_free (&mat->send_req[i]); 00175 if (mat->recv_req != NULL) 00176 { 00177 FREE_DH (mat->recv_req); 00178 CHECK_V_ERROR; 00179 } 00180 if (mat->send_req != NULL) 00181 { 00182 FREE_DH (mat->send_req); 00183 CHECK_V_ERROR; 00184 } 00185 if (mat->status != NULL) 00186 { 00187 FREE_DH (mat->status); 00188 CHECK_V_ERROR; 00189 } 00190 if (mat->recvbuf != NULL) 00191 { 00192 FREE_DH (mat->recvbuf); 00193 CHECK_V_ERROR; 00194 } 00195 if (mat->sendbuf != NULL) 00196 { 00197 FREE_DH (mat->sendbuf); 00198 CHECK_V_ERROR; 00199 } 00200 if (mat->sendind != NULL) 00201 { 00202 FREE_DH (mat->sendind); 00203 CHECK_V_ERROR; 00204 } 00205 00206 if (mat->matvecIsSetup) 00207 { 00208 Mat_dhMatVecSetdown (mat); 00209 CHECK_V_ERROR; 00210 } 00211 if (mat->numb != NULL) 00212 { 00213 Numbering_dhDestroy (mat->numb); 00214 CHECK_V_ERROR; 00215 } 00216 FREE_DH (mat); 00217 CHECK_V_ERROR; 00218 END_FUNC_DH} 00219 00220 00221 /* this should put the cval array back the way it was! */ 00222 #undef __FUNC__ 00223 #define __FUNC__ "Mat_dhMatVecSetDown" 00224 void 00225 Mat_dhMatVecSetdown (Mat_dh mat) 00226 { 00227 START_FUNC_DH if (ignoreMe) 00228 SET_V_ERROR ("not implemented"); 00229 END_FUNC_DH} 00230 00231 00232 /* adopted from Edmond Chow's ParaSails */ 00233 #undef __FUNC__ 00234 #define __FUNC__ "Mat_dhMatVecSetup" 00235 void 00236 Mat_dhMatVecSetup (Mat_dh mat) 00237 { 00238 START_FUNC_DH if (np_dh == 1) 00239 { 00240 goto DO_NOTHING; 00241 } 00242 00243 else 00244 { 00245 int *outlist, *inlist; 00246 int ierr, i, row, *rp = mat->rp, *cval = mat->cval; 00247 Numbering_dh numb; 00248 int m = mat->m; 00249 int firstLocal = mat->beg_row; 00250 int lastLocal = firstLocal + m; 00251 int *beg_rows, *end_rows; 00252 00253 mat->recv_req = 00254 (MPI_Request *) MALLOC_DH (np_dh * sizeof (MPI_Request)); 00255 CHECK_V_ERROR; 00256 mat->send_req = 00257 (MPI_Request *) MALLOC_DH (np_dh * sizeof (MPI_Request)); 00258 CHECK_V_ERROR; 00259 mat->status = (MPI_Status *) MALLOC_DH (np_dh * sizeof (MPI_Status)); 00260 CHECK_V_ERROR; 00261 beg_rows = (int *) MALLOC_DH (np_dh * sizeof (int)); 00262 CHECK_V_ERROR; 00263 end_rows = (int *) MALLOC_DH (np_dh * sizeof (int)); 00264 CHECK_V_ERROR; 00265 00266 if (np_dh == 1) 00267 { /* this is for debugging purposes in some of the drivers */ 00268 beg_rows[0] = 0; 00269 end_rows[0] = m; 00270 } 00271 else 00272 { 00273 ierr = 00274 MPI_Allgather (&firstLocal, 1, MPI_INT, beg_rows, 1, MPI_INT, 00275 comm_dh); 00276 00277 CHECK_MPI_V_ERROR (ierr); 00278 00279 ierr = 00280 MPI_Allgather (&lastLocal, 1, MPI_INT, end_rows, 1, MPI_INT, 00281 comm_dh); 00282 CHECK_MPI_V_ERROR (ierr); 00283 } 00284 00285 outlist = (int *) MALLOC_DH (np_dh * sizeof (int)); 00286 CHECK_V_ERROR; 00287 inlist = (int *) MALLOC_DH (np_dh * sizeof (int)); 00288 CHECK_V_ERROR; 00289 for (i = 0; i < np_dh; ++i) 00290 { 00291 outlist[i] = 0; 00292 inlist[i] = 0; 00293 } 00294 00295 /* Create Numbering object */ 00296 Numbering_dhCreate (&(mat->numb)); 00297 CHECK_V_ERROR; 00298 numb = mat->numb; 00299 Numbering_dhSetup (numb, mat); 00300 CHECK_V_ERROR; 00301 00302 setup_matvec_receives_private (mat, beg_rows, end_rows, numb->num_ext, 00303 numb->idx_ext, outlist); 00304 CHECK_V_ERROR; 00305 00306 if (np_dh == 1) 00307 { /* this is for debugging purposes in some of the drivers */ 00308 inlist[0] = outlist[0]; 00309 } 00310 else 00311 { 00312 ierr = 00313 MPI_Alltoall (outlist, 1, MPI_INT, inlist, 1, MPI_INT, comm_dh); 00314 CHECK_MPI_V_ERROR (ierr); 00315 } 00316 00317 setup_matvec_sends_private (mat, inlist); 00318 CHECK_V_ERROR; 00319 00320 /* Convert to local indices */ 00321 for (row = 0; row < m; row++) 00322 { 00323 int len = rp[row + 1] - rp[row]; 00324 int *ind = cval + rp[row]; 00325 Numbering_dhGlobalToLocal (numb, len, ind, ind); 00326 CHECK_V_ERROR; 00327 } 00328 00329 FREE_DH (outlist); 00330 CHECK_V_ERROR; 00331 FREE_DH (inlist); 00332 CHECK_V_ERROR; 00333 FREE_DH (beg_rows); 00334 CHECK_V_ERROR; 00335 FREE_DH (end_rows); 00336 CHECK_V_ERROR; 00337 } 00338 00339 DO_NOTHING:; 00340 00341 END_FUNC_DH} 00342 00343 /* adopted from Edmond Chow's ParaSails */ 00344 #undef __FUNC__ 00345 #define __FUNC__ "setup_matvec_receives_private" 00346 void 00347 setup_matvec_receives_private (Mat_dh mat, int *beg_rows, int *end_rows, 00348 int reqlen, int *reqind, int *outlist) 00349 { 00350 START_FUNC_DH int ierr, i, j, this_pe; 00351 MPI_Request request; 00352 int m = mat->m; 00353 00354 mat->num_recv = 0; 00355 00356 /* Allocate recvbuf */ 00357 /* recvbuf has numlocal entries saved for local part of x, used in matvec */ 00358 mat->recvbuf = (double *) MALLOC_DH ((reqlen + m) * sizeof (double)); 00359 00360 for (i = 0; i < reqlen; i = j) 00361 { /* j is set below */ 00362 /* The processor that owns the row with index reqind[i] */ 00363 this_pe = mat_find_owner (beg_rows, end_rows, reqind[i]); 00364 CHECK_V_ERROR; 00365 00366 /* Figure out other rows we need from this_pe */ 00367 for (j = i + 1; j < reqlen; j++) 00368 { 00369 /* if row is on different pe */ 00370 if (reqind[j] < beg_rows[this_pe] || reqind[j] > end_rows[this_pe]) 00371 break; 00372 } 00373 00374 /* Request rows in reqind[i..j-1] */ 00375 ierr = 00376 MPI_Isend (&reqind[i], j - i, MPI_INT, this_pe, 444, comm_dh, 00377 &request); 00378 CHECK_MPI_V_ERROR (ierr); 00379 ierr = MPI_Request_free (&request); 00380 CHECK_MPI_V_ERROR (ierr); 00381 00382 /* Count of number of number of indices needed from this_pe */ 00383 outlist[this_pe] = j - i; 00384 00385 ierr = 00386 MPI_Recv_init (&mat->recvbuf[i + m], j - i, MPI_DOUBLE, this_pe, 555, 00387 comm_dh, &mat->recv_req[mat->num_recv]); 00388 CHECK_MPI_V_ERROR (ierr); 00389 00390 mat->num_recv++; 00391 mat->recvlen += j - i; /* only used for statistical reporting */ 00392 } 00393 END_FUNC_DH} 00394 00395 00396 /* adopted from Edmond Chow's ParaSails */ 00397 #undef __FUNC__ 00398 #define __FUNC__ "setup_matvec_sends_private" 00399 void 00400 setup_matvec_sends_private (Mat_dh mat, int *inlist) 00401 { 00402 START_FUNC_DH int ierr, i, j, sendlen, first = mat->beg_row; 00403 MPI_Request *requests; 00404 MPI_Status *statuses; 00405 00406 requests = (MPI_Request *) MALLOC_DH (np_dh * sizeof (MPI_Request)); 00407 CHECK_V_ERROR; 00408 statuses = (MPI_Status *) MALLOC_DH (np_dh * sizeof (MPI_Status)); 00409 CHECK_V_ERROR; 00410 00411 /* Determine size of and allocate sendbuf and sendind */ 00412 sendlen = 0; 00413 for (i = 0; i < np_dh; i++) 00414 sendlen += inlist[i]; 00415 mat->sendlen = sendlen; 00416 mat->sendbuf = (double *) MALLOC_DH (sendlen * sizeof (double)); 00417 CHECK_V_ERROR; 00418 mat->sendind = (int *) MALLOC_DH (sendlen * sizeof (int)); 00419 CHECK_V_ERROR; 00420 00421 j = 0; 00422 mat->num_send = 0; 00423 for (i = 0; i < np_dh; i++) 00424 { 00425 if (inlist[i] != 0) 00426 { 00427 /* Post receive for the actual indices */ 00428 ierr = 00429 MPI_Irecv (&mat->sendind[j], inlist[i], MPI_INT, i, 444, comm_dh, 00430 &requests[mat->num_send]); 00431 CHECK_MPI_V_ERROR (ierr); 00432 /* Set up the send */ 00433 ierr = 00434 MPI_Send_init (&mat->sendbuf[j], inlist[i], MPI_DOUBLE, i, 555, 00435 comm_dh, &mat->send_req[mat->num_send]); 00436 CHECK_MPI_V_ERROR (ierr); 00437 00438 mat->num_send++; 00439 j += inlist[i]; 00440 } 00441 } 00442 00443 /* total bytes to be sent during matvec */ 00444 mat->time[MATVEC_WORDS] = j; 00445 00446 00447 ierr = MPI_Waitall (mat->num_send, requests, statuses); 00448 CHECK_MPI_V_ERROR (ierr); 00449 /* convert global indices to local indices */ 00450 /* these are all indices on this processor */ 00451 for (i = 0; i < mat->sendlen; i++) 00452 mat->sendind[i] -= first; 00453 00454 FREE_DH (requests); 00455 FREE_DH (statuses); 00456 END_FUNC_DH} 00457 00458 00459 /* unthreaded MPI version */ 00460 #undef __FUNC__ 00461 #define __FUNC__ "Mat_dhMatVec" 00462 void 00463 Mat_dhMatVec (Mat_dh mat, double *x, double *b) 00464 { 00465 START_FUNC_DH if (np_dh == 1) 00466 { 00467 Mat_dhMatVec_uni (mat, x, b); 00468 CHECK_V_ERROR; 00469 } 00470 00471 else 00472 { 00473 int ierr, i, row, m = mat->m; 00474 int *rp = mat->rp, *cval = mat->cval; 00475 double *aval = mat->aval; 00476 int *sendind = mat->sendind; 00477 int sendlen = mat->sendlen; 00478 double *sendbuf = mat->sendbuf; 00479 double *recvbuf = mat->recvbuf; 00480 double t1 = 0, t2 = 0, t3 = 0, t4 = 0; 00481 bool timeFlag = mat->matvec_timing; 00482 00483 00484 if (timeFlag) 00485 t1 = MPI_Wtime (); 00486 00487 /* Put components of x into the right outgoing buffers */ 00488 if (!commsOnly) 00489 { 00490 for (i = 0; i < sendlen; i++) 00491 sendbuf[i] = x[sendind[i]]; 00492 } 00493 00494 if (timeFlag) 00495 { 00496 t2 = MPI_Wtime (); 00497 mat->time[MATVEC_TIME] += (t2 - t1); 00498 00499 } 00500 00501 ierr = MPI_Startall (mat->num_recv, mat->recv_req); 00502 CHECK_MPI_V_ERROR (ierr); 00503 ierr = MPI_Startall (mat->num_send, mat->send_req); 00504 CHECK_MPI_V_ERROR (ierr); 00505 ierr = MPI_Waitall (mat->num_recv, mat->recv_req, mat->status); 00506 CHECK_MPI_V_ERROR (ierr); 00507 ierr = MPI_Waitall (mat->num_send, mat->send_req, mat->status); 00508 CHECK_MPI_V_ERROR (ierr); 00509 00510 00511 if (timeFlag) 00512 { 00513 t3 = MPI_Wtime (); 00514 mat->time[MATVEC_MPI_TIME] += (t3 - t2); 00515 } 00516 00517 /* Copy local part of x into top part of recvbuf */ 00518 if (!commsOnly) 00519 { 00520 for (i = 0; i < m; i++) 00521 recvbuf[i] = x[i]; 00522 00523 /* do the multiply */ 00524 for (row = 0; row < m; row++) 00525 { 00526 int len = rp[row + 1] - rp[row]; 00527 int *ind = cval + rp[row]; 00528 double *val = aval + rp[row]; 00529 double temp = 0.0; 00530 for (i = 0; i < len; i++) 00531 { 00532 temp += (val[i] * recvbuf[ind[i]]); 00533 } 00534 b[row] = temp; 00535 } 00536 } /* if (! commsOnly) */ 00537 00538 if (timeFlag) 00539 { 00540 t4 = MPI_Wtime (); 00541 mat->time[MATVEC_TOTAL_TIME] += (t4 - t1); 00542 mat->time[MATVEC_TIME] += (t4 - t3); 00543 } 00544 } 00545 END_FUNC_DH} 00546 00547 /* OpenMP/MPI version */ 00548 #undef __FUNC__ 00549 #define __FUNC__ "Mat_dhMatVec_omp" 00550 void 00551 Mat_dhMatVec_omp (Mat_dh mat, double *x, double *b) 00552 { 00553 START_FUNC_DH int ierr, i, row, m = mat->m; 00554 int *rp = mat->rp, *cval = mat->cval; 00555 double *aval = mat->aval; 00556 int *sendind = mat->sendind; 00557 int sendlen = mat->sendlen; 00558 double *sendbuf = mat->sendbuf; 00559 double *recvbuf = mat->recvbuf; 00560 double t1 = 0, t2 = 0, t3 = 0, t4 = 0, tx = 0; 00561 double *val, temp; 00562 int len, *ind; 00563 bool timeFlag = mat->matvec_timing; 00564 00565 if (timeFlag) 00566 t1 = MPI_Wtime (); 00567 00568 /* Put components of x into the right outgoing buffers */ 00569 for (i = 0; i < sendlen; i++) 00570 sendbuf[i] = x[sendind[i]]; 00571 00572 if (timeFlag) 00573 { 00574 t2 = MPI_Wtime (); 00575 mat->time[MATVEC_TIME] += (t2 - t1); 00576 } 00577 00578 ierr = MPI_Startall (mat->num_recv, mat->recv_req); 00579 CHECK_MPI_V_ERROR (ierr); 00580 ierr = MPI_Startall (mat->num_send, mat->send_req); 00581 CHECK_MPI_V_ERROR (ierr); 00582 ierr = MPI_Waitall (mat->num_recv, mat->recv_req, mat->status); 00583 CHECK_MPI_V_ERROR (ierr); 00584 ierr = MPI_Waitall (mat->num_send, mat->send_req, mat->status); 00585 CHECK_MPI_V_ERROR (ierr); 00586 00587 if (timeFlag) 00588 { 00589 t3 = MPI_Wtime (); 00590 mat->time[MATVEC_MPI_TIME] += (t3 - t2); 00591 } 00592 00593 /* Copy local part of x into top part of recvbuf */ 00594 for (i = 0; i < m; i++) 00595 recvbuf[i] = x[i]; 00596 00597 if (timeFlag) 00598 { 00599 tx = MPI_Wtime (); 00600 mat->time[MATVEC_MPI_TIME2] += (tx - t1); 00601 } 00602 00603 00604 /* do the multiply */ 00605 for (row = 0; row < m; row++) 00606 { 00607 len = rp[row + 1] - rp[row]; 00608 ind = cval + rp[row]; 00609 val = aval + rp[row]; 00610 temp = 0.0; 00611 for (i = 0; i < len; i++) 00612 { 00613 temp += (val[i] * recvbuf[ind[i]]); 00614 } 00615 b[row] = temp; 00616 } 00617 00618 if (timeFlag) 00619 { 00620 t4 = MPI_Wtime (); 00621 mat->time[MATVEC_TOTAL_TIME] += (t4 - t1); 00622 mat->time[MATVEC_TIME] += (t4 - t3); 00623 } 00624 00625 END_FUNC_DH} 00626 00627 00628 /* OpenMP/single primary task version */ 00629 #undef __FUNC__ 00630 #define __FUNC__ "Mat_dhMatVec_uni_omp" 00631 void 00632 Mat_dhMatVec_uni_omp (Mat_dh mat, double *x, double *b) 00633 { 00634 START_FUNC_DH int i, row, m = mat->m; 00635 int *rp = mat->rp, *cval = mat->cval; 00636 double *aval = mat->aval; 00637 double t1 = 0, t2 = 0; 00638 bool timeFlag = mat->matvec_timing; 00639 00640 if (timeFlag) 00641 { 00642 t1 = MPI_Wtime (); 00643 } 00644 00645 /* do the multiply */ 00646 for (row = 0; row < m; row++) 00647 { 00648 int len = rp[row + 1] - rp[row]; 00649 int *ind = cval + rp[row]; 00650 double *val = aval + rp[row]; 00651 double temp = 0.0; 00652 for (i = 0; i < len; i++) 00653 { 00654 temp += (val[i] * x[ind[i]]); 00655 } 00656 b[row] = temp; 00657 } 00658 00659 if (timeFlag) 00660 { 00661 t2 = MPI_Wtime (); 00662 mat->time[MATVEC_TIME] += (t2 - t1); 00663 mat->time[MATVEC_TOTAL_TIME] += (t2 - t1); 00664 } 00665 00666 END_FUNC_DH} 00667 00668 00669 /* unthreaded, single-task version */ 00670 #undef __FUNC__ 00671 #define __FUNC__ "Mat_dhMatVec_uni" 00672 void 00673 Mat_dhMatVec_uni (Mat_dh mat, double *x, double *b) 00674 { 00675 START_FUNC_DH int i, row, m = mat->m; 00676 int *rp = mat->rp, *cval = mat->cval; 00677 double *aval = mat->aval; 00678 double t1 = 0, t2 = 0; 00679 bool timeFlag = mat->matvec_timing; 00680 00681 if (timeFlag) 00682 t1 = MPI_Wtime (); 00683 00684 for (row = 0; row < m; row++) 00685 { 00686 int len = rp[row + 1] - rp[row]; 00687 int *ind = cval + rp[row]; 00688 double *val = aval + rp[row]; 00689 double temp = 0.0; 00690 for (i = 0; i < len; i++) 00691 { 00692 temp += (val[i] * x[ind[i]]); 00693 } 00694 b[row] = temp; 00695 } 00696 00697 if (timeFlag) 00698 { 00699 t2 = MPI_Wtime (); 00700 mat->time[MATVEC_TIME] += (t2 - t1); 00701 mat->time[MATVEC_TOTAL_TIME] += (t2 - t1); 00702 } 00703 00704 END_FUNC_DH} 00705 00706 00707 #undef __FUNC__ 00708 #define __FUNC__ "Mat_dhReadNz" 00709 int 00710 Mat_dhReadNz (Mat_dh mat) 00711 { 00712 START_FUNC_DH int ierr, retval = mat->rp[mat->m]; 00713 int nz = retval; 00714 ierr = MPI_Allreduce (&nz, &retval, 1, MPI_INT, MPI_SUM, comm_dh); 00715 CHECK_MPI_ERROR (ierr); 00716 END_FUNC_VAL (retval)} 00717 00718 00719 00720 #if 0 00721 00722 #undef __FUNC__ 00723 #define __FUNC__ "Mat_dhAllocate_getRow_private" 00724 void 00725 Mat_dhAllocate_getRow_private (Mat_dh A) 00726 { 00727 START_FUNC_DH int i, *rp = A->rp, len = 0; 00728 int m = A->m; 00729 00730 /* find longest row in matrix */ 00731 for (i = 0; i < m; ++i) 00732 len = MAX (len, rp[i + 1] - rp[i]); 00733 len *= A->bs; 00734 00735 /* free any previously allocated private storage */ 00736 if (len > A->len_private) 00737 { 00738 if (A->cval_private != NULL) 00739 { 00740 FREE_DH (A->cval_private); 00741 CHECK_V_ERROR; 00742 } 00743 if (A->aval_private != NULL) 00744 { 00745 FREE_DH (A->aval_private); 00746 CHECK_V_ERROR; 00747 } 00748 } 00749 00750 /* allocate private storage */ 00751 A->cval_private = (int *) MALLOC_DH (len * sizeof (int)); 00752 CHECK_V_ERROR; 00753 A->aval_private = (double *) MALLOC_DH (len * sizeof (double)); 00754 CHECK_V_ERROR; 00755 A->len_private = len; 00756 END_FUNC_DH} 00757 00758 #endif 00759 00760 #undef __FUNC__ 00761 #define __FUNC__ "Mat_dhZeroTiming" 00762 void 00763 Mat_dhZeroTiming (Mat_dh mat) 00764 { 00765 START_FUNC_DH int i; 00766 00767 for (i = 0; i < MAT_DH_BINS; ++i) 00768 { 00769 mat->time[i] = 0; 00770 mat->time_max[i] = 0; 00771 mat->time_min[i] = 0; 00772 } 00773 END_FUNC_DH} 00774 00775 #undef __FUNC__ 00776 #define __FUNC__ "Mat_dhReduceTiming" 00777 void 00778 Mat_dhReduceTiming (Mat_dh mat) 00779 { 00780 START_FUNC_DH if (mat->time[MATVEC_MPI_TIME]) 00781 { 00782 mat->time[MATVEC_RATIO] = 00783 mat->time[MATVEC_TIME] / mat->time[MATVEC_MPI_TIME]; 00784 } 00785 MPI_Allreduce (mat->time, mat->time_min, MAT_DH_BINS, MPI_DOUBLE, MPI_MIN, 00786 comm_dh); 00787 MPI_Allreduce (mat->time, mat->time_max, MAT_DH_BINS, MPI_DOUBLE, MPI_MAX, 00788 comm_dh); 00789 END_FUNC_DH} 00790 00791 #undef __FUNC__ 00792 #define __FUNC__ "Mat_dhPermute" 00793 void 00794 Mat_dhPermute (Mat_dh A, int *n2o, Mat_dh * Bout) 00795 { 00796 START_FUNC_DH Mat_dh B; 00797 int i, j, *RP = A->rp, *CVAL = A->cval; 00798 int *o2n, *rp, *cval, m = A->m, nz = RP[m]; 00799 double *aval, *AVAL = A->aval; 00800 00801 Mat_dhCreate (&B); 00802 CHECK_V_ERROR; 00803 B->m = B->n = m; 00804 *Bout = B; 00805 00806 /* form inverse permutation */ 00807 o2n = (int *) MALLOC_DH (m * sizeof (int)); 00808 CHECK_V_ERROR; 00809 for (i = 0; i < m; ++i) 00810 o2n[n2o[i]] = i; 00811 00812 /* allocate storage for permuted matrix */ 00813 rp = B->rp = (int *) MALLOC_DH ((m + 1) * sizeof (int)); 00814 CHECK_V_ERROR; 00815 cval = B->cval = (int *) MALLOC_DH (nz * sizeof (int)); 00816 CHECK_V_ERROR; 00817 aval = B->aval = (double *) MALLOC_DH (nz * sizeof (double)); 00818 CHECK_V_ERROR; 00819 00820 /* form new rp array */ 00821 rp[0] = 0; 00822 for (i = 0; i < m; ++i) 00823 { 00824 int oldRow = n2o[i]; 00825 rp[i + 1] = RP[oldRow + 1] - RP[oldRow]; 00826 } 00827 for (i = 1; i <= m; ++i) 00828 rp[i] = rp[i] + rp[i - 1]; 00829 00830 for (i = 0; i < m; ++i) 00831 { 00832 int oldRow = n2o[i]; 00833 int idx = rp[i]; 00834 for (j = RP[oldRow]; j < RP[oldRow + 1]; ++j) 00835 { 00836 cval[idx] = o2n[CVAL[j]]; 00837 aval[idx] = AVAL[j]; 00838 ++idx; 00839 } 00840 } 00841 00842 FREE_DH (o2n); 00843 CHECK_V_ERROR; 00844 END_FUNC_DH} 00845 00846 00847 /*---------------------------------------------------------------------- 00848 * Print methods 00849 *----------------------------------------------------------------------*/ 00850 00851 /* seq or mpi */ 00852 #undef __FUNC__ 00853 #define __FUNC__ "Mat_dhPrintGraph" 00854 void 00855 Mat_dhPrintGraph (Mat_dh A, SubdomainGraph_dh sg, FILE * fp) 00856 { 00857 START_FUNC_DH int pe, id = myid_dh; 00858 int ierr; 00859 00860 if (sg != NULL) 00861 { 00862 id = sg->o2n_sub[id]; 00863 } 00864 00865 for (pe = 0; pe < np_dh; ++pe) 00866 { 00867 ierr = MPI_Barrier (comm_dh); 00868 CHECK_MPI_V_ERROR (ierr); 00869 if (id == pe) 00870 { 00871 if (sg == NULL) 00872 { 00873 mat_dh_print_graph_private (A->m, A->beg_row, A->rp, A->cval, 00874 A->aval, NULL, NULL, NULL, fp); 00875 CHECK_V_ERROR; 00876 } 00877 else 00878 { 00879 int beg_row = sg->beg_rowP[myid_dh]; 00880 mat_dh_print_graph_private (A->m, beg_row, A->rp, A->cval, 00881 A->aval, sg->n2o_row, sg->o2n_col, 00882 sg->o2n_ext, fp); 00883 CHECK_V_ERROR; 00884 } 00885 } 00886 } 00887 END_FUNC_DH} 00888 00889 00890 #undef __FUNC__ 00891 #define __FUNC__ "Mat_dhPrintRows" 00892 void 00893 Mat_dhPrintRows (Mat_dh A, SubdomainGraph_dh sg, FILE * fp) 00894 { 00895 START_FUNC_DH bool noValues; 00896 int m = A->m, *rp = A->rp, *cval = A->cval; 00897 double *aval = A->aval; 00898 00899 noValues = (Parser_dhHasSwitch (parser_dh, "-noValues")); 00900 if (noValues) 00901 aval = NULL; 00902 00903 /*---------------------------------------------------------------- 00904 * case 1: print local portion of unpermuted matrix 00905 *----------------------------------------------------------------*/ 00906 if (sg == NULL) 00907 { 00908 int i, j; 00909 int beg_row = A->beg_row; 00910 00911 fprintf (fp, 00912 "\n----- A, unpermuted ------------------------------------\n"); 00913 for (i = 0; i < m; ++i) 00914 { 00915 fprintf (fp, "%i :: ", 1 + i + beg_row); 00916 for (j = rp[i]; j < rp[i + 1]; ++j) 00917 { 00918 if (noValues) 00919 { 00920 fprintf (fp, "%i ", 1 + cval[j]); 00921 } 00922 else 00923 { 00924 fprintf (fp, "%i,%g ; ", 1 + cval[j], aval[j]); 00925 } 00926 } 00927 fprintf (fp, "\n"); 00928 } 00929 } 00930 00931 /*---------------------------------------------------------------- 00932 * case 2: single mpi task, with multiple subdomains 00933 *----------------------------------------------------------------*/ 00934 else if (np_dh == 1) 00935 { 00936 int i, k, idx = 1; 00937 int oldRow; 00938 00939 for (i = 0; i < sg->blocks; ++i) 00940 { 00941 int oldBlock = sg->n2o_sub[i]; 00942 00943 /* here, 'beg_row' and 'end_row' refer to rows in the 00944 original ordering of A. 00945 */ 00946 int beg_row = sg->beg_row[oldBlock]; 00947 int end_row = beg_row + sg->row_count[oldBlock]; 00948 00949 fprintf (fp, "\n"); 00950 fprintf (fp, 00951 "\n----- A, permuted, single mpi task ------------------\n"); 00952 fprintf (fp, "---- new subdomain: %i; old subdomain: %i\n", i, 00953 oldBlock); 00954 fprintf (fp, " old beg_row: %i; new beg_row: %i\n", 00955 sg->beg_row[oldBlock], sg->beg_rowP[oldBlock]); 00956 fprintf (fp, " local rows in this block: %i\n", 00957 sg->row_count[oldBlock]); 00958 fprintf (fp, " bdry rows in this block: %i\n", 00959 sg->bdry_count[oldBlock]); 00960 fprintf (fp, " 1st bdry row= %i \n", 00961 1 + end_row - sg->bdry_count[oldBlock]); 00962 00963 for (oldRow = beg_row; oldRow < end_row; ++oldRow) 00964 { 00965 int len = 0, *cval; 00966 double *aval; 00967 00968 fprintf (fp, "%3i (old= %3i) :: ", idx, 1 + oldRow); 00969 ++idx; 00970 Mat_dhGetRow (A, oldRow, &len, &cval, &aval); 00971 CHECK_V_ERROR; 00972 00973 for (k = 0; k < len; ++k) 00974 { 00975 if (noValues) 00976 { 00977 fprintf (fp, "%i ", 1 + sg->o2n_col[cval[k]]); 00978 } 00979 else 00980 { 00981 fprintf (fp, "%i,%g ; ", 1 + sg->o2n_col[cval[k]], 00982 aval[k]); 00983 } 00984 } 00985 00986 fprintf (fp, "\n"); 00987 Mat_dhRestoreRow (A, oldRow, &len, &cval, &aval); 00988 CHECK_V_ERROR; 00989 } 00990 } 00991 } 00992 00993 /*---------------------------------------------------------------- 00994 * case 3: multiple mpi tasks, one subdomain per task 00995 *----------------------------------------------------------------*/ 00996 else 00997 { 00998 Hash_i_dh hash = sg->o2n_ext; 00999 int *o2n_col = sg->o2n_col, *n2o_row = sg->n2o_row; 01000 int beg_row = sg->beg_row[myid_dh]; 01001 int beg_rowP = sg->beg_rowP[myid_dh]; 01002 int i, j; 01003 01004 for (i = 0; i < m; ++i) 01005 { 01006 int row = n2o_row[i]; 01007 fprintf (fp, "%3i (old= %3i) :: ", 1 + i + beg_rowP, 01008 1 + row + beg_row); 01009 for (j = rp[row]; j < rp[row + 1]; ++j) 01010 { 01011 int col = cval[j]; 01012 01013 /* find permuted (old-to-new) value for the column */ 01014 /* case i: column is locally owned */ 01015 if (col >= beg_row && col < beg_row + m) 01016 { 01017 col = o2n_col[col - beg_row] + beg_rowP; 01018 } 01019 01020 /* case ii: column is external */ 01021 else 01022 { 01023 int tmp = col; 01024 tmp = Hash_i_dhLookup (hash, col); 01025 CHECK_V_ERROR; 01026 if (tmp == -1) 01027 { 01028 sprintf (msgBuf_dh, 01029 "nonlocal column= %i not in hash table", 01030 1 + col); 01031 SET_V_ERROR (msgBuf_dh); 01032 } 01033 else 01034 { 01035 col = tmp; 01036 } 01037 } 01038 01039 if (noValues) 01040 { 01041 fprintf (fp, "%i ", 1 + col); 01042 } 01043 else 01044 { 01045 fprintf (fp, "%i,%g ; ", 1 + col, aval[j]); 01046 } 01047 } 01048 fprintf (fp, "\n"); 01049 } 01050 } 01051 END_FUNC_DH} 01052 01053 01054 01055 #undef __FUNC__ 01056 #define __FUNC__ "Mat_dhPrintTriples" 01057 void 01058 Mat_dhPrintTriples (Mat_dh A, SubdomainGraph_dh sg, char *filename) 01059 { 01060 START_FUNC_DH int m = A->m, *rp = A->rp, *cval = A->cval; 01061 double *aval = A->aval; 01062 bool noValues; 01063 bool matlab; 01064 FILE *fp; 01065 01066 noValues = (Parser_dhHasSwitch (parser_dh, "-noValues")); 01067 if (noValues) 01068 aval = NULL; 01069 matlab = (Parser_dhHasSwitch (parser_dh, "-matlab")); 01070 01071 /*---------------------------------------------------------------- 01072 * case 1: unpermuted matrix, single or multiple mpi tasks 01073 *----------------------------------------------------------------*/ 01074 if (sg == NULL) 01075 { 01076 int i, j, pe; 01077 int beg_row = A->beg_row; 01078 double val; 01079 01080 for (pe = 0; pe < np_dh; ++pe) 01081 { 01082 MPI_Barrier (comm_dh); 01083 if (pe == myid_dh) 01084 { 01085 if (pe == 0) 01086 { 01087 fp = openFile_dh (filename, "w"); 01088 CHECK_V_ERROR; 01089 } 01090 else 01091 { 01092 fp = openFile_dh (filename, "a"); 01093 CHECK_V_ERROR; 01094 } 01095 01096 for (i = 0; i < m; ++i) 01097 { 01098 for (j = rp[i]; j < rp[i + 1]; ++j) 01099 { 01100 if (noValues) 01101 { 01102 fprintf (fp, "%i %i\n", 1 + i + beg_row, 01103 1 + cval[j]); 01104 } 01105 else 01106 { 01107 val = aval[j]; 01108 if (val == 0.0 && matlab) 01109 val = _MATLAB_ZERO_; 01110 fprintf (fp, TRIPLES_FORMAT, 1 + i + beg_row, 01111 1 + cval[j], val); 01112 } 01113 } 01114 } 01115 closeFile_dh (fp); 01116 CHECK_V_ERROR; 01117 } 01118 } 01119 } 01120 01121 /*---------------------------------------------------------------- 01122 * case 2: single mpi task, with multiple subdomains 01123 *----------------------------------------------------------------*/ 01124 else if (np_dh == 1) 01125 { 01126 int i, j, k, idx = 1; 01127 01128 fp = openFile_dh (filename, "w"); 01129 CHECK_V_ERROR; 01130 01131 for (i = 0; i < sg->blocks; ++i) 01132 { 01133 int oldBlock = sg->n2o_sub[i]; 01134 int beg_row = sg->beg_rowP[oldBlock]; 01135 int end_row = beg_row + sg->row_count[oldBlock]; 01136 01137 for (j = beg_row; j < end_row; ++j) 01138 { 01139 int len = 0, *cval; 01140 double *aval; 01141 int oldRow = sg->n2o_row[j]; 01142 01143 Mat_dhGetRow (A, oldRow, &len, &cval, &aval); 01144 CHECK_V_ERROR; 01145 01146 if (noValues) 01147 { 01148 for (k = 0; k < len; ++k) 01149 { 01150 fprintf (fp, "%i %i\n", idx, 1 + sg->o2n_col[cval[k]]); 01151 } 01152 ++idx; 01153 } 01154 01155 else 01156 { 01157 for (k = 0; k < len; ++k) 01158 { 01159 double val = aval[k]; 01160 if (val == 0.0 && matlab) 01161 val = _MATLAB_ZERO_; 01162 fprintf (fp, TRIPLES_FORMAT, idx, 01163 1 + sg->o2n_col[cval[k]], val); 01164 } 01165 ++idx; 01166 } 01167 Mat_dhRestoreRow (A, oldRow, &len, &cval, &aval); 01168 CHECK_V_ERROR; 01169 } 01170 } 01171 } 01172 01173 /*---------------------------------------------------------------- 01174 * case 3: multiple mpi tasks, one subdomain per task 01175 *----------------------------------------------------------------*/ 01176 else 01177 { 01178 Hash_i_dh hash = sg->o2n_ext; 01179 int *o2n_col = sg->o2n_col, *n2o_row = sg->n2o_row; 01180 int beg_row = sg->beg_row[myid_dh]; 01181 int beg_rowP = sg->beg_rowP[myid_dh]; 01182 int i, j, pe; 01183 int id = sg->o2n_sub[myid_dh]; 01184 01185 for (pe = 0; pe < np_dh; ++pe) 01186 { 01187 MPI_Barrier (comm_dh); 01188 if (id == pe) 01189 { 01190 if (pe == 0) 01191 { 01192 fp = openFile_dh (filename, "w"); 01193 CHECK_V_ERROR; 01194 } 01195 else 01196 { 01197 fp = openFile_dh (filename, "a"); 01198 CHECK_V_ERROR; 01199 } 01200 01201 for (i = 0; i < m; ++i) 01202 { 01203 int row = n2o_row[i]; 01204 for (j = rp[row]; j < rp[row + 1]; ++j) 01205 { 01206 int col = cval[j]; 01207 double val = 0.0; 01208 01209 if (aval != NULL) 01210 val = aval[j]; 01211 if (val == 0.0 && matlab) 01212 val = _MATLAB_ZERO_; 01213 01214 /* find permuted (old-to-new) value for the column */ 01215 /* case i: column is locally owned */ 01216 if (col >= beg_row && col < beg_row + m) 01217 { 01218 col = o2n_col[col - beg_row] + beg_rowP; 01219 } 01220 01221 /* case ii: column is external */ 01222 else 01223 { 01224 int tmp = col; 01225 tmp = Hash_i_dhLookup (hash, col); 01226 CHECK_V_ERROR; 01227 if (tmp == -1) 01228 { 01229 sprintf (msgBuf_dh, 01230 "nonlocal column= %i not in hash table", 01231 1 + col); 01232 SET_V_ERROR (msgBuf_dh); 01233 } 01234 else 01235 { 01236 col = tmp; 01237 } 01238 } 01239 01240 if (noValues) 01241 { 01242 fprintf (fp, "%i %i\n", 1 + i + beg_rowP, 1 + col); 01243 } 01244 else 01245 { 01246 fprintf (fp, TRIPLES_FORMAT, 1 + i + beg_rowP, 01247 1 + col, val); 01248 } 01249 } 01250 } 01251 closeFile_dh (fp); 01252 CHECK_V_ERROR; 01253 } 01254 } 01255 } 01256 END_FUNC_DH} 01257 01258 01259 /* seq only */ 01260 #undef __FUNC__ 01261 #define __FUNC__ "Mat_dhPrintCSR" 01262 void 01263 Mat_dhPrintCSR (Mat_dh A, SubdomainGraph_dh sg, char *filename) 01264 { 01265 START_FUNC_DH FILE * fp; 01266 01267 if (np_dh > 1) 01268 { 01269 SET_V_ERROR ("only implemented for a single mpi task"); 01270 } 01271 if (sg != NULL) 01272 { 01273 SET_V_ERROR 01274 ("not implemented for reordered matrix (SubdomainGraph_dh should be NULL)"); 01275 } 01276 01277 fp = openFile_dh (filename, "w"); 01278 CHECK_V_ERROR; 01279 01280 if (sg == NULL) 01281 { 01282 mat_dh_print_csr_private (A->m, A->rp, A->cval, A->aval, fp); 01283 CHECK_V_ERROR; 01284 } 01285 else 01286 { 01287 mat_dh_print_csr_private (A->m, A->rp, A->cval, A->aval, fp); 01288 CHECK_V_ERROR; 01289 } 01290 closeFile_dh (fp); 01291 CHECK_V_ERROR; 01292 END_FUNC_DH} 01293 01294 /* seq */ 01295 /* no reordering */ 01296 #undef __FUNC__ 01297 #define __FUNC__ "Mat_dhPrintBIN" 01298 void 01299 Mat_dhPrintBIN (Mat_dh A, SubdomainGraph_dh sg, char *filename) 01300 { 01301 START_FUNC_DH if (np_dh > 1) 01302 { 01303 SET_V_ERROR ("only implemented for a single MPI task"); 01304 } 01305 /* if (n2o != NULL || o2n != NULL || hash != NULL) { 01306 */ 01307 if (sg != NULL) 01308 { 01309 SET_V_ERROR ("not implemented for reordering; ensure sg=NULL"); 01310 } 01311 01312 io_dh_print_ebin_mat_private (A->m, A->beg_row, A->rp, A->cval, A->aval, 01313 NULL, NULL, NULL, filename); 01314 CHECK_V_ERROR; 01315 END_FUNC_DH} 01316 01317 01318 /*---------------------------------------------------------------------- 01319 * Read methods 01320 *----------------------------------------------------------------------*/ 01321 /* seq only */ 01322 #undef __FUNC__ 01323 #define __FUNC__ "Mat_dhReadCSR" 01324 void 01325 Mat_dhReadCSR (Mat_dh * mat, char *filename) 01326 { 01327 START_FUNC_DH Mat_dh A; 01328 FILE *fp; 01329 01330 if (np_dh > 1) 01331 { 01332 SET_V_ERROR ("only implemented for a single MPI task"); 01333 } 01334 01335 fp = openFile_dh (filename, "r"); 01336 CHECK_V_ERROR; 01337 01338 Mat_dhCreate (&A); 01339 CHECK_V_ERROR; 01340 mat_dh_read_csr_private (&A->m, &A->rp, &A->cval, &A->aval, fp); 01341 CHECK_V_ERROR; 01342 A->n = A->m; 01343 *mat = A; 01344 01345 closeFile_dh (fp); 01346 CHECK_V_ERROR; 01347 END_FUNC_DH} 01348 01349 /* seq only */ 01350 #undef __FUNC__ 01351 #define __FUNC__ "Mat_dhReadTriples" 01352 void 01353 Mat_dhReadTriples (Mat_dh * mat, int ignore, char *filename) 01354 { 01355 START_FUNC_DH FILE * fp = NULL; 01356 Mat_dh A = NULL; 01357 01358 if (np_dh > 1) 01359 { 01360 SET_V_ERROR ("only implemented for a single MPI task"); 01361 } 01362 01363 fp = openFile_dh (filename, "r"); 01364 CHECK_V_ERROR; 01365 01366 Mat_dhCreate (&A); 01367 CHECK_V_ERROR; 01368 mat_dh_read_triples_private (ignore, &A->m, &A->rp, &A->cval, &A->aval, fp); 01369 CHECK_V_ERROR; 01370 A->n = A->m; 01371 *mat = A; 01372 01373 closeFile_dh (fp); 01374 CHECK_V_ERROR; 01375 END_FUNC_DH} 01376 01377 /* here we pass the private function a filename, instead of an open file, 01378 the reason being that Euclid's binary format is more complicated, 01379 i.e, the other "Read" methods are only for a single mpi task. 01380 */ 01381 #undef __FUNC__ 01382 #define __FUNC__ "Mat_dhReadBIN" 01383 void 01384 Mat_dhReadBIN (Mat_dh * mat, char *filename) 01385 { 01386 START_FUNC_DH Mat_dh A; 01387 01388 if (np_dh > 1) 01389 { 01390 SET_V_ERROR ("only implemented for a single MPI task"); 01391 } 01392 01393 Mat_dhCreate (&A); 01394 CHECK_V_ERROR; 01395 io_dh_read_ebin_mat_private (&A->m, &A->rp, &A->cval, &A->aval, filename); 01396 CHECK_V_ERROR; 01397 A->n = A->m; 01398 *mat = A; 01399 END_FUNC_DH} 01400 01401 #undef __FUNC__ 01402 #define __FUNC__ "Mat_dhTranspose" 01403 void 01404 Mat_dhTranspose (Mat_dh A, Mat_dh * Bout) 01405 { 01406 START_FUNC_DH Mat_dh B; 01407 01408 if (np_dh > 1) 01409 { 01410 SET_V_ERROR ("only for sequential"); 01411 } 01412 01413 Mat_dhCreate (&B); 01414 CHECK_V_ERROR; 01415 *Bout = B; 01416 B->m = B->n = A->m; 01417 mat_dh_transpose_private (A->m, A->rp, &B->rp, A->cval, &B->cval, 01418 A->aval, &B->aval); 01419 CHECK_V_ERROR; 01420 END_FUNC_DH} 01421 01422 #undef __FUNC__ 01423 #define __FUNC__ "Mat_dhMakeStructurallySymmetric" 01424 void 01425 Mat_dhMakeStructurallySymmetric (Mat_dh A) 01426 { 01427 START_FUNC_DH if (np_dh > 1) 01428 { 01429 SET_V_ERROR ("only for sequential"); 01430 } 01431 make_symmetric_private (A->m, &A->rp, &A->cval, &A->aval); 01432 CHECK_V_ERROR; 01433 END_FUNC_DH} 01434 01435 void insert_diags_private (Mat_dh A, int ct); 01436 01437 /* inserts diagonal if not explicitly present; 01438 sets diagonal value in row i to sum of absolute 01439 values of all elts in row i. 01440 */ 01441 #undef __FUNC__ 01442 #define __FUNC__ "Mat_dhFixDiags" 01443 void 01444 Mat_dhFixDiags (Mat_dh A) 01445 { 01446 START_FUNC_DH int i, j; 01447 int *rp = A->rp, *cval = A->cval, m = A->m; 01448 bool ct = 0; /* number of missing diagonals */ 01449 double *aval = A->aval; 01450 01451 /* determine if any diagonals are missing */ 01452 for (i = 0; i < m; ++i) 01453 { 01454 bool flag = true; 01455 for (j = rp[i]; j < rp[i + 1]; ++j) 01456 { 01457 int col = cval[j]; 01458 if (col == i) 01459 { 01460 flag = false; 01461 break; 01462 } 01463 } 01464 if (flag) 01465 ++ct; 01466 } 01467 01468 /* insert any missing diagonal elements */ 01469 if (ct) 01470 { 01471 printf 01472 ("\nMat_dhFixDiags:: %i diags not explicitly present; inserting!\n", 01473 ct); 01474 insert_diags_private (A, ct); 01475 CHECK_V_ERROR; 01476 rp = A->rp; 01477 cval = A->cval; 01478 aval = A->aval; 01479 } 01480 01481 /* set the value of all diagonal elements */ 01482 for (i = 0; i < m; ++i) 01483 { 01484 double sum = 0.0; 01485 for (j = rp[i]; j < rp[i + 1]; ++j) 01486 { 01487 sum += fabs (aval[j]); 01488 } 01489 for (j = rp[i]; j < rp[i + 1]; ++j) 01490 { 01491 if (cval[j] == i) 01492 { 01493 aval[j] = sum; 01494 } 01495 } 01496 } 01497 END_FUNC_DH} 01498 01499 01500 #undef __FUNC__ 01501 #define __FUNC__ "insert_diags_private" 01502 void 01503 insert_diags_private (Mat_dh A, int ct) 01504 { 01505 START_FUNC_DH int *RP = A->rp, *CVAL = A->cval; 01506 int *rp, *cval, m = A->m; 01507 double *aval, *AVAL = A->aval; 01508 int nz = RP[m] + ct; 01509 int i, j, idx = 0; 01510 01511 rp = A->rp = (int *) MALLOC_DH ((m + 1) * sizeof (int)); 01512 CHECK_V_ERROR; 01513 cval = A->cval = (int *) MALLOC_DH (nz * sizeof (int)); 01514 CHECK_V_ERROR; 01515 aval = A->aval = (double *) MALLOC_DH (nz * sizeof (double)); 01516 CHECK_V_ERROR; 01517 rp[0] = 0; 01518 01519 for (i = 0; i < m; ++i) 01520 { 01521 bool flag = true; 01522 for (j = RP[i]; j < RP[i + 1]; ++j) 01523 { 01524 cval[idx] = CVAL[j]; 01525 aval[idx] = AVAL[j]; 01526 ++idx; 01527 if (CVAL[j] == i) 01528 flag = false; 01529 } 01530 01531 if (flag) 01532 { 01533 cval[idx] = i; 01534 aval[idx] = 0.0; 01535 ++idx; 01536 } 01537 rp[i + 1] = idx; 01538 } 01539 01540 FREE_DH (RP); 01541 CHECK_V_ERROR; 01542 FREE_DH (CVAL); 01543 CHECK_V_ERROR; 01544 FREE_DH (AVAL); 01545 CHECK_V_ERROR; 01546 01547 END_FUNC_DH} 01548 01549 #undef __FUNC__ 01550 #define __FUNC__ "Mat_dhPrintDiags" 01551 void 01552 Mat_dhPrintDiags (Mat_dh A, FILE * fp) 01553 { 01554 START_FUNC_DH int i, j, m = A->m; 01555 int *rp = A->rp, *cval = A->cval; 01556 double *aval = A->aval; 01557 01558 fprintf (fp, 01559 "=================== diagonal elements ====================\n"); 01560 for (i = 0; i < m; ++i) 01561 { 01562 bool flag = true; 01563 for (j = rp[i]; j < rp[i + 1]; ++j) 01564 { 01565 if (cval[j] == i) 01566 { 01567 fprintf (fp, "%i %g\n", i + 1, aval[j]); 01568 flag = false; 01569 break; 01570 } 01571 } 01572 if (flag) 01573 { 01574 fprintf (fp, "%i ---------- missing\n", i + 1); 01575 } 01576 } 01577 END_FUNC_DH} 01578 01579 01580 #undef __FUNC__ 01581 #define __FUNC__ "Mat_dhGetRow" 01582 void 01583 Mat_dhGetRow (Mat_dh B, int globalRow, int *len, int **ind, double **val) 01584 { 01585 START_FUNC_DH int row = globalRow - B->beg_row; 01586 if (row > B->m) 01587 { 01588 sprintf (msgBuf_dh, 01589 "requested globalRow= %i, which is local row= %i, but only have %i rows!", 01590 globalRow, row, B->m); 01591 SET_V_ERROR (msgBuf_dh); 01592 } 01593 *len = B->rp[row + 1] - B->rp[row]; 01594 if (ind != NULL) 01595 *ind = B->cval + B->rp[row]; 01596 if (val != NULL) 01597 *val = B->aval + B->rp[row]; 01598 END_FUNC_DH} 01599 01600 #undef __FUNC__ 01601 #define __FUNC__ "Mat_dhRestoreRow" 01602 void 01603 Mat_dhRestoreRow (Mat_dh B, int row, int *len, int **ind, double **val) 01604 { 01605 START_FUNC_DH END_FUNC_DH} 01606 01607 #undef __FUNC__ 01608 #define __FUNC__ "Mat_dhRowPermute" 01609 void 01610 Mat_dhRowPermute (Mat_dh mat) 01611 { 01612 START_FUNC_DH if (ignoreMe) 01613 SET_V_ERROR ("turned off; compilation problem on blue"); 01614 01615 #if 0 01616 int i, j, m = mat->m, nz = mat->rp[m]; 01617 int *o2n, *cval; 01618 int algo = 1; 01619 double *r1, *c1; 01620 bool debug = mat->debug; 01621 bool isNatural; 01622 Mat_dh B; 01623 01624 #if 0 01625 * = 1:Compute a row permutation of the matrix so that the 01626 * permuted matrix has as many entries on its diagonal as 01627 * possible.The values on the diagonal are of arbitrary size. 01628 * HSL subroutine MC21A / AD is used for this 01629 . * = 2: Compute a row permutation of the matrix so that the smallest * value on the diagonal of the permuted matrix is maximized. * = 3:Compute a row permutation of the matrix so that the smallest 01630 * value on the diagonal of the permuted matrix is maximized. 01631 * The algorithm differs from the one used for JOB 01632 = 2 and may * have quite a different performance. * = 4: Compute a row permutation of the matrix so that the sum * of the diagonal entries of the permuted matrix is maximized. * = 5:Compute a row permutation of the matrix so that the product 01633 * of the diagonal entries of the permuted matrix is maximized 01634 * and vectors to scale the matrix so that the nonzero diagonal 01635 * entries of the permuted matrix are one in absolute value and 01636 * all the off - diagonal entries are less than or equal to one in 01637 * absolute value. 01638 #endif 01639 Parser_dhReadInt (parser_dh, "-rowPermute", &algo); 01640 CHECK_V_ERROR; 01641 if (algo < 1) 01642 algo = 1; 01643 if (algo > 5) 01644 algo = 1; 01645 sprintf (msgBuf_dh, "calling row permutation with algo= %i", algo); 01646 SET_INFO (msgBuf_dh); 01647 01648 r1 = (double *) MALLOC_DH (m * sizeof (double)); 01649 CHECK_V_ERROR; 01650 c1 = (double *) MALLOC_DH (m * sizeof (double)); 01651 CHECK_V_ERROR; 01652 if (mat->row_perm == NULL) 01653 { 01654 mat->row_perm = o2n = (int *) MALLOC_DH (m * sizeof (int)); 01655 CHECK_V_ERROR; 01656 } 01657 else 01658 { 01659 o2n = mat->row_perm; 01660 } 01661 01662 Mat_dhTranspose (mat, &B); 01663 CHECK_V_ERROR; 01664 01665 /* get row permutation and scaling vectors */ 01666 dldperm (algo, m, nz, B->rp, B->cval, B->aval, o2n, r1, c1); 01667 01668 /* permute column indices, then turn the matrix rightside up */ 01669 cval = B->cval; 01670 for (i = 0; i < nz; ++i) 01671 cval[i] = o2n[cval[i]]; 01672 01673 /* debug block */ 01674 if (debug && logFile != NULL) 01675 { 01676 fprintf (logFile, "\n-------- row permutation vector --------\n"); 01677 for (i = 0; i < m; ++i) 01678 fprintf (logFile, "%i ", 1 + o2n[i]); 01679 fprintf (logFile, "\n"); 01680 01681 if (myid_dh == 0) 01682 { 01683 printf ("\n-------- row permutation vector --------\n"); 01684 for (i = 0; i < m; ++i) 01685 printf ("%i ", 1 + o2n[i]); 01686 printf ("\n"); 01687 } 01688 } 01689 01690 /* check to see if permutation is non-natural */ 01691 isNatural = true; 01692 for (i = 0; i < m; ++i) 01693 { 01694 if (o2n[i] != i) 01695 { 01696 isNatural = false; 01697 break; 01698 } 01699 } 01700 01701 if (isNatural) 01702 { 01703 printf ("@@@ [%i] Mat_dhRowPermute :: got natural ordering!\n", 01704 myid_dh); 01705 } 01706 else 01707 { 01708 int *rp = B->rp, *cval = B->cval; 01709 double *aval = B->aval; 01710 01711 if (algo == 5) 01712 { 01713 printf 01714 ("@@@ [%i] Mat_dhRowPermute :: scaling matrix rows and columns!\n", 01715 myid_dh); 01716 01717 /* scale matrix */ 01718 for (i = 0; i < m; i++) 01719 { 01720 r1[i] = exp (r1[i]); 01721 c1[i] = exp (c1[i]); 01722 } 01723 for (i = 0; i < m; i++) 01724 for (j = rp[i]; j < rp[i + 1]; j++) 01725 aval[j] *= r1[cval[j]] * c1[i]; 01726 } 01727 01728 mat_dh_transpose_reuse_private (B->m, B->rp, B->cval, B->aval, 01729 mat->rp, mat->cval, mat->aval); 01730 CHECK_V_ERROR; 01731 } 01732 01733 01734 Mat_dhDestroy (B); 01735 CHECK_V_ERROR; 01736 FREE_DH (r1); 01737 CHECK_V_ERROR; 01738 FREE_DH (c1); 01739 CHECK_V_ERROR; 01740 01741 #endif 01742 END_FUNC_DH} 01743 01744 01745 /*==============================================================================*/ 01746 #undef __FUNC__ 01747 #define __FUNC__ "Mat_dhPartition" 01748 void 01749 build_adj_lists_private (Mat_dh mat, int **rpOUT, int **cvalOUT) 01750 { 01751 START_FUNC_DH int m = mat->m; 01752 int *RP = mat->rp, *CVAL = mat->cval; 01753 int nz = RP[m]; 01754 int i, j, *rp, *cval, idx = 0; 01755 01756 rp = *rpOUT = (int *) MALLOC_DH ((m + 1) * sizeof (int)); 01757 CHECK_V_ERROR; 01758 cval = *cvalOUT = (int *) MALLOC_DH (nz * sizeof (int)); 01759 CHECK_V_ERROR; 01760 rp[0] = 0; 01761 01762 /* assume symmetry for now! */ 01763 for (i = 0; i < m; ++i) 01764 { 01765 for (j = RP[i]; j < RP[i + 1]; ++j) 01766 { 01767 int col = CVAL[j]; 01768 if (col != i) 01769 { 01770 cval[idx++] = col; 01771 } 01772 } 01773 rp[i + 1] = idx; 01774 } 01775 END_FUNC_DH} 01776 01777 01778 #undef __FUNC__ 01779 #define __FUNC__ "Mat_dhPartition" 01780 void 01781 Mat_dhPartition (Mat_dh mat, int blocks, 01782 int **beg_rowOUT, int **row_countOUT, int **n2oOUT, 01783 int **o2nOUT) 01784 { 01785 START_FUNC_DH 01786 #ifndef HAVE_METIS_DH 01787 if (ignoreMe) 01788 SET_V_ERROR ("not compiled for metis!"); 01789 01790 #else 01791 int *beg_row, *row_count, *n2o, *o2n, bk, new, *part; 01792 int m = mat->m; 01793 int i, cutEdgeCount; 01794 double zero = 0.0; 01795 int metisOpts[5] = { 0, 0, 0, 0, 0 }; 01796 int *rp, *cval; 01797 01798 /* allocate storage for returned arrays */ 01799 beg_row = *beg_rowOUT = (int *) MALLOC_DH (blocks * sizeof (int)); 01800 CHECK_V_ERROR; 01801 row_count = *row_countOUT = (int *) MALLOC_DH (blocks * sizeof (int)); 01802 CHECK_V_ERROR; 01803 *n2oOUT = n2o = (int *) MALLOC_DH (m * sizeof (int)); 01804 CHECK_V_ERROR; 01805 *o2nOUT = o2n = (int *) MALLOC_DH (m * sizeof (int)); 01806 CHECK_V_ERROR; 01807 01808 #if 0 01809 == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == = Metis arguments: 01810 01811 n - number of nodes rp[], cval[]NULL, NULL, 0 /*no edge or vertex weights */ 01812 0 /*use zero-based numbering */ 01813 blocksIN, options[5] = 0::0 / 1 use defauls; 01814 use uptions 1. .4 01815 1:: 01816 edgecutOUT, 01817 part[] 01818 == == == == == == == == == == == == == == == == == == == == == == == == == 01819 == == == == == = 01820 #endif 01821 /* form the graph representation that metis wants */ 01822 build_adj_lists_private (mat, &rp, &cval); 01823 CHECK_V_ERROR; 01824 part = (int *) MALLOC_DH (m * sizeof (int)); 01825 CHECK_V_ERROR; 01826 01827 /* get parition vector from metis */ 01828 METIS_PartGraphKway (&m, rp, cval, NULL, NULL, 01829 &zero, &zero, &blocks, metisOpts, &cutEdgeCount, part); 01830 01831 FREE_DH (rp); 01832 CHECK_V_ERROR; 01833 FREE_DH (cval); 01834 CHECK_V_ERROR; 01835 01836 if (mat->debug) 01837 { 01838 printf_dh ("\nmetis partitioning vector; blocks= %i\n", blocks); 01839 for (i = 0; i < m; ++i) 01840 printf_dh (" %i %i\n", i + 1, part[i]); 01841 } 01842 01843 /* compute beg_row, row_count arrays from partition vector */ 01844 for (i = 0; i < blocks; ++i) 01845 row_count[i] = 0; 01846 for (i = 0; i < m; ++i) 01847 { 01848 bk = part[i]; /* block to which row i belongs */ 01849 row_count[bk] += 1; 01850 } 01851 beg_row[0] = 0; 01852 for (i = 1; i < blocks; ++i) 01853 beg_row[i] = beg_row[i - 1] + row_count[i - 1]; 01854 01855 if (mat->debug) 01856 { 01857 printf_dh ("\nrow_counts: "); 01858 for (i = 0; i < blocks; ++i) 01859 printf_dh (" %i", row_count[i]); 01860 printf_dh ("\nbeg_row: "); 01861 for (i = 0; i < blocks; ++i) 01862 printf_dh (" %i", beg_row[i] + 1); 01863 printf_dh ("\n"); 01864 } 01865 01866 /* compute permutation vector */ 01867 { 01868 int *tmp = (int *) MALLOC_DH (blocks * sizeof (int)); 01869 CHECK_V_ERROR; 01870 memcpy (tmp, beg_row, blocks * sizeof (int)); 01871 for (i = 0; i < m; ++i) 01872 { 01873 bk = part[i]; /* block to which row i belongs */ 01874 new = tmp[bk]; 01875 tmp[bk] += 1; 01876 o2n[i] = new; 01877 n2o[new] = i; 01878 } 01879 FREE_DH (tmp); 01880 } 01881 01882 FREE_DH (part); 01883 CHECK_V_ERROR; 01884 01885 #endif 01886 01887 END_FUNC_DH}
1.7.4