|
IFPACK Development
|
00001 /*@HEADER 00002 // *********************************************************************** 00003 // 00004 // Ifpack: Object-Oriented Algebraic Preconditioner Package 00005 // Copyright (2009) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // This library is free software; you can redistribute it and/or modify 00011 // it under the terms of the GNU Lesser General Public License as 00012 // published by the Free Software Foundation; either version 2.1 of the 00013 // License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, but 00016 // WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00023 // USA 00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00025 // 00026 // *********************************************************************** 00027 //@HEADER 00028 */ 00029 00030 #include "Factor_dh.h" 00031 #include "Vec_dh.h" 00032 #include "Mat_dh.h" 00033 #include "SubdomainGraph_dh.h" 00034 #include "TimeLog_dh.h" 00035 #include "Mem_dh.h" 00036 #include "Numbering_dh.h" 00037 #include "Hash_i_dh.h" 00038 #include "Parser_dh.h" 00039 #include "mat_dh_private.h" 00040 #include "getRow_dh.h" 00041 #include "Euclid_dh.h" 00042 #include "io_dh.h" 00043 00044 /* suppress compiler complaints */ 00045 void 00046 Factor_dh_junk () 00047 { 00048 } 00049 00050 static void adjust_bj_private (Factor_dh mat); 00051 static void unadjust_bj_private (Factor_dh mat); 00052 00053 00054 #undef __FUNC__ 00055 #define __FUNC__ "Factor_dhCreate" 00056 void 00057 Factor_dhCreate (Factor_dh * mat) 00058 { 00059 START_FUNC_DH struct _factor_dh *tmp; 00060 00061 if (np_dh > MAX_MPI_TASKS) 00062 { 00063 SET_V_ERROR ("you must change MAX_MPI_TASKS and recompile!"); 00064 } 00065 00066 tmp = (struct _factor_dh *) MALLOC_DH (sizeof (struct _factor_dh)); 00067 CHECK_V_ERROR; 00068 *mat = tmp; 00069 00070 tmp->m = 0; 00071 tmp->n = 0; 00072 tmp->id = myid_dh; 00073 tmp->beg_row = 0; 00074 tmp->first_bdry = 0; 00075 tmp->bdry_count = 0; 00076 tmp->blockJacobi = false; 00077 00078 tmp->rp = NULL; 00079 tmp->cval = NULL; 00080 tmp->aval = NULL; 00081 tmp->fill = NULL; 00082 tmp->diag = NULL; 00083 tmp->alloc = 0; 00084 00085 tmp->work_y_lo = tmp->work_x_hi = NULL; 00086 tmp->sendbufLo = tmp->sendbufHi = NULL; 00087 tmp->sendindLo = tmp->sendindHi = NULL; 00088 tmp->num_recvLo = tmp->num_recvHi = 0; 00089 tmp->num_sendLo = tmp->num_sendHi = 0; 00090 tmp->sendlenLo = tmp->sendlenHi = 0; 00091 00092 tmp->solveIsSetup = false; 00093 tmp->numbSolve = NULL; 00094 00095 tmp->debug = Parser_dhHasSwitch (parser_dh, "-debug_Factor"); 00096 00097 /* Factor_dhZeroTiming(tmp); CHECK_V_ERROR; */ 00098 END_FUNC_DH} 00099 00100 #undef __FUNC__ 00101 #define __FUNC__ "Factor_dhDestroy" 00102 void 00103 Factor_dhDestroy (Factor_dh mat) 00104 { 00105 START_FUNC_DH if (mat->rp != NULL) 00106 { 00107 FREE_DH (mat->rp); 00108 CHECK_V_ERROR; 00109 } 00110 if (mat->cval != NULL) 00111 { 00112 FREE_DH (mat->cval); 00113 CHECK_V_ERROR; 00114 } 00115 if (mat->aval != NULL) 00116 { 00117 FREE_DH (mat->aval); 00118 CHECK_V_ERROR; 00119 } 00120 if (mat->diag != NULL) 00121 { 00122 FREE_DH (mat->diag); 00123 CHECK_V_ERROR; 00124 } 00125 if (mat->fill != NULL) 00126 { 00127 FREE_DH (mat->fill); 00128 CHECK_V_ERROR; 00129 } 00130 00131 if (mat->work_y_lo != NULL) 00132 { 00133 FREE_DH (mat->work_y_lo); 00134 CHECK_V_ERROR; 00135 } 00136 if (mat->work_x_hi != NULL) 00137 { 00138 FREE_DH (mat->work_x_hi); 00139 CHECK_V_ERROR; 00140 } 00141 if (mat->sendbufLo != NULL) 00142 { 00143 FREE_DH (mat->sendbufLo); 00144 CHECK_V_ERROR; 00145 } 00146 if (mat->sendbufHi != NULL) 00147 { 00148 FREE_DH (mat->sendbufHi); 00149 CHECK_V_ERROR; 00150 } 00151 if (mat->sendindLo != NULL) 00152 { 00153 FREE_DH (mat->sendindLo); 00154 CHECK_V_ERROR; 00155 } 00156 if (mat->sendindHi != NULL) 00157 { 00158 FREE_DH (mat->sendindHi); 00159 CHECK_V_ERROR; 00160 } 00161 00162 if (mat->numbSolve != NULL) 00163 { 00164 Numbering_dhDestroy (mat->numbSolve); 00165 CHECK_V_ERROR; 00166 } 00167 FREE_DH (mat); 00168 CHECK_V_ERROR; 00169 END_FUNC_DH} 00170 00171 00172 #undef __FUNC__ 00173 #define __FUNC__ "create_fake_mat_private" 00174 static void 00175 create_fake_mat_private (Factor_dh mat, Mat_dh * matFakeIN) 00176 { 00177 START_FUNC_DH Mat_dh matFake; 00178 Mat_dhCreate (matFakeIN); 00179 CHECK_V_ERROR; 00180 matFake = *matFakeIN; 00181 matFake->m = mat->m; 00182 matFake->n = mat->n; 00183 matFake->rp = mat->rp; 00184 matFake->cval = mat->cval; 00185 matFake->aval = mat->aval; 00186 matFake->beg_row = mat->beg_row; 00187 END_FUNC_DH} 00188 00189 #undef __FUNC__ 00190 #define __FUNC__ "destroy_fake_mat_private" 00191 static void 00192 destroy_fake_mat_private (Mat_dh matFake) 00193 { 00194 START_FUNC_DH matFake->rp = NULL; 00195 matFake->cval = NULL; 00196 matFake->aval = NULL; 00197 Mat_dhDestroy (matFake); 00198 CHECK_V_ERROR; 00199 END_FUNC_DH} 00200 00201 00202 00203 #undef __FUNC__ 00204 #define __FUNC__ "Factor_dhReadNz" 00205 int 00206 Factor_dhReadNz (Factor_dh mat) 00207 { 00208 START_FUNC_DH int ierr, retval = mat->rp[mat->m]; 00209 int nz = retval; 00210 ierr = MPI_Allreduce (&nz, &retval, 1, MPI_INT, MPI_SUM, comm_dh); 00211 CHECK_MPI_ERROR (ierr); 00212 END_FUNC_VAL (retval)} 00213 00214 00215 00216 #undef __FUNC__ 00217 #define __FUNC__ "Factor_dhPrintRows" 00218 void 00219 Factor_dhPrintRows (Factor_dh mat, FILE * fp) 00220 { 00221 START_FUNC_DH int beg_row = mat->beg_row; 00222 int m = mat->m, i, j; 00223 bool noValues; 00224 00225 noValues = (Parser_dhHasSwitch (parser_dh, "-noValues")); 00226 if (mat->aval == NULL) 00227 noValues = true; 00228 00229 if (mat->blockJacobi) 00230 { 00231 adjust_bj_private (mat); 00232 CHECK_V_ERROR; 00233 } 00234 00235 fprintf (fp, 00236 "\n----------------------- Factor_dhPrintRows ------------------\n"); 00237 if (mat->blockJacobi) 00238 { 00239 fprintf (fp, 00240 "@@@ Block Jacobi ILU; adjusted values from zero-based @@@\n"); 00241 } 00242 00243 for (i = 0; i < m; ++i) 00244 { 00245 fprintf (fp, "%i :: ", 1 + i + beg_row); 00246 for (j = mat->rp[i]; j < mat->rp[i + 1]; ++j) 00247 { 00248 if (noValues) 00249 { 00250 fprintf (fp, "%i ", 1 + mat->cval[j]); 00251 } 00252 else 00253 { 00254 fprintf (fp, "%i,%g ; ", 1 + mat->cval[j], mat->aval[j]); 00255 } 00256 } 00257 fprintf (fp, "\n"); 00258 } 00259 00260 if (mat->blockJacobi) 00261 { 00262 unadjust_bj_private (mat); 00263 CHECK_V_ERROR; 00264 } 00265 END_FUNC_DH} 00266 00267 00268 #undef __FUNC__ 00269 #define __FUNC__ "Factor_dhPrintDiags" 00270 void 00271 Factor_dhPrintDiags (Factor_dh mat, FILE * fp) 00272 { 00273 START_FUNC_DH int beg_row = mat->beg_row; 00274 int m = mat->m, i, pe, *diag = mat->diag; 00275 REAL_DH *aval = mat->aval; 00276 00277 00278 fprintf_dh (fp, 00279 "\n----------------------- Factor_dhPrintDiags ------------------\n"); 00280 fprintf_dh (fp, "(grep for 'ZERO')\n"); 00281 00282 for (pe = 0; pe < np_dh; ++pe) 00283 { 00284 MPI_Barrier (comm_dh); 00285 if (mat->id == pe) 00286 { 00287 fprintf (fp, "----- subdomain: %i processor: %i\n", pe, myid_dh); 00288 for (i = 0; i < m; ++i) 00289 { 00290 REAL_DH val = aval[diag[i]]; 00291 if (val) 00292 { 00293 fprintf (fp, "%i %g\n", i + 1 + beg_row, aval[diag[i]]); 00294 } 00295 else 00296 { 00297 fprintf (fp, "%i %g ZERO\n", i + 1 + beg_row, 00298 aval[diag[i]]); 00299 } 00300 } 00301 } 00302 } 00303 END_FUNC_DH} 00304 00305 00306 #undef __FUNC__ 00307 #define __FUNC__ "Factor_dhPrintGraph" 00308 void 00309 Factor_dhPrintGraph (Factor_dh mat, char *filename) 00310 { 00311 START_FUNC_DH FILE * fp; 00312 int i, j, m = mat->m, *work, *rp = mat->rp, *cval = mat->cval; 00313 00314 if (np_dh > 1) 00315 SET_V_ERROR ("only implemented for single mpi task"); 00316 00317 work = (int *) MALLOC_DH (m * sizeof (int)); 00318 CHECK_V_ERROR; 00319 00320 fp = openFile_dh (filename, "w"); 00321 CHECK_V_ERROR; 00322 00323 for (i = 0; i < m; ++i) 00324 { 00325 for (j = 0; j < m; ++j) 00326 work[j] = 0; 00327 for (j = rp[i]; j < rp[i]; ++j) 00328 work[cval[j]] = 1; 00329 00330 for (j = 0; j < m; ++j) 00331 { 00332 if (work[j]) 00333 { 00334 fprintf (fp, " x "); 00335 } 00336 else 00337 { 00338 fprintf (fp, " "); 00339 } 00340 } 00341 fprintf (fp, "\n"); 00342 } 00343 00344 closeFile_dh (fp); 00345 CHECK_V_ERROR; 00346 00347 FREE_DH (work); 00348 END_FUNC_DH} 00349 00350 00351 #undef __FUNC__ 00352 #define __FUNC__ "Factor_dhPrintTriples" 00353 void 00354 Factor_dhPrintTriples (Factor_dh mat, char *filename) 00355 { 00356 START_FUNC_DH int pe, i, j; 00357 int m = mat->m, *rp = mat->rp; 00358 int beg_row = mat->beg_row; 00359 REAL_DH *aval = mat->aval; 00360 bool noValues; 00361 FILE *fp; 00362 00363 if (mat->blockJacobi) 00364 { 00365 adjust_bj_private (mat); 00366 CHECK_V_ERROR; 00367 } 00368 00369 noValues = (Parser_dhHasSwitch (parser_dh, "-noValues")); 00370 if (noValues) 00371 aval = NULL; 00372 00373 for (pe = 0; pe < np_dh; ++pe) 00374 { 00375 MPI_Barrier (comm_dh); 00376 if (mat->id == pe) 00377 { 00378 if (pe == 0) 00379 { 00380 fp = openFile_dh (filename, "w"); 00381 CHECK_V_ERROR; 00382 } 00383 else 00384 { 00385 fp = openFile_dh (filename, "a"); 00386 CHECK_V_ERROR; 00387 } 00388 00389 for (i = 0; i < m; ++i) 00390 { 00391 for (j = rp[i]; j < rp[i + 1]; ++j) 00392 { 00393 if (noValues) 00394 { 00395 fprintf (fp, "%i %i\n", 1 + i + beg_row, 00396 1 + mat->cval[j]); 00397 } 00398 else 00399 { 00400 fprintf (fp, TRIPLES_FORMAT, 00401 1 + i + beg_row, 1 + mat->cval[j], aval[j]); 00402 } 00403 } 00404 } 00405 closeFile_dh (fp); 00406 CHECK_V_ERROR; 00407 } 00408 } 00409 00410 if (mat->blockJacobi) 00411 { 00412 unadjust_bj_private (mat); 00413 CHECK_V_ERROR; 00414 } 00415 END_FUNC_DH} 00416 00417 /*-------------------------------------------------------------------------------- 00418 * Functions to setup the matrix for triangular solves. These are similar to 00419 * MatVecSetup(), except that there are two cases: subdomains ordered lower than 00420 * ourselves, and subdomains ordered higher than ourselves. This SolveSetup 00421 * is used for Parallel ILU (PILU). The following are adopted/modified from 00422 * Edmond Chow's ParaSails 00423 *--------------------------------------------------------------------------------*/ 00424 00425 /* adopted from Edmond Chow's ParaSails */ 00426 00427 /* 1. start receives of node data to be received from other processors; 00428 2. send to other processors the list of nodes this processor needs 00429 to receive from them. 00430 Returns: the number of processors from whom nodes will be received. 00431 */ 00432 #undef __FUNC__ 00433 #define __FUNC__ "setup_receives_private" 00434 static int 00435 setup_receives_private (Factor_dh mat, int *beg_rows, int *end_rows, 00436 double *recvBuf, MPI_Request * req, 00437 int *reqind, int reqlen, int *outlist, bool debug) 00438 { 00439 START_FUNC_DH int i, j, this_pe, num_recv = 0; 00440 MPI_Request request; 00441 00442 if (debug) 00443 { 00444 fprintf (logFile, 00445 "\nFACT ========================================================\n"); 00446 fprintf (logFile, "FACT STARTING: setup_receives_private\n"); 00447 } 00448 00449 for (i = 0; i < reqlen; i = j) 00450 { /* j is set below */ 00451 /* determine the processor that owns the row with index reqind[i] */ 00452 this_pe = mat_find_owner (beg_rows, end_rows, reqind[i]); 00453 CHECK_ERROR (-1); 00454 00455 /* Figure out other rows we need from this_pe */ 00456 for (j = i + 1; j < reqlen; j++) 00457 { 00458 int idx = reqind[j]; 00459 if (idx < beg_rows[this_pe] || idx >= end_rows[this_pe]) 00460 { 00461 break; 00462 } 00463 } 00464 00465 if (debug) 00466 { 00467 int k; 00468 fprintf (logFile, "FACT need nodes from P_%i: ", this_pe); 00469 for (k = i; k < j; ++k) 00470 fprintf (logFile, "%i ", 1 + reqind[k]); 00471 fprintf (logFile, "\n"); 00472 } 00473 00474 /* Record the number of number of indices needed from this_pe */ 00475 outlist[this_pe] = j - i; 00476 00477 /* Request rows in reqind[i..j-1] */ 00478 /* Note: the receiving processor, this_pe, doesn't yet know 00479 about the incoming request, hence, can't set up a matching 00480 receive; this matching receive will be started later, 00481 in setup_sends_private. 00482 */ 00483 MPI_Isend (reqind + i, j - i, MPI_INT, this_pe, 444, comm_dh, &request); 00484 MPI_Request_free (&request); 00485 00486 /* set up persistent comms for receiving the values from this_pe */ 00487 MPI_Recv_init (recvBuf + i, j - i, MPI_DOUBLE, this_pe, 555, 00488 comm_dh, req + num_recv); 00489 ++num_recv; 00490 } 00491 00492 END_FUNC_VAL (num_recv); 00493 } 00494 00495 /* 00496 1. start receive to get list of nodes that this processor 00497 needs to send to other processors 00498 2. start persistent comms to send the data 00499 */ 00500 #undef __FUNC__ 00501 #define __FUNC__ "setup_sends_private" 00502 static void 00503 setup_sends_private (Factor_dh mat, int *inlist, 00504 int *o2n_subdomain, bool debug) 00505 { 00506 START_FUNC_DH int i, jLo, jHi, sendlenLo, sendlenHi, first = mat->beg_row; 00507 MPI_Request *requests = mat->requests, *sendReq; 00508 MPI_Status *statuses = mat->status; 00509 bool isHigher; 00510 int *rcvBuf; 00511 double *sendBuf; 00512 int myidNEW = o2n_subdomain[myid_dh]; 00513 int count; 00514 00515 if (debug) 00516 { 00517 fprintf (logFile, "FACT \nSTARTING: setup_sends_private\n"); 00518 } 00519 00520 /* Determine size of and allocate sendbuf and sendind */ 00521 sendlenLo = sendlenHi = 0; 00522 for (i = 0; i < np_dh; i++) 00523 { 00524 if (inlist[i]) 00525 { 00526 if (o2n_subdomain[i] < myidNEW) 00527 { 00528 sendlenLo += inlist[i]; 00529 } 00530 else 00531 { 00532 sendlenHi += inlist[i]; 00533 } 00534 } 00535 } 00536 00537 mat->sendlenLo = sendlenLo; 00538 mat->sendlenHi = sendlenHi; 00539 mat->sendbufLo = (double *) MALLOC_DH (sendlenLo * sizeof (double)); 00540 CHECK_V_ERROR; 00541 mat->sendbufHi = (double *) MALLOC_DH (sendlenHi * sizeof (double)); 00542 CHECK_V_ERROR; 00543 mat->sendindLo = (int *) MALLOC_DH (sendlenLo * sizeof (int)); 00544 CHECK_V_ERROR; 00545 mat->sendindHi = (int *) MALLOC_DH (sendlenHi * sizeof (int)); 00546 CHECK_V_ERROR; 00547 00548 count = 0; /* number of calls to MPI_Irecv() */ 00549 jLo = jHi = 0; 00550 mat->num_sendLo = 0; 00551 mat->num_sendHi = 0; 00552 for (i = 0; i < np_dh; i++) 00553 { 00554 if (inlist[i]) 00555 { 00556 isHigher = (o2n_subdomain[i] < myidNEW) ? false : true; 00557 00558 /* Post receive for the actual indices */ 00559 if (isHigher) 00560 { 00561 rcvBuf = &mat->sendindHi[jHi]; 00562 sendBuf = &mat->sendbufHi[jHi]; 00563 sendReq = &mat->send_reqHi[mat->num_sendHi]; 00564 mat->num_sendHi++; 00565 jHi += inlist[i]; 00566 } 00567 else 00568 { 00569 rcvBuf = &mat->sendindLo[jLo]; 00570 sendBuf = &mat->sendbufLo[jLo]; 00571 sendReq = &mat->send_reqLo[mat->num_sendLo]; 00572 mat->num_sendLo++; 00573 jLo += inlist[i]; 00574 } 00575 00576 /* matching receive, for list of unknowns that will be sent, 00577 during the triangular solves, from ourselves to P_i 00578 */ 00579 MPI_Irecv (rcvBuf, inlist[i], MPI_INT, i, 444, comm_dh, 00580 requests + count); 00581 ++count; 00582 00583 /* Set up the send */ 00584 MPI_Send_init (sendBuf, inlist[i], MPI_DOUBLE, i, 555, comm_dh, 00585 sendReq); 00586 } 00587 } 00588 00589 /* note: count = mat->num_sendLo = mat->num_sendHi */ 00590 MPI_Waitall (count, requests, statuses); 00591 00592 if (debug) 00593 { 00594 int j; 00595 jLo = jHi = 0; 00596 00597 fprintf (logFile, 00598 "\nFACT columns that I must send to other subdomains:\n"); 00599 for (i = 0; i < np_dh; i++) 00600 { 00601 if (inlist[i]) 00602 { 00603 isHigher = (o2n_subdomain[i] < myidNEW) ? false : true; 00604 if (isHigher) 00605 { 00606 rcvBuf = &mat->sendindHi[jHi]; 00607 jHi += inlist[i]; 00608 } 00609 else 00610 { 00611 rcvBuf = &mat->sendindLo[jLo]; 00612 jLo += inlist[i]; 00613 } 00614 00615 fprintf (logFile, "FACT send to P_%i: ", i); 00616 for (j = 0; j < inlist[i]; ++j) 00617 fprintf (logFile, "%i ", rcvBuf[j] + 1); 00618 fprintf (logFile, "\n"); 00619 } 00620 } 00621 } 00622 00623 /* convert global indices to local indices */ 00624 /* these are all indices on this processor */ 00625 for (i = 0; i < mat->sendlenLo; i++) 00626 mat->sendindLo[i] -= first; 00627 for (i = 0; i < mat->sendlenHi; i++) 00628 mat->sendindHi[i] -= first; 00629 END_FUNC_DH} 00630 00631 00632 00633 #undef __FUNC__ 00634 #define __FUNC__ "Factor_dhSolveSetup" 00635 void 00636 Factor_dhSolveSetup (Factor_dh mat, SubdomainGraph_dh sg) 00637 { 00638 START_FUNC_DH int *outlist, *inlist; 00639 int i, row, *rp = mat->rp, *cval = mat->cval; 00640 Numbering_dh numb; 00641 int m = mat->m; 00642 /* int firstLocalRow = mat->beg_row; */ 00643 int *beg_rows = sg->beg_rowP, *row_count = sg->row_count, *end_rows; 00644 Mat_dh matFake; 00645 bool debug = false; 00646 double *recvBuf; 00647 00648 if (mat->debug && logFile != NULL) 00649 debug = true; 00650 00651 end_rows = (int *) MALLOC_DH (np_dh * sizeof (int)); 00652 CHECK_V_ERROR; 00653 outlist = (int *) MALLOC_DH (np_dh * sizeof (int)); 00654 CHECK_V_ERROR; 00655 inlist = (int *) MALLOC_DH (np_dh * sizeof (int)); 00656 CHECK_V_ERROR; 00657 for (i = 0; i < np_dh; ++i) 00658 { 00659 inlist[i] = 0; 00660 outlist[i] = 0; 00661 end_rows[i] = beg_rows[i] + row_count[i]; 00662 } 00663 00664 /* Create Numbering object */ 00665 create_fake_mat_private (mat, &matFake); 00666 CHECK_V_ERROR; 00667 Numbering_dhCreate (&(mat->numbSolve)); 00668 CHECK_V_ERROR; 00669 numb = mat->numbSolve; 00670 Numbering_dhSetup (numb, matFake); 00671 CHECK_V_ERROR; 00672 destroy_fake_mat_private (matFake); 00673 CHECK_V_ERROR; 00674 00675 if (debug) 00676 { 00677 fprintf (stderr, "Numbering_dhSetup completed\n"); 00678 } 00679 00680 /* Allocate recvbuf; recvbuf has numlocal entries saved for local part of x */ 00681 i = m + numb->num_ext; 00682 mat->work_y_lo = (double *) MALLOC_DH (i * sizeof (double)); 00683 CHECK_V_ERROR; 00684 mat->work_x_hi = (double *) MALLOC_DH (i * sizeof (double)); 00685 CHECK_V_ERROR; 00686 if (debug) 00687 { 00688 fprintf (logFile, "FACT num_extLo= %i num_extHi= %i\n", 00689 numb->num_extLo, numb->num_extHi); 00690 } 00691 00692 mat->num_recvLo = 0; 00693 mat->num_recvHi = 0; 00694 if (numb->num_extLo) 00695 { 00696 recvBuf = mat->work_y_lo + m; 00697 mat->num_recvLo = setup_receives_private (mat, beg_rows, end_rows, 00698 recvBuf, mat->recv_reqLo, 00699 numb->idx_extLo, 00700 numb->num_extLo, outlist, 00701 debug); 00702 CHECK_V_ERROR; 00703 00704 } 00705 00706 if (numb->num_extHi) 00707 { 00708 recvBuf = mat->work_x_hi + m + numb->num_extLo; 00709 mat->num_recvHi = setup_receives_private (mat, beg_rows, end_rows, 00710 recvBuf, mat->recv_reqHi, 00711 numb->idx_extHi, 00712 numb->num_extHi, outlist, 00713 debug); 00714 CHECK_V_ERROR; 00715 } 00716 00717 MPI_Alltoall (outlist, 1, MPI_INT, inlist, 1, MPI_INT, comm_dh); 00718 /* At this point, inlist[j] contains the number of indices 00719 that this processor must send to P_j. Processors next need 00720 to exchange the actual lists of required indices; this is done 00721 in setup_sends_private() 00722 */ 00723 00724 setup_sends_private (mat, inlist, sg->o2n_sub, debug); 00725 CHECK_V_ERROR; 00726 00727 /* Convert column indices in each row to local indices */ 00728 for (row = 0; row < m; row++) 00729 { 00730 int len = rp[row + 1] - rp[row]; 00731 int *ind = cval + rp[row]; 00732 Numbering_dhGlobalToLocal (numb, len, ind, ind); 00733 CHECK_V_ERROR; 00734 } 00735 00736 FREE_DH (outlist); 00737 CHECK_V_ERROR; 00738 FREE_DH (inlist); 00739 CHECK_V_ERROR; 00740 FREE_DH (end_rows); 00741 CHECK_V_ERROR; 00742 00743 if (debug) 00744 { 00745 int ii, jj; 00746 00747 fprintf (logFile, 00748 "\n--------- row/col structure, after global to local renumbering\n"); 00749 for (ii = 0; ii < mat->m; ++ii) 00750 { 00751 fprintf (logFile, "local row %i :: ", ii + 1); 00752 for (jj = mat->rp[ii]; jj < mat->rp[ii + 1]; ++jj) 00753 { 00754 fprintf (logFile, "%i ", 1 + mat->cval[jj]); 00755 } 00756 fprintf (logFile, "\n"); 00757 } 00758 fprintf (logFile, "\n"); 00759 fflush (logFile); 00760 } 00761 END_FUNC_DH} 00762 00763 /* solve for MPI implementation of PILU. This function is 00764 so similar to MatVec, that I put it here, instead of with 00765 the other solves located in Euclid_apply.c. 00766 */ 00767 static void forward_solve_private (int m, int from, int to, 00768 int *rp, int *cval, int *diag, 00769 double *aval, double *rhs, double *work_y, 00770 bool debug); 00771 00772 static void backward_solve_private (int m, int from, int to, 00773 int *rp, int *cval, int *diag, 00774 double *aval, double *work_y, 00775 double *work_x, bool debug); 00776 00777 static int beg_rowG; 00778 00779 00780 #undef __FUNC__ 00781 #define __FUNC__ "Factor_dhSolve" 00782 void 00783 Factor_dhSolve (double *rhs, double *lhs, Euclid_dh ctx) 00784 { 00785 START_FUNC_DH Factor_dh mat = ctx->F; 00786 int from, to; 00787 int ierr, i, m = mat->m, first_bdry = mat->first_bdry; 00788 int offsetLo = mat->numbSolve->num_extLo; 00789 int offsetHi = mat->numbSolve->num_extHi; 00790 int *rp = mat->rp, *cval = mat->cval, *diag = mat->diag; 00791 double *aval = mat->aval; 00792 int *sendindLo = mat->sendindLo, *sendindHi = mat->sendindHi; 00793 int sendlenLo = mat->sendlenLo, sendlenHi = mat->sendlenHi; 00794 double *sendbufLo = mat->sendbufLo, *sendbufHi = mat->sendbufHi; 00795 double *work_y = mat->work_y_lo; 00796 double *work_x = mat->work_x_hi; 00797 bool debug = false; 00798 00799 if (mat->debug && logFile != NULL) 00800 debug = true; 00801 if (debug) 00802 beg_rowG = ctx->F->beg_row; 00803 00804 /* 00805 for (i=0; i<m+offsetLo+offsetHi; ++i) { 00806 work_y[i] = -99; 00807 work_x[i] = -99; 00808 } 00809 */ 00810 00811 if (debug) 00812 { 00813 fprintf (logFile, 00814 "\n=====================================================\n"); 00815 fprintf (logFile, 00816 "FACT Factor_dhSolve: num_recvLo= %i num_recvHi = %i\n", 00817 mat->num_recvLo, mat->num_recvHi); 00818 } 00819 00820 /* start receives from higher and lower ordered subdomains */ 00821 if (mat->num_recvLo) 00822 { 00823 MPI_Startall (mat->num_recvLo, mat->recv_reqLo); 00824 } 00825 if (mat->num_recvHi) 00826 { 00827 MPI_Startall (mat->num_recvHi, mat->recv_reqHi); 00828 } 00829 00830 /*------------------------------------------------------------- 00831 * PART 1: Forward Solve Ly = rhs for y ('y' is called 'work') 00832 *-------------------------------------------------------------*/ 00833 /* forward triangular solve on interior nodes */ 00834 from = 0; 00835 to = first_bdry; 00836 if (from != to) 00837 { 00838 forward_solve_private (m, from, to, rp, cval, diag, aval, 00839 rhs, work_y, debug); 00840 CHECK_V_ERROR; 00841 } 00842 00843 /* wait for receives from lower ordered subdomains, then 00844 complete forward solve on boundary nodes. 00845 */ 00846 if (mat->num_recvLo) 00847 { 00848 MPI_Waitall (mat->num_recvLo, mat->recv_reqLo, mat->status); 00849 00850 /* debug block */ 00851 if (debug) 00852 { 00853 fprintf (logFile, 00854 "FACT got 'y' values from lower neighbors; work buffer:\n "); 00855 for (i = 0; i < offsetLo; ++i) 00856 { 00857 fprintf (logFile, "%g ", work_y[m + i]); 00858 } 00859 } 00860 } 00861 00862 /* forward triangular solve on boundary nodes */ 00863 from = first_bdry; 00864 to = m; 00865 if (from != to) 00866 { 00867 forward_solve_private (m, from, to, rp, cval, diag, aval, 00868 rhs, work_y, debug); 00869 CHECK_V_ERROR; 00870 } 00871 00872 /* send boundary elements from work vector 'y' to higher ordered subdomains */ 00873 if (mat->num_sendHi) 00874 { 00875 00876 /* copy elements to send buffer */ 00877 for (i = 0; i < sendlenHi; i++) 00878 { 00879 sendbufHi[i] = work_y[sendindHi[i]]; 00880 } 00881 00882 /* start the sends */ 00883 MPI_Startall (mat->num_sendHi, mat->send_reqHi); 00884 00885 /* debug block */ 00886 if (debug) 00887 { 00888 fprintf (logFile, 00889 "\nFACT sending 'y' values to higher neighbor:\nFACT "); 00890 for (i = 0; i < sendlenHi; i++) 00891 { 00892 fprintf (logFile, "%g ", sendbufHi[i]); 00893 } 00894 fprintf (logFile, "\n"); 00895 } 00896 } 00897 00898 /*---------------------------------------------------------- 00899 * PART 2: Backward Solve 00900 *----------------------------------------------------------*/ 00901 /* wait for bdry nodes 'x' from higher-ordered processsors */ 00902 if (mat->num_recvHi) 00903 { 00904 ierr = MPI_Waitall (mat->num_recvHi, mat->recv_reqHi, mat->status); 00905 CHECK_MPI_V_ERROR (ierr); 00906 00907 /* debug block */ 00908 if (debug) 00909 { 00910 fprintf (logFile, "FACT got 'x' values from higher neighbors:\n "); 00911 for (i = m + offsetLo; i < m + offsetLo + offsetHi; ++i) 00912 { 00913 fprintf (logFile, "%g ", work_x[i]); 00914 } 00915 fprintf (logFile, "\n"); 00916 } 00917 } 00918 00919 /* backward solve boundary nodes */ 00920 from = m; 00921 to = first_bdry; 00922 if (from != to) 00923 { 00924 backward_solve_private (m, from, to, rp, cval, diag, aval, 00925 work_y, work_x, debug); 00926 CHECK_V_ERROR; 00927 } 00928 00929 /* send boundary node elements to lower ordered subdomains */ 00930 if (mat->num_sendLo) 00931 { 00932 00933 /* copy elements to send buffer */ 00934 for (i = 0; i < sendlenLo; i++) 00935 { 00936 sendbufLo[i] = work_x[sendindLo[i]]; 00937 } 00938 00939 /* start the sends */ 00940 ierr = MPI_Startall (mat->num_sendLo, mat->send_reqLo); 00941 CHECK_MPI_V_ERROR (ierr); 00942 00943 /* debug block */ 00944 if (debug) 00945 { 00946 fprintf (logFile, 00947 "\nFACT sending 'x' values to lower neighbor:\nFACT "); 00948 for (i = 0; i < sendlenLo; i++) 00949 { 00950 fprintf (logFile, "%g ", sendbufLo[i]); 00951 } 00952 fprintf (logFile, "\n"); 00953 } 00954 } 00955 00956 /* backward solve interior nodes */ 00957 from = first_bdry; 00958 to = 0; 00959 if (from != to) 00960 { 00961 backward_solve_private (m, from, to, rp, cval, diag, aval, 00962 work_y, work_x, debug); 00963 CHECK_V_ERROR; 00964 } 00965 00966 /* copy solution from work vector lhs vector */ 00967 memcpy (lhs, work_x, m * sizeof (double)); 00968 00969 if (debug) 00970 { 00971 fprintf (logFile, "\nFACT solution: "); 00972 for (i = 0; i < m; ++i) 00973 { 00974 fprintf (logFile, "%g ", lhs[i]); 00975 } 00976 fprintf (logFile, "\n"); 00977 } 00978 00979 /* wait for sends to go through */ 00980 if (mat->num_sendLo) 00981 { 00982 ierr = MPI_Waitall (mat->num_sendLo, mat->send_reqLo, mat->status); 00983 CHECK_MPI_V_ERROR (ierr); 00984 } 00985 00986 if (mat->num_sendHi) 00987 { 00988 ierr = MPI_Waitall (mat->num_sendHi, mat->send_reqHi, mat->status); 00989 CHECK_MPI_V_ERROR (ierr); 00990 } 00991 END_FUNC_DH} 00992 00993 00994 00995 #undef __FUNC__ 00996 #define __FUNC__ "forward_solve_private" 00997 void 00998 forward_solve_private (int m, int from, int to, int *rp, 00999 int *cval, int *diag, double *aval, 01000 double *rhs, double *work_y, bool debug) 01001 { 01002 START_FUNC_DH int i, j, idx; 01003 01004 if (debug) 01005 { 01006 fprintf (logFile, 01007 "\nFACT starting forward_solve_private; from= %i; to= %i, m= %i\n", 01008 1 + from, 1 + to, m); 01009 } 01010 01011 /* 01012 if (from == 0) { 01013 work_y[0] = rhs[0]; 01014 if (debug) { 01015 fprintf(logFile, "FACT work_y[%i] = %g\n------------\n", 1+beg_rowG, work_y[0]); 01016 } 01017 } else { 01018 --from; 01019 } 01020 */ 01021 01022 if (debug) 01023 { 01024 for (i = from; i < to; ++i) 01025 { 01026 int len = diag[i] - rp[i]; 01027 int *col = cval + rp[i]; 01028 double *val = aval + rp[i]; 01029 double sum = rhs[i]; 01030 01031 fprintf (logFile, "FACT solving for work_y[%i] (global)\n", 01032 i + 1 + beg_rowG); 01033 fprintf (logFile, "FACT sum = %g\n", sum); 01034 for (j = 0; j < len; ++j) 01035 { 01036 idx = col[j]; 01037 sum -= (val[j] * work_y[idx]); 01038 fprintf (logFile, 01039 "FACT sum(%g) -= val[j] (%g) * work_y[%i] (%g)\n", 01040 sum, val[j], 1 + idx, work_y[idx]); 01041 } 01042 work_y[i] = sum; 01043 fprintf (logFile, "FACT work_y[%i] = %g\n", 1 + i + beg_rowG, 01044 work_y[i]); 01045 fprintf (logFile, "-----------\n"); 01046 } 01047 01048 fprintf (logFile, "\nFACT work vector at end of forward solve:\n"); 01049 for (i = 0; i < to; i++) 01050 fprintf (logFile, " %i %g\n", i + 1 + beg_rowG, work_y[i]); 01051 01052 } 01053 else 01054 { 01055 for (i = from; i < to; ++i) 01056 { 01057 int len = diag[i] - rp[i]; 01058 int *col = cval + rp[i]; 01059 double *val = aval + rp[i]; 01060 double sum = rhs[i]; 01061 01062 for (j = 0; j < len; ++j) 01063 { 01064 idx = col[j]; 01065 sum -= (val[j] * work_y[idx]); 01066 } 01067 work_y[i] = sum; 01068 } 01069 } 01070 END_FUNC_DH} 01071 01072 #undef __FUNC__ 01073 #define __FUNC__ "backward_solve_private" 01074 void 01075 backward_solve_private (int m, int from, int to, int *rp, 01076 int *cval, int *diag, double *aval, 01077 double *work_y, double *work_x, bool debug) 01078 { 01079 START_FUNC_DH int i, j, idx; 01080 01081 if (debug) 01082 { 01083 fprintf (logFile, 01084 "\nFACT starting backward_solve_private; from= %i; to= %i, m= %i\n", 01085 1 + from, 1 + to, m); 01086 for (i = from - 1; i >= to; --i) 01087 { 01088 int len = rp[i + 1] - diag[i] - 1; 01089 int *col = cval + diag[i] + 1; 01090 double *val = aval + diag[i] + 1; 01091 double sum = work_y[i]; 01092 fprintf (logFile, "FACT solving for work_x[%i]\n", 01093 i + 1 + beg_rowG); 01094 01095 for (j = 0; j < len; ++j) 01096 { 01097 idx = col[j]; 01098 sum -= (val[j] * work_x[idx]); 01099 fprintf (logFile, 01100 "FACT sum(%g) -= val[j] (%g) * work_x[idx] (%g)\n", 01101 sum, val[j], work_x[idx]); 01102 } 01103 work_x[i] = sum * aval[diag[i]]; 01104 fprintf (logFile, "FACT work_x[%i] = %g\n", 1 + i, work_x[i]); 01105 fprintf (logFile, "----------\n"); 01106 } 01107 01108 } 01109 else 01110 { 01111 for (i = from - 1; i >= to; --i) 01112 { 01113 int len = rp[i + 1] - diag[i] - 1; 01114 int *col = cval + diag[i] + 1; 01115 double *val = aval + diag[i] + 1; 01116 double sum = work_y[i]; 01117 01118 for (j = 0; j < len; ++j) 01119 { 01120 idx = col[j]; 01121 sum -= (val[j] * work_x[idx]); 01122 } 01123 work_x[i] = sum * aval[diag[i]]; 01124 } 01125 } 01126 END_FUNC_DH} 01127 01128 #undef __FUNC__ 01129 #define __FUNC__ "Factor_dhInit" 01130 void 01131 Factor_dhInit (void *A, bool fillFlag, bool avalFlag, 01132 double rho, int id, int beg_rowP, Factor_dh * Fout) 01133 { 01134 START_FUNC_DH int m, n, beg_row, alloc; 01135 Factor_dh F; 01136 01137 EuclidGetDimensions (A, &beg_row, &m, &n); 01138 CHECK_V_ERROR; 01139 alloc = rho * m; 01140 Factor_dhCreate (&F); 01141 CHECK_V_ERROR; 01142 01143 *Fout = F; 01144 F->m = m; 01145 F->n = n; 01146 F->beg_row = beg_rowP; 01147 F->id = id; 01148 F->alloc = alloc; 01149 01150 F->rp = (int *) MALLOC_DH ((m + 1) * sizeof (int)); 01151 CHECK_V_ERROR; 01152 F->rp[0] = 0; 01153 F->cval = (int *) MALLOC_DH (alloc * sizeof (int)); 01154 CHECK_V_ERROR; 01155 F->diag = (int *) MALLOC_DH (m * sizeof (int)); 01156 CHECK_V_ERROR; 01157 if (fillFlag) 01158 { 01159 F->fill = (int *) MALLOC_DH (alloc * sizeof (int)); 01160 CHECK_V_ERROR; 01161 } 01162 if (avalFlag) 01163 { 01164 F->aval = (REAL_DH *) MALLOC_DH (alloc * sizeof (REAL_DH)); 01165 CHECK_V_ERROR; 01166 } 01167 END_FUNC_DH} 01168 01169 #undef __FUNC__ 01170 #define __FUNC__ "Factor_dhReallocate" 01171 void 01172 Factor_dhReallocate (Factor_dh F, int used, int additional) 01173 { 01174 START_FUNC_DH int alloc = F->alloc; 01175 01176 if (used + additional > F->alloc) 01177 { 01178 int *tmpI; 01179 while (alloc < used + additional) 01180 alloc *= 2.0; 01181 F->alloc = alloc; 01182 tmpI = F->cval; 01183 F->cval = (int *) MALLOC_DH (alloc * sizeof (int)); 01184 CHECK_V_ERROR; 01185 memcpy (F->cval, tmpI, used * sizeof (int)); 01186 FREE_DH (tmpI); 01187 CHECK_V_ERROR; 01188 if (F->fill != NULL) 01189 { 01190 tmpI = F->fill; 01191 F->fill = (int *) MALLOC_DH (alloc * sizeof (int)); 01192 CHECK_V_ERROR; 01193 memcpy (F->fill, tmpI, used * sizeof (int)); 01194 FREE_DH (tmpI); 01195 CHECK_V_ERROR; 01196 } 01197 if (F->aval != NULL) 01198 { 01199 REAL_DH *tmpF = F->aval; 01200 F->aval = (REAL_DH *) MALLOC_DH (alloc * sizeof (REAL_DH)); 01201 CHECK_V_ERROR; 01202 memcpy (F->aval, tmpF, used * sizeof (REAL_DH)); 01203 FREE_DH (tmpF); 01204 CHECK_V_ERROR; 01205 } 01206 } 01207 END_FUNC_DH} 01208 01209 #undef __FUNC__ 01210 #define __FUNC__ "Factor_dhTranspose" 01211 void 01212 Factor_dhTranspose (Factor_dh A, Factor_dh * Bout) 01213 { 01214 START_FUNC_DH Factor_dh B; 01215 01216 if (np_dh > 1) 01217 { 01218 SET_V_ERROR ("only for sequential"); 01219 } 01220 01221 Factor_dhCreate (&B); 01222 CHECK_V_ERROR; 01223 *Bout = B; 01224 B->m = B->n = A->m; 01225 if (B->aval == NULL) 01226 { 01227 mat_dh_transpose_private (A->m, A->rp, &B->rp, A->cval, &B->cval, 01228 A->aval, NULL); 01229 CHECK_V_ERROR; 01230 } 01231 else 01232 { 01233 mat_dh_transpose_private (A->m, A->rp, &B->rp, A->cval, &B->cval, 01234 A->aval, &B->aval); 01235 CHECK_V_ERROR; 01236 } 01237 END_FUNC_DH} 01238 01239 01240 /* this could be done using OpenMP, but I took it out for now */ 01241 #undef __FUNC__ 01242 #define __FUNC__ "Factor_dhSolveSeq" 01243 void 01244 Factor_dhSolveSeq (double *rhs, double *lhs, Euclid_dh ctx) 01245 { 01246 START_FUNC_DH Factor_dh F = ctx->F; 01247 if (F == NULL) 01248 { 01249 printf ("F is null.\n"); 01250 } 01251 else 01252 { 01253 printf ("F isn't null.\n"); 01254 } 01255 int *rp, *cval, *diag; 01256 int i, j, *vi, nz, m = F->m; 01257 REAL_DH *aval, *work; 01258 /* REAL_DH *scale; */ 01259 REAL_DH *v, sum; 01260 bool debug = false; 01261 01262 if (ctx->F->debug && logFile != NULL) 01263 debug = true; 01264 01265 rp = F->rp; 01266 cval = F->cval; 01267 aval = F->aval; 01268 diag = F->diag; 01269 /* scale = ctx->scale; */ 01270 work = ctx->work; 01271 01272 if (debug) 01273 { 01274 fprintf (logFile, 01275 "\nFACT ============================================================\n"); 01276 fprintf (logFile, "FACT starting Factor_dhSolveSeq\n"); 01277 01278 /* forward solve lower triangle */ 01279 fprintf (logFile, "\nFACT STARTING FORWARD SOLVE\n------------\n"); 01280 work[0] = rhs[0]; 01281 fprintf (logFile, "FACT work[0] = %g\n------------\n", work[0]); 01282 for (i = 1; i < m; i++) 01283 { 01284 v = aval + rp[i]; 01285 vi = cval + rp[i]; 01286 nz = diag[i] - rp[i]; 01287 fprintf (logFile, "FACT solving for work[%i]\n", i + 1); 01288 sum = rhs[i]; 01289 for (j = 0; j < nz; ++j) 01290 { 01291 sum -= (v[j] * work[vi[j]]); 01292 fprintf (logFile, 01293 "FACT sum (%g) -= v[j] (%g) * work[vi[j]] (%g)\n", 01294 sum, v[j], work[vi[j]]); 01295 } 01296 work[i] = sum; 01297 fprintf (logFile, "FACT work[%i] = %g\n------------\n", 1 + i, 01298 work[i]); 01299 } 01300 01301 01302 fprintf (logFile, "\nFACT work vector at end of forward solve:\n"); 01303 for (i = 0; i < m; i++) 01304 fprintf (logFile, " %i %g\n", i + 1, work[i]); 01305 01306 01307 /* backward solve upper triangular boundaries (sequential) */ 01308 fprintf (logFile, "\nFACT STARTING BACKWARD SOLVE\n--------------\n"); 01309 for (i = m - 1; i >= 0; i--) 01310 { 01311 v = aval + diag[i] + 1; 01312 vi = cval + diag[i] + 1; 01313 nz = rp[i + 1] - diag[i] - 1; 01314 fprintf (logFile, "FACT solving for lhs[%i]\n", i + 1); 01315 sum = work[i]; 01316 for (j = 0; j < nz; ++j) 01317 { 01318 sum -= (v[j] * work[vi[j]]); 01319 fprintf (logFile, 01320 "FACT sum (%g) -= v[j] (%g) * work[vi[j]] (%g)\n", 01321 sum, v[j], work[vi[j]]); 01322 } 01323 lhs[i] = work[i] = sum * aval[diag[i]]; 01324 fprintf (logFile, "FACT lhs[%i] = %g\n------------\n", 1 + i, 01325 lhs[i]); 01326 fprintf (logFile, "FACT solving for lhs[%i]\n", i + 1); 01327 } 01328 01329 fprintf (logFile, "\nFACT solution: "); 01330 for (i = 0; i < m; ++i) 01331 fprintf (logFile, "%g ", lhs[i]); 01332 fprintf (logFile, "\n"); 01333 01334 01335 } 01336 else 01337 { 01338 /* forward solve lower triangle */ 01339 work[0] = rhs[0]; 01340 for (i = 1; i < m; i++) 01341 { 01342 v = aval + rp[i]; 01343 vi = cval + rp[i]; 01344 nz = diag[i] - rp[i]; 01345 sum = rhs[i]; 01346 while (nz--) 01347 sum -= (*v++ * work[*vi++]); 01348 work[i] = sum; 01349 } 01350 01351 /* backward solve upper triangular boundaries (sequential) */ 01352 for (i = m - 1; i >= 0; i--) 01353 { 01354 v = aval + diag[i] + 1; 01355 vi = cval + diag[i] + 1; 01356 nz = rp[i + 1] - diag[i] - 1; 01357 sum = work[i]; 01358 while (nz--) 01359 sum -= (*v++ * work[*vi++]); 01360 lhs[i] = work[i] = sum * aval[diag[i]]; 01361 } 01362 } 01363 END_FUNC_DH} 01364 01365 /*--------------------------------------------------------------- 01366 * next two are used by Factor_dhPrintXXX methods 01367 *---------------------------------------------------------------*/ 01368 01369 #undef __FUNC__ 01370 #define __FUNC__ "adjust_bj_private" 01371 void 01372 adjust_bj_private (Factor_dh mat) 01373 { 01374 START_FUNC_DH int i; 01375 int nz = mat->rp[mat->m]; 01376 int beg_row = mat->beg_row; 01377 for (i = 0; i < nz; ++i) 01378 mat->cval[i] += beg_row; 01379 END_FUNC_DH} 01380 01381 #undef __FUNC__ 01382 #define __FUNC__ "unadjust_bj_private" 01383 void 01384 unadjust_bj_private (Factor_dh mat) 01385 { 01386 START_FUNC_DH int i; 01387 int nz = mat->rp[mat->m]; 01388 int beg_row = mat->beg_row; 01389 for (i = 0; i < nz; ++i) 01390 mat->cval[i] -= beg_row; 01391 END_FUNC_DH} 01392 01393 #undef __FUNC__ 01394 #define __FUNC__ "Factor_dhMaxPivotInverse" 01395 double 01396 Factor_dhMaxPivotInverse (Factor_dh mat) 01397 { 01398 START_FUNC_DH int i, m = mat->m, *diags = mat->diag; 01399 REAL_DH *aval = mat->aval; 01400 double minGlobal = 0.0, min = aval[diags[0]]; 01401 double retval; 01402 01403 for (i = 0; i < m; ++i) 01404 min = MIN (min, fabs (aval[diags[i]])); 01405 if (np_dh == 1) 01406 { 01407 minGlobal = min; 01408 } 01409 else 01410 { 01411 MPI_Reduce (&min, &minGlobal, 1, MPI_DOUBLE, MPI_MIN, 0, comm_dh); 01412 } 01413 01414 if (minGlobal == 0) 01415 { 01416 retval = 0; 01417 } 01418 else 01419 { 01420 retval = 1.0 / minGlobal; 01421 } 01422 END_FUNC_VAL (retval)} 01423 01424 #undef __FUNC__ 01425 #define __FUNC__ "Factor_dhMaxValue" 01426 double 01427 Factor_dhMaxValue (Factor_dh mat) 01428 { 01429 START_FUNC_DH double maxGlobal = 0.0, max = 0.0; 01430 int i, nz = mat->rp[mat->m]; 01431 REAL_DH *aval = mat->aval; 01432 01433 for (i = 0; i < nz; ++i) 01434 { 01435 max = MAX (max, fabs (aval[i])); 01436 } 01437 01438 if (np_dh == 1) 01439 { 01440 maxGlobal = max; 01441 } 01442 else 01443 { 01444 MPI_Reduce (&max, &maxGlobal, 1, MPI_DOUBLE, MPI_MAX, 0, comm_dh); 01445 } 01446 END_FUNC_VAL (maxGlobal)} 01447 01448 01449 #undef __FUNC__ 01450 #define __FUNC__ "Factor_dhCondEst" 01451 double 01452 Factor_dhCondEst (Factor_dh mat, Euclid_dh ctx) 01453 { 01454 START_FUNC_DH double max = 0.0, maxGlobal = 0.0; 01455 double *x; 01456 int i, m = mat->m; 01457 Vec_dh lhs, rhs; 01458 01459 Vec_dhCreate (&lhs); 01460 CHECK_ERROR (-1); 01461 Vec_dhInit (lhs, m); 01462 CHECK_ERROR (-1); 01463 Vec_dhDuplicate (lhs, &rhs); 01464 CHECK_ERROR (-1); 01465 Vec_dhSet (rhs, 1.0); 01466 CHECK_ERROR (-1); 01467 Euclid_dhApply (ctx, rhs->vals, lhs->vals); 01468 CHECK_ERROR (-1); 01469 01470 x = lhs->vals; 01471 for (i = 0; i < m; ++i) 01472 { 01473 max = MAX (max, fabs (x[i])); 01474 } 01475 01476 if (np_dh == 1) 01477 { 01478 maxGlobal = max; 01479 } 01480 else 01481 { 01482 MPI_Reduce (&max, &maxGlobal, 1, MPI_DOUBLE, MPI_MAX, 0, comm_dh); 01483 } 01484 END_FUNC_VAL (maxGlobal)}
1.7.4