|
IFPACK Development
|
00001 /*@HEADER 00002 // *********************************************************************** 00003 // 00004 // Ifpack: Object-Oriented Algebraic Preconditioner Package 00005 // Copyright (2009) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // This library is free software; you can redistribute it and/or modify 00011 // it under the terms of the GNU Lesser General Public License as 00012 // published by the Free Software Foundation; either version 2.1 of the 00013 // License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, but 00016 // WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00023 // USA 00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00025 // 00026 // *********************************************************************** 00027 //@HEADER 00028 */ 00029 00030 #include "Euclid_dh.h" 00031 #include "Mem_dh.h" 00032 #include "Mat_dh.h" 00033 #include "Vec_dh.h" 00034 #include "Factor_dh.h" 00035 #include "getRow_dh.h" 00036 #include "ilu_dh.h" 00037 #include "Parser_dh.h" 00038 #include "SortedList_dh.h" 00039 #include "SubdomainGraph_dh.h" 00040 #include "ExternalRows_dh.h" 00041 #include "krylov_dh.h" 00042 00043 static void get_runtime_params_private (Euclid_dh ctx); 00044 static void invert_diagonals_private (Euclid_dh ctx); 00045 static void compute_rho_private (Euclid_dh ctx); 00046 static void factor_private (Euclid_dh ctx); 00047 /* static void discard_indices_private(Euclid_dh ctx); */ 00048 static void reduce_timings_private (Euclid_dh ctx); 00049 00050 #undef __FUNC__ 00051 #define __FUNC__ "Euclid_dhCreate" 00052 void 00053 Euclid_dhCreate (Euclid_dh * ctxOUT) 00054 { 00055 START_FUNC_DH 00056 struct _mpi_interface_dh *ctx = 00057 (struct _mpi_interface_dh *) 00058 MALLOC_DH (sizeof (struct _mpi_interface_dh)); 00059 CHECK_V_ERROR; 00060 *ctxOUT = ctx; 00061 00062 ctx->isSetup = false; 00063 00064 ctx->rho_init = 2.0; 00065 ctx->rho_final = 0.0; 00066 00067 ctx->m = 0; 00068 ctx->n = 0; 00069 ctx->rhs = NULL; 00070 ctx->A = NULL; 00071 ctx->F = NULL; 00072 ctx->sg = NULL; 00073 00074 ctx->scale = NULL; 00075 ctx->isScaled = false; 00076 ctx->work = NULL; 00077 ctx->work2 = NULL; 00078 ctx->from = 0; 00079 ctx->to = 0; 00080 00081 strcpy (ctx->algo_par, "pilu"); 00082 strcpy (ctx->algo_ilu, "iluk"); 00083 ctx->level = 1; 00084 ctx->droptol = DEFAULT_DROP_TOL; 00085 ctx->sparseTolA = 0.0; 00086 ctx->sparseTolF = 0.0; 00087 ctx->pivotMin = 0.0; 00088 ctx->pivotFix = PIVOT_FIX_DEFAULT; 00089 ctx->maxVal = 0.0; 00090 00091 ctx->slist = NULL; 00092 ctx->extRows = NULL; 00093 00094 strcpy (ctx->krylovMethod, "bicgstab"); 00095 ctx->maxIts = 200; 00096 ctx->rtol = 1e-5; 00097 ctx->atol = 1e-50; 00098 ctx->its = 0; 00099 ctx->itsTotal = 0; 00100 ctx->setupCount = 0; 00101 ctx->logging = 0; 00102 ctx->printStats = (Parser_dhHasSwitch (parser_dh, "-printStats")); 00103 00104 { 00105 int i; 00106 for (i = 0; i < TIMING_BINS; ++i) 00107 ctx->timing[i] = 0.0; 00108 for (i = 0; i < STATS_BINS; ++i) 00109 ctx->stats[i] = 0.0; 00110 } 00111 ctx->timingsWereReduced = false; 00112 00113 ++ref_counter; 00114 END_FUNC_DH} 00115 00116 00117 #undef __FUNC__ 00118 #define __FUNC__ "Euclid_dhDestroy" 00119 void 00120 Euclid_dhDestroy (Euclid_dh ctx) 00121 { 00122 START_FUNC_DH 00123 if (Parser_dhHasSwitch (parser_dh, "-eu_stats") || ctx->logging) 00124 { 00125 /* insert switch so memory report will also be printed */ 00126 Parser_dhInsert (parser_dh, "-eu_mem", "1"); 00127 CHECK_V_ERROR; 00128 Euclid_dhPrintHypreReport (ctx, stdout); 00129 CHECK_V_ERROR; 00130 } 00131 00132 if (ctx->setupCount > 1 && ctx->printStats) 00133 { 00134 Euclid_dhPrintStatsShorter (ctx, stdout); 00135 CHECK_V_ERROR; 00136 } 00137 00138 if (ctx->F != NULL) 00139 { 00140 Factor_dhDestroy (ctx->F); 00141 CHECK_V_ERROR; 00142 } 00143 if (ctx->sg != NULL) 00144 { 00145 SubdomainGraph_dhDestroy (ctx->sg); 00146 CHECK_V_ERROR; 00147 } 00148 if (ctx->scale != NULL) 00149 { 00150 FREE_DH (ctx->scale); 00151 CHECK_V_ERROR; 00152 } 00153 if (ctx->work != NULL) 00154 { 00155 FREE_DH (ctx->work); 00156 CHECK_V_ERROR; 00157 } 00158 if (ctx->work2 != NULL) 00159 { 00160 FREE_DH (ctx->work2); 00161 CHECK_V_ERROR; 00162 } 00163 if (ctx->slist != NULL) 00164 { 00165 SortedList_dhDestroy (ctx->slist); 00166 CHECK_V_ERROR; 00167 } 00168 if (ctx->extRows != NULL) 00169 { 00170 ExternalRows_dhDestroy (ctx->extRows); 00171 CHECK_V_ERROR; 00172 } 00173 FREE_DH (ctx); 00174 CHECK_V_ERROR; 00175 00176 --ref_counter; 00177 END_FUNC_DH} 00178 00179 00180 /* on entry, "A" must have been set. If this context is being 00181 reused, user must ensure ??? 00182 */ 00183 #undef __FUNC__ 00184 #define __FUNC__ "Euclid_dhSetup" 00185 void 00186 Euclid_dhSetup (Euclid_dh ctx) 00187 { 00188 START_FUNC_DH int m, n, beg_row; 00189 double t1; 00190 bool isSetup = ctx->isSetup; 00191 bool bj = false; 00192 00193 /*---------------------------------------------------- 00194 * If Euclid was previously setup, print summary of 00195 * what happened during previous setup/solve 00196 *----------------------------------------------------*/ 00197 if (ctx->setupCount && ctx->printStats) 00198 { 00199 Euclid_dhPrintStatsShorter (ctx, stdout); 00200 CHECK_V_ERROR; 00201 ctx->its = 0; 00202 } 00203 00204 /*---------------------------------------------------- 00205 * zero array for statistical reporting 00206 *----------------------------------------------------*/ 00207 { 00208 int i; 00209 for (i = 0; i < STATS_BINS; ++i) 00210 ctx->stats[i] = 0.0; 00211 } 00212 00213 /*---------------------------------------------------- 00214 * internal timing 00215 *----------------------------------------------------*/ 00216 ctx->timing[SOLVE_START_T] = MPI_Wtime (); 00217 /* sum timing from last linear solve cycle, if any */ 00218 ctx->timing[TOTAL_SOLVE_T] += ctx->timing[TOTAL_SOLVE_TEMP_T]; 00219 ctx->timing[TOTAL_SOLVE_TEMP_T] = 0.0; 00220 00221 if (ctx->F != NULL) 00222 { 00223 Factor_dhDestroy (ctx->F); 00224 CHECK_V_ERROR; 00225 ctx->F = NULL; 00226 } 00227 00228 if (ctx->A == NULL) 00229 { 00230 SET_V_ERROR ("must set ctx->A before calling init"); 00231 } 00232 EuclidGetDimensions (ctx->A, &beg_row, &m, &n); 00233 CHECK_V_ERROR; 00234 ctx->m = m; 00235 ctx->n = n; 00236 00237 if (Parser_dhHasSwitch (parser_dh, "-print_size")) 00238 { 00239 printf_dh 00240 ("setting up linear system; global rows: %i local rows: %i (on P_0)\n", 00241 n, m); 00242 } 00243 00244 sprintf (msgBuf_dh, "localRow= %i; globalRows= %i; beg_row= %i", m, n, 00245 beg_row); 00246 SET_INFO (msgBuf_dh); 00247 00248 bj = Parser_dhHasSwitch (parser_dh, "-bj"); 00249 00250 /*------------------------------------------------------------------------ 00251 * Setup the SubdomainGraph, which contains connectivity and 00252 * and permutation information. If this context is being reused, 00253 * this may already have been done; if being resused, the underlying 00254 * subdomain graph cannot change (user's responsibility?) 00255 *------------------------------------------------------------------------*/ 00256 if (ctx->sg == NULL) 00257 { 00258 int blocks = np_dh; 00259 t1 = MPI_Wtime (); 00260 if (np_dh == 1) 00261 { 00262 Parser_dhReadInt (parser_dh, "-blocks", &blocks); 00263 CHECK_V_ERROR; 00264 SubdomainGraph_dhCreate (&(ctx->sg)); 00265 CHECK_V_ERROR; 00266 SubdomainGraph_dhInit (ctx->sg, blocks, bj, ctx->A); 00267 CHECK_V_ERROR; 00268 } 00269 else 00270 { 00271 SubdomainGraph_dhCreate (&(ctx->sg)); 00272 CHECK_V_ERROR; 00273 SubdomainGraph_dhInit (ctx->sg, -1, bj, ctx->A); 00274 CHECK_V_ERROR; 00275 } 00276 ctx->timing[SUB_GRAPH_T] += (MPI_Wtime () - t1); 00277 } 00278 00279 00280 /* SubdomainGraph_dhDump(ctx->sg, "SG.dump"); CHECK_V_ERROR; */ 00281 00282 /*---------------------------------------------------- 00283 * for debugging 00284 *----------------------------------------------------*/ 00285 if (Parser_dhHasSwitch (parser_dh, "-doNotFactor")) 00286 { 00287 goto END_OF_FUNCTION; 00288 } 00289 00290 00291 /*---------------------------------------------------- 00292 * query parser for runtime parameters 00293 *----------------------------------------------------*/ 00294 if (!isSetup) 00295 { 00296 get_runtime_params_private (ctx); 00297 CHECK_V_ERROR; 00298 } 00299 if (!strcmp (ctx->algo_par, "bj")) 00300 bj = false; 00301 00302 /*--------------------------------------------------------- 00303 * allocate and initialize storage for row-scaling 00304 * (ctx->isScaled is set in get_runtime_params_private(); ) 00305 *---------------------------------------------------------*/ 00306 if (ctx->scale == NULL) 00307 { 00308 ctx->scale = (REAL_DH *) MALLOC_DH (m * sizeof (REAL_DH)); 00309 CHECK_V_ERROR; 00310 } 00311 { 00312 int i; 00313 for (i = 0; i < m; ++i) 00314 ctx->scale[i] = 1.0; 00315 } 00316 00317 /*------------------------------------------------------------------ 00318 * allocate work vectors; used in factorization and triangular solves; 00319 *------------------------------------------------------------------*/ 00320 if (ctx->work == NULL) 00321 { 00322 ctx->work = (REAL_DH *) MALLOC_DH (m * sizeof (REAL_DH)); 00323 CHECK_V_ERROR; 00324 } 00325 if (ctx->work2 == NULL) 00326 { 00327 ctx->work2 = (REAL_DH *) MALLOC_DH (m * sizeof (REAL_DH)); 00328 CHECK_V_ERROR; 00329 } 00330 00331 /*----------------------------------------------------------------- 00332 * perform the incomplete factorization (this should be, at least 00333 * for higher level ILUK, the most time-intensive portion of setup) 00334 *-----------------------------------------------------------------*/ 00335 t1 = MPI_Wtime (); 00336 factor_private (ctx); 00337 CHECK_V_ERROR; 00338 ctx->timing[FACTOR_T] += (MPI_Wtime () - t1); 00339 00340 /*-------------------------------------------------------------- 00341 * invert diagonals, for faster triangular solves 00342 *--------------------------------------------------------------*/ 00343 if (strcmp (ctx->algo_par, "none")) 00344 { 00345 invert_diagonals_private (ctx); 00346 CHECK_V_ERROR; 00347 } 00348 00349 /*-------------------------------------------------------------- 00350 * compute rho_final: global ratio of nzF/nzA 00351 * also, if -sparseA > 0, compute ratio of nzA 00352 * used in factorization 00353 *--------------------------------------------------------------*/ 00354 /* for some reason compute_rho_private() was expensive, so now it's 00355 an option, unless there's only one mpi task. 00356 */ 00357 if (Parser_dhHasSwitch (parser_dh, "-computeRho") || np_dh == 1) 00358 { 00359 if (strcmp (ctx->algo_par, "none")) 00360 { 00361 t1 = MPI_Wtime (); 00362 compute_rho_private (ctx); 00363 CHECK_V_ERROR; 00364 ctx->timing[COMPUTE_RHO_T] += (MPI_Wtime () - t1); 00365 } 00366 } 00367 00368 /*-------------------------------------------------------------- 00369 * if using PILU, set up persistent comms and global-to-local 00370 * number scheme, for efficient triangular solves. 00371 * (Thanks to Edmond Chow for these algorithmic ideas.) 00372 *--------------------------------------------------------------*/ 00373 00374 if (!strcmp (ctx->algo_par, "pilu") && np_dh > 1) 00375 { 00376 t1 = MPI_Wtime (); 00377 Factor_dhSolveSetup (ctx->F, ctx->sg); 00378 CHECK_V_ERROR; 00379 ctx->timing[SOLVE_SETUP_T] += (MPI_Wtime () - t1); 00380 } 00381 00382 END_OF_FUNCTION:; 00383 00384 /*------------------------------------------------------- 00385 * internal timing 00386 *-------------------------------------------------------*/ 00387 ctx->timing[SETUP_T] += (MPI_Wtime () - ctx->timing[SOLVE_START_T]); 00388 ctx->setupCount += 1; 00389 00390 ctx->isSetup = true; 00391 00392 END_FUNC_DH} 00393 00394 00395 #undef __FUNC__ 00396 #define __FUNC__ "get_runtime_params_private" 00397 void 00398 get_runtime_params_private (Euclid_dh ctx) 00399 { 00400 START_FUNC_DH char *tmp; 00401 00402 /* params for use of internal solvers */ 00403 Parser_dhReadInt (parser_dh, "-maxIts", &(ctx->maxIts)); 00404 Parser_dhReadDouble (parser_dh, "-rtol", &(ctx->rtol)); 00405 Parser_dhReadDouble (parser_dh, "-atol", &(ctx->atol)); 00406 00407 /* parallelization strategy (bj, pilu, none) */ 00408 tmp = NULL; 00409 Parser_dhReadString (parser_dh, "-par", &tmp); 00410 if (tmp != NULL) 00411 { 00412 strcpy (ctx->algo_par, tmp); 00413 } 00414 if (Parser_dhHasSwitch (parser_dh, "-bj")) 00415 { 00416 strcpy (ctx->algo_par, "bj"); 00417 } 00418 00419 00420 /* factorization parameters */ 00421 Parser_dhReadDouble (parser_dh, "-rho", &(ctx->rho_init)); 00422 /* inital storage allocation for factor */ 00423 Parser_dhReadInt (parser_dh, "-level", &ctx->level); 00424 Parser_dhReadInt (parser_dh, "-pc_ilu_levels", &ctx->level); 00425 00426 if (Parser_dhHasSwitch (parser_dh, "-ilut")) 00427 { 00428 Parser_dhReadDouble (parser_dh, "-ilut", &ctx->droptol); 00429 ctx->isScaled = true; 00430 strcpy (ctx->algo_ilu, "ilut"); 00431 } 00432 00433 /* make sure both algo_par and algo_ilu are set to "none," 00434 if at least one is. 00435 */ 00436 if (!strcmp (ctx->algo_par, "none")) 00437 { 00438 strcpy (ctx->algo_ilu, "none"); 00439 } 00440 else if (!strcmp (ctx->algo_ilu, "none")) 00441 { 00442 strcpy (ctx->algo_par, "none"); 00443 } 00444 00445 00446 Parser_dhReadDouble (parser_dh, "-sparseA", &(ctx->sparseTolA)); 00447 /* sparsify A before factoring */ 00448 Parser_dhReadDouble (parser_dh, "-sparseF", &(ctx->sparseTolF)); 00449 /* sparsify after factoring */ 00450 Parser_dhReadDouble (parser_dh, "-pivotMin", &(ctx->pivotMin)); 00451 /* adjust pivots if smaller than this */ 00452 Parser_dhReadDouble (parser_dh, "-pivotFix", &(ctx->pivotFix)); 00453 /* how to adjust pivots */ 00454 00455 /* set row scaling for mandatory cases */ 00456 if (ctx->sparseTolA || !strcmp (ctx->algo_ilu, "ilut")) 00457 { 00458 ctx->isScaled = true; 00459 } 00460 00461 /* solve method */ 00462 tmp = NULL; 00463 Parser_dhReadString (parser_dh, "-ksp_type", &tmp); 00464 if (tmp != NULL) 00465 { 00466 strcpy (ctx->krylovMethod, tmp); 00467 00468 if (!strcmp (ctx->krylovMethod, "bcgs")) 00469 { 00470 strcpy (ctx->krylovMethod, "bicgstab"); 00471 } 00472 } 00473 END_FUNC_DH} 00474 00475 #undef __FUNC__ 00476 #define __FUNC__ "invert_diagonals_private" 00477 void 00478 invert_diagonals_private (Euclid_dh ctx) 00479 { 00480 START_FUNC_DH REAL_DH * aval = ctx->F->aval; 00481 int *diag = ctx->F->diag; 00482 if (aval == NULL || diag == NULL) 00483 { 00484 SET_INFO ("can't invert diags; either F->aval or F->diag is NULL"); 00485 } 00486 else 00487 { 00488 int i, m = ctx->F->m; 00489 for (i = 0; i < m; ++i) 00490 { 00491 aval[diag[i]] = 1.0 / aval[diag[i]]; 00492 } 00493 } 00494 END_FUNC_DH} 00495 00496 00497 /* records rho_final (max for all processors) */ 00498 #undef __FUNC__ 00499 #define __FUNC__ "compute_rho_private" 00500 void 00501 compute_rho_private (Euclid_dh ctx) 00502 { 00503 START_FUNC_DH if (ctx->F != NULL) 00504 { 00505 double bufLocal[3], bufGlobal[3]; 00506 int m = ctx->m; 00507 00508 ctx->stats[NZF_STATS] = (double) ctx->F->rp[m]; 00509 bufLocal[0] = ctx->stats[NZA_STATS]; /* nzA */ 00510 bufLocal[1] = ctx->stats[NZF_STATS]; /* nzF */ 00511 bufLocal[2] = ctx->stats[NZA_USED_STATS]; /* nzA used */ 00512 00513 if (np_dh == 1) 00514 { 00515 bufGlobal[0] = bufLocal[0]; 00516 bufGlobal[1] = bufLocal[1]; 00517 bufGlobal[2] = bufLocal[2]; 00518 } 00519 else 00520 { 00521 MPI_Reduce (bufLocal, bufGlobal, 3, MPI_DOUBLE, MPI_SUM, 0, 00522 comm_dh); 00523 } 00524 00525 if (myid_dh == 0) 00526 { 00527 00528 /* compute rho */ 00529 if (bufGlobal[0] && bufGlobal[1]) 00530 { 00531 ctx->rho_final = bufGlobal[1] / bufGlobal[0]; 00532 } 00533 else 00534 { 00535 ctx->rho_final = -1; 00536 } 00537 00538 /* compute ratio of nonzeros in A that were used */ 00539 if (bufGlobal[0] && bufGlobal[2]) 00540 { 00541 ctx->stats[NZA_RATIO_STATS] = 00542 100.0 * bufGlobal[2] / bufGlobal[0]; 00543 } 00544 else 00545 { 00546 ctx->stats[NZA_RATIO_STATS] = 100.0; 00547 } 00548 } 00549 } 00550 END_FUNC_DH} 00551 00552 #undef __FUNC__ 00553 #define __FUNC__ "factor_private" 00554 void 00555 factor_private (Euclid_dh ctx) 00556 { 00557 START_FUNC_DH 00558 /*------------------------------------------------------------- 00559 * special case, for testing/debugging: no preconditioning 00560 *-------------------------------------------------------------*/ 00561 if (!strcmp (ctx->algo_par, "none")) 00562 { 00563 goto DO_NOTHING; 00564 } 00565 00566 /*------------------------------------------------------------- 00567 * Initialize object to hold factor. 00568 *-------------------------------------------------------------*/ 00569 { 00570 int br = 0; 00571 int id = np_dh; 00572 if (ctx->sg != NULL) 00573 { 00574 br = ctx->sg->beg_rowP[myid_dh]; 00575 id = ctx->sg->o2n_sub[myid_dh]; 00576 } 00577 Factor_dhInit (ctx->A, true, true, ctx->rho_init, id, br, &(ctx->F)); 00578 CHECK_V_ERROR; 00579 ctx->F->bdry_count = ctx->sg->bdry_count[myid_dh]; 00580 ctx->F->first_bdry = ctx->F->m - ctx->F->bdry_count; 00581 if (!strcmp (ctx->algo_par, "bj")) 00582 ctx->F->blockJacobi = true; 00583 if (Parser_dhHasSwitch (parser_dh, "-bj")) 00584 ctx->F->blockJacobi = true; 00585 } 00586 00587 /*------------------------------------------------------------- 00588 * single mpi task with single or multiple subdomains 00589 *-------------------------------------------------------------*/ 00590 if (np_dh == 1) 00591 { 00592 00593 /* ILU(k) factorization */ 00594 if (!strcmp (ctx->algo_ilu, "iluk")) 00595 { 00596 ctx->from = 0; 00597 ctx->to = ctx->m; 00598 00599 /* only for debugging: use ilu_mpi_pilu */ 00600 if (Parser_dhHasSwitch (parser_dh, "-mpi")) 00601 { 00602 if (ctx->sg != NULL && ctx->sg->blocks > 1) 00603 { 00604 SET_V_ERROR 00605 ("only use -mpi, which invokes ilu_mpi_pilu(), for np = 1 and -blocks 1"); 00606 } 00607 iluk_mpi_pilu (ctx); 00608 CHECK_V_ERROR; 00609 } 00610 00611 /* "normal" operation */ 00612 else 00613 { 00614 iluk_seq_block (ctx); 00615 CHECK_V_ERROR; 00616 /* note: iluk_seq_block() performs block jacobi iluk if ctx->algo_par == bj. */ 00617 } 00618 } 00619 00620 /* ILUT factorization */ 00621 else if (!strcmp (ctx->algo_ilu, "ilut")) 00622 { 00623 ctx->from = 0; 00624 ctx->to = ctx->m; 00625 ilut_seq (ctx); 00626 CHECK_V_ERROR; 00627 } 00628 00629 /* all other factorization methods */ 00630 else 00631 { 00632 sprintf (msgBuf_dh, "factorization method: %s is not implemented", 00633 ctx->algo_ilu); 00634 SET_V_ERROR (msgBuf_dh); 00635 } 00636 } 00637 00638 /*------------------------------------------------------------- 00639 * multiple mpi tasks with multiple subdomains 00640 *-------------------------------------------------------------*/ 00641 else 00642 { 00643 /* block jacobi */ 00644 if (!strcmp (ctx->algo_par, "bj")) 00645 { 00646 ctx->from = 0; 00647 ctx->to = ctx->m; 00648 iluk_mpi_bj (ctx); 00649 CHECK_V_ERROR; 00650 } 00651 00652 /* iluk */ 00653 else if (!strcmp (ctx->algo_ilu, "iluk")) 00654 { 00655 bool bj = ctx->F->blockJacobi; /* for debugging */ 00656 00657 /* printf_dh("\n@@@ starting ilu_mpi_pilu @@@\n"); */ 00658 00659 SortedList_dhCreate (&(ctx->slist)); 00660 CHECK_V_ERROR; 00661 SortedList_dhInit (ctx->slist, ctx->sg); 00662 CHECK_V_ERROR; 00663 ExternalRows_dhCreate (&(ctx->extRows)); 00664 CHECK_V_ERROR; 00665 ExternalRows_dhInit (ctx->extRows, ctx); 00666 CHECK_V_ERROR; 00667 00668 /* factor interior rows */ 00669 ctx->from = 0; 00670 ctx->to = ctx->F->first_bdry; 00671 00672 /* 00673 if (Parser_dhHasSwitch(parser_dh, "-test")) { 00674 printf("[%i] Euclid_dh :: TESTING ilu_seq\n", myid_dh); 00675 iluk_seq(ctx); CHECK_V_ERROR; 00676 } else { 00677 iluk_mpi_pilu(ctx); CHECK_V_ERROR; 00678 } 00679 */ 00680 00681 iluk_seq (ctx); 00682 CHECK_V_ERROR; 00683 00684 /* get external rows from lower ordered neighbors in the 00685 subdomain graph; these rows are needed for factoring 00686 this subdomain's boundary rows. 00687 */ 00688 if (!bj) 00689 { 00690 ExternalRows_dhRecvRows (ctx->extRows); 00691 CHECK_V_ERROR; 00692 } 00693 00694 /* factor boundary rows */ 00695 ctx->from = ctx->F->first_bdry; 00696 ctx->to = ctx->F->m; 00697 iluk_mpi_pilu (ctx); 00698 CHECK_V_ERROR; 00699 00700 /* send this processor's boundary rows to higher ordered 00701 neighbors in the subdomain graph. 00702 */ 00703 if (!bj) 00704 { 00705 ExternalRows_dhSendRows (ctx->extRows); 00706 CHECK_V_ERROR; 00707 } 00708 00709 /* discard column indices in factor if they would alter 00710 the subdomain graph (any such elements are in upper 00711 triangular portion of the row) 00712 */ 00713 00714 00715 SortedList_dhDestroy (ctx->slist); 00716 CHECK_V_ERROR; 00717 ctx->slist = NULL; 00718 ExternalRows_dhDestroy (ctx->extRows); 00719 CHECK_V_ERROR; 00720 ctx->extRows = NULL; 00721 } 00722 00723 /* all other factorization methods */ 00724 else 00725 { 00726 sprintf (msgBuf_dh, "factorization method: %s is not implemented", 00727 ctx->algo_ilu); 00728 SET_V_ERROR (msgBuf_dh); 00729 } 00730 } 00731 00732 DO_NOTHING:; 00733 00734 END_FUNC_DH} 00735 00736 #if 0 00737 00738 #undef __FUNC__ 00739 #define __FUNC__ "discard_indices_private" 00740 void 00741 discard_indices_private (Euclid_dh ctx) 00742 { 00743 START_FUNC_DH 00744 #if 0 00745 int *rp = ctx->F->rp, *cval = ctx->F->cval; 00746 double *aval = ctx->F->aval; 00747 int m = F->m, *nabors = ctx->nabors, nc = ctx->naborCount; 00748 int i, j, k, idx, count = 0, start_of_row; 00749 int beg_row = ctx->beg_row, end_row = beg_row + m; 00750 int *diag = ctx->F->diag; 00751 00752 /* if col is not locally owned, and doesn't belong to a 00753 * nabor in the (original) subdomain graph, we need to discard 00754 * the column index and associated value. First, we'll flag all 00755 * such indices for deletion. 00756 */ 00757 for (i = 0; i < m; ++i) 00758 { 00759 for (j = rp[i]; j < rp[i + 1]; ++j) 00760 { 00761 int col = cval[j]; 00762 if (col < beg_row || col >= end_row) 00763 { 00764 bool flag = true; 00765 int owner = find_owner_private_mpi (ctx, col); 00766 CHECK_V_ERROR; 00767 00768 for (k = 0; k < nc; ++k) 00769 { 00770 if (nabors[k] == owner) 00771 { 00772 flag = false; 00773 break; 00774 } 00775 } 00776 00777 if (flag) 00778 { 00779 cval[j] = -1; 00780 ++count; 00781 } 00782 } 00783 } 00784 } 00785 00786 sprintf (msgBuf_dh, 00787 "deleting %i indices that would alter the subdomain graph", count); 00788 SET_INFO (msgBuf_dh); 00789 00790 /* Second, perform the actual deletion */ 00791 idx = 0; 00792 start_of_row = 0; 00793 for (i = 0; i < m; ++i) 00794 { 00795 for (j = start_of_row; j < rp[i + 1]; ++j) 00796 { 00797 int col = cval[j]; 00798 double val = aval[j]; 00799 if (col != -1) 00800 { 00801 cval[idx] = col; 00802 aval[idx] = val; 00803 ++idx; 00804 } 00805 } 00806 start_of_row = rp[i + 1]; 00807 rp[i + 1] = idx; 00808 } 00809 00810 /* rebuild diagonal pointers */ 00811 for (i = 0; i < m; ++i) 00812 { 00813 for (j = rp[i]; j < rp[i + 1]; ++j) 00814 { 00815 if (cval[j] == i + beg_row) 00816 { 00817 diag[i] = j; 00818 break; 00819 } 00820 } 00821 } 00822 #endif 00823 END_FUNC_DH} 00824 #endif 00825 00826 #undef __FUNC__ 00827 #define __FUNC__ "Euclid_dhSolve" 00828 void 00829 Euclid_dhSolve (Euclid_dh ctx, Vec_dh x, Vec_dh b, int *its) 00830 { 00831 START_FUNC_DH int itsOUT; 00832 Mat_dh A = (Mat_dh) ctx->A; 00833 00834 if (!strcmp (ctx->krylovMethod, "cg")) 00835 { 00836 cg_euclid (A, ctx, x->vals, b->vals, &itsOUT); 00837 ERRCHKA; 00838 } 00839 else if (!strcmp (ctx->krylovMethod, "bicgstab")) 00840 { 00841 bicgstab_euclid (A, ctx, x->vals, b->vals, &itsOUT); 00842 ERRCHKA; 00843 } 00844 else 00845 { 00846 sprintf (msgBuf_dh, "unknown krylov solver: %s", ctx->krylovMethod); 00847 SET_V_ERROR (msgBuf_dh); 00848 } 00849 *its = itsOUT; 00850 END_FUNC_DH} 00851 00852 #undef __FUNC__ 00853 #define __FUNC__ "Euclid_dhPrintStats" 00854 void 00855 Euclid_dhPrintStats (Euclid_dh ctx, FILE * fp) 00856 { 00857 START_FUNC_DH double *timing; 00858 int nz; 00859 00860 nz = Factor_dhReadNz (ctx->F); 00861 CHECK_V_ERROR; 00862 timing = ctx->timing; 00863 00864 /* add in timing from lasst setup (if any) */ 00865 ctx->timing[TOTAL_SOLVE_T] += ctx->timing[TOTAL_SOLVE_TEMP_T]; 00866 ctx->timing[TOTAL_SOLVE_TEMP_T] = 0.0; 00867 00868 reduce_timings_private (ctx); 00869 CHECK_V_ERROR; 00870 00871 fprintf_dh (fp, 00872 "\n==================== Euclid report (start) ====================\n"); 00873 fprintf_dh (fp, "\nruntime parameters\n"); 00874 fprintf_dh (fp, "------------------\n"); 00875 fprintf_dh (fp, " setups: %i\n", ctx->setupCount); 00876 fprintf_dh (fp, " tri solves: %i\n", ctx->itsTotal); 00877 fprintf_dh (fp, " parallelization method: %s\n", ctx->algo_par); 00878 fprintf_dh (fp, " factorization method: %s\n", ctx->algo_ilu); 00879 fprintf_dh (fp, " matrix was row scaled: %i\n", ctx->isScaled); 00880 00881 fprintf_dh (fp, " matrix row count: %i\n", ctx->n); 00882 fprintf_dh (fp, " nzF: %i\n", nz); 00883 fprintf_dh (fp, " rho: %g\n", ctx->rho_final); 00884 fprintf_dh (fp, " level: %i\n", ctx->level); 00885 fprintf_dh (fp, " sparseA: %g\n", ctx->sparseTolA); 00886 00887 fprintf_dh (fp, "\nEuclid timing report\n"); 00888 fprintf_dh (fp, "--------------------\n"); 00889 fprintf_dh (fp, " solves total: %0.2f (see docs)\n", 00890 timing[TOTAL_SOLVE_T]); 00891 fprintf_dh (fp, " tri solves: %0.2f\n", timing[TRI_SOLVE_T]); 00892 fprintf_dh (fp, " setups: %0.2f\n", timing[SETUP_T]); 00893 fprintf_dh (fp, " subdomain graph setup: %0.2f\n", 00894 timing[SUB_GRAPH_T]); 00895 fprintf_dh (fp, " factorization: %0.2f\n", timing[FACTOR_T]); 00896 fprintf_dh (fp, " solve setup: %0.2f\n", 00897 timing[SOLVE_SETUP_T]); 00898 fprintf_dh (fp, " rho: %0.2f\n", 00899 ctx->timing[COMPUTE_RHO_T]); 00900 fprintf_dh (fp, " misc (should be small): %0.2f\n", 00901 timing[SETUP_T] - (timing[SUB_GRAPH_T] + timing[FACTOR_T] + 00902 timing[SOLVE_SETUP_T] + 00903 timing[COMPUTE_RHO_T])); 00904 00905 if (ctx->sg != NULL) 00906 { 00907 SubdomainGraph_dhPrintStats (ctx->sg, fp); 00908 CHECK_V_ERROR; 00909 SubdomainGraph_dhPrintRatios (ctx->sg, fp); 00910 CHECK_V_ERROR; 00911 } 00912 00913 00914 fprintf_dh (fp, "\nApplicable if Euclid's internal solvers were used:\n"); 00915 fprintf_dh (fp, "---------------------------------------------------\n"); 00916 fprintf_dh (fp, " solve method: %s\n", ctx->krylovMethod); 00917 fprintf_dh (fp, " maxIts: %i\n", ctx->maxIts); 00918 fprintf_dh (fp, " rtol: %g\n", ctx->rtol); 00919 fprintf_dh (fp, " atol: %g\n", ctx->atol); 00920 fprintf_dh (fp, 00921 "\n==================== Euclid report (end) ======================\n"); 00922 END_FUNC_DH} 00923 00924 00925 /* nzA ratio and rho refer to most recent solve, if more than 00926 one solve (call to Setup) was performed. Other stats 00927 are cumulative. 00928 */ 00929 #undef __FUNC__ 00930 #define __FUNC__ "Euclid_dhPrintStatsShort" 00931 void 00932 Euclid_dhPrintStatsShort (Euclid_dh ctx, double setup, double solve, 00933 FILE * fp) 00934 { 00935 START_FUNC_DH double *timing = ctx->timing; 00936 /* double *stats = ctx->stats; */ 00937 /* double setup_factor; */ 00938 /* double setup_other; */ 00939 double apply_total; 00940 double apply_per_it; 00941 /* double nzUsedRatio; */ 00942 double perIt; 00943 int blocks = np_dh; 00944 00945 if (np_dh == 1) 00946 blocks = ctx->sg->blocks; 00947 00948 reduce_timings_private (ctx); 00949 CHECK_V_ERROR; 00950 00951 /* setup_factor = timing[FACTOR_T]; */ 00952 /* setup_other = timing[SETUP_T] - setup_factor; */ 00953 apply_total = timing[TRI_SOLVE_T]; 00954 apply_per_it = apply_total / (double) ctx->its; 00955 /* nzUsedRatio = stats[NZA_RATIO_STATS]; */ 00956 perIt = solve / (double) ctx->its; 00957 00958 fprintf_dh (fp, "\n"); 00959 fprintf_dh (fp, "%6s %6s %6s %6s %6s %6s %6s %6s %6s %6s XX\n", 00960 "method", "subdms", "level", "its", "setup", "solve", "total", 00961 "perIt", "perIt", "rows"); 00962 fprintf_dh (fp, 00963 "------ ----- ----- ----- ----- ----- ----- ----- ----- ----- XX\n"); 00964 fprintf_dh (fp, "%6s %6i %6i %6i %6.2f %6.2f %6.2f %6.4f %6.5f %6g XXX\n", ctx->algo_par, /* parallelization strategy [pilu, bj] */ 00965 blocks, /* number of subdomains */ 00966 ctx->level, /* level, for ILU(k) */ 00967 ctx->its, /* iterations */ 00968 setup, /* total setup time, from caller */ 00969 solve, /* total setup time, from caller */ 00970 setup + solve, /* total time, from caller */ 00971 perIt, /* time per iteration, solver+precond. */ 00972 apply_per_it, /* time per iteration, solver+precond. */ 00973 (double) ctx->n /* global unknnowns */ 00974 ); 00975 00976 00977 00978 00979 #if 0 00980 fprintf_dh (fp, "\n"); 00981 fprintf_dh (fp, "%6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s XX\n", 00982 "", "", "", "", "", "setup", "setup", "", "", "", "", "", ""); 00983 00984 fprintf_dh (fp, "%6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s XX\n", 00985 "method", "subdms", "level", "its", "total", "factor", 00986 "other", "apply", "perIt", "rho", "A_tol", "A_%", "rows"); 00987 fprintf_dh (fp, 00988 "------ ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- XX\n"); 00989 00990 00991 fprintf_dh (fp, "%6s %6i %6i %6i %6.2f %6.2f %6.2f %6.2f %6.4f %6.1f %6g %6.2f %6g XXX\n", ctx->algo_par, /* parallelization strategy [pilu, bj] */ 00992 blocks, /* number of subdomains */ 00993 ctx->level, /* level, for ILU(k) */ 00994 ctx->its, /* iterations */ 00995 setup, /* total setup time, from caller */ 00996 solve, /* total setup time, from caller */ 00997 setup_factor, /* pc solve: factorization */ 00998 setup_other, /* pc setup: other */ 00999 apply_total, /* triangular solve time */ 01000 apply_per_it, /* time for one triangular solve */ 01001 ctx->rho_final, /* rho */ 01002 ctx->sparseTolA, /* sparseA tolerance */ 01003 nzUsedRatio, /* percent of A that was used */ 01004 (double) ctx->n /* global unknnowns */ 01005 ); 01006 #endif 01007 01008 #if 0 01009 /* special: for scalability studies */ 01010 fprintf_dh (fp, "\n%6s %6s %6s %6s %6s %6s WW\n", "method", "level", 01011 "subGph", "factor", "solveS", "perIt"); 01012 fprintf_dh (fp, "------ ----- ----- ----- ----- ----- WW\n"); 01013 fprintf_dh (fp, "%6s %6i %6.2f %6.2f %6.2f %6.4f WWW\n", 01014 ctx->algo_par, 01015 ctx->level, 01016 timing[SUB_GRAPH_T], 01017 timing[FACTOR_T], timing[SOLVE_SETUP_T], apply_per_it); 01018 #endif 01019 END_FUNC_DH} 01020 01021 01022 /* its during last solve; rho; nzaUsed */ 01023 #undef __FUNC__ 01024 #define __FUNC__ "Euclid_dhPrintStatsShorter" 01025 void 01026 Euclid_dhPrintStatsShorter (Euclid_dh ctx, FILE * fp) 01027 { 01028 START_FUNC_DH double *stats = ctx->stats; 01029 01030 int its = ctx->its; 01031 double rho = ctx->rho_final; 01032 double nzUsedRatio = stats[NZA_RATIO_STATS]; 01033 01034 01035 fprintf_dh (fp, "\nStats from last linear solve: YY\n"); 01036 fprintf_dh (fp, "%6s %6s %6s YY\n", "its", "rho", "A_%"); 01037 fprintf_dh (fp, " ----- ----- ----- YY\n"); 01038 fprintf_dh (fp, "%6i %6.2f %6.2f YYY\n", its, rho, nzUsedRatio); 01039 01040 END_FUNC_DH} 01041 01042 #undef __FUNC__ 01043 #define __FUNC__ "Euclid_dhPrintScaling" 01044 void 01045 Euclid_dhPrintScaling (Euclid_dh ctx, FILE * fp) 01046 { 01047 START_FUNC_DH int i, m = ctx->m; 01048 01049 if (m > 10) 01050 m = 10; 01051 01052 if (ctx->scale == NULL) 01053 { 01054 SET_V_ERROR ("ctx->scale is NULL; was Euclid_dhSetup() called?"); 01055 } 01056 01057 fprintf (fp, "\n---------- 1st %i row scaling values:\n", m); 01058 for (i = 0; i < m; ++i) 01059 { 01060 fprintf (fp, " %i %g \n", i + 1, ctx->scale[i]); 01061 } 01062 END_FUNC_DH} 01063 01064 01065 #undef __FUNC__ 01066 #define __FUNC__ "reduce_timings_private" 01067 void 01068 reduce_timings_private (Euclid_dh ctx) 01069 { 01070 START_FUNC_DH if (np_dh > 1) 01071 { 01072 double bufOUT[TIMING_BINS]; 01073 01074 memcpy (bufOUT, ctx->timing, TIMING_BINS * sizeof (double)); 01075 MPI_Reduce (bufOUT, ctx->timing, TIMING_BINS, MPI_DOUBLE, MPI_MAX, 0, 01076 comm_dh); 01077 } 01078 01079 ctx->timingsWereReduced = true; 01080 END_FUNC_DH} 01081 01082 #undef __FUNC__ 01083 #define __FUNC__ "Euclid_dhPrintHypreReport" 01084 void 01085 Euclid_dhPrintHypreReport (Euclid_dh ctx, FILE * fp) 01086 { 01087 START_FUNC_DH double *timing; 01088 int nz; 01089 01090 nz = Factor_dhReadNz (ctx->F); 01091 CHECK_V_ERROR; 01092 timing = ctx->timing; 01093 01094 /* add in timing from lasst setup (if any) */ 01095 ctx->timing[TOTAL_SOLVE_T] += ctx->timing[TOTAL_SOLVE_TEMP_T]; 01096 ctx->timing[TOTAL_SOLVE_TEMP_T] = 0.0; 01097 01098 reduce_timings_private (ctx); 01099 CHECK_V_ERROR; 01100 01101 if (myid_dh == 0) 01102 { 01103 01104 fprintf (fp, 01105 "@@@@@@@@@@@@@@@@@@@@@@ Euclid statistical report (start)\n"); 01106 fprintf_dh (fp, "\nruntime parameters\n"); 01107 fprintf_dh (fp, "------------------\n"); 01108 fprintf_dh (fp, " setups: %i\n", ctx->setupCount); 01109 fprintf_dh (fp, " tri solves: %i\n", ctx->itsTotal); 01110 fprintf_dh (fp, " parallelization method: %s\n", ctx->algo_par); 01111 fprintf_dh (fp, " factorization method: %s\n", ctx->algo_ilu); 01112 if (!strcmp (ctx->algo_ilu, "iluk")) 01113 { 01114 fprintf_dh (fp, " level: %i\n", ctx->level); 01115 } 01116 01117 if (ctx->isScaled) 01118 { 01119 fprintf_dh (fp, " matrix was row scaled\n"); 01120 } 01121 01122 fprintf_dh (fp, " global matrix row count: %i\n", ctx->n); 01123 fprintf_dh (fp, " nzF: %i\n", nz); 01124 fprintf_dh (fp, " rho: %g\n", ctx->rho_final); 01125 fprintf_dh (fp, " sparseA: %g\n", ctx->sparseTolA); 01126 01127 fprintf_dh (fp, "\nEuclid timing report\n"); 01128 fprintf_dh (fp, "--------------------\n"); 01129 fprintf_dh (fp, " solves total: %0.2f (see docs)\n", 01130 timing[TOTAL_SOLVE_T]); 01131 fprintf_dh (fp, " tri solves: %0.2f\n", timing[TRI_SOLVE_T]); 01132 fprintf_dh (fp, " setups: %0.2f\n", timing[SETUP_T]); 01133 fprintf_dh (fp, " subdomain graph setup: %0.2f\n", 01134 timing[SUB_GRAPH_T]); 01135 fprintf_dh (fp, " factorization: %0.2f\n", 01136 timing[FACTOR_T]); 01137 fprintf_dh (fp, " solve setup: %0.2f\n", 01138 timing[SOLVE_SETUP_T]); 01139 fprintf_dh (fp, " rho: %0.2f\n", 01140 ctx->timing[COMPUTE_RHO_T]); 01141 fprintf_dh (fp, " misc (should be small): %0.2f\n", 01142 timing[SETUP_T] - (timing[SUB_GRAPH_T] + timing[FACTOR_T] + 01143 timing[SOLVE_SETUP_T] + 01144 timing[COMPUTE_RHO_T])); 01145 01146 if (ctx->sg != NULL) 01147 { 01148 SubdomainGraph_dhPrintStats (ctx->sg, fp); 01149 CHECK_V_ERROR; 01150 SubdomainGraph_dhPrintRatios (ctx->sg, fp); 01151 CHECK_V_ERROR; 01152 } 01153 01154 fprintf (fp, 01155 "@@@@@@@@@@@@@@@@@@@@@@ Euclid statistical report (end)\n"); 01156 01157 } 01158 01159 END_FUNC_DH} 01160 01161 #undef __FUNC__ 01162 #define __FUNC__ "Euclid_dhPrintTestData" 01163 void 01164 Euclid_dhPrintTestData (Euclid_dh ctx, FILE * fp) 01165 { 01166 START_FUNC_DH 01167 /* Print data that should remain that will hopefully 01168 remain the same for any platform. 01169 Possibly "tri solves" may change . . . 01170 */ 01171 if (myid_dh == 0) 01172 { 01173 fprintf (fp, " setups: %i\n", ctx->setupCount); 01174 fprintf (fp, " tri solves: %i\n", ctx->its); 01175 fprintf (fp, " parallelization method: %s\n", ctx->algo_par); 01176 fprintf (fp, " factorization method: %s\n", ctx->algo_ilu); 01177 fprintf (fp, " level: %i\n", ctx->level); 01178 fprintf (fp, " row scaling: %i\n", ctx->isScaled); 01179 } 01180 SubdomainGraph_dhPrintRatios (ctx->sg, fp); 01181 CHECK_V_ERROR; 01182 END_FUNC_DH}
1.7.4