|
IFPACK Development
|
00001 /*@HEADER 00002 // *********************************************************************** 00003 // 00004 // Ifpack: Object-Oriented Algebraic Preconditioner Package 00005 // Copyright (2009) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // This library is free software; you can redistribute it and/or modify 00011 // it under the terms of the GNU Lesser General Public License as 00012 // published by the Free Software Foundation; either version 2.1 of the 00013 // License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, but 00016 // WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00023 // USA 00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00025 // 00026 // *********************************************************************** 00027 //@HEADER 00028 */ 00029 00030 #include "mat_dh_private.h" 00031 #include "Parser_dh.h" 00032 #include "Hash_i_dh.h" 00033 #include "Mat_dh.h" 00034 #include "Mem_dh.h" 00035 #include "Vec_dh.h" 00036 00037 #define IS_UPPER_TRI 97 00038 #define IS_LOWER_TRI 98 00039 #define IS_FULL 99 00040 static int isTriangular (int m, int *rp, int *cval); 00041 00042 /* Instantiates Aout; allocates storage for rp, cval, and aval arrays; 00043 uses rowLengths[] and rowToBlock[] data to fill in rp[]. 00044 */ 00045 static void mat_par_read_allocate_private (Mat_dh * Aout, int n, 00046 int *rowLengths, int *rowToBlock); 00047 00048 /* Currently, divides (partitions)matrix by contiguous sections of rows. 00049 For future expansion: use metis. 00050 */ 00051 void mat_partition_private (Mat_dh A, int blocks, int *o2n_row, 00052 int *rowToBlock); 00053 00054 00055 static void convert_triples_to_scr_private (int m, int nz, 00056 int *I, int *J, double *A, 00057 int *rp, int *cval, double *aval); 00058 00059 #if 0 00060 #undef __FUNC__ 00061 #define __FUNC__ "mat_dh_print_graph_private" 00062 void 00063 mat_dh_print_graph_private (int m, int beg_row, int *rp, int *cval, 00064 double *aval, int *n2o, int *o2n, Hash_i_dh hash, 00065 FILE * fp) 00066 { 00067 START_FUNC_DH int i, j, row, col; 00068 double val; 00069 bool private_n2o = false; 00070 bool private_hash = false; 00071 00072 if (n2o == NULL) 00073 { 00074 private_n2o = true; 00075 create_nat_ordering_private (m, &n2o); 00076 CHECK_V_ERROR; 00077 create_nat_ordering_private (m, &o2n); 00078 CHECK_V_ERROR; 00079 } 00080 00081 if (hash == NULL) 00082 { 00083 private_hash = true; 00084 Hash_i_dhCreate (&hash, -1); 00085 CHECK_V_ERROR; 00086 } 00087 00088 for (i = 0; i < m; ++i) 00089 { 00090 row = n2o[i]; 00091 for (j = rp[row]; j < rp[row + 1]; ++j) 00092 { 00093 col = cval[j]; 00094 if (col < beg_row || col >= beg_row + m) 00095 { 00096 int tmp = col; 00097 00098 /* nonlocal column: get permutation from hash table */ 00099 tmp = Hash_i_dhLookup (hash, col); 00100 CHECK_V_ERROR; 00101 if (tmp == -1) 00102 { 00103 sprintf (msgBuf_dh, 00104 "beg_row= %i m= %i; nonlocal column= %i not in hash table", 00105 beg_row, m, col); 00106 SET_V_ERROR (msgBuf_dh); 00107 } 00108 else 00109 { 00110 col = tmp; 00111 } 00112 } 00113 else 00114 { 00115 col = o2n[col]; 00116 } 00117 00118 if (aval == NULL) 00119 { 00120 val = _MATLAB_ZERO_; 00121 } 00122 else 00123 { 00124 val = aval[j]; 00125 } 00126 fprintf (fp, "%i %i %g\n", 1 + row + beg_row, 1 + col, val); 00127 } 00128 } 00129 00130 if (private_n2o) 00131 { 00132 destroy_nat_ordering_private (n2o); 00133 CHECK_V_ERROR; 00134 destroy_nat_ordering_private (o2n); 00135 CHECK_V_ERROR; 00136 } 00137 00138 if (private_hash) 00139 { 00140 Hash_i_dhDestroy (hash); 00141 CHECK_V_ERROR; 00142 } 00143 END_FUNC_DH} 00144 00145 #endif 00146 00147 00148 /* currently only for unpermuted */ 00149 #undef __FUNC__ 00150 #define __FUNC__ "mat_dh_print_graph_private" 00151 void 00152 mat_dh_print_graph_private (int m, int beg_row, int *rp, int *cval, 00153 double *aval, int *n2o, int *o2n, Hash_i_dh hash, 00154 FILE * fp) 00155 { 00156 START_FUNC_DH int i, j, row, col; 00157 bool private_n2o = false; 00158 bool private_hash = false; 00159 int *work = NULL; 00160 00161 work = (int *) MALLOC_DH (m * sizeof (int)); 00162 CHECK_V_ERROR; 00163 00164 if (n2o == NULL) 00165 { 00166 private_n2o = true; 00167 create_nat_ordering_private (m, &n2o); 00168 CHECK_V_ERROR; 00169 create_nat_ordering_private (m, &o2n); 00170 CHECK_V_ERROR; 00171 } 00172 00173 if (hash == NULL) 00174 { 00175 private_hash = true; 00176 Hash_i_dhCreate (&hash, -1); 00177 CHECK_V_ERROR; 00178 } 00179 00180 for (i = 0; i < m; ++i) 00181 { 00182 for (j = 0; j < m; ++j) 00183 work[j] = 0; 00184 row = n2o[i]; 00185 for (j = rp[row]; j < rp[row + 1]; ++j) 00186 { 00187 col = cval[j]; 00188 00189 /* local column */ 00190 if (col >= beg_row || col < beg_row + m) 00191 { 00192 col = o2n[col]; 00193 } 00194 00195 /* nonlocal column: get permutation from hash table */ 00196 else 00197 { 00198 int tmp = col; 00199 00200 tmp = Hash_i_dhLookup (hash, col); 00201 CHECK_V_ERROR; 00202 if (tmp == -1) 00203 { 00204 sprintf (msgBuf_dh, 00205 "beg_row= %i m= %i; nonlocal column= %i not in hash table", 00206 beg_row, m, col); 00207 SET_V_ERROR (msgBuf_dh); 00208 } 00209 else 00210 { 00211 col = tmp; 00212 } 00213 } 00214 00215 work[col] = 1; 00216 } 00217 00218 for (j = 0; j < m; ++j) 00219 { 00220 if (work[j]) 00221 { 00222 fprintf (fp, " x "); 00223 } 00224 else 00225 { 00226 fprintf (fp, " "); 00227 } 00228 } 00229 fprintf (fp, "\n"); 00230 } 00231 00232 if (private_n2o) 00233 { 00234 destroy_nat_ordering_private (n2o); 00235 CHECK_V_ERROR; 00236 destroy_nat_ordering_private (o2n); 00237 CHECK_V_ERROR; 00238 } 00239 00240 if (private_hash) 00241 { 00242 Hash_i_dhDestroy (hash); 00243 CHECK_V_ERROR; 00244 } 00245 00246 if (work != NULL) 00247 { 00248 FREE_DH (work); 00249 CHECK_V_ERROR; 00250 } 00251 END_FUNC_DH} 00252 00253 #undef __FUNC__ 00254 #define __FUNC__ "create_nat_ordering_private" 00255 void 00256 create_nat_ordering_private (int m, int **p) 00257 { 00258 START_FUNC_DH int *tmp, i; 00259 00260 tmp = *p = (int *) MALLOC_DH (m * sizeof (int)); 00261 CHECK_V_ERROR; 00262 for (i = 0; i < m; ++i) 00263 { 00264 tmp[i] = i; 00265 } 00266 END_FUNC_DH} 00267 00268 #undef __FUNC__ 00269 #define __FUNC__ "destroy_nat_ordering_private" 00270 void 00271 destroy_nat_ordering_private (int *p) 00272 { 00273 START_FUNC_DH FREE_DH (p); 00274 CHECK_V_ERROR; 00275 END_FUNC_DH} 00276 00277 00278 #undef __FUNC__ 00279 #define __FUNC__ "invert_perm" 00280 void 00281 invert_perm (int m, int *pIN, int *pOUT) 00282 { 00283 START_FUNC_DH int i; 00284 00285 for (i = 0; i < m; ++i) 00286 pOUT[pIN[i]] = i; 00287 END_FUNC_DH} 00288 00289 00290 00291 /* only implemented for a single cpu! */ 00292 #undef __FUNC__ 00293 #define __FUNC__ "mat_dh_print_csr_private" 00294 void 00295 mat_dh_print_csr_private (int m, int *rp, int *cval, double *aval, FILE * fp) 00296 { 00297 START_FUNC_DH int i, nz = rp[m]; 00298 00299 /* print header line */ 00300 fprintf (fp, "%i %i\n", m, rp[m]); 00301 00302 /* print rp[] */ 00303 for (i = 0; i <= m; ++i) 00304 fprintf (fp, "%i ", rp[i]); 00305 fprintf (fp, "\n"); 00306 00307 /* print cval[] */ 00308 for (i = 0; i < nz; ++i) 00309 fprintf (fp, "%i ", cval[i]); 00310 fprintf (fp, "\n"); 00311 00312 /* print aval[] */ 00313 for (i = 0; i < nz; ++i) 00314 fprintf (fp, "%1.19e ", aval[i]); 00315 fprintf (fp, "\n"); 00316 00317 END_FUNC_DH} 00318 00319 00320 /* only implemented for a single cpu! */ 00321 #undef __FUNC__ 00322 #define __FUNC__ "mat_dh_read_csr_private" 00323 void 00324 mat_dh_read_csr_private (int *mOUT, int **rpOUT, int **cvalOUT, 00325 double **avalOUT, FILE * fp) 00326 { 00327 START_FUNC_DH int i, m, nz, items; 00328 int *rp, *cval; 00329 double *aval; 00330 00331 /* read header line */ 00332 items = fscanf (fp, "%d %d", &m, &nz); 00333 if (items != 2) 00334 { 00335 SET_V_ERROR ("failed to read header"); 00336 } 00337 else 00338 { 00339 printf ("mat_dh_read_csr_private:: m= %i nz= %i\n", m, nz); 00340 } 00341 00342 *mOUT = m; 00343 rp = *rpOUT = (int *) MALLOC_DH ((m + 1) * sizeof (int)); 00344 CHECK_V_ERROR; 00345 cval = *cvalOUT = (int *) MALLOC_DH (nz * sizeof (int)); 00346 CHECK_V_ERROR; 00347 aval = *avalOUT = (double *) MALLOC_DH (nz * sizeof (double)); 00348 CHECK_V_ERROR; 00349 00350 /* read rp[] block */ 00351 for (i = 0; i <= m; ++i) 00352 { 00353 items = fscanf (fp, "%d", &(rp[i])); 00354 if (items != 1) 00355 { 00356 sprintf (msgBuf_dh, "failed item %i of %i in rp block", i, m + 1); 00357 SET_V_ERROR (msgBuf_dh); 00358 } 00359 } 00360 00361 /* read cval[] block */ 00362 for (i = 0; i < nz; ++i) 00363 { 00364 items = fscanf (fp, "%d", &(cval[i])); 00365 if (items != 1) 00366 { 00367 sprintf (msgBuf_dh, "failed item %i of %i in cval block", i, m + 1); 00368 SET_V_ERROR (msgBuf_dh); 00369 } 00370 } 00371 00372 /* read aval[] block */ 00373 for (i = 0; i < nz; ++i) 00374 { 00375 items = fscanf (fp, "%lg", &(aval[i])); 00376 if (items != 1) 00377 { 00378 sprintf (msgBuf_dh, "failed item %i of %i in aval block", i, m + 1); 00379 SET_V_ERROR (msgBuf_dh); 00380 } 00381 } 00382 END_FUNC_DH} 00383 00384 /*============================================*/ 00385 #define MAX_JUNK 200 00386 00387 #undef __FUNC__ 00388 #define __FUNC__ "mat_dh_read_triples_private" 00389 void 00390 mat_dh_read_triples_private (int ignore, int *mOUT, int **rpOUT, 00391 int **cvalOUT, double **avalOUT, FILE * fp) 00392 { 00393 START_FUNC_DH int m, n, nz, items, i, j; 00394 int idx = 0; 00395 int *cval, *rp, *I, *J; 00396 double *aval, *A, v; 00397 char junk[MAX_JUNK]; 00398 fpos_t fpos; 00399 00400 /* skip over header */ 00401 if (ignore && myid_dh == 0) 00402 { 00403 printf 00404 ("mat_dh_read_triples_private:: ignoring following header lines:\n"); 00405 printf 00406 ("--------------------------------------------------------------\n"); 00407 for (i = 0; i < ignore; ++i) 00408 { 00409 fgets (junk, MAX_JUNK, fp); 00410 printf ("%s", junk); 00411 } 00412 printf 00413 ("--------------------------------------------------------------\n"); 00414 if (fgetpos (fp, &fpos)) 00415 SET_V_ERROR ("fgetpos failed!"); 00416 printf ("\nmat_dh_read_triples_private::1st two non-ignored lines:\n"); 00417 printf 00418 ("--------------------------------------------------------------\n"); 00419 for (i = 0; i < 2; ++i) 00420 { 00421 fgets (junk, MAX_JUNK, fp); 00422 printf ("%s", junk); 00423 } 00424 printf 00425 ("--------------------------------------------------------------\n"); 00426 if (fsetpos (fp, &fpos)) 00427 SET_V_ERROR ("fsetpos failed!"); 00428 } 00429 00430 00431 if (feof (fp)) 00432 printf ("trouble!"); 00433 00434 /* determine matrix dimensions */ 00435 m = n = nz = 0; 00436 while (!feof (fp)) 00437 { 00438 items = fscanf (fp, "%d %d %lg", &i, &j, &v); 00439 if (items != 3) 00440 { 00441 break; 00442 } 00443 ++nz; 00444 if (i > m) 00445 m = i; 00446 if (j > n) 00447 n = j; 00448 } 00449 00450 if (myid_dh == 0) 00451 { 00452 printf ("mat_dh_read_triples_private: m= %i nz= %i\n", m, nz); 00453 } 00454 00455 00456 /* reset file, and skip over header again */ 00457 rewind (fp); 00458 for (i = 0; i < ignore; ++i) 00459 { 00460 fgets (junk, MAX_JUNK, fp); 00461 } 00462 00463 /* error check for squareness */ 00464 if (m != n) 00465 { 00466 sprintf (msgBuf_dh, "matrix is not square; row= %i, cols= %i", m, n); 00467 SET_V_ERROR (msgBuf_dh); 00468 } 00469 00470 *mOUT = m; 00471 00472 /* allocate storage */ 00473 rp = *rpOUT = (int *) MALLOC_DH ((m + 1) * sizeof (int)); 00474 CHECK_V_ERROR; 00475 cval = *cvalOUT = (int *) MALLOC_DH (nz * sizeof (int)); 00476 CHECK_V_ERROR; 00477 aval = *avalOUT = (double *) MALLOC_DH (nz * sizeof (double)); 00478 CHECK_V_ERROR; 00479 00480 I = (int *) MALLOC_DH (nz * sizeof (int)); 00481 CHECK_V_ERROR; 00482 J = (int *) MALLOC_DH (nz * sizeof (int)); 00483 CHECK_V_ERROR; 00484 A = (double *) MALLOC_DH (nz * sizeof (double)); 00485 CHECK_V_ERROR; 00486 00487 /* read <row, col, value> triples into arrays */ 00488 while (!feof (fp)) 00489 { 00490 items = fscanf (fp, "%d %d %lg", &i, &j, &v); 00491 if (items < 3) 00492 break; 00493 j--; 00494 i--; 00495 I[idx] = i; 00496 J[idx] = j; 00497 A[idx] = v; 00498 ++idx; 00499 } 00500 00501 /* convert from triples to sparse-compressed-row storage */ 00502 convert_triples_to_scr_private (m, nz, I, J, A, rp, cval, aval); 00503 CHECK_V_ERROR; 00504 00505 /* if matrix is triangular */ 00506 { 00507 int type; 00508 type = isTriangular (m, rp, cval); 00509 CHECK_V_ERROR; 00510 if (type == IS_UPPER_TRI) 00511 { 00512 printf ("CAUTION: matrix is upper triangular; converting to full\n"); 00513 } 00514 else if (type == IS_LOWER_TRI) 00515 { 00516 printf ("CAUTION: matrix is lower triangular; converting to full\n"); 00517 } 00518 00519 if (type == IS_UPPER_TRI || type == IS_LOWER_TRI) 00520 { 00521 make_full_private (m, &rp, &cval, &aval); 00522 CHECK_V_ERROR; 00523 } 00524 } 00525 00526 *rpOUT = rp; 00527 *cvalOUT = cval; 00528 *avalOUT = aval; 00529 00530 FREE_DH (I); 00531 CHECK_V_ERROR; 00532 FREE_DH (J); 00533 CHECK_V_ERROR; 00534 FREE_DH (A); 00535 CHECK_V_ERROR; 00536 00537 END_FUNC_DH} 00538 00539 #undef __FUNC__ 00540 #define __FUNC__ "convert_triples_to_scr_private" 00541 void 00542 convert_triples_to_scr_private (int m, int nz, int *I, int *J, double *A, 00543 int *rp, int *cval, double *aval) 00544 { 00545 START_FUNC_DH int i; 00546 int *rowCounts; 00547 00548 rowCounts = (int *) MALLOC_DH ((m + 1) * sizeof (int)); 00549 CHECK_V_ERROR; 00550 for (i = 0; i < m; ++i) 00551 rowCounts[i] = 0; 00552 00553 /* count number of entries in each row */ 00554 for (i = 0; i < nz; ++i) 00555 { 00556 int row = I[i]; 00557 rowCounts[row] += 1; 00558 } 00559 00560 /* prefix-sum to form rp[] */ 00561 rp[0] = 0; 00562 for (i = 1; i <= m; ++i) 00563 { 00564 rp[i] = rp[i - 1] + rowCounts[i - 1]; 00565 } 00566 memcpy (rowCounts, rp, (m + 1) * sizeof (int)); 00567 00568 /* write SCR arrays */ 00569 for (i = 0; i < nz; ++i) 00570 { 00571 int row = I[i]; 00572 int col = J[i]; 00573 double val = A[i]; 00574 int idx = rowCounts[row]; 00575 rowCounts[row] += 1; 00576 00577 cval[idx] = col; 00578 aval[idx] = val; 00579 } 00580 00581 00582 FREE_DH (rowCounts); 00583 CHECK_V_ERROR; 00584 END_FUNC_DH} 00585 00586 00587 /*====================================================================== 00588 * utilities for use in drivers that read, write, convert, and/or 00589 * compare different file types 00590 *======================================================================*/ 00591 00592 void fix_diags_private (Mat_dh A); 00593 void insert_missing_diags_private (Mat_dh A); 00594 00595 #undef __FUNC__ 00596 #define __FUNC__ "readMat" 00597 void 00598 readMat (Mat_dh * Aout, char *ft, char *fn, int ignore) 00599 { 00600 START_FUNC_DH bool makeStructurallySymmetric; 00601 bool fixDiags; 00602 *Aout = NULL; 00603 00604 makeStructurallySymmetric = 00605 Parser_dhHasSwitch (parser_dh, "-makeSymmetric"); 00606 fixDiags = Parser_dhHasSwitch (parser_dh, "-fixDiags"); 00607 00608 if (fn == NULL) 00609 { 00610 SET_V_ERROR ("passed NULL filename; can't open for reading!"); 00611 } 00612 00613 if (!strcmp (ft, "csr")) 00614 { 00615 Mat_dhReadCSR (Aout, fn); 00616 CHECK_V_ERROR; 00617 } 00618 00619 else if (!strcmp (ft, "trip")) 00620 { 00621 Mat_dhReadTriples (Aout, ignore, fn); 00622 CHECK_V_ERROR; 00623 } 00624 00625 else if (!strcmp (ft, "ebin")) 00626 { 00627 Mat_dhReadBIN (Aout, fn); 00628 CHECK_V_ERROR; 00629 } 00630 00631 else if (!strcmp (ft, "petsc")) 00632 { 00633 sprintf (msgBuf_dh, "must recompile Euclid using petsc mode!"); 00634 SET_V_ERROR (msgBuf_dh); 00635 } 00636 00637 else 00638 { 00639 sprintf (msgBuf_dh, "unknown filetype: -ftin %s", ft); 00640 SET_V_ERROR (msgBuf_dh); 00641 } 00642 00643 if (makeStructurallySymmetric) 00644 { 00645 printf ("\npadding with zeros to make structurally symmetric\n"); 00646 Mat_dhMakeStructurallySymmetric (*Aout); 00647 CHECK_V_ERROR; 00648 } 00649 00650 if ((*Aout)->m == 0) 00651 { 00652 SET_V_ERROR ("row count = 0; something's wrong!"); 00653 } 00654 00655 if (fixDiags) 00656 { 00657 fix_diags_private (*Aout); 00658 CHECK_V_ERROR; 00659 } 00660 00661 END_FUNC_DH} 00662 00663 00664 #undef __FUNC__ 00665 #define __FUNC__ "fix_diags_private" 00666 void 00667 fix_diags_private (Mat_dh A) 00668 { 00669 START_FUNC_DH int i, j, m = A->m, *rp = A->rp, *cval = A->cval; 00670 double *aval = A->aval; 00671 bool insertDiags = false; 00672 00673 /* verify that all diagonals are present */ 00674 for (i = 0; i < m; ++i) 00675 { 00676 bool isMissing = true; 00677 for (j = rp[i]; j < rp[i + 1]; ++j) 00678 { 00679 if (cval[j] == i) 00680 { 00681 isMissing = false; 00682 break; 00683 } 00684 } 00685 if (isMissing) 00686 { 00687 insertDiags = true; 00688 break; 00689 } 00690 } 00691 00692 if (insertDiags) 00693 { 00694 insert_missing_diags_private (A); 00695 CHECK_V_ERROR; 00696 rp = A->rp; 00697 cval = A->cval; 00698 aval = A->aval; 00699 } 00700 00701 /* set value of all diags to largest absolute value in each row */ 00702 for (i = 0; i < m; ++i) 00703 { 00704 double sum = 0; 00705 for (j = rp[i]; j < rp[i + 1]; ++j) 00706 { 00707 sum = MAX (sum, fabs (aval[j])); 00708 } 00709 for (j = rp[i]; j < rp[i + 1]; ++j) 00710 { 00711 if (cval[j] == i) 00712 { 00713 aval[j] = sum; 00714 break; 00715 } 00716 } 00717 } 00718 00719 END_FUNC_DH} 00720 00721 #undef __FUNC__ 00722 #define __FUNC__ "insert_missing_diags_private" 00723 void 00724 insert_missing_diags_private (Mat_dh A) 00725 { 00726 START_FUNC_DH int *RP = A->rp, *CVAL = A->cval, m = A->m; 00727 int *rp, *cval; 00728 double *AVAL = A->aval, *aval; 00729 int i, j, nz = RP[m] + m; 00730 int idx = 0; 00731 00732 rp = A->rp = (int *) MALLOC_DH ((1 + m) * sizeof (int)); 00733 CHECK_V_ERROR; 00734 cval = A->cval = (int *) MALLOC_DH (nz * sizeof (int)); 00735 CHECK_V_ERROR; 00736 aval = A->aval = (double *) MALLOC_DH (nz * sizeof (double)); 00737 CHECK_V_ERROR; 00738 rp[0] = 0; 00739 00740 for (i = 0; i < m; ++i) 00741 { 00742 bool isMissing = true; 00743 for (j = RP[i]; j < RP[i + 1]; ++j) 00744 { 00745 cval[idx] = CVAL[j]; 00746 aval[idx] = AVAL[j]; 00747 ++idx; 00748 if (CVAL[j] == i) 00749 isMissing = false; 00750 } 00751 if (isMissing) 00752 { 00753 cval[idx] = i; 00754 aval[idx] = 0.0; 00755 ++idx; 00756 } 00757 rp[i + 1] = idx; 00758 } 00759 00760 FREE_DH (RP); 00761 CHECK_V_ERROR; 00762 FREE_DH (CVAL); 00763 CHECK_V_ERROR; 00764 FREE_DH (AVAL); 00765 CHECK_V_ERROR; 00766 END_FUNC_DH} 00767 00768 #undef __FUNC__ 00769 #define __FUNC__ "readVec" 00770 void 00771 readVec (Vec_dh * bout, char *ft, char *fn, int ignore) 00772 { 00773 START_FUNC_DH * bout = NULL; 00774 00775 if (fn == NULL) 00776 { 00777 SET_V_ERROR ("passed NULL filename; can't open for reading!"); 00778 } 00779 00780 if (!strcmp (ft, "csr") || !strcmp (ft, "trip")) 00781 { 00782 Vec_dhRead (bout, ignore, fn); 00783 CHECK_V_ERROR; 00784 } 00785 00786 else if (!strcmp (ft, "ebin")) 00787 { 00788 Vec_dhReadBIN (bout, fn); 00789 CHECK_V_ERROR; 00790 } 00791 00792 else if (!strcmp (ft, "petsc")) 00793 { 00794 sprintf (msgBuf_dh, "must recompile Euclid using petsc mode!"); 00795 SET_V_ERROR (msgBuf_dh); 00796 } 00797 00798 else 00799 { 00800 sprintf (msgBuf_dh, "unknown filetype: -ftin %s", ft); 00801 SET_V_ERROR (msgBuf_dh); 00802 } 00803 00804 END_FUNC_DH} 00805 00806 00807 #undef __FUNC__ 00808 #define __FUNC__ "writeMat" 00809 void 00810 writeMat (Mat_dh Ain, char *ft, char *fn) 00811 { 00812 START_FUNC_DH if (fn == NULL) 00813 { 00814 SET_V_ERROR ("passed NULL filename; can't open for writing!"); 00815 } 00816 00817 if (!strcmp (ft, "csr")) 00818 { 00819 Mat_dhPrintCSR (Ain, NULL, fn); 00820 CHECK_V_ERROR; 00821 } 00822 00823 else if (!strcmp (ft, "trip")) 00824 { 00825 Mat_dhPrintTriples (Ain, NULL, fn); 00826 CHECK_V_ERROR; 00827 } 00828 00829 else if (!strcmp (ft, "ebin")) 00830 { 00831 Mat_dhPrintBIN (Ain, NULL, fn); 00832 CHECK_V_ERROR; 00833 } 00834 00835 00836 else if (!strcmp (ft, "petsc")) 00837 { 00838 sprintf (msgBuf_dh, "must recompile Euclid using petsc mode!"); 00839 SET_V_ERROR (msgBuf_dh); 00840 } 00841 00842 else 00843 { 00844 sprintf (msgBuf_dh, "unknown filetype: -ftout %s", ft); 00845 SET_V_ERROR (msgBuf_dh); 00846 } 00847 00848 END_FUNC_DH} 00849 00850 #undef __FUNC__ 00851 #define __FUNC__ "writeVec" 00852 void 00853 writeVec (Vec_dh bin, char *ft, char *fn) 00854 { 00855 START_FUNC_DH if (fn == NULL) 00856 { 00857 SET_V_ERROR ("passed NULL filename; can't open for writing!"); 00858 } 00859 00860 if (!strcmp (ft, "csr") || !strcmp (ft, "trip")) 00861 { 00862 Vec_dhPrint (bin, NULL, fn); 00863 CHECK_V_ERROR; 00864 } 00865 00866 else if (!strcmp (ft, "ebin")) 00867 { 00868 Vec_dhPrintBIN (bin, NULL, fn); 00869 CHECK_V_ERROR; 00870 } 00871 00872 else if (!strcmp (ft, "petsc")) 00873 { 00874 sprintf (msgBuf_dh, "must recompile Euclid using petsc mode!"); 00875 SET_V_ERROR (msgBuf_dh); 00876 } 00877 00878 else 00879 { 00880 sprintf (msgBuf_dh, "unknown filetype: -ftout %s", ft); 00881 SET_V_ERROR (msgBuf_dh); 00882 } 00883 00884 END_FUNC_DH} 00885 00886 #undef __FUNC__ 00887 #define __FUNC__ "isTriangular" 00888 int 00889 isTriangular (int m, int *rp, int *cval) 00890 { 00891 START_FUNC_DH int row, j; 00892 int type; 00893 bool type_lower = false, type_upper = false; 00894 00895 if (np_dh > 1) 00896 { 00897 SET_ERROR (-1, "only implemented for a single cpu"); 00898 } 00899 00900 for (row = 0; row < m; ++row) 00901 { 00902 for (j = rp[row]; j < rp[row + 1]; ++j) 00903 { 00904 int col = cval[j]; 00905 if (col < row) 00906 type_lower = true; 00907 if (col > row) 00908 type_upper = true; 00909 } 00910 if (type_lower && type_upper) 00911 break; 00912 } 00913 00914 if (type_lower && type_upper) 00915 { 00916 type = IS_FULL; 00917 } 00918 else if (type_lower) 00919 { 00920 type = IS_LOWER_TRI; 00921 } 00922 else 00923 { 00924 type = IS_UPPER_TRI; 00925 } 00926 END_FUNC_VAL (type)} 00927 00928 /*-----------------------------------------------------------------------------------*/ 00929 00930 static void mat_dh_transpose_reuse_private_private (bool allocateMem, int m, 00931 int *rpIN, int *cvalIN, 00932 double *avalIN, 00933 int **rpOUT, 00934 int **cvalOUT, 00935 double **avalOUT); 00936 00937 00938 #undef __FUNC__ 00939 #define __FUNC__ "mat_dh_transpose_reuse_private" 00940 void 00941 mat_dh_transpose_reuse_private (int m, 00942 int *rpIN, int *cvalIN, double *avalIN, 00943 int *rpOUT, int *cvalOUT, double *avalOUT) 00944 { 00945 START_FUNC_DH 00946 mat_dh_transpose_reuse_private_private (false, m, rpIN, cvalIN, avalIN, 00947 &rpOUT, &cvalOUT, &avalOUT); 00948 CHECK_V_ERROR; 00949 END_FUNC_DH} 00950 00951 00952 #undef __FUNC__ 00953 #define __FUNC__ "mat_dh_transpose_private" 00954 void 00955 mat_dh_transpose_private (int m, int *RP, int **rpOUT, 00956 int *CVAL, int **cvalOUT, 00957 double *AVAL, double **avalOUT) 00958 { 00959 START_FUNC_DH 00960 mat_dh_transpose_reuse_private_private (true, m, RP, CVAL, AVAL, 00961 rpOUT, cvalOUT, avalOUT); 00962 CHECK_V_ERROR; 00963 END_FUNC_DH} 00964 00965 #undef __FUNC__ 00966 #define __FUNC__ "mat_dh_transpose_private_private" 00967 void 00968 mat_dh_transpose_reuse_private_private (bool allocateMem, int m, 00969 int *RP, int *CVAL, double *AVAL, 00970 int **rpOUT, int **cvalOUT, 00971 double **avalOUT) 00972 { 00973 START_FUNC_DH int *rp, *cval, *tmp; 00974 int i, j, nz = RP[m]; 00975 double *aval; 00976 00977 if (allocateMem) 00978 { 00979 rp = *rpOUT = (int *) MALLOC_DH ((1 + m) * sizeof (int)); 00980 CHECK_V_ERROR; 00981 cval = *cvalOUT = (int *) MALLOC_DH (nz * sizeof (int)); 00982 CHECK_V_ERROR; 00983 if (avalOUT != NULL) 00984 { 00985 aval = *avalOUT = (double *) MALLOC_DH (nz * sizeof (double)); 00986 CHECK_V_ERROR; 00987 } 00988 } 00989 else 00990 { 00991 rp = *rpOUT; 00992 cval = *cvalOUT; 00993 if (avalOUT != NULL) 00994 aval = *avalOUT; 00995 } 00996 00997 00998 tmp = (int *) MALLOC_DH ((1 + m) * sizeof (int)); 00999 CHECK_V_ERROR; 01000 for (i = 0; i <= m; ++i) 01001 tmp[i] = 0; 01002 01003 for (i = 0; i < m; ++i) 01004 { 01005 for (j = RP[i]; j < RP[i + 1]; ++j) 01006 { 01007 int col = CVAL[j]; 01008 tmp[col + 1] += 1; 01009 } 01010 } 01011 for (i = 1; i <= m; ++i) 01012 tmp[i] += tmp[i - 1]; 01013 memcpy (rp, tmp, (m + 1) * sizeof (int)); 01014 01015 if (avalOUT != NULL) 01016 { 01017 for (i = 0; i < m; ++i) 01018 { 01019 for (j = RP[i]; j < RP[i + 1]; ++j) 01020 { 01021 int col = CVAL[j]; 01022 int idx = tmp[col]; 01023 cval[idx] = i; 01024 aval[idx] = AVAL[j]; 01025 tmp[col] += 1; 01026 } 01027 } 01028 } 01029 01030 else 01031 { 01032 for (i = 0; i < m; ++i) 01033 { 01034 for (j = RP[i]; j < RP[i + 1]; ++j) 01035 { 01036 int col = CVAL[j]; 01037 int idx = tmp[col]; 01038 cval[idx] = i; 01039 tmp[col] += 1; 01040 } 01041 } 01042 } 01043 01044 FREE_DH (tmp); 01045 CHECK_V_ERROR; 01046 END_FUNC_DH} 01047 01048 /*-----------------------------------------------------------------------------------*/ 01049 01050 #undef __FUNC__ 01051 #define __FUNC__ "mat_find_owner" 01052 int 01053 mat_find_owner (int *beg_rows, int *end_rows, int index) 01054 { 01055 START_FUNC_DH int pe, owner = -1; 01056 01057 for (pe = 0; pe < np_dh; ++pe) 01058 { 01059 if (index >= beg_rows[pe] && index < end_rows[pe]) 01060 { 01061 owner = pe; 01062 break; 01063 } 01064 } 01065 01066 if (owner == -1) 01067 { 01068 sprintf (msgBuf_dh, "failed to find owner for index= %i", index); 01069 SET_ERROR (-1, msgBuf_dh); 01070 } 01071 01072 END_FUNC_VAL (owner)} 01073 01074 01075 #define AVAL_TAG 2 01076 #define CVAL_TAG 3 01077 void partition_and_distribute_private (Mat_dh A, Mat_dh * Bout); 01078 void partition_and_distribute_metis_private (Mat_dh A, Mat_dh * Bout); 01079 01080 #undef __FUNC__ 01081 #define __FUNC__ "readMat_par" 01082 void 01083 readMat_par (Mat_dh * Aout, char *fileType, char *fileName, int ignore) 01084 { 01085 START_FUNC_DH Mat_dh A = NULL; 01086 01087 if (myid_dh == 0) 01088 { 01089 int tmp = np_dh; 01090 np_dh = 1; 01091 readMat (&A, fileType, fileName, ignore); 01092 CHECK_V_ERROR; 01093 np_dh = tmp; 01094 } 01095 01096 if (np_dh == 1) 01097 { 01098 *Aout = A; 01099 } 01100 else 01101 { 01102 if (Parser_dhHasSwitch (parser_dh, "-metis")) 01103 { 01104 partition_and_distribute_metis_private (A, Aout); 01105 CHECK_V_ERROR; 01106 } 01107 else 01108 { 01109 partition_and_distribute_private (A, Aout); 01110 CHECK_V_ERROR; 01111 } 01112 } 01113 01114 if (np_dh > 1 && A != NULL) 01115 { 01116 Mat_dhDestroy (A); 01117 CHECK_V_ERROR; 01118 } 01119 01120 01121 if (Parser_dhHasSwitch (parser_dh, "-printMAT")) 01122 { 01123 char xname[] = "A", *name = xname; 01124 Parser_dhReadString (parser_dh, "-printMat", &name); 01125 Mat_dhPrintTriples (*Aout, NULL, name); 01126 CHECK_V_ERROR; 01127 printf_dh ("\n@@@ readMat_par: printed mat to %s\n\n", xname); 01128 } 01129 01130 01131 END_FUNC_DH} 01132 01133 /* this is bad code! */ 01134 #undef __FUNC__ 01135 #define __FUNC__ "partition_and_distribute_metis_private" 01136 void 01137 partition_and_distribute_metis_private (Mat_dh A, Mat_dh * Bout) 01138 { 01139 START_FUNC_DH Mat_dh B = NULL; 01140 Mat_dh C = NULL; 01141 int i, m; 01142 int *rowLengths = NULL; 01143 int *o2n_row = NULL, *n2o_col = NULL, *rowToBlock = NULL; 01144 int *beg_row = NULL, *row_count = NULL; 01145 MPI_Request *send_req = NULL; 01146 MPI_Request *rcv_req = NULL; 01147 MPI_Status *send_status = NULL; 01148 MPI_Status *rcv_status = NULL; 01149 01150 MPI_Barrier (comm_dh); 01151 printf_dh ("@@@ partitioning with metis\n"); 01152 01153 /* broadcast number of rows to all processors */ 01154 if (myid_dh == 0) 01155 m = A->m; 01156 MPI_Bcast (&m, 1, MPI_INT, 0, MPI_COMM_WORLD); 01157 01158 /* broadcast number of nonzeros in each row to all processors */ 01159 rowLengths = (int *) MALLOC_DH (m * sizeof (int)); 01160 CHECK_V_ERROR; 01161 rowToBlock = (int *) MALLOC_DH (m * sizeof (int)); 01162 CHECK_V_ERROR; 01163 01164 if (myid_dh == 0) 01165 { 01166 int *tmp = A->rp; 01167 for (i = 0; i < m; ++i) 01168 { 01169 rowLengths[i] = tmp[i + 1] - tmp[i]; 01170 } 01171 } 01172 MPI_Bcast (rowLengths, m, MPI_INT, 0, comm_dh); 01173 01174 /* partition matrix */ 01175 if (myid_dh == 0) 01176 { 01177 int idx = 0; 01178 int j; 01179 01180 /* partition and permute matrix */ 01181 Mat_dhPartition (A, np_dh, &beg_row, &row_count, &n2o_col, &o2n_row); 01182 ERRCHKA; 01183 Mat_dhPermute (A, n2o_col, &C); 01184 ERRCHKA; 01185 01186 /* form rowToBlock array */ 01187 for (i = 0; i < np_dh; ++i) 01188 { 01189 for (j = beg_row[i]; j < beg_row[i] + row_count[i]; ++j) 01190 { 01191 rowToBlock[idx++] = i; 01192 } 01193 } 01194 } 01195 01196 /* broadcast partitiioning information to all processors */ 01197 MPI_Bcast (rowToBlock, m, MPI_INT, 0, comm_dh); 01198 01199 /* allocate storage for local portion of matrix */ 01200 mat_par_read_allocate_private (&B, m, rowLengths, rowToBlock); 01201 CHECK_V_ERROR; 01202 01203 /* root sends each processor its portion of the matrix */ 01204 if (myid_dh == 0) 01205 { 01206 int *cval = C->cval, *rp = C->rp; 01207 double *aval = C->aval; 01208 send_req = (MPI_Request *) MALLOC_DH (2 * m * sizeof (MPI_Request)); 01209 CHECK_V_ERROR; 01210 send_status = (MPI_Status *) MALLOC_DH (2 * m * sizeof (MPI_Status)); 01211 CHECK_V_ERROR; 01212 for (i = 0; i < m; ++i) 01213 { 01214 int owner = rowToBlock[i]; 01215 int count = rp[i + 1] - rp[i]; 01216 01217 /* error check for empty row */ 01218 if (!count) 01219 { 01220 sprintf (msgBuf_dh, "row %i of %i is empty!", i + 1, m); 01221 SET_V_ERROR (msgBuf_dh); 01222 } 01223 01224 MPI_Isend (cval + rp[i], count, MPI_INT, owner, CVAL_TAG, comm_dh, 01225 send_req + 2 * i); 01226 MPI_Isend (aval + rp[i], count, MPI_DOUBLE, owner, AVAL_TAG, 01227 comm_dh, send_req + 2 * i + 1); 01228 } 01229 } 01230 01231 /* all processors receive their local rows */ 01232 { 01233 int *cval = B->cval; 01234 int *rp = B->rp; 01235 double *aval = B->aval; 01236 m = B->m; 01237 01238 rcv_req = (MPI_Request *) MALLOC_DH (2 * m * sizeof (MPI_Request)); 01239 CHECK_V_ERROR; 01240 rcv_status = (MPI_Status *) MALLOC_DH (2 * m * sizeof (MPI_Status)); 01241 CHECK_V_ERROR; 01242 01243 for (i = 0; i < m; ++i) 01244 { 01245 01246 /* error check for empty row */ 01247 int count = rp[i + 1] - rp[i]; 01248 if (!count) 01249 { 01250 sprintf (msgBuf_dh, "local row %i of %i is empty!", i + 1, m); 01251 SET_V_ERROR (msgBuf_dh); 01252 } 01253 01254 MPI_Irecv (cval + rp[i], count, MPI_INT, 0, CVAL_TAG, comm_dh, 01255 rcv_req + 2 * i); 01256 MPI_Irecv (aval + rp[i], count, MPI_DOUBLE, 0, AVAL_TAG, comm_dh, 01257 rcv_req + 2 * i + 1); 01258 } 01259 } 01260 01261 /* wait for all sends/receives to finish */ 01262 if (myid_dh == 0) 01263 { 01264 MPI_Waitall (m * 2, send_req, send_status); 01265 } 01266 MPI_Waitall (2 * B->m, rcv_req, rcv_status); 01267 01268 /* clean up */ 01269 if (rowLengths != NULL) 01270 { 01271 FREE_DH (rowLengths); 01272 CHECK_V_ERROR; 01273 } 01274 if (o2n_row != NULL) 01275 { 01276 FREE_DH (o2n_row); 01277 CHECK_V_ERROR; 01278 } 01279 if (n2o_col != NULL) 01280 { 01281 FREE_DH (n2o_col); 01282 CHECK_V_ERROR; 01283 } 01284 if (rowToBlock != NULL) 01285 { 01286 FREE_DH (rowToBlock); 01287 CHECK_V_ERROR; 01288 } 01289 if (send_req != NULL) 01290 { 01291 FREE_DH (send_req); 01292 CHECK_V_ERROR; 01293 } 01294 if (rcv_req != NULL) 01295 { 01296 FREE_DH (rcv_req); 01297 CHECK_V_ERROR; 01298 } 01299 if (send_status != NULL) 01300 { 01301 FREE_DH (send_status); 01302 CHECK_V_ERROR; 01303 } 01304 if (rcv_status != NULL) 01305 { 01306 FREE_DH (rcv_status); 01307 CHECK_V_ERROR; 01308 } 01309 if (beg_row != NULL) 01310 { 01311 FREE_DH (beg_row); 01312 CHECK_V_ERROR; 01313 } 01314 if (row_count != NULL) 01315 { 01316 FREE_DH (row_count); 01317 CHECK_V_ERROR; 01318 } 01319 if (C != NULL) 01320 { 01321 Mat_dhDestroy (C); 01322 ERRCHKA; 01323 } 01324 01325 *Bout = B; 01326 01327 END_FUNC_DH} 01328 01329 01330 #undef __FUNC__ 01331 #define __FUNC__ "partition_and_distribute_private" 01332 void 01333 partition_and_distribute_private (Mat_dh A, Mat_dh * Bout) 01334 { 01335 START_FUNC_DH Mat_dh B = NULL; 01336 int i, m; 01337 int *rowLengths = NULL; 01338 int *o2n_row = NULL, *n2o_col = NULL, *rowToBlock = NULL; 01339 MPI_Request *send_req = NULL; 01340 MPI_Request *rcv_req = NULL; 01341 MPI_Status *send_status = NULL; 01342 MPI_Status *rcv_status = NULL; 01343 01344 MPI_Barrier (comm_dh); 01345 01346 /* broadcast number of rows to all processors */ 01347 if (myid_dh == 0) 01348 m = A->m; 01349 MPI_Bcast (&m, 1, MPI_INT, 0, MPI_COMM_WORLD); 01350 01351 /* broadcast number of nonzeros in each row to all processors */ 01352 rowLengths = (int *) MALLOC_DH (m * sizeof (int)); 01353 CHECK_V_ERROR; 01354 if (myid_dh == 0) 01355 { 01356 int *tmp = A->rp; 01357 for (i = 0; i < m; ++i) 01358 { 01359 rowLengths[i] = tmp[i + 1] - tmp[i]; 01360 } 01361 } 01362 MPI_Bcast (rowLengths, m, MPI_INT, 0, comm_dh); 01363 01364 /* partition matrix */ 01365 rowToBlock = (int *) MALLOC_DH (m * sizeof (int)); 01366 CHECK_V_ERROR; 01367 01368 if (myid_dh == 0) 01369 { 01370 o2n_row = (int *) MALLOC_DH (m * sizeof (int)); 01371 CHECK_V_ERROR; 01372 mat_partition_private (A, np_dh, o2n_row, rowToBlock); 01373 CHECK_V_ERROR; 01374 } 01375 01376 /* broadcast partitiioning information to all processors */ 01377 MPI_Bcast (rowToBlock, m, MPI_INT, 0, comm_dh); 01378 01379 /* allocate storage for local portion of matrix */ 01380 mat_par_read_allocate_private (&B, m, rowLengths, rowToBlock); 01381 CHECK_V_ERROR; 01382 01383 /* root sends each processor its portion of the matrix */ 01384 if (myid_dh == 0) 01385 { 01386 int *cval = A->cval, *rp = A->rp; 01387 double *aval = A->aval; 01388 send_req = (MPI_Request *) MALLOC_DH (2 * m * sizeof (MPI_Request)); 01389 CHECK_V_ERROR; 01390 send_status = (MPI_Status *) MALLOC_DH (2 * m * sizeof (MPI_Status)); 01391 CHECK_V_ERROR; 01392 for (i = 0; i < m; ++i) 01393 { 01394 int owner = rowToBlock[i]; 01395 int count = rp[i + 1] - rp[i]; 01396 01397 /* error check for empty row */ 01398 if (!count) 01399 { 01400 sprintf (msgBuf_dh, "row %i of %i is empty!", i + 1, m); 01401 SET_V_ERROR (msgBuf_dh); 01402 } 01403 01404 MPI_Isend (cval + rp[i], count, MPI_INT, owner, CVAL_TAG, comm_dh, 01405 send_req + 2 * i); 01406 MPI_Isend (aval + rp[i], count, MPI_DOUBLE, owner, AVAL_TAG, 01407 comm_dh, send_req + 2 * i + 1); 01408 } 01409 } 01410 01411 /* all processors receive their local rows */ 01412 { 01413 int *cval = B->cval; 01414 int *rp = B->rp; 01415 double *aval = B->aval; 01416 m = B->m; 01417 01418 rcv_req = (MPI_Request *) MALLOC_DH (2 * m * sizeof (MPI_Request)); 01419 CHECK_V_ERROR; 01420 rcv_status = (MPI_Status *) MALLOC_DH (2 * m * sizeof (MPI_Status)); 01421 CHECK_V_ERROR; 01422 01423 for (i = 0; i < m; ++i) 01424 { 01425 01426 /* error check for empty row */ 01427 int count = rp[i + 1] - rp[i]; 01428 if (!count) 01429 { 01430 sprintf (msgBuf_dh, "local row %i of %i is empty!", i + 1, m); 01431 SET_V_ERROR (msgBuf_dh); 01432 } 01433 01434 MPI_Irecv (cval + rp[i], count, MPI_INT, 0, CVAL_TAG, comm_dh, 01435 rcv_req + 2 * i); 01436 MPI_Irecv (aval + rp[i], count, MPI_DOUBLE, 0, AVAL_TAG, comm_dh, 01437 rcv_req + 2 * i + 1); 01438 } 01439 } 01440 01441 /* wait for all sends/receives to finish */ 01442 if (myid_dh == 0) 01443 { 01444 MPI_Waitall (m * 2, send_req, send_status); 01445 } 01446 MPI_Waitall (2 * B->m, rcv_req, rcv_status); 01447 01448 /* clean up */ 01449 if (rowLengths != NULL) 01450 { 01451 FREE_DH (rowLengths); 01452 CHECK_V_ERROR; 01453 } 01454 if (o2n_row != NULL) 01455 { 01456 FREE_DH (o2n_row); 01457 CHECK_V_ERROR; 01458 } 01459 if (n2o_col != NULL) 01460 { 01461 FREE_DH (n2o_col); 01462 CHECK_V_ERROR; 01463 } 01464 if (rowToBlock != NULL) 01465 { 01466 FREE_DH (rowToBlock); 01467 CHECK_V_ERROR; 01468 } 01469 if (send_req != NULL) 01470 { 01471 FREE_DH (send_req); 01472 CHECK_V_ERROR; 01473 } 01474 if (rcv_req != NULL) 01475 { 01476 FREE_DH (rcv_req); 01477 CHECK_V_ERROR; 01478 } 01479 if (send_status != NULL) 01480 { 01481 FREE_DH (send_status); 01482 CHECK_V_ERROR; 01483 } 01484 if (rcv_status != NULL) 01485 { 01486 FREE_DH (rcv_status); 01487 CHECK_V_ERROR; 01488 } 01489 01490 *Bout = B; 01491 01492 END_FUNC_DH} 01493 01494 #undef __FUNC__ 01495 #define __FUNC__ "mat_par_read_allocate_private" 01496 void 01497 mat_par_read_allocate_private (Mat_dh * Aout, int n, int *rowLengths, 01498 int *rowToBlock) 01499 { 01500 START_FUNC_DH Mat_dh A; 01501 int i, m, nz, beg_row, *rp, idx; 01502 01503 Mat_dhCreate (&A); 01504 CHECK_V_ERROR; 01505 *Aout = A; 01506 A->n = n; 01507 01508 /* count number of rows owned by this processor */ 01509 m = 0; 01510 for (i = 0; i < n; ++i) 01511 { 01512 if (rowToBlock[i] == myid_dh) 01513 ++m; 01514 } 01515 A->m = m; 01516 01517 /* compute global numbering of first locally owned row */ 01518 beg_row = 0; 01519 for (i = 0; i < n; ++i) 01520 { 01521 if (rowToBlock[i] < myid_dh) 01522 ++beg_row; 01523 } 01524 A->beg_row = beg_row; 01525 01526 /* allocate storage for row-pointer array */ 01527 A->rp = rp = (int *) MALLOC_DH ((m + 1) * sizeof (int)); 01528 CHECK_V_ERROR; 01529 rp[0] = 0; 01530 01531 /* count number of nonzeros owned by this processor, and form rp array */ 01532 nz = 0; 01533 idx = 1; 01534 for (i = 0; i < n; ++i) 01535 { 01536 if (rowToBlock[i] == myid_dh) 01537 { 01538 nz += rowLengths[i]; 01539 rp[idx++] = nz; 01540 } 01541 } 01542 01543 /* allocate storage for column indices and values arrays */ 01544 A->cval = (int *) MALLOC_DH (nz * sizeof (int)); 01545 CHECK_V_ERROR; 01546 A->aval = (double *) MALLOC_DH (nz * sizeof (double)); 01547 CHECK_V_ERROR; 01548 END_FUNC_DH} 01549 01550 01551 #undef __FUNC__ 01552 #define __FUNC__ "mat_partition_private" 01553 void 01554 mat_partition_private (Mat_dh A, int blocks, int *o2n_row, int *rowToBlock) 01555 { 01556 START_FUNC_DH int i, j, n = A->n; 01557 int rpb = n / blocks; /* rows per block (except possibly last block) */ 01558 int idx = 0; 01559 01560 while (rpb * blocks < n) 01561 ++rpb; 01562 01563 if (rpb * (blocks - 1) == n) 01564 { 01565 --rpb; 01566 printf_dh ("adjusted rpb to: %i\n", rpb); 01567 } 01568 01569 for (i = 0; i < n; ++i) 01570 o2n_row[i] = i; 01571 01572 /* assign all rows to blocks, except for last block, which may 01573 contain less than "rpb" rows 01574 */ 01575 for (i = 0; i < blocks - 1; ++i) 01576 { 01577 for (j = 0; j < rpb; ++j) 01578 { 01579 rowToBlock[idx++] = i; 01580 } 01581 } 01582 01583 /* now deal with the last block in the partition */ 01584 i = blocks - 1; 01585 while (idx < n) 01586 rowToBlock[idx++] = i; 01587 01588 END_FUNC_DH} 01589 01590 01591 /* may produce incorrect result if input is not triangular! */ 01592 #undef __FUNC__ 01593 #define __FUNC__ "make_full_private" 01594 void 01595 make_full_private (int m, int **rpIN, int **cvalIN, double **avalIN) 01596 { 01597 START_FUNC_DH int i, j, *rpNew, *cvalNew, *rp = *rpIN, *cval = *cvalIN; 01598 double *avalNew, *aval = *avalIN; 01599 int nz, *rowCounts = NULL; 01600 01601 /* count the number of nonzeros in each row */ 01602 rowCounts = (int *) MALLOC_DH ((m + 1) * sizeof (int)); 01603 CHECK_V_ERROR; 01604 for (i = 0; i <= m; ++i) 01605 rowCounts[i] = 0; 01606 01607 for (i = 0; i < m; ++i) 01608 { 01609 for (j = rp[i]; j < rp[i + 1]; ++j) 01610 { 01611 int col = cval[j]; 01612 rowCounts[i + 1] += 1; 01613 if (col != i) 01614 rowCounts[col + 1] += 1; 01615 } 01616 } 01617 01618 /* prefix sum to form row pointers for full representation */ 01619 rpNew = (int *) MALLOC_DH ((m + 1) * sizeof (int)); 01620 CHECK_V_ERROR; 01621 for (i = 1; i <= m; ++i) 01622 rowCounts[i] += rowCounts[i - 1]; 01623 memcpy (rpNew, rowCounts, (m + 1) * sizeof (int)); 01624 01625 /* form full representation */ 01626 nz = rpNew[m]; 01627 01628 cvalNew = (int *) MALLOC_DH (nz * sizeof (int)); 01629 CHECK_V_ERROR; 01630 avalNew = (double *) MALLOC_DH (nz * sizeof (double)); 01631 CHECK_V_ERROR; 01632 for (i = 0; i < m; ++i) 01633 { 01634 for (j = rp[i]; j < rp[i + 1]; ++j) 01635 { 01636 int col = cval[j]; 01637 double val = aval[j]; 01638 01639 cvalNew[rowCounts[i]] = col; 01640 avalNew[rowCounts[i]] = val; 01641 rowCounts[i] += 1; 01642 if (col != i) 01643 { 01644 cvalNew[rowCounts[col]] = i; 01645 avalNew[rowCounts[col]] = val; 01646 rowCounts[col] += 1; 01647 } 01648 } 01649 } 01650 01651 if (rowCounts != NULL) 01652 { 01653 FREE_DH (rowCounts); 01654 CHECK_V_ERROR; 01655 } 01656 FREE_DH (cval); 01657 CHECK_V_ERROR; 01658 FREE_DH (rp); 01659 CHECK_V_ERROR; 01660 FREE_DH (aval); 01661 CHECK_V_ERROR; 01662 *rpIN = rpNew; 01663 *cvalIN = cvalNew; 01664 *avalIN = avalNew; 01665 END_FUNC_DH} 01666 01667 #undef __FUNC__ 01668 #define __FUNC__ "make_symmetric_private" 01669 void 01670 make_symmetric_private (int m, int **rpIN, int **cvalIN, double **avalIN) 01671 { 01672 START_FUNC_DH int i, j, *rpNew, *cvalNew, *rp = *rpIN, *cval = *cvalIN; 01673 double *avalNew, *aval = *avalIN; 01674 int nz, *rowCounts = NULL; 01675 int *rpTrans, *cvalTrans; 01676 int *work; 01677 double *avalTrans; 01678 int nzCount = 0, transCount = 0; 01679 01680 mat_dh_transpose_private (m, rp, &rpTrans, 01681 cval, &cvalTrans, aval, &avalTrans); 01682 CHECK_V_ERROR; 01683 01684 /* count the number of nonzeros in each row */ 01685 work = (int *) MALLOC_DH (m * sizeof (int)); 01686 CHECK_V_ERROR; 01687 for (i = 0; i < m; ++i) 01688 work[i] = -1; 01689 rowCounts = (int *) MALLOC_DH ((m + 1) * sizeof (int)); 01690 CHECK_V_ERROR; 01691 for (i = 0; i <= m; ++i) 01692 rowCounts[i] = 0; 01693 01694 for (i = 0; i < m; ++i) 01695 { 01696 int ct = 0; 01697 for (j = rp[i]; j < rp[i + 1]; ++j) 01698 { 01699 int col = cval[j]; 01700 work[col] = i; 01701 ++ct; 01702 ++nzCount; 01703 } 01704 for (j = rpTrans[i]; j < rpTrans[i + 1]; ++j) 01705 { 01706 int col = cvalTrans[j]; 01707 if (work[col] != i) 01708 { 01709 ++ct; 01710 ++transCount; 01711 } 01712 } 01713 rowCounts[i + 1] = ct; 01714 } 01715 01716 /*--------------------------------------------------------- 01717 * if matrix is already symmetric, do nothing 01718 *---------------------------------------------------------*/ 01719 if (transCount == 0) 01720 { 01721 printf 01722 ("make_symmetric_private: matrix is already structurally symmetric!\n"); 01723 FREE_DH (rpTrans); 01724 CHECK_V_ERROR; 01725 FREE_DH (cvalTrans); 01726 CHECK_V_ERROR; 01727 FREE_DH (avalTrans); 01728 CHECK_V_ERROR; 01729 FREE_DH (work); 01730 CHECK_V_ERROR; 01731 FREE_DH (rowCounts); 01732 CHECK_V_ERROR; 01733 goto END_OF_FUNCTION; 01734 } 01735 01736 /*--------------------------------------------------------- 01737 * otherwise, finish symmetrizing 01738 *---------------------------------------------------------*/ 01739 else 01740 { 01741 printf ("original nz= %i\n", rp[m]); 01742 printf ("zeros added= %i\n", transCount); 01743 printf 01744 ("ratio of added zeros to nonzeros = %0.2f (assumes all original entries were nonzero!)\n", 01745 (double) transCount / (double) (nzCount)); 01746 } 01747 01748 /* prefix sum to form row pointers for full representation */ 01749 rpNew = (int *) MALLOC_DH ((m + 1) * sizeof (int)); 01750 CHECK_V_ERROR; 01751 for (i = 1; i <= m; ++i) 01752 rowCounts[i] += rowCounts[i - 1]; 01753 memcpy (rpNew, rowCounts, (m + 1) * sizeof (int)); 01754 for (i = 0; i < m; ++i) 01755 work[i] = -1; 01756 01757 /* form full representation */ 01758 nz = rpNew[m]; 01759 cvalNew = (int *) MALLOC_DH (nz * sizeof (int)); 01760 CHECK_V_ERROR; 01761 avalNew = (double *) MALLOC_DH (nz * sizeof (double)); 01762 CHECK_V_ERROR; 01763 for (i = 0; i < m; ++i) 01764 work[i] = -1; 01765 01766 for (i = 0; i < m; ++i) 01767 { 01768 for (j = rp[i]; j < rp[i + 1]; ++j) 01769 { 01770 int col = cval[j]; 01771 double val = aval[j]; 01772 work[col] = i; 01773 cvalNew[rowCounts[i]] = col; 01774 avalNew[rowCounts[i]] = val; 01775 rowCounts[i] += 1; 01776 } 01777 for (j = rpTrans[i]; j < rpTrans[i + 1]; ++j) 01778 { 01779 int col = cvalTrans[j]; 01780 if (work[col] != i) 01781 { 01782 cvalNew[rowCounts[i]] = col; 01783 avalNew[rowCounts[i]] = 0.0; 01784 rowCounts[i] += 1; 01785 } 01786 } 01787 } 01788 01789 if (rowCounts != NULL) 01790 { 01791 FREE_DH (rowCounts); 01792 CHECK_V_ERROR; 01793 } 01794 FREE_DH (work); 01795 CHECK_V_ERROR; 01796 FREE_DH (cval); 01797 CHECK_V_ERROR; 01798 FREE_DH (rp); 01799 CHECK_V_ERROR; 01800 FREE_DH (aval); 01801 CHECK_V_ERROR; 01802 FREE_DH (cvalTrans); 01803 CHECK_V_ERROR; 01804 FREE_DH (rpTrans); 01805 CHECK_V_ERROR; 01806 FREE_DH (avalTrans); 01807 CHECK_V_ERROR; 01808 *rpIN = rpNew; 01809 *cvalIN = cvalNew; 01810 *avalIN = avalNew; 01811 01812 END_OF_FUNCTION:; 01813 01814 END_FUNC_DH} 01815 01816 #undef __FUNC__ 01817 #define __FUNC__ "profileMat" 01818 void 01819 profileMat (Mat_dh A) 01820 { 01821 START_FUNC_DH Mat_dh B = NULL; 01822 int type; 01823 int m; 01824 int i, j; 01825 int *work1; 01826 double *work2; 01827 bool isStructurallySymmetric = true; 01828 bool isNumericallySymmetric = true; 01829 bool is_Triangular = false; 01830 int zeroCount = 0, nz; 01831 01832 if (myid_dh > 0) 01833 { 01834 SET_V_ERROR ("only for a single MPI task!"); 01835 } 01836 01837 m = A->m; 01838 01839 printf ("\nYY----------------------------------------------------\n"); 01840 01841 /* count number of explicit zeros */ 01842 nz = A->rp[m]; 01843 for (i = 0; i < nz; ++i) 01844 { 01845 if (A->aval[i] == 0) 01846 ++zeroCount; 01847 } 01848 printf ("YY row count: %i\n", m); 01849 printf ("YY nz count: %i\n", nz); 01850 printf ("YY explicit zeros: %i (entire matrix)\n", zeroCount); 01851 01852 /* count number of missing or zero diagonals */ 01853 { 01854 int m_diag = 0, z_diag = 0; 01855 for (i = 0; i < m; ++i) 01856 { 01857 bool flag = true; 01858 for (j = A->rp[i]; j < A->rp[i + 1]; ++j) 01859 { 01860 int col = A->cval[j]; 01861 01862 /* row has an explicit diagonal element */ 01863 if (col == i) 01864 { 01865 double val = A->aval[j]; 01866 flag = false; 01867 if (val == 0.0) 01868 ++z_diag; 01869 break; 01870 } 01871 } 01872 01873 /* row has an implicit zero diagonal element */ 01874 if (flag) 01875 ++m_diag; 01876 } 01877 printf ("YY missing diagonals: %i\n", m_diag); 01878 printf ("YY explicit zero diags: %i\n", z_diag); 01879 } 01880 01881 /* check to see if matrix is triangular */ 01882 type = isTriangular (m, A->rp, A->cval); 01883 CHECK_V_ERROR; 01884 if (type == IS_UPPER_TRI) 01885 { 01886 printf ("YY matrix is upper triangular\n"); 01887 is_Triangular = true; 01888 goto END_OF_FUNCTION; 01889 } 01890 else if (type == IS_LOWER_TRI) 01891 { 01892 printf ("YY matrix is lower triangular\n"); 01893 is_Triangular = true; 01894 goto END_OF_FUNCTION; 01895 } 01896 01897 /* if not triangular, count nz in each triangle */ 01898 { 01899 int unz = 0, lnz = 0; 01900 for (i = 0; i < m; ++i) 01901 { 01902 for (j = A->rp[i]; j < A->rp[i + 1]; ++j) 01903 { 01904 int col = A->cval[j]; 01905 if (col < i) 01906 ++lnz; 01907 if (col > i) 01908 ++unz; 01909 } 01910 } 01911 printf ("YY strict upper triangular nonzeros: %i\n", unz); 01912 printf ("YY strict lower triangular nonzeros: %i\n", lnz); 01913 } 01914 01915 01916 01917 01918 Mat_dhTranspose (A, &B); 01919 CHECK_V_ERROR; 01920 01921 /* check for structural and numerical symmetry */ 01922 01923 work1 = (int *) MALLOC_DH (m * sizeof (int)); 01924 CHECK_V_ERROR; 01925 work2 = (double *) MALLOC_DH (m * sizeof (double)); 01926 CHECK_V_ERROR; 01927 for (i = 0; i < m; ++i) 01928 work1[i] = -1; 01929 for (i = 0; i < m; ++i) 01930 work2[i] = 0.0; 01931 01932 for (i = 0; i < m; ++i) 01933 { 01934 for (j = A->rp[i]; j < A->rp[i + 1]; ++j) 01935 { 01936 int col = A->cval[j]; 01937 double val = A->aval[j]; 01938 work1[col] = i; 01939 work2[col] = val; 01940 } 01941 for (j = B->rp[i]; j < B->rp[i + 1]; ++j) 01942 { 01943 int col = B->cval[j]; 01944 double val = B->aval[j]; 01945 01946 if (work1[col] != i) 01947 { 01948 isStructurallySymmetric = false; 01949 isNumericallySymmetric = false; 01950 goto END_OF_FUNCTION; 01951 } 01952 if (work2[col] != val) 01953 { 01954 isNumericallySymmetric = false; 01955 work2[col] = 0.0; 01956 } 01957 } 01958 } 01959 01960 01961 END_OF_FUNCTION:; 01962 01963 if (!is_Triangular) 01964 { 01965 printf ("YY matrix is NOT triangular\n"); 01966 if (isStructurallySymmetric) 01967 { 01968 printf ("YY matrix IS structurally symmetric\n"); 01969 } 01970 else 01971 { 01972 printf ("YY matrix is NOT structurally symmetric\n"); 01973 } 01974 if (isNumericallySymmetric) 01975 { 01976 printf ("YY matrix IS numerically symmetric\n"); 01977 } 01978 else 01979 { 01980 printf ("YY matrix is NOT numerically symmetric\n"); 01981 } 01982 } 01983 01984 if (work1 != NULL) 01985 { 01986 FREE_DH (work1); 01987 CHECK_V_ERROR; 01988 } 01989 if (work2 != NULL) 01990 { 01991 FREE_DH (work2); 01992 CHECK_V_ERROR; 01993 } 01994 if (B != NULL) 01995 { 01996 Mat_dhDestroy (B); 01997 CHECK_V_ERROR; 01998 } 01999 02000 printf ("YY----------------------------------------------------\n"); 02001 02002 END_FUNC_DH}
1.7.4