|
IFPACK Development
|
00001 /*@HEADER 00002 // *********************************************************************** 00003 // 00004 // Ifpack: Object-Oriented Algebraic Preconditioner Package 00005 // Copyright (2009) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // This library is free software; you can redistribute it and/or modify 00011 // it under the terms of the GNU Lesser General Public License as 00012 // published by the Free Software Foundation; either version 2.1 of the 00013 // License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, but 00016 // WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00023 // USA 00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00025 // 00026 // *********************************************************************** 00027 //@HEADER 00028 */ 00029 00030 #include "SubdomainGraph_dh.h" 00031 #include "getRow_dh.h" 00032 #include "Mem_dh.h" 00033 #include "Parser_dh.h" 00034 #include "Hash_i_dh.h" 00035 #include "mat_dh_private.h" 00036 #include "io_dh.h" 00037 #include "SortedSet_dh.h" 00038 #include "shellSort_dh.h" 00039 00040 00041 /* for debugging only! */ 00042 #include <unistd.h> 00043 00044 static void init_seq_private (SubdomainGraph_dh s, int blocks, bool bj, 00045 void *A); 00046 static void init_mpi_private (SubdomainGraph_dh s, int blocks, bool bj, 00047 void *A); 00048 /* 00049 static void partition_metis_private(SubdomainGraph_dh s, void *A); 00050 00051 grep for same below! 00052 */ 00053 static void allocate_storage_private (SubdomainGraph_dh s, int blocks, int m, 00054 bool bj); 00055 static void form_subdomaingraph_mpi_private (SubdomainGraph_dh s); 00056 static void form_subdomaingraph_seq_private (SubdomainGraph_dh s, int m, 00057 void *A); 00058 static void find_all_neighbors_sym_private (SubdomainGraph_dh s, int m, 00059 void *A); 00060 static void find_all_neighbors_unsym_private (SubdomainGraph_dh s, int m, 00061 void *A); 00062 static void find_bdry_nodes_sym_private (SubdomainGraph_dh s, int m, void *A, 00063 int *interiorNodes, int *bdryNodes, 00064 int *interiorCount, int *bdryCount); 00065 static void find_bdry_nodes_unsym_private (SubdomainGraph_dh s, int m, 00066 void *A, int *interiorNodes, 00067 int *bdryNodes, int *interiorCount, 00068 int *bdryCount); 00069 00070 static void find_bdry_nodes_seq_private (SubdomainGraph_dh s, int m, void *A); 00071 /* above also forms n2o[] and o2n[] */ 00072 00073 static void find_ordered_neighbors_private (SubdomainGraph_dh s); 00074 static void color_subdomain_graph_private (SubdomainGraph_dh s); 00075 static void adjust_matrix_perms_private (SubdomainGraph_dh s, int m); 00076 00077 #undef __FUNC__ 00078 #define __FUNC__ "SubdomainGraph_dhCreate" 00079 void 00080 SubdomainGraph_dhCreate (SubdomainGraph_dh * s) 00081 { 00082 START_FUNC_DH 00083 struct _subdomain_dh *tmp = 00084 (struct _subdomain_dh *) MALLOC_DH (sizeof (struct _subdomain_dh)); 00085 CHECK_V_ERROR; 00086 *s = tmp; 00087 00088 tmp->blocks = 1; 00089 tmp->ptrs = tmp->adj = NULL; 00090 tmp->colors = 1; 00091 tmp->colorVec = NULL; 00092 tmp->o2n_sub = tmp->n2o_sub = NULL; 00093 tmp->beg_row = tmp->beg_rowP = NULL; 00094 tmp->bdry_count = tmp->row_count = NULL; 00095 tmp->loNabors = tmp->hiNabors = tmp->allNabors = NULL; 00096 tmp->loCount = tmp->hiCount = tmp->allCount = 0; 00097 00098 tmp->m = 0; 00099 tmp->n2o_row = tmp->o2n_col = NULL; 00100 tmp->o2n_ext = tmp->n2o_ext = NULL; 00101 00102 tmp->doNotColor = Parser_dhHasSwitch (parser_dh, "-doNotColor"); 00103 tmp->debug = Parser_dhHasSwitch (parser_dh, "-debug_SubGraph"); 00104 { 00105 int i; 00106 for (i = 0; i < TIMING_BINS_SG; ++i) 00107 tmp->timing[i] = 0.0; 00108 } 00109 END_FUNC_DH} 00110 00111 #undef __FUNC__ 00112 #define __FUNC__ "SubdomainGraph_dhDestroy" 00113 void 00114 SubdomainGraph_dhDestroy (SubdomainGraph_dh s) 00115 { 00116 START_FUNC_DH if (s->ptrs != NULL) 00117 { 00118 FREE_DH (s->ptrs); 00119 CHECK_V_ERROR; 00120 } 00121 if (s->adj != NULL) 00122 { 00123 FREE_DH (s->adj); 00124 CHECK_V_ERROR; 00125 } 00126 if (s->colorVec != NULL) 00127 { 00128 FREE_DH (s->colorVec); 00129 CHECK_V_ERROR; 00130 } 00131 if (s->o2n_sub != NULL) 00132 { 00133 FREE_DH (s->o2n_sub); 00134 CHECK_V_ERROR; 00135 } 00136 if (s->n2o_sub != NULL) 00137 { 00138 FREE_DH (s->n2o_sub); 00139 CHECK_V_ERROR; 00140 } 00141 00142 if (s->beg_row != NULL) 00143 { 00144 FREE_DH (s->beg_row); 00145 CHECK_V_ERROR; 00146 } 00147 if (s->beg_rowP != NULL) 00148 { 00149 FREE_DH (s->beg_rowP); 00150 CHECK_V_ERROR; 00151 } 00152 if (s->row_count != NULL) 00153 { 00154 FREE_DH (s->row_count); 00155 CHECK_V_ERROR; 00156 } 00157 if (s->bdry_count != NULL) 00158 { 00159 FREE_DH (s->bdry_count); 00160 CHECK_V_ERROR; 00161 } 00162 if (s->loNabors != NULL) 00163 { 00164 FREE_DH (s->loNabors); 00165 CHECK_V_ERROR; 00166 } 00167 if (s->hiNabors != NULL) 00168 { 00169 FREE_DH (s->hiNabors); 00170 CHECK_V_ERROR; 00171 } 00172 if (s->allNabors != NULL) 00173 { 00174 FREE_DH (s->allNabors); 00175 CHECK_V_ERROR; 00176 } 00177 00178 if (s->n2o_row != NULL) 00179 { 00180 FREE_DH (s->n2o_row); 00181 CHECK_V_ERROR; 00182 } 00183 if (s->o2n_col != NULL) 00184 { 00185 FREE_DH (s->o2n_col); 00186 CHECK_V_ERROR; 00187 } 00188 if (s->o2n_ext != NULL) 00189 { 00190 Hash_i_dhDestroy (s->o2n_ext); 00191 CHECK_V_ERROR; 00192 } 00193 if (s->n2o_ext != NULL) 00194 { 00195 Hash_i_dhDestroy (s->n2o_ext); 00196 CHECK_V_ERROR; 00197 } 00198 FREE_DH (s); 00199 CHECK_V_ERROR; 00200 END_FUNC_DH} 00201 00202 #undef __FUNC__ 00203 #define __FUNC__ "SubdomainGraph_dhInit" 00204 void 00205 SubdomainGraph_dhInit (SubdomainGraph_dh s, int blocks, bool bj, void *A) 00206 { 00207 START_FUNC_DH double t1 = MPI_Wtime (); 00208 00209 if (blocks < 1) 00210 blocks = 1; 00211 00212 if (np_dh == 1 || blocks > 1) 00213 { 00214 s->blocks = blocks; 00215 init_seq_private (s, blocks, bj, A); 00216 CHECK_V_ERROR; 00217 } 00218 else 00219 { 00220 s->blocks = np_dh; 00221 init_mpi_private (s, np_dh, bj, A); 00222 CHECK_V_ERROR; 00223 } 00224 00225 s->timing[TOTAL_SGT] += (MPI_Wtime () - t1); 00226 END_FUNC_DH} 00227 00228 00229 #undef __FUNC__ 00230 #define __FUNC__ "SubdomainGraph_dhFindOwner" 00231 int 00232 SubdomainGraph_dhFindOwner (SubdomainGraph_dh s, int idx, bool permuted) 00233 { 00234 START_FUNC_DH int sd; 00235 int *beg_row = s->beg_row; 00236 int *row_count = s->row_count; 00237 int owner = -1, blocks = s->blocks; 00238 00239 if (permuted) 00240 beg_row = s->beg_rowP; 00241 00242 /* determine the subdomain that contains "idx" */ 00243 for (sd = 0; sd < blocks; ++sd) 00244 { 00245 if (idx >= beg_row[sd] && idx < beg_row[sd] + row_count[sd]) 00246 { 00247 owner = sd; 00248 break; 00249 } 00250 } 00251 00252 if (owner == -1) 00253 { 00254 00255 fprintf (stderr, "@@@ failed to find owner for idx = %i @@@\n", idx); 00256 fprintf (stderr, "blocks= %i\n", blocks); 00257 00258 sprintf (msgBuf_dh, "failed to find owner for idx = %i", idx); 00259 SET_ERROR (-1, msgBuf_dh); 00260 } 00261 00262 END_FUNC_VAL (owner)} 00263 00264 00265 #undef __FUNC__ 00266 #define __FUNC__ "SubdomainGraph_dhPrintStatsLong" 00267 void 00268 SubdomainGraph_dhPrintStatsLong (SubdomainGraph_dh s, FILE * fp) 00269 { 00270 START_FUNC_DH int i, j, k; 00271 double max = 0, min = INT_MAX; 00272 00273 fprintf (fp, 00274 "\n------------- SubdomainGraph_dhPrintStatsLong -----------\n"); 00275 fprintf (fp, "colors used = %i\n", s->colors); 00276 fprintf (fp, "subdomain count = %i\n", s->blocks); 00277 00278 00279 fprintf (fp, "\ninterior/boundary node ratios:\n"); 00280 00281 for (i = 0; i < s->blocks; ++i) 00282 { 00283 int inNodes = s->row_count[i] - s->bdry_count[i]; 00284 int bdNodes = s->bdry_count[i]; 00285 double ratio; 00286 00287 if (bdNodes == 0) 00288 { 00289 ratio = -1; 00290 } 00291 else 00292 { 00293 ratio = (double) inNodes / (double) bdNodes; 00294 } 00295 00296 max = MAX (max, ratio); 00297 min = MIN (min, ratio); 00298 fprintf (fp, 00299 " P_%i: first= %3i rowCount= %3i interior= %3i bdry= %3i ratio= %0.1f\n", 00300 i, 1 + s->beg_row[i], s->row_count[i], inNodes, bdNodes, 00301 ratio); 00302 } 00303 00304 00305 fprintf (fp, "\nmax interior/bdry ratio = %.1f\n", max); 00306 fprintf (fp, "min interior/bdry ratio = %.1f\n", min); 00307 00308 00309 /*----------------------------------------- 00310 * subdomain graph 00311 *-----------------------------------------*/ 00312 if (s->adj != NULL) 00313 { 00314 fprintf (fp, "\nunpermuted subdomain graph: \n"); 00315 for (i = 0; i < s->blocks; ++i) 00316 { 00317 fprintf (fp, "%i :: ", i); 00318 for (j = s->ptrs[i]; j < s->ptrs[i + 1]; ++j) 00319 { 00320 fprintf (fp, "%i ", s->adj[j]); 00321 } 00322 fprintf (fp, "\n"); 00323 } 00324 } 00325 00326 00327 /*----------------------------------------- 00328 * subdomain permutation 00329 *-----------------------------------------*/ 00330 fprintf (fp, "\no2n subdomain permutation:\n"); 00331 for (i = 0; i < s->blocks; ++i) 00332 { 00333 fprintf (fp, " %i %i\n", i, s->o2n_sub[i]); 00334 } 00335 fprintf (fp, "\n"); 00336 00337 if (np_dh > 1) 00338 { 00339 00340 /*----------------------------------------- 00341 * local n2o_row permutation 00342 *-----------------------------------------*/ 00343 fprintf (fp, "\nlocal n2o_row permutation:\n "); 00344 for (i = 0; i < s->row_count[myid_dh]; ++i) 00345 { 00346 fprintf (fp, "%i ", s->n2o_row[i]); 00347 } 00348 fprintf (fp, "\n"); 00349 00350 /*----------------------------------------- 00351 * local n2o permutation 00352 *-----------------------------------------*/ 00353 fprintf (fp, "\nlocal o2n_col permutation:\n "); 00354 for (i = 0; i < s->row_count[myid_dh]; ++i) 00355 { 00356 fprintf (fp, "%i ", s->o2n_col[i]); 00357 } 00358 fprintf (fp, "\n"); 00359 00360 } 00361 else 00362 { 00363 /*----------------------------------------- 00364 * local n2o_row permutation 00365 *-----------------------------------------*/ 00366 fprintf (fp, "\nlocal n2o_row permutation:\n"); 00367 fprintf (fp, "--------------------------\n"); 00368 for (k = 0; k < s->blocks; ++k) 00369 { 00370 int beg_row = s->beg_row[k]; 00371 int end_row = beg_row + s->row_count[k]; 00372 00373 for (i = beg_row; i < end_row; ++i) 00374 { 00375 fprintf (fp, "%i ", s->n2o_row[i]); 00376 } 00377 fprintf (fp, "\n"); 00378 } 00379 00380 /*----------------------------------------- 00381 * local n2o permutation 00382 *-----------------------------------------*/ 00383 fprintf (fp, "\nlocal o2n_col permutation:\n"); 00384 fprintf (fp, "--------------------------\n"); 00385 for (k = 0; k < s->blocks; ++k) 00386 { 00387 int beg_row = s->beg_row[k]; 00388 int end_row = beg_row + s->row_count[k]; 00389 00390 for (i = beg_row; i < end_row; ++i) 00391 { 00392 fprintf (fp, "%i ", s->o2n_col[i]); 00393 } 00394 fprintf (fp, "\n"); 00395 } 00396 00397 00398 } 00399 00400 END_FUNC_DH} 00401 00402 #undef __FUNC__ 00403 #define __FUNC__ "init_seq_private" 00404 void 00405 init_seq_private (SubdomainGraph_dh s, int blocks, bool bj, void *A) 00406 { 00407 START_FUNC_DH int m, n, beg_row; 00408 double t1; 00409 00410 /*------------------------------------------------------- 00411 * get number of local rows (m), global rows (n), and 00412 * global numbering of first locally owned row 00413 * (for sequential, beg_row=0 and m == n 00414 *-------------------------------------------------------*/ 00415 EuclidGetDimensions (A, &beg_row, &m, &n); 00416 CHECK_V_ERROR; 00417 s->m = n; 00418 00419 /*------------------------------------------------------- 00420 * allocate storage for all data structures 00421 * EXCEPT s->adj and hash tables. 00422 * (but note that hash tables aren't used for sequential) 00423 *-------------------------------------------------------*/ 00424 allocate_storage_private (s, blocks, m, bj); 00425 CHECK_V_ERROR; 00426 00427 /*------------------------------------------------------------- 00428 * Fill in: beg_row[] 00429 * beg_rowP[] 00430 * row_count[] 00431 * At this point, beg_rowP[] is a copy of beg_row[]) 00432 *-------------------------------------------------------------*/ 00433 { 00434 int i; 00435 int rpp = m / blocks; 00436 00437 if (rpp * blocks < m) 00438 ++rpp; 00439 00440 s->beg_row[0] = 0; 00441 for (i = 1; i < blocks; ++i) 00442 s->beg_row[i] = rpp + s->beg_row[i - 1]; 00443 for (i = 0; i < blocks; ++i) 00444 s->row_count[i] = rpp; 00445 s->row_count[blocks - 1] = m - rpp * (blocks - 1); 00446 } 00447 memcpy (s->beg_rowP, s->beg_row, blocks * sizeof (int)); 00448 00449 00450 /*----------------------------------------------------------------- 00451 * Find all neighboring processors in subdomain graph. 00452 * This block fills in: allNabors[] 00453 *-----------------------------------------------------------------*/ 00454 /* NA for sequential! */ 00455 00456 00457 /*------------------------------------------------------- 00458 * Count number of interior nodes for each subdomain; 00459 * also, form permutation vector to order boundary 00460 * nodes last in each subdomain. 00461 * This block fills in: bdry_count[] 00462 * n2o_col[] 00463 * o2n_row[] 00464 *-------------------------------------------------------*/ 00465 t1 = MPI_Wtime (); 00466 if (!bj) 00467 { 00468 find_bdry_nodes_seq_private (s, m, A); 00469 CHECK_V_ERROR; 00470 } 00471 else 00472 { 00473 int i; 00474 for (i = 0; i < m; ++i) 00475 { 00476 s->n2o_row[i] = i; 00477 s->o2n_col[i] = i; 00478 } 00479 } 00480 s->timing[ORDER_BDRY_SGT] += (MPI_Wtime () - t1); 00481 00482 /*------------------------------------------------------- 00483 * Form subdomain graph, 00484 * then color and reorder subdomain graph. 00485 * This block fills in: ptr[] 00486 * adj[] 00487 * o2n_sub[] 00488 * n2o_sub[] 00489 * beg_rowP[] 00490 *-------------------------------------------------------*/ 00491 t1 = MPI_Wtime (); 00492 if (!bj) 00493 { 00494 form_subdomaingraph_seq_private (s, m, A); 00495 CHECK_V_ERROR; 00496 if (s->doNotColor) 00497 { 00498 int i; 00499 printf_dh ("subdomain coloring and reordering is OFF\n"); 00500 for (i = 0; i < blocks; ++i) 00501 { 00502 s->o2n_sub[i] = i; 00503 s->n2o_sub[i] = i; 00504 s->colorVec[i] = 0; 00505 } 00506 } 00507 else 00508 { 00509 SET_INFO ("subdomain coloring and reordering is ON"); 00510 color_subdomain_graph_private (s); 00511 CHECK_V_ERROR; 00512 } 00513 } 00514 00515 /* bj setup */ 00516 else 00517 { 00518 int i; 00519 for (i = 0; i < blocks; ++i) 00520 { 00521 s->o2n_sub[i] = i; 00522 s->n2o_sub[i] = i; 00523 } 00524 } 00525 s->timing[FORM_GRAPH_SGT] += (MPI_Wtime () - t1); 00526 00527 /*------------------------------------------------------- 00528 * Here's a step we DON'T do for the parallel case: 00529 * we need to adjust the matrix row and column perms 00530 * to reflect subdomain reordering (for the parallel 00531 * case, these permutation vectors are purely local and 00532 * zero-based) 00533 *-------------------------------------------------------*/ 00534 if (!bj) 00535 { 00536 adjust_matrix_perms_private (s, m); 00537 CHECK_V_ERROR; 00538 } 00539 00540 /*------------------------------------------------------- 00541 * Build lists of lower and higher ordered neighbors. 00542 * This block fills in: loNabors[] 00543 * hiNabors[] 00544 *-------------------------------------------------------*/ 00545 /* NA for sequential */ 00546 00547 /*------------------------------------------------------- 00548 * Exchange boundary node permutation information with 00549 * neighboring processors in the subdomain graph. 00550 * This block fills in: o2n_ext (hash table) 00551 * n2o_ext (hash table) 00552 *-------------------------------------------------------*/ 00553 /* NA for sequential */ 00554 00555 00556 END_FUNC_DH} 00557 00558 00559 #if 0 00560 #undef __FUNC__ 00561 #define __FUNC__ "partition_metis_private" 00562 void 00563 partition_metis_private (SubdomainGraph_dh s, void *A) 00564 { 00565 START_FUNC_DH if (ignoreMe) 00566 SET_V_ERROR ("not implemented"); 00567 END_FUNC_DH} 00568 #endif 00569 00570 #undef __FUNC__ 00571 #define __FUNC__ "allocate_storage_private" 00572 void 00573 allocate_storage_private (SubdomainGraph_dh s, int blocks, int m, bool bj) 00574 { 00575 START_FUNC_DH if (!bj) 00576 { 00577 s->ptrs = (int *) MALLOC_DH ((blocks + 1) * sizeof (int)); 00578 CHECK_V_ERROR; 00579 s->ptrs[0] = 0; 00580 s->colorVec = (int *) MALLOC_DH (blocks * sizeof (int)); 00581 CHECK_V_ERROR; 00582 s->loNabors = (int *) MALLOC_DH (np_dh * sizeof (int)); 00583 CHECK_V_ERROR; 00584 s->hiNabors = (int *) MALLOC_DH (np_dh * sizeof (int)); 00585 CHECK_V_ERROR; 00586 s->allNabors = (int *) MALLOC_DH (np_dh * sizeof (int)); 00587 CHECK_V_ERROR; 00588 } 00589 00590 s->n2o_row = (int *) MALLOC_DH (m * sizeof (int)); 00591 CHECK_V_ERROR; 00592 s->o2n_col = (int *) MALLOC_DH (m * sizeof (int)); 00593 CHECK_V_ERROR; 00594 00595 /* these are probably only needed for single mpi task -- ?? */ 00596 /* nope; beg_row and row_ct are needed by ilu_mpi_bj; yuck! */ 00597 s->beg_row = (int *) MALLOC_DH ((blocks) * sizeof (int)); 00598 CHECK_V_ERROR; 00599 s->beg_rowP = (int *) MALLOC_DH ((blocks) * sizeof (int)); 00600 CHECK_V_ERROR; 00601 s->row_count = (int *) MALLOC_DH (blocks * sizeof (int)); 00602 CHECK_V_ERROR; 00603 s->bdry_count = (int *) MALLOC_DH (blocks * sizeof (int)); 00604 CHECK_V_ERROR; 00605 s->o2n_sub = (int *) MALLOC_DH (blocks * sizeof (int)); 00606 CHECK_V_ERROR; 00607 s->n2o_sub = (int *) MALLOC_DH (blocks * sizeof (int)); 00608 CHECK_V_ERROR; 00609 00610 END_FUNC_DH} 00611 00612 /*-----------------------------------------------------------------*/ 00613 00614 00615 #undef __FUNC__ 00616 #define __FUNC__ "init_mpi_private" 00617 void 00618 init_mpi_private (SubdomainGraph_dh s, int blocks, bool bj, void *A) 00619 { 00620 START_FUNC_DH int m, n, beg_row; 00621 bool symmetric; 00622 double t1; 00623 00624 symmetric = Parser_dhHasSwitch (parser_dh, "-sym"); 00625 CHECK_V_ERROR; 00626 if (Parser_dhHasSwitch (parser_dh, "-makeSymmetric")) 00627 { 00628 symmetric = true; 00629 } 00630 00631 /*------------------------------------------------------- 00632 * get number of local rows (m), global rows (n), and 00633 * global numbering of first locally owned row 00634 *-------------------------------------------------------*/ 00635 EuclidGetDimensions (A, &beg_row, &m, &n); 00636 CHECK_V_ERROR; 00637 s->m = m; 00638 00639 00640 /*------------------------------------------------------- 00641 * allocate storage for all data structures 00642 * EXCEPT s->adj and hash tables. 00643 *-------------------------------------------------------*/ 00644 allocate_storage_private (s, blocks, m, bj); 00645 CHECK_V_ERROR; 00646 00647 /*------------------------------------------------------------- 00648 * Fill in: beg_row[] 00649 * beg_rowP[] 00650 * row_count[] 00651 * At this point, beg_rowP[] is a copy of beg_row[]) 00652 *-------------------------------------------------------------*/ 00653 if (!bj) 00654 { 00655 MPI_Allgather (&beg_row, 1, MPI_INT, s->beg_row, 1, MPI_INT, comm_dh); 00656 MPI_Allgather (&m, 1, MPI_INT, s->row_count, 1, MPI_INT, comm_dh); 00657 memcpy (s->beg_rowP, s->beg_row, np_dh * sizeof (int)); 00658 } 00659 else 00660 { 00661 s->beg_row[myid_dh] = beg_row; 00662 s->beg_rowP[myid_dh] = beg_row; 00663 s->row_count[myid_dh] = m; 00664 } 00665 00666 /*----------------------------------------------------------------- 00667 * Find all neighboring processors in subdomain graph. 00668 * This block fills in: allNabors[] 00669 *-----------------------------------------------------------------*/ 00670 if (!bj) 00671 { 00672 t1 = MPI_Wtime (); 00673 if (symmetric) 00674 { 00675 find_all_neighbors_sym_private (s, m, A); 00676 CHECK_V_ERROR; 00677 } 00678 else 00679 { 00680 find_all_neighbors_unsym_private (s, m, A); 00681 CHECK_V_ERROR; 00682 } 00683 s->timing[FIND_NABORS_SGT] += (MPI_Wtime () - t1); 00684 } 00685 00686 00687 /*----------------------------------------------------------------- 00688 * determine which rows are boundary rows, and which are interior 00689 * rows; also, form permutation vector to order interior 00690 * nodes first within each subdomain 00691 * This block fills in: bdry_count[] 00692 * n2o_col[] 00693 * o2n_row[] 00694 *-----------------------------------------------------------------*/ 00695 t1 = MPI_Wtime (); 00696 if (!bj) 00697 { 00698 int *interiorNodes, *bdryNodes; 00699 int interiorCount, bdryCount; 00700 int *o2n = s->o2n_col, idx; 00701 int i; 00702 00703 interiorNodes = (int *) MALLOC_DH (m * sizeof (int)); 00704 CHECK_V_ERROR; 00705 bdryNodes = (int *) MALLOC_DH (m * sizeof (int)); 00706 CHECK_V_ERROR; 00707 00708 /* divide this subdomain's rows into interior and boundary rows; 00709 the returned lists are with respect to local numbering. 00710 */ 00711 if (symmetric) 00712 { 00713 find_bdry_nodes_sym_private (s, m, A, 00714 interiorNodes, bdryNodes, 00715 &interiorCount, &bdryCount); 00716 CHECK_V_ERROR; 00717 } 00718 else 00719 { 00720 find_bdry_nodes_unsym_private (s, m, A, 00721 interiorNodes, bdryNodes, 00722 &interiorCount, &bdryCount); 00723 CHECK_V_ERROR; 00724 } 00725 00726 /* exchange number of boundary rows with all neighbors */ 00727 MPI_Allgather (&bdryCount, 1, MPI_INT, s->bdry_count, 1, MPI_INT, 00728 comm_dh); 00729 00730 /* form local permutation */ 00731 idx = 0; 00732 for (i = 0; i < interiorCount; ++i) 00733 { 00734 o2n[interiorNodes[i]] = idx++; 00735 } 00736 for (i = 0; i < bdryCount; ++i) 00737 { 00738 o2n[bdryNodes[i]] = idx++; 00739 } 00740 00741 /* invert permutation */ 00742 invert_perm (m, o2n, s->n2o_row); 00743 CHECK_V_ERROR; 00744 00745 FREE_DH (interiorNodes); 00746 CHECK_V_ERROR; 00747 FREE_DH (bdryNodes); 00748 CHECK_V_ERROR; 00749 } 00750 00751 /* bj setup */ 00752 else 00753 { 00754 int *o2n = s->o2n_col, *n2o = s->n2o_row; 00755 int i, m = s->m; 00756 00757 for (i = 0; i < m; ++i) 00758 { 00759 o2n[i] = i; 00760 n2o[i] = i; 00761 } 00762 } 00763 s->timing[ORDER_BDRY_SGT] += (MPI_Wtime () - t1); 00764 00765 /*------------------------------------------------------- 00766 * Form subdomain graph, 00767 * then color and reorder subdomain graph. 00768 * This block fills in: ptr[] 00769 * adj[] 00770 * o2n_sub[] 00771 * n2o_sub[] 00772 * beg_rowP[] 00773 *-------------------------------------------------------*/ 00774 if (!bj) 00775 { 00776 t1 = MPI_Wtime (); 00777 form_subdomaingraph_mpi_private (s); 00778 CHECK_V_ERROR; 00779 if (s->doNotColor) 00780 { 00781 int i; 00782 printf_dh ("subdomain coloring and reordering is OFF\n"); 00783 for (i = 0; i < blocks; ++i) 00784 { 00785 s->o2n_sub[i] = i; 00786 s->n2o_sub[i] = i; 00787 s->colorVec[i] = 0; 00788 } 00789 } 00790 else 00791 { 00792 SET_INFO ("subdomain coloring and reordering is ON"); 00793 color_subdomain_graph_private (s); 00794 CHECK_V_ERROR; 00795 } 00796 s->timing[FORM_GRAPH_SGT] += (MPI_Wtime () - t1); 00797 } 00798 00799 /*------------------------------------------------------- 00800 * Build lists of lower and higher ordered neighbors. 00801 * This block fills in: loNabors[] 00802 * hiNabors[] 00803 *-------------------------------------------------------*/ 00804 if (!bj) 00805 { 00806 find_ordered_neighbors_private (s); 00807 CHECK_V_ERROR; 00808 } 00809 00810 /*------------------------------------------------------- 00811 * Exchange boundary node permutation information with 00812 * neighboring processors in the subdomain graph. 00813 * This block fills in: o2n_ext (hash table) 00814 * n2o_ext (hash table) 00815 *-------------------------------------------------------*/ 00816 if (!bj) 00817 { 00818 t1 = MPI_Wtime (); 00819 SubdomainGraph_dhExchangePerms (s); 00820 CHECK_V_ERROR; 00821 s->timing[EXCHANGE_PERMS_SGT] += (MPI_Wtime () - t1); 00822 } 00823 00824 END_FUNC_DH} 00825 00826 00827 00828 #undef __FUNC__ 00829 #define __FUNC__ "SubdomainGraph_dhExchangePerms" 00830 void 00831 SubdomainGraph_dhExchangePerms (SubdomainGraph_dh s) 00832 { 00833 START_FUNC_DH MPI_Request * recv_req = NULL, *send_req = NULL; 00834 MPI_Status *status = NULL; 00835 int *nabors = s->allNabors, naborCount = s->allCount; 00836 int i, j, *sendBuf = NULL, *recvBuf = NULL, *naborIdx = NULL, nz; 00837 int m = s->row_count[myid_dh]; 00838 int beg_row = s->beg_row[myid_dh]; 00839 int beg_rowP = s->beg_rowP[myid_dh]; 00840 int *bdryNodeCounts = s->bdry_count; 00841 int myBdryCount = s->bdry_count[myid_dh]; 00842 bool debug = false; 00843 int myFirstBdry = m - myBdryCount; 00844 int *n2o_row = s->n2o_row; 00845 Hash_i_dh n2o_table, o2n_table; 00846 00847 if (logFile != NULL && s->debug) 00848 debug = true; 00849 00850 /* allocate send buffer, and copy permutation info to buffer; 00851 each entry is a <old_value, new_value> pair. 00852 */ 00853 sendBuf = (int *) MALLOC_DH (2 * myBdryCount * sizeof (int)); 00854 CHECK_V_ERROR; 00855 00856 00857 if (debug) 00858 { 00859 fprintf (logFile, 00860 "\nSUBG myFirstBdry= %i myBdryCount= %i m= %i beg_rowP= %i\n", 00861 1 + myFirstBdry, myBdryCount, m, 1 + beg_rowP); 00862 fflush (logFile); 00863 } 00864 00865 for (i = myFirstBdry, j = 0; j < myBdryCount; ++i, ++j) 00866 { 00867 sendBuf[2 * j] = n2o_row[i] + beg_row; 00868 sendBuf[2 * j + 1] = i + beg_rowP; 00869 } 00870 00871 if (debug) 00872 { 00873 fprintf (logFile, "\nSUBG SEND_BUF:\n"); 00874 for (i = myFirstBdry, j = 0; j < myBdryCount; ++i, ++j) 00875 { 00876 fprintf (logFile, "SUBG %i, %i\n", 1 + sendBuf[2 * j], 00877 1 + sendBuf[2 * j + 1]); 00878 } 00879 fflush (logFile); 00880 } 00881 00882 /* allocate a receive buffer for each nabor in the subdomain graph, 00883 and set up index array for locating the beginning of each 00884 nabor's buffers. 00885 */ 00886 naborIdx = (int *) MALLOC_DH ((1 + naborCount) * sizeof (int)); 00887 CHECK_V_ERROR; 00888 naborIdx[0] = 0; 00889 nz = 0; 00890 for (i = 0; i < naborCount; ++i) 00891 { 00892 nz += (2 * bdryNodeCounts[nabors[i]]); 00893 naborIdx[i + 1] = nz; 00894 } 00895 00896 00897 recvBuf = (int *) MALLOC_DH (nz * sizeof (int)); 00898 CHECK_V_ERROR; 00899 00900 00901 /* for (i=0; i<nz; ++i) recvBuf[i] = -10; */ 00902 00903 /* perform sends and receives */ 00904 recv_req = (MPI_Request *) MALLOC_DH (naborCount * sizeof (MPI_Request)); 00905 CHECK_V_ERROR; 00906 send_req = (MPI_Request *) MALLOC_DH (naborCount * sizeof (MPI_Request)); 00907 CHECK_V_ERROR; 00908 status = (MPI_Status *) MALLOC_DH (naborCount * sizeof (MPI_Status)); 00909 CHECK_V_ERROR; 00910 00911 for (i = 0; i < naborCount; ++i) 00912 { 00913 int nabr = nabors[i]; 00914 int *buf = recvBuf + naborIdx[i]; 00915 int ct = 2 * bdryNodeCounts[nabr]; 00916 00917 00918 MPI_Isend (sendBuf, 2 * myBdryCount, MPI_INT, nabr, 444, comm_dh, 00919 &(send_req[i])); 00920 00921 if (debug) 00922 { 00923 fprintf (logFile, "SUBG sending %i elts to %i\n", 2 * myBdryCount, 00924 nabr); 00925 fflush (logFile); 00926 } 00927 00928 MPI_Irecv (buf, ct, MPI_INT, nabr, 444, comm_dh, &(recv_req[i])); 00929 00930 if (debug) 00931 { 00932 fprintf (logFile, "SUBG receiving %i elts from %i\n", ct, nabr); 00933 fflush (logFile); 00934 } 00935 } 00936 00937 MPI_Waitall (naborCount, send_req, status); 00938 MPI_Waitall (naborCount, recv_req, status); 00939 00940 Hash_i_dhCreate (&n2o_table, nz / 2); 00941 CHECK_V_ERROR; 00942 Hash_i_dhCreate (&o2n_table, nz / 2); 00943 CHECK_V_ERROR; 00944 s->n2o_ext = n2o_table; 00945 s->o2n_ext = o2n_table; 00946 00947 /* insert non-local boundary node permutations in lookup tables */ 00948 for (i = 0; i < nz; i += 2) 00949 { 00950 int old = recvBuf[i]; 00951 int new = recvBuf[i + 1]; 00952 00953 if (debug) 00954 { 00955 fprintf (logFile, "SUBG i= %i old= %i new= %i\n", i, old + 1, 00956 new + 1); 00957 fflush (logFile); 00958 } 00959 00960 Hash_i_dhInsert (o2n_table, old, new); 00961 CHECK_V_ERROR; 00962 Hash_i_dhInsert (n2o_table, new, old); 00963 CHECK_V_ERROR; 00964 } 00965 00966 00967 if (recvBuf != NULL) 00968 { 00969 FREE_DH (recvBuf); 00970 CHECK_V_ERROR; 00971 } 00972 if (naborIdx != NULL) 00973 { 00974 FREE_DH (naborIdx); 00975 CHECK_V_ERROR; 00976 } 00977 if (sendBuf != NULL) 00978 { 00979 FREE_DH (sendBuf); 00980 CHECK_V_ERROR; 00981 } 00982 if (recv_req != NULL) 00983 { 00984 FREE_DH (recv_req); 00985 CHECK_V_ERROR; 00986 } 00987 if (send_req != NULL) 00988 { 00989 FREE_DH (send_req); 00990 CHECK_V_ERROR; 00991 } 00992 if (status != NULL) 00993 { 00994 FREE_DH (status); 00995 CHECK_V_ERROR; 00996 } 00997 00998 END_FUNC_DH} 00999 01000 01001 01002 #undef __FUNC__ 01003 #define __FUNC__ "form_subdomaingraph_mpi_private" 01004 void 01005 form_subdomaingraph_mpi_private (SubdomainGraph_dh s) 01006 { 01007 START_FUNC_DH int *nabors = s->allNabors, nct = s->allCount; 01008 int *idxAll = NULL; 01009 int i, j, nz, *adj, *ptrs = s->ptrs; 01010 MPI_Request *recvReqs = NULL, sendReq; 01011 MPI_Status *statuses = NULL, status; 01012 01013 /* all processors tell root how many nabors they have */ 01014 if (myid_dh == 0) 01015 { 01016 idxAll = (int *) MALLOC_DH (np_dh * sizeof (int)); 01017 CHECK_V_ERROR; 01018 } 01019 MPI_Gather (&nct, 1, MPI_INT, idxAll, 1, MPI_INT, 0, comm_dh); 01020 01021 /* root counts edges in graph, and broacasts to all */ 01022 if (myid_dh == 0) 01023 { 01024 nz = 0; 01025 for (i = 0; i < np_dh; ++i) 01026 nz += idxAll[i]; 01027 } 01028 MPI_Bcast (&nz, 1, MPI_INT, 0, comm_dh); 01029 01030 /* allocate space for adjacency lists (memory for the 01031 pointer array was previously allocated) 01032 */ 01033 adj = s->adj = (int *) MALLOC_DH (nz * sizeof (int)); 01034 CHECK_V_ERROR; 01035 01036 /* root receives adjacency lists from all processors */ 01037 if (myid_dh == 0) 01038 { 01039 recvReqs = (MPI_Request *) MALLOC_DH (np_dh * sizeof (MPI_Request)); 01040 CHECK_V_ERROR; 01041 statuses = (MPI_Status *) MALLOC_DH (np_dh * sizeof (MPI_Status)); 01042 CHECK_V_ERROR; 01043 01044 /* first, set up row pointer array */ 01045 ptrs[0] = 0; 01046 for (j = 0; j < np_dh; ++j) 01047 ptrs[j + 1] = ptrs[j] + idxAll[j]; 01048 01049 /* second, start the receives */ 01050 for (j = 0; j < np_dh; ++j) 01051 { 01052 int ct = idxAll[j]; 01053 01054 MPI_Irecv (adj + ptrs[j], ct, MPI_INT, j, 42, comm_dh, 01055 recvReqs + j); 01056 } 01057 } 01058 01059 /* all processors send root their adjacency list */ 01060 MPI_Isend (nabors, nct, MPI_INT, 0, 42, comm_dh, &sendReq); 01061 01062 /* wait for comms to go through */ 01063 if (myid_dh == 0) 01064 { 01065 MPI_Waitall (np_dh, recvReqs, statuses); 01066 } 01067 MPI_Wait (&sendReq, &status); 01068 01069 /* root broadcasts assembled subdomain graph to all processors */ 01070 MPI_Bcast (ptrs, 1 + np_dh, MPI_INT, 0, comm_dh); 01071 MPI_Bcast (adj, nz, MPI_INT, 0, comm_dh); 01072 01073 if (idxAll != NULL) 01074 { 01075 FREE_DH (idxAll); 01076 CHECK_V_ERROR; 01077 } 01078 if (recvReqs != NULL) 01079 { 01080 FREE_DH (recvReqs); 01081 CHECK_V_ERROR; 01082 } 01083 if (statuses != NULL) 01084 { 01085 FREE_DH (statuses); 01086 CHECK_V_ERROR; 01087 } 01088 01089 END_FUNC_DH} 01090 01091 /* this is ugly and inefficient; but seq mode is primarily 01092 for debugging and testing, so there. 01093 */ 01094 #undef __FUNC__ 01095 #define __FUNC__ "form_subdomaingraph_seq_private" 01096 void 01097 form_subdomaingraph_seq_private (SubdomainGraph_dh s, int m, void *A) 01098 { 01099 START_FUNC_DH int *dense, i, j, row, blocks = s->blocks; 01100 int *cval, len, *adj; 01101 int idx = 0, *ptrs = s->ptrs; 01102 01103 /* allocate storage for adj[]; since this function is intended 01104 for debugging/testing, and the number of blocks should be 01105 relatively small, we'll punt and allocate the maximum 01106 possibly needed. 01107 */ 01108 adj = s->adj = (int *) MALLOC_DH (blocks * blocks * sizeof (int)); 01109 CHECK_V_ERROR; 01110 01111 dense = (int *) MALLOC_DH (blocks * blocks * sizeof (int)); 01112 CHECK_V_ERROR; 01113 for (i = 0; i < blocks * blocks; ++i) 01114 dense[i] = 0; 01115 01116 /* loop over each block's rows to identify all boundary nodes */ 01117 for (i = 0; i < blocks; ++i) 01118 { 01119 int beg_row = s->beg_row[i]; 01120 int end_row = beg_row + s->row_count[i]; 01121 01122 for (row = beg_row; row < end_row; ++row) 01123 { 01124 EuclidGetRow (A, row, &len, &cval, NULL); 01125 CHECK_V_ERROR; 01126 for (j = 0; j < len; ++j) 01127 { 01128 int col = cval[j]; 01129 if (col < beg_row || col >= end_row) 01130 { 01131 int owner = SubdomainGraph_dhFindOwner (s, col, false); 01132 CHECK_V_ERROR; 01133 dense[i * blocks + owner] = 1; 01134 dense[owner * blocks + i] = 1; 01135 } 01136 } 01137 EuclidRestoreRow (A, row, &len, &cval, NULL); 01138 CHECK_V_ERROR; 01139 } 01140 } 01141 01142 /* form sparse csr representation of subdomain graph 01143 from dense representation 01144 */ 01145 ptrs[0] = 0; 01146 for (i = 0; i < blocks; ++i) 01147 { 01148 for (j = 0; j < blocks; ++j) 01149 { 01150 if (dense[i * blocks + j]) 01151 { 01152 adj[idx++] = j; 01153 } 01154 } 01155 ptrs[i + 1] = idx; 01156 } 01157 01158 FREE_DH (dense); 01159 CHECK_V_ERROR; 01160 END_FUNC_DH} 01161 01162 01163 #undef __FUNC__ 01164 #define __FUNC__ "find_all_neighbors_sym_private" 01165 void 01166 find_all_neighbors_sym_private (SubdomainGraph_dh s, int m, void *A) 01167 { 01168 START_FUNC_DH int *marker, i, j, beg_row, end_row; 01169 int row, len, *cval, ct = 0; 01170 int *nabors = s->allNabors; 01171 01172 marker = (int *) MALLOC_DH (m * sizeof (int)); 01173 CHECK_V_ERROR; 01174 for (i = 0; i < m; ++i) 01175 marker[i] = 0; 01176 01177 SET_INFO 01178 ("finding nabors in subdomain graph for structurally symmetric matrix"); 01179 SET_INFO ("(if this isn't what you want, use '-sym 0' switch)"); 01180 01181 beg_row = s->beg_row[myid_dh]; 01182 end_row = beg_row + s->row_count[myid_dh]; 01183 01184 for (row = beg_row; row < end_row; ++row) 01185 { 01186 EuclidGetRow (A, row, &len, &cval, NULL); 01187 CHECK_V_ERROR; 01188 for (j = 0; j < len; ++j) 01189 { 01190 int col = cval[j]; 01191 if (col < beg_row || col >= end_row) 01192 { 01193 int owner = SubdomainGraph_dhFindOwner (s, col, false); 01194 CHECK_V_ERROR; 01195 if (!marker[owner]) 01196 { 01197 marker[owner] = 1; 01198 nabors[ct++] = owner; 01199 } 01200 } 01201 } 01202 EuclidRestoreRow (A, row, &len, &cval, NULL); 01203 CHECK_V_ERROR; 01204 } 01205 s->allCount = ct; 01206 01207 /* fprintf(logFile, "@@@@@ allCount= %i\n", ct); */ 01208 01209 if (marker != NULL) 01210 { 01211 FREE_DH (marker); 01212 CHECK_V_ERROR; 01213 } 01214 END_FUNC_DH} 01215 01216 #undef __FUNC__ 01217 #define __FUNC__ "find_all_neighbors_unsym_private" 01218 void 01219 find_all_neighbors_unsym_private (SubdomainGraph_dh s, int m, void *A) 01220 { 01221 START_FUNC_DH int i, j, row, beg_row, end_row; 01222 int *marker; 01223 int *cval, len, idx = 0; 01224 int nz, *nabors = s->allNabors, *myNabors; 01225 01226 myNabors = (int *) MALLOC_DH (np_dh * sizeof (int)); 01227 CHECK_V_ERROR; 01228 marker = (int *) MALLOC_DH (np_dh * sizeof (int)); 01229 CHECK_V_ERROR; 01230 for (i = 0; i < np_dh; ++i) 01231 marker[i] = 0; 01232 01233 SET_INFO 01234 ("finding nabors in subdomain graph for structurally unsymmetric matrix"); 01235 01236 /* loop over this block's boundary rows, finding all nabors in 01237 subdomain graph 01238 */ 01239 beg_row = s->beg_row[myid_dh]; 01240 end_row = beg_row + s->row_count[myid_dh]; 01241 01242 01243 01244 /*for each locally owned row ... */ 01245 for (row = beg_row; row < end_row; ++row) 01246 { 01247 EuclidGetRow (A, row, &len, &cval, NULL); 01248 CHECK_V_ERROR; 01249 for (j = 0; j < len; ++j) 01250 { 01251 int col = cval[j]; 01252 /*for each column that corresponds to a non-locally owned row ... */ 01253 if (col < beg_row || col >= end_row) 01254 { 01255 int owner = SubdomainGraph_dhFindOwner (s, col, false); 01256 CHECK_V_ERROR; 01257 /*if I've not yet done so ... */ 01258 if (!marker[owner]) 01259 { 01260 marker[owner] = 1; 01261 /*append the non-local row's owner in to the list of my nabors 01262 in the subdomain graph */ 01263 myNabors[idx++] = owner; 01264 } 01265 } 01266 } 01267 EuclidRestoreRow (A, row, &len, &cval, NULL); 01268 CHECK_V_ERROR; 01269 } 01270 01271 /* 01272 at this point, idx = the number of my neighbors in the subdomain 01273 graph; equivalently, idx is the number of meaningfull slots in 01274 the myNabors array. -dah 1/31/06 01275 */ 01276 01277 /* 01278 at this point: marker[j] = 0 indicates that processor j is NOT my nabor 01279 marker[j] = 1 indicates that processor j IS my nabor 01280 however, there may be some nabors that can't be discovered in the above loop 01281 "//for each locally owned row;" this can happen if the matrix is 01282 structurally unsymmetric. 01283 -dah 1/31/06 01284 */ 01285 01286 /* fprintf(stderr, "[%i] marker: ", myid_dh); 01287 for (j=0; j<np_dh; j++) { 01288 fprintf(stderr, "[%i] (j=%d) %d\n", myid_dh, j, marker[j]); 01289 } 01290 fprintf(stderr, "\n"); 01291 */ 01292 01293 /* find out who my neighbors are that I cannot discern locally */ 01294 MPI_Alltoall (marker, 1, MPI_INT, nabors, 1, MPI_INT, comm_dh); 01295 CHECK_V_ERROR; 01296 01297 /* add in neighbors that I know about from scanning my adjacency lists */ 01298 for (i = 0; i < idx; ++i) 01299 nabors[myNabors[i]] = 1; 01300 01301 /* remove self from the adjacency list */ 01302 nabors[myid_dh] = 0; 01303 01304 /* 01305 at this point: marker[j] = 0 indicates that processor j is NOT my nabor 01306 marker[j] = 1 indicates that processor j IS my nabor 01307 and this is guaranteed to be complete. 01308 */ 01309 01310 /* form final list of neighboring processors */ 01311 nz = 0; 01312 for (i = 0; i < np_dh; ++i) 01313 { 01314 if (nabors[i]) 01315 myNabors[nz++] = i; 01316 } 01317 s->allCount = nz; 01318 memcpy (nabors, myNabors, nz * sizeof (int)); 01319 01320 if (marker != NULL) 01321 { 01322 FREE_DH (marker); 01323 CHECK_V_ERROR; 01324 } 01325 if (myNabors != NULL) 01326 { 01327 FREE_DH (myNabors); 01328 CHECK_V_ERROR; 01329 } 01330 END_FUNC_DH} 01331 01332 /*================================================================*/ 01333 01334 #undef __FUNC__ 01335 #define __FUNC__ "find_bdry_nodes_sym_private" 01336 void 01337 find_bdry_nodes_sym_private (SubdomainGraph_dh s, int m, void *A, 01338 int *interiorNodes, int *bdryNodes, 01339 int *interiorCount, int *bdryCount) 01340 { 01341 START_FUNC_DH int beg_row = s->beg_row[myid_dh]; 01342 int end_row = beg_row + s->row_count[myid_dh]; 01343 int row, inCt = 0, bdCt = 0; 01344 01345 int j; 01346 int *cval; 01347 01348 /* determine if the row is a boundary row */ 01349 for (row = beg_row; row < end_row; ++row) 01350 { /* for each row in the subdomain */ 01351 bool isBdry = false; 01352 int len; 01353 EuclidGetRow (A, row, &len, &cval, NULL); 01354 CHECK_V_ERROR; 01355 01356 for (j = 0; j < len; ++j) 01357 { /* for each column in the row */ 01358 int col = cval[j]; 01359 if (col < beg_row || col >= end_row) 01360 { 01361 isBdry = true; 01362 break; 01363 } 01364 } 01365 EuclidRestoreRow (A, row, &len, &cval, NULL); 01366 CHECK_V_ERROR; 01367 01368 if (isBdry) 01369 { 01370 bdryNodes[bdCt++] = row - beg_row; 01371 } 01372 else 01373 { 01374 interiorNodes[inCt++] = row - beg_row; 01375 } 01376 } 01377 01378 *interiorCount = inCt; 01379 *bdryCount = bdCt; 01380 01381 END_FUNC_DH} 01382 01383 #define BDRY_NODE_TAG 42 01384 01385 #undef __FUNC__ 01386 #define __FUNC__ "find_bdry_nodes_unsym_private" 01387 void 01388 find_bdry_nodes_unsym_private (SubdomainGraph_dh s, int m, void *A, 01389 int *interiorNodes, int *boundaryNodes, 01390 int *interiorCount, int *bdryCount) 01391 { 01392 START_FUNC_DH int beg_row = s->beg_row[myid_dh]; 01393 int end_row = beg_row + s->row_count[myid_dh]; 01394 int i, j, row, max; 01395 int *cval; 01396 int *list, count; 01397 int *rpIN = NULL, *rpOUT = NULL; 01398 int *sendBuf, *recvBuf; 01399 int *marker, inCt, bdCt; 01400 int *bdryNodes, nz; 01401 int sendCt, recvCt; 01402 MPI_Request *sendReq, *recvReq; 01403 MPI_Status *status; 01404 SortedSet_dh ss; 01405 01406 SortedSet_dhCreate (&ss, m); 01407 CHECK_V_ERROR; 01408 01409 /*----------------------------------------------------- 01410 * identify all boundary nodes possible using locally 01411 * owned adjacency lists 01412 *-----------------------------------------------------*/ 01413 for (row = beg_row; row < end_row; ++row) 01414 { 01415 bool isBdry = false; 01416 int len; 01417 EuclidGetRow (A, row, &len, &cval, NULL); 01418 CHECK_V_ERROR; 01419 01420 for (j = 0; j < len; ++j) 01421 { 01422 int col = cval[j]; 01423 if (col < beg_row || col >= end_row) 01424 { 01425 isBdry = true; /* this row is a boundary node */ 01426 SortedSet_dhInsert (ss, col); 01427 CHECK_V_ERROR; 01428 /* the row "col" is also a boundary node */ 01429 } 01430 } 01431 EuclidRestoreRow (A, row, &len, &cval, NULL); 01432 CHECK_V_ERROR; 01433 01434 if (isBdry) 01435 { 01436 SortedSet_dhInsert (ss, row); 01437 CHECK_V_ERROR; 01438 } 01439 } 01440 01441 /*----------------------------------------------------- 01442 * scan the sorted list to determine what boundary 01443 * node information to send to whom 01444 *-----------------------------------------------------*/ 01445 sendBuf = (int *) MALLOC_DH (np_dh * sizeof (int)); 01446 CHECK_V_ERROR; 01447 recvBuf = (int *) MALLOC_DH (np_dh * sizeof (int)); 01448 CHECK_V_ERROR; 01449 rpOUT = (int *) MALLOC_DH ((np_dh + 1) * sizeof (int)); 01450 CHECK_V_ERROR; 01451 rpOUT[0] = 0; 01452 for (i = 0; i < np_dh; ++i) 01453 sendBuf[i] = 0; 01454 01455 sendCt = 0; /* total number of processor to whom we will send lists */ 01456 SortedSet_dhGetList (ss, &list, &count); 01457 CHECK_V_ERROR; 01458 01459 for (i = 0; i < count; /* i is set below */ ) 01460 { 01461 int node = list[i]; 01462 int owner; 01463 int last; 01464 01465 owner = SubdomainGraph_dhFindOwner (s, node, false); 01466 CHECK_V_ERROR; 01467 last = s->beg_row[owner] + s->row_count[owner]; 01468 01469 /* determine the other boundary nodes that belong to owner */ 01470 while ((i < count) && (list[i] < last)) 01471 ++i; 01472 ++sendCt; 01473 rpOUT[sendCt] = i; 01474 sendBuf[owner] = rpOUT[sendCt] - rpOUT[sendCt - 1]; 01475 01476 } 01477 01478 /*----------------------------------------------------- 01479 * processors tell each other how much information 01480 * each will send to whom 01481 *-----------------------------------------------------*/ 01482 MPI_Alltoall (sendBuf, 1, MPI_INT, recvBuf, 1, MPI_INT, comm_dh); 01483 CHECK_V_ERROR; 01484 01485 /*----------------------------------------------------- 01486 * exchange boundary node information 01487 * (note that we also exchange information with ourself!) 01488 *-----------------------------------------------------*/ 01489 01490 /* first, set up data structures to hold incoming information */ 01491 rpIN = (int *) MALLOC_DH ((np_dh + 1) * sizeof (int)); 01492 CHECK_V_ERROR; 01493 rpIN[0] = 0; 01494 nz = 0; 01495 recvCt = 0; 01496 for (i = 0; i < np_dh; ++i) 01497 { 01498 if (recvBuf[i]) 01499 { 01500 ++recvCt; 01501 nz += recvBuf[i]; 01502 rpIN[recvCt] = nz; 01503 } 01504 } 01505 bdryNodes = (int *) MALLOC_DH (nz * sizeof (int)); 01506 CHECK_V_ERROR; 01507 sendReq = (MPI_Request *) MALLOC_DH (sendCt * sizeof (MPI_Request)); 01508 CHECK_V_ERROR; 01509 recvReq = (MPI_Request *) MALLOC_DH (recvCt * sizeof (MPI_Request)); 01510 CHECK_V_ERROR; 01511 max = MAX (sendCt, recvCt); 01512 status = (MPI_Status *) MALLOC_DH (max * sizeof (MPI_Status)); 01513 CHECK_V_ERROR; 01514 01515 /* second, start receives for incoming data */ 01516 j = 0; 01517 for (i = 0; i < np_dh; ++i) 01518 { 01519 if (recvBuf[i]) 01520 { 01521 MPI_Irecv (bdryNodes + rpIN[j], recvBuf[i], MPI_INT, 01522 i, BDRY_NODE_TAG, comm_dh, recvReq + j); 01523 ++j; 01524 } 01525 } 01526 01527 /* third, start sends for outgoing data */ 01528 j = 0; 01529 for (i = 0; i < np_dh; ++i) 01530 { 01531 if (sendBuf[i]) 01532 { 01533 MPI_Isend (list + rpOUT[j], sendBuf[i], MPI_INT, 01534 i, BDRY_NODE_TAG, comm_dh, sendReq + j); 01535 ++j; 01536 } 01537 } 01538 01539 /* fourth, wait for all comms to finish */ 01540 MPI_Waitall (sendCt, sendReq, status); 01541 MPI_Waitall (recvCt, recvReq, status); 01542 01543 /* fifth, convert from global to local indices */ 01544 for (i = 0; i < nz; ++i) 01545 bdryNodes[i] -= beg_row; 01546 01547 /*----------------------------------------------------- 01548 * consolidate information from all processors to 01549 * identify all local boundary nodes 01550 *-----------------------------------------------------*/ 01551 marker = (int *) MALLOC_DH (m * sizeof (int)); 01552 CHECK_V_ERROR; 01553 for (i = 0; i < m; ++i) 01554 marker[i] = 0; 01555 for (i = 0; i < nz; ++i) 01556 marker[bdryNodes[i]] = 1; 01557 01558 inCt = bdCt = 0; 01559 for (i = 0; i < m; ++i) 01560 { 01561 if (marker[i]) 01562 { 01563 boundaryNodes[bdCt++] = i; 01564 } 01565 else 01566 { 01567 interiorNodes[inCt++] = i; 01568 } 01569 } 01570 *interiorCount = inCt; 01571 *bdryCount = bdCt; 01572 01573 /*----------------------------------------------------- 01574 * clean up 01575 *-----------------------------------------------------*/ 01576 SortedSet_dhDestroy (ss); 01577 CHECK_V_ERROR; 01578 if (rpIN != NULL) 01579 { 01580 FREE_DH (rpIN); 01581 CHECK_V_ERROR; 01582 } 01583 if (rpOUT != NULL) 01584 { 01585 FREE_DH (rpOUT); 01586 CHECK_V_ERROR; 01587 } 01588 if (sendBuf != NULL) 01589 { 01590 FREE_DH (sendBuf); 01591 CHECK_V_ERROR; 01592 } 01593 if (recvBuf != NULL) 01594 { 01595 FREE_DH (recvBuf); 01596 CHECK_V_ERROR; 01597 } 01598 if (bdryNodes != NULL) 01599 { 01600 FREE_DH (bdryNodes); 01601 CHECK_V_ERROR; 01602 } 01603 if (marker != NULL) 01604 { 01605 FREE_DH (marker); 01606 CHECK_V_ERROR; 01607 } 01608 if (sendReq != NULL) 01609 { 01610 FREE_DH (sendReq); 01611 CHECK_V_ERROR; 01612 } 01613 if (recvReq != NULL) 01614 { 01615 FREE_DH (recvReq); 01616 CHECK_V_ERROR; 01617 } 01618 if (status != NULL) 01619 { 01620 FREE_DH (status); 01621 CHECK_V_ERROR; 01622 } 01623 END_FUNC_DH} 01624 01625 01626 #undef __FUNC__ 01627 #define __FUNC__ "find_ordered_neighbors_private" 01628 void 01629 find_ordered_neighbors_private (SubdomainGraph_dh s) 01630 { 01631 START_FUNC_DH int *loNabors = s->loNabors; 01632 int *hiNabors = s->hiNabors; 01633 int *allNabors = s->allNabors, allCount = s->allCount; 01634 int loCt = 0, hiCt = 0; 01635 int *o2n = s->o2n_sub; 01636 int i, myNewId = o2n[myid_dh]; 01637 01638 for (i = 0; i < allCount; ++i) 01639 { 01640 int nabor = allNabors[i]; 01641 if (o2n[nabor] < myNewId) 01642 { 01643 loNabors[loCt++] = nabor; 01644 } 01645 else 01646 { 01647 hiNabors[hiCt++] = nabor; 01648 } 01649 } 01650 01651 s->loCount = loCt; 01652 s->hiCount = hiCt; 01653 END_FUNC_DH} 01654 01655 01656 #undef __FUNC__ 01657 #define __FUNC__ "color_subdomain_graph_private" 01658 void 01659 color_subdomain_graph_private (SubdomainGraph_dh s) 01660 { 01661 START_FUNC_DH int i, n = np_dh; 01662 int *rp = s->ptrs, *cval = s->adj; 01663 int j, *marker, thisNodesColor, *colorCounter; 01664 int *o2n = s->o2n_sub; 01665 int *color = s->colorVec; 01666 01667 if (np_dh == 1) 01668 n = s->blocks; 01669 01670 marker = (int *) MALLOC_DH ((n + 1) * sizeof (int)); 01671 colorCounter = (int *) MALLOC_DH ((n + 1) * sizeof (int)); 01672 for (i = 0; i <= n; ++i) 01673 { 01674 marker[i] = -1; 01675 colorCounter[i] = 0; 01676 } 01677 01678 /*------------------------------------------------------------------ 01679 * color the nodes 01680 *------------------------------------------------------------------*/ 01681 for (i = 0; i < n; ++i) 01682 { /* color node "i" */ 01683 /* mark colors of "i"s nabors as unavailable; 01684 only need to mark nabors that are (per the input ordering) 01685 numbered less than "i." 01686 */ 01687 for (j = rp[i]; j < rp[i + 1]; ++j) 01688 { 01689 int nabor = cval[j]; 01690 if (nabor < i) 01691 { 01692 int naborsColor = color[nabor]; 01693 marker[naborsColor] = i; 01694 } 01695 } 01696 01697 /* assign vertex i the "smallest" possible color */ 01698 thisNodesColor = -1; 01699 for (j = 0; j < n; ++j) 01700 { 01701 if (marker[j] != i) 01702 { 01703 thisNodesColor = j; 01704 break; 01705 } 01706 } 01707 color[i] = thisNodesColor; 01708 colorCounter[1 + thisNodesColor] += 1; 01709 } 01710 01711 /*------------------------------------------------------------------ 01712 * build ordering vector; if two nodes are similarly colored, 01713 * they will have the same relative ordering as before. 01714 *------------------------------------------------------------------*/ 01715 /* prefix-sum to find lowest-numbered node for each color */ 01716 for (i = 1; i < n; ++i) 01717 { 01718 if (colorCounter[i] == 0) 01719 break; 01720 colorCounter[i] += colorCounter[i - 1]; 01721 } 01722 01723 for (i = 0; i < n; ++i) 01724 { 01725 o2n[i] = colorCounter[color[i]]; 01726 colorCounter[color[i]] += 1; 01727 } 01728 01729 /* invert permutation */ 01730 invert_perm (n, s->o2n_sub, s->n2o_sub); 01731 CHECK_V_ERROR; 01732 01733 01734 /*------------------------------------------------------------------ 01735 * count the number of colors used 01736 *------------------------------------------------------------------*/ 01737 { 01738 int ct = 0; 01739 for (j = 0; j < n; ++j) 01740 { 01741 if (marker[j] == -1) 01742 break; 01743 ++ct; 01744 } 01745 s->colors = ct; 01746 } 01747 01748 01749 /*------------------------------------------------------------------ 01750 * (re)build the beg_rowP array 01751 *------------------------------------------------------------------*/ 01752 { 01753 int sum = 0; 01754 for (i = 0; i < n; ++i) 01755 { 01756 int old = s->n2o_sub[i]; 01757 s->beg_rowP[old] = sum; 01758 sum += s->row_count[old]; 01759 } 01760 } 01761 01762 FREE_DH (marker); 01763 CHECK_V_ERROR; 01764 FREE_DH (colorCounter); 01765 CHECK_V_ERROR; 01766 END_FUNC_DH} 01767 01768 01769 #undef __FUNC__ 01770 #define __FUNC__ "SubdomainGraph_dhDump" 01771 void 01772 SubdomainGraph_dhDump (SubdomainGraph_dh s, char *filename) 01773 { 01774 START_FUNC_DH int i; 01775 int sCt = np_dh; 01776 FILE *fp; 01777 01778 if (np_dh == 1) 01779 sCt = s->blocks; 01780 01781 01782 /* --------------------------------------------------------- 01783 * for seq and par runs, 1st processor prints information 01784 * that is common to all processors 01785 * ---------------------------------------------------------*/ 01786 fp = openFile_dh (filename, "w"); 01787 CHECK_V_ERROR; 01788 01789 /* write subdomain ordering permutations */ 01790 fprintf (fp, "----- colors used\n"); 01791 fprintf (fp, "%i\n", s->colors); 01792 if (s->colorVec == NULL) 01793 { 01794 fprintf (fp, "s->colorVec == NULL\n"); 01795 } 01796 else 01797 { 01798 fprintf (fp, "----- colorVec\n"); 01799 for (i = 0; i < sCt; ++i) 01800 { 01801 fprintf (fp, "%i ", s->colorVec[i]); 01802 } 01803 fprintf (fp, "\n"); 01804 } 01805 01806 if (s->o2n_sub == NULL || s->o2n_sub == NULL) 01807 { 01808 fprintf (fp, "s->o2n_sub == NULL || s->o2n_sub == NULL\n"); 01809 } 01810 else 01811 { 01812 fprintf (fp, "----- o2n_sub\n"); 01813 for (i = 0; i < sCt; ++i) 01814 { 01815 fprintf (fp, "%i ", s->o2n_sub[i]); 01816 } 01817 fprintf (fp, "\n"); 01818 fprintf (fp, "----- n2o_sub\n"); 01819 for (i = 0; i < sCt; ++i) 01820 { 01821 fprintf (fp, "%i ", s->n2o_sub[i]); 01822 } 01823 fprintf (fp, "\n"); 01824 } 01825 01826 /* write begin row arrays */ 01827 if (s->beg_row == NULL || s->beg_rowP == NULL) 01828 { 01829 fprintf (fp, "s->beg_row == NULL || s->beg_rowP == NULL\n"); 01830 } 01831 else 01832 { 01833 fprintf (fp, "----- beg_row\n"); 01834 for (i = 0; i < sCt; ++i) 01835 { 01836 fprintf (fp, "%i ", 1 + s->beg_row[i]); 01837 } 01838 fprintf (fp, "\n"); 01839 fprintf (fp, "----- beg_rowP\n"); 01840 for (i = 0; i < sCt; ++i) 01841 { 01842 fprintf (fp, "%i ", 1 + s->beg_rowP[i]); 01843 } 01844 fprintf (fp, "\n"); 01845 } 01846 01847 /* write row count and bdry count arrays */ 01848 if (s->row_count == NULL || s->bdry_count == NULL) 01849 { 01850 fprintf (fp, "s->row_count == NULL || s->bdry_count == NULL\n"); 01851 } 01852 else 01853 { 01854 fprintf (fp, "----- row_count\n"); 01855 for (i = 0; i < sCt; ++i) 01856 { 01857 fprintf (fp, "%i ", s->row_count[i]); 01858 } 01859 fprintf (fp, "\n"); 01860 fprintf (fp, "----- bdry_count\n"); 01861 for (i = 0; i < sCt; ++i) 01862 { 01863 fprintf (fp, "%i ", s->bdry_count[i]); 01864 } 01865 fprintf (fp, "\n"); 01866 01867 } 01868 01869 /* write subdomain graph */ 01870 if (s->ptrs == NULL || s->adj == NULL) 01871 { 01872 fprintf (fp, "s->ptrs == NULL || s->adj == NULL\n"); 01873 } 01874 else 01875 { 01876 int j; 01877 int ct; 01878 fprintf (fp, "----- subdomain graph\n"); 01879 for (i = 0; i < sCt; ++i) 01880 { 01881 fprintf (fp, "%i :: ", i); 01882 ct = s->ptrs[i + 1] - s->ptrs[i]; 01883 if (ct) 01884 { 01885 shellSort_int (ct, s->adj + s->ptrs[i]); 01886 CHECK_V_ERROR; 01887 } 01888 for (j = s->ptrs[i]; j < s->ptrs[i + 1]; ++j) 01889 { 01890 fprintf (fp, "%i ", s->adj[j]); 01891 } 01892 fprintf (fp, "\n"); 01893 } 01894 } 01895 closeFile_dh (fp); 01896 CHECK_V_ERROR; 01897 01898 /* --------------------------------------------------------- 01899 * next print info that differs across processors for par 01900 * trials. deal with this as two cases: seq and par 01901 * ---------------------------------------------------------*/ 01902 if (s->beg_rowP == NULL) 01903 { 01904 SET_V_ERROR ("s->beg_rowP == NULL; can't continue"); 01905 } 01906 if (s->row_count == NULL) 01907 { 01908 SET_V_ERROR ("s->row_count == NULL; can't continue"); 01909 } 01910 if (s->o2n_sub == NULL) 01911 { 01912 SET_V_ERROR ("s->o2n_sub == NULL; can't continue"); 01913 } 01914 01915 01916 if (np_dh == 1) 01917 { 01918 fp = openFile_dh (filename, "a"); 01919 CHECK_V_ERROR; 01920 01921 /* write n2o_row and o2n_col */ 01922 if (s->n2o_row == NULL || s->o2n_col == NULL) 01923 { 01924 fprintf (fp, "s->n2o_row == NULL|| s->o2n_col == NULL\n"); 01925 } 01926 else 01927 { 01928 fprintf (fp, "----- n2o_row\n"); 01929 for (i = 0; i < s->m; ++i) 01930 { 01931 fprintf (fp, "%i ", 1 + s->n2o_row[i]); 01932 } 01933 fprintf (fp, "\n"); 01934 01935 #if 0 01936 /* 01937 note: this won't match the parallel case, since 01938 parallel permutation vecs are zero-based and purely 01939 local 01940 */ 01941 01942 fprintf (fp, "----- o2n_col\n"); 01943 for (i = 0; i < sCt; ++i) 01944 { 01945 int br = s->beg_row[i]; 01946 int er = br + s->row_count[i]; 01947 01948 for (j = br; j < er; ++j) 01949 { 01950 fprintf (fp, "%i ", 1 + s->o2n_col[j]); 01951 } 01952 fprintf (fp, "\n"); 01953 } 01954 fprintf (fp, "\n"); 01955 01956 #endif 01957 01958 } 01959 closeFile_dh (fp); 01960 CHECK_V_ERROR; 01961 } 01962 01963 /* parallel case */ 01964 else 01965 { 01966 int id = s->n2o_sub[myid_dh]; 01967 int m = s->m; 01968 int pe; 01969 int beg_row = 0; 01970 if (s->beg_row != 0) 01971 beg_row = s->beg_row[myid_dh]; 01972 01973 /* write n2o_row */ 01974 for (pe = 0; pe < np_dh; ++pe) 01975 { 01976 MPI_Barrier (comm_dh); 01977 if (id == pe) 01978 { 01979 fp = openFile_dh (filename, "a"); 01980 CHECK_V_ERROR; 01981 if (id == 0) 01982 fprintf (fp, "----- n2o_row\n"); 01983 01984 for (i = 0; i < m; ++i) 01985 { 01986 fprintf (fp, "%i ", 1 + s->n2o_row[i] + beg_row); 01987 } 01988 if (id == np_dh - 1) 01989 fprintf (fp, "\n"); 01990 closeFile_dh (fp); 01991 CHECK_V_ERROR; 01992 } 01993 } 01994 01995 #if 0 01996 01997 /* write o2n_col */ 01998 for (pe = 0; pe < np_dh; ++pe) 01999 { 02000 MPI_Barrier (comm_dh); 02001 if (myid_dh == pe) 02002 { 02003 fp = openFile_dh (filename, "a"); 02004 CHECK_V_ERROR; 02005 if (myid_dh == 0) 02006 fprintf (fp, "----- o2n_col\n"); 02007 02008 for (i = 0; i < m; ++i) 02009 { 02010 fprintf (fp, "%i ", 1 + s->o2n_col[i] + beg_row); 02011 } 02012 fprintf (fp, "\n"); 02013 02014 if (myid_dh == np_dh - 1) 02015 fprintf (fp, "\n"); 02016 02017 closeFile_dh (fp); 02018 CHECK_V_ERROR; 02019 } 02020 } 02021 02022 #endif 02023 02024 } 02025 02026 END_FUNC_DH} 02027 02028 02029 02030 #undef __FUNC__ 02031 #define __FUNC__ "find_bdry_nodes_seq_private" 02032 void 02033 find_bdry_nodes_seq_private (SubdomainGraph_dh s, int m, void *A) 02034 { 02035 START_FUNC_DH int i, j, row, blocks = s->blocks; 02036 int *cval, *tmp; 02037 02038 tmp = (int *) MALLOC_DH (m * sizeof (int)); 02039 CHECK_V_ERROR; 02040 for (i = 0; i < m; ++i) 02041 tmp[i] = 0; 02042 02043 /*------------------------------------------ 02044 * mark all boundary nodes 02045 *------------------------------------------ */ 02046 for (i = 0; i < blocks; ++i) 02047 { 02048 int beg_row = s->beg_row[i]; 02049 int end_row = beg_row + s->row_count[i]; 02050 02051 for (row = beg_row; row < end_row; ++row) 02052 { 02053 bool isBdry = false; 02054 int len; 02055 EuclidGetRow (A, row, &len, &cval, NULL); 02056 CHECK_V_ERROR; 02057 02058 for (j = 0; j < len; ++j) 02059 { /* for each column in the row */ 02060 int col = cval[j]; 02061 02062 if (col < beg_row || col >= end_row) 02063 { 02064 tmp[col] = 1; 02065 isBdry = true; 02066 } 02067 } 02068 if (isBdry) 02069 tmp[row] = 1; 02070 EuclidRestoreRow (A, row, &len, &cval, NULL); 02071 CHECK_V_ERROR; 02072 } 02073 } 02074 02075 /*------------------------------------------ 02076 * fill in the bdry_count[] array 02077 *------------------------------------------ */ 02078 for (i = 0; i < blocks; ++i) 02079 { 02080 int beg_row = s->beg_row[i]; 02081 int end_row = beg_row + s->row_count[i]; 02082 int ct = 0; 02083 for (row = beg_row; row < end_row; ++row) 02084 { 02085 if (tmp[row]) 02086 ++ct; 02087 } 02088 s->bdry_count[i] = ct; 02089 } 02090 02091 /*------------------------------------------ 02092 * form the o2n_col[] permutation 02093 *------------------------------------------ */ 02094 for (i = 0; i < blocks; ++i) 02095 { 02096 int beg_row = s->beg_row[i]; 02097 int end_row = beg_row + s->row_count[i]; 02098 int interiorIDX = beg_row; 02099 int bdryIDX = end_row - s->bdry_count[i]; 02100 02101 for (row = beg_row; row < end_row; ++row) 02102 { 02103 if (tmp[row]) 02104 { 02105 s->o2n_col[row] = bdryIDX++; 02106 } 02107 else 02108 { 02109 s->o2n_col[row] = interiorIDX++; 02110 } 02111 } 02112 } 02113 02114 /* invert permutation */ 02115 invert_perm (m, s->o2n_col, s->n2o_row); 02116 CHECK_V_ERROR; 02117 FREE_DH (tmp); 02118 CHECK_V_ERROR; 02119 02120 END_FUNC_DH} 02121 02122 #undef __FUNC__ 02123 #define __FUNC__ "SubdomainGraph_dhPrintSubdomainGraph" 02124 void 02125 SubdomainGraph_dhPrintSubdomainGraph (SubdomainGraph_dh s, FILE * fp) 02126 { 02127 START_FUNC_DH if (myid_dh == 0) 02128 { 02129 int i, j; 02130 02131 fprintf (fp, 02132 "\n-----------------------------------------------------\n"); 02133 fprintf (fp, "SubdomainGraph, and coloring and ordering information\n"); 02134 fprintf (fp, "-----------------------------------------------------\n"); 02135 fprintf (fp, "colors used: %i\n", s->colors); 02136 02137 fprintf (fp, "o2n ordering vector: "); 02138 for (i = 0; i < s->blocks; ++i) 02139 fprintf (fp, "%i ", s->o2n_sub[i]); 02140 02141 fprintf (fp, "\ncoloring vector (node, color): \n"); 02142 for (i = 0; i < s->blocks; ++i) 02143 fprintf (fp, " %i, %i\n", i, s->colorVec[i]); 02144 02145 fprintf (fp, "\n"); 02146 fprintf (fp, "Adjacency lists:\n"); 02147 02148 for (i = 0; i < s->blocks; ++i) 02149 { 02150 fprintf (fp, " P_%i :: ", i); 02151 for (j = s->ptrs[i]; j < s->ptrs[i + 1]; ++j) 02152 { 02153 fprintf (fp, "%i ", s->adj[j]); 02154 } 02155 fprintf (fp, "\n"); 02156 } 02157 fprintf (fp, "-----------------------------------------------------\n"); 02158 } 02159 END_FUNC_DH} 02160 02161 02162 #undef __FUNC__ 02163 #define __FUNC__ "adjust_matrix_perms_private" 02164 void 02165 adjust_matrix_perms_private (SubdomainGraph_dh s, int m) 02166 { 02167 START_FUNC_DH int i, j, blocks = s->blocks; 02168 int *o2n = s->o2n_col; 02169 02170 for (i = 0; i < blocks; ++i) 02171 { 02172 int beg_row = s->beg_row[i]; 02173 int end_row = beg_row + s->row_count[i]; 02174 int adjust = s->beg_rowP[i] - s->beg_row[i]; 02175 for (j = beg_row; j < end_row; ++j) 02176 o2n[j] += adjust; 02177 } 02178 02179 invert_perm (m, s->o2n_col, s->n2o_row); 02180 CHECK_V_ERROR; 02181 END_FUNC_DH} 02182 02183 #undef __FUNC__ 02184 #define __FUNC__ "SubdomainGraph_dhPrintRatios" 02185 void 02186 SubdomainGraph_dhPrintRatios (SubdomainGraph_dh s, FILE * fp) 02187 { 02188 START_FUNC_DH int i; 02189 int blocks = np_dh; 02190 double ratio[25]; 02191 02192 if (myid_dh == 0) 02193 { 02194 if (np_dh == 1) 02195 blocks = s->blocks; 02196 if (blocks > 25) 02197 blocks = 25; 02198 02199 fprintf (fp, "\n"); 02200 fprintf (fp, "Subdomain interior/boundary node ratios\n"); 02201 fprintf (fp, "---------------------------------------\n"); 02202 02203 /* compute ratios */ 02204 for (i = 0; i < blocks; ++i) 02205 { 02206 if (s->bdry_count[i] == 0) 02207 { 02208 ratio[i] = -1; 02209 } 02210 else 02211 { 02212 ratio[i] = 02213 (double) (s->row_count[i] - 02214 s->bdry_count[i]) / (double) s->bdry_count[i]; 02215 } 02216 } 02217 02218 /* sort ratios */ 02219 shellSort_float (blocks, ratio); 02220 02221 /* print ratios */ 02222 if (blocks <= 20) 02223 { /* print all ratios */ 02224 int j = 0; 02225 for (i = 0; i < blocks; ++i) 02226 { 02227 fprintf (fp, "%0.2g ", ratio[i]); 02228 ++j; 02229 if (j == 10) 02230 { 02231 fprintf (fp, "\n"); 02232 } 02233 } 02234 fprintf (fp, "\n"); 02235 } 02236 else 02237 { /* print 10 largest and 10 smallest ratios */ 02238 fprintf (fp, "10 smallest ratios: "); 02239 for (i = 0; i < 10; ++i) 02240 { 02241 fprintf (fp, "%0.2g ", ratio[i]); 02242 } 02243 fprintf (fp, "\n"); 02244 fprintf (fp, "10 largest ratios: "); 02245 { 02246 int start = blocks - 6, stop = blocks - 1; 02247 for (i = start; i < stop; ++i) 02248 { 02249 fprintf (fp, "%0.2g ", ratio[i]); 02250 } 02251 fprintf (fp, "\n"); 02252 } 02253 } 02254 } 02255 02256 END_FUNC_DH} 02257 02258 02259 #undef __FUNC__ 02260 #define __FUNC__ "SubdomainGraph_dhPrintStats" 02261 void 02262 SubdomainGraph_dhPrintStats (SubdomainGraph_dh sg, FILE * fp) 02263 { 02264 START_FUNC_DH double *timing = sg->timing; 02265 02266 fprintf_dh (fp, "\nSubdomainGraph timing report\n"); 02267 fprintf_dh (fp, "-----------------------------\n"); 02268 fprintf_dh (fp, "total setup time: %0.2f\n", timing[TOTAL_SGT]); 02269 fprintf_dh (fp, " find neighbors in subdomain graph: %0.2f\n", 02270 timing[FIND_NABORS_SGT]); 02271 fprintf_dh (fp, " locally order interiors and bdry: %0.2f\n", 02272 timing[ORDER_BDRY_SGT]); 02273 fprintf_dh (fp, " form and color subdomain graph: %0.2f\n", 02274 timing[FORM_GRAPH_SGT]); 02275 fprintf_dh (fp, " exchange bdry permutations: %0.2f\n", 02276 timing[EXCHANGE_PERMS_SGT]); 02277 fprintf_dh (fp, " everything else (should be small): %0.2f\n", 02278 timing[TOTAL_SGT] - (timing[FIND_NABORS_SGT] + 02279 timing[ORDER_BDRY_SGT] + 02280 timing[FORM_GRAPH_SGT] + 02281 timing[EXCHANGE_PERMS_SGT])); 02282 END_FUNC_DH}
1.7.4