|
IFPACK Development
|
00001 /*BHEADER********************************************************************** 00002 * (c) 2002 The Regents of the University of California 00003 * 00004 * See the file COPYRIGHT_and_DISCLAIMER for a complete copyright 00005 * notice, contact person, and disclaimer. 00006 * 00007 * $Revision$ 00008 *********************************************************************EHEADER*/ 00009 00010 /* 00011 * Crout-form incomplete factorization 00012 * 00013 * To create a Matlab interface for icrout_cholesky_mex, define the symbol 00014 * MATLAB and compile this file as a Matlab mex program. 00015 * 00016 * Contact Info for this code 00017 * -------------------------- 00018 * Edmond Chow, Lawrence Livermore National Laboratory, echow@llnl.gov 00019 * 00020 * Revision History 00021 * ---------------- 00022 * 06/04/02 Cleaned up for Boeing 00023 * 11/06/02 Added icrout_cholesky_mex 00024 */ 00025 00026 #undef IFPACK 00027 00028 #define SYMSTR 1 /* structurally symmetric version */ 00029 00030 #ifdef MATLAB 00031 00032 #include "matrix.h" 00033 #include "mex.h" 00034 #define malloc mxMalloc 00035 #define calloc mxCalloc 00036 #define realloc mxRealloc 00037 #define free mxFree 00038 #define printf mexPrintf 00039 00040 #else 00041 00042 #include <stdio.h> 00043 00044 #endif 00045 00046 #include <stdlib.h> 00047 #include <math.h> 00048 #include <assert.h> 00049 00050 #define MIN(a,b) ((a)<=(b) ? (a) : (b)) 00051 #define MAX(a,b) ((a)>=(b) ? (a) : (b)) 00052 #define ABS(a) ((a)>=0 ? (a) : -(a)) 00053 00054 /* 00055 * Data structure for sparse matrices is CSR, 0-based indexing. 00056 */ 00057 typedef struct { 00058 double *val; /* also known as A */ 00059 int *col; /* also known as JA; first column is column 0 */ 00060 int *ptr; /* also known as IA; with ptr[0] = 0 */ 00061 } Matrix; 00062 00063 void ifpack_quicksort (int *const pbase, double *const daux, size_t total_elems); 00064 00065 #define SHORTCUT(p, a, ja, ia) \ 00066 (a) = (p)->val; \ 00067 (ja) = (p)->col; \ 00068 (ia) = (p)->ptr; 00069 00070 #define MATNULL(p) \ 00071 (p).val = NULL; \ 00072 (p).col = NULL; \ 00073 (p).ptr = NULL; 00074 00075 void Matrix_alloc(Matrix *a, int n, int nnz) 00076 { 00077 a->val = (double *) malloc(nnz * sizeof(double)); 00078 a->col = (int *) malloc(nnz * sizeof(int)); 00079 a->ptr = (int *) malloc((n+1) * sizeof(int)); 00080 } 00081 00082 void Matrix_dealloc(Matrix *a) 00083 { 00084 free(a->val); 00085 free(a->col); 00086 free(a->ptr); 00087 a->val = NULL; 00088 a->col = NULL; 00089 a->ptr = NULL; 00090 } 00091 00092 static void qsplit(double *a, int *ind, int n, int ncut) 00093 { 00094 double tmp, abskey; 00095 int itmp, first, last, mid; 00096 int j; 00097 00098 ncut--; 00099 first = 0; 00100 last = n-1; 00101 if (ncut < first || ncut > last) 00102 return; 00103 00104 /* outer loop while mid != ncut */ 00105 while (1) 00106 { 00107 mid = first; 00108 abskey = ABS(a[mid]); 00109 for (j=first+1; j<=last; j++) 00110 { 00111 if (ABS(a[j]) > abskey) 00112 { 00113 mid = mid+1; 00114 /* interchange */ 00115 tmp = a[mid]; 00116 itmp = ind[mid]; 00117 a[mid] = a[j]; 00118 ind[mid] = ind[j]; 00119 a[j] = tmp; 00120 ind[j] = itmp; 00121 } 00122 } 00123 00124 /* interchange */ 00125 tmp = a[mid]; 00126 a[mid] = a[first]; 00127 a[first] = tmp; 00128 itmp = ind[mid]; 00129 ind[mid] = ind[first]; 00130 ind[first] = itmp; 00131 00132 /* test for while loop */ 00133 if (mid == ncut) 00134 return; 00135 00136 if (mid > ncut) 00137 last = mid-1; 00138 else 00139 first = mid+1; 00140 } 00141 } 00142 00143 /* update column k using previous columns */ 00144 /* assumes that column of A resides in the work vector */ 00145 /* this function can also be used to update rows */ 00146 00147 static void update_column( 00148 int k, 00149 const int *ia, const int *ja, const double *a, 00150 const int *ifirst, 00151 const int *ifirst2, 00152 const int *list2, 00153 const double *multipliers, /* the val array of the other factor */ 00154 const double *d, /* diagonal of factorization */ 00155 int *marker, 00156 double *ta, 00157 int *itcol, 00158 int *ptalen) 00159 { 00160 int j, i, isj, iej, row; 00161 int talen, pos; 00162 double mult; 00163 00164 talen = *ptalen; 00165 00166 j = list2[k]; 00167 while (j != -1) 00168 { 00169 /* update column k using column j */ 00170 00171 isj = ifirst[j]; 00172 00173 /* skip first nonzero in column, since it does not contribute */ 00174 /* if symmetric structure */ 00175 /* isj++; */ 00176 00177 /* now do the actual update */ 00178 if (isj != -1) 00179 { 00180 /* multiplier */ 00181 mult = multipliers[ifirst2[j]] * d[j]; 00182 00183 /* section of column used for update */ 00184 iej = ia[j+1]-1; 00185 00186 for (i=isj; i<=iej; i++) 00187 { 00188 row = ja[i]; 00189 00190 /* if nonsymmetric structure */ 00191 if (row <= k) 00192 continue; 00193 00194 if ((pos = marker[row]) != -1) 00195 { 00196 /* already in pattern of working row */ 00197 ta[pos] -= mult*a[i]; 00198 } 00199 else 00200 { 00201 /* not yet in pattern of working row */ 00202 itcol[talen] = row; 00203 ta[talen] = -mult*a[i]; 00204 marker[row] = talen++; 00205 } 00206 } 00207 } 00208 00209 j = list2[j]; 00210 } 00211 00212 *ptalen = talen; 00213 } 00214 00215 /* update ifirst and list */ 00216 00217 static void update_lists( 00218 int k, 00219 const int *ia, 00220 const int *ja, 00221 int *ifirst, 00222 int *list) 00223 { 00224 int j, isj, iej, iptr; 00225 00226 j = list[k]; 00227 while (j != -1) 00228 { 00229 isj = ifirst[j]; 00230 iej = ia[j+1]-1; 00231 00232 isj++; 00233 00234 if (isj != 0 && isj <= iej) /* nonsymmetric structure */ 00235 { 00236 /* increment ifirst for column j */ 00237 ifirst[j] = isj; 00238 00239 /* add j to head of list for list[ja[isj]] */ 00240 iptr = j; 00241 j = list[j]; 00242 list[iptr] = list[ja[isj]]; 00243 list[ja[isj]] = iptr; 00244 } 00245 else 00246 { 00247 j = list[j]; 00248 } 00249 } 00250 } 00251 00252 static void update_lists_newcol( 00253 int k, 00254 int isk, 00255 int iptr, 00256 int *ifirst, 00257 int *list) 00258 { 00259 ifirst[k] = isk; 00260 list[k] = list[iptr]; 00261 list[iptr] = k; 00262 } 00263 00264 /* 00265 * crout_ict - Crout version of ICT - Incomplete Cholesky by Threshold 00266 * 00267 * The incomplete factorization L D L^T is computed, 00268 * where L is unit triangular, and D is diagonal 00269 * 00270 * INPUTS 00271 * n = number of rows or columns of the matrices 00272 * AL = unit lower triangular part of A stored by columns 00273 * the unit diagonal is implied (not stored) 00274 * Adiag = diagonal part of A 00275 * droptol = drop tolerance 00276 * lfil = max nonzeros per col in L factor or per row in U factor 00277 * 00278 * OUTPUTS 00279 * L = lower triangular factor stored by columns 00280 * pdiag = diagonal factor stored in an array 00281 * 00282 * NOTE: calling function must free the memory allocated by this 00283 * function, i.e., L, pdiag. 00284 */ 00285 00286 void crout_ict( 00287 int n, 00288 #ifdef IFPACK 00289 void * A, 00290 int maxentries; 00291 int (*getcol)( void * A, int col, int ** nentries, double * val, int * ind), 00292 int (*getdiag)( void *A, double * diag), 00293 #else 00294 const Matrix *AL, 00295 const double *Adiag, 00296 #endif 00297 double droptol, 00298 int lfil, 00299 Matrix *L, 00300 double **pdiag) 00301 { 00302 int k, j, i, index; 00303 int count_l; 00304 double norm_l; 00305 00306 /* work arrays; work_l is list of nonzero values */ 00307 double *work_l = (double *) malloc(n * sizeof(double)); 00308 int *ind_l = (int *) malloc(n * sizeof(int)); 00309 int len_l; 00310 00311 /* list and ifirst data structures */ 00312 int *list_l = (int *) malloc(n * sizeof(int)); 00313 int *ifirst_l = (int *) malloc(n * sizeof(int)); 00314 00315 /* aliases */ 00316 int *marker_l = ifirst_l; 00317 00318 /* matrix arrays */ 00319 double *al; int *jal, *ial; 00320 double *l; int *jl, *il; 00321 00322 int kl = 0; 00323 00324 double *diag = (double *) malloc(n * sizeof(double)); 00325 *pdiag = diag; 00326 00327 Matrix_alloc(L, n, lfil*n); 00328 00329 SHORTCUT(AL, al, jal, ial); 00330 SHORTCUT(L, l, jl, il); 00331 00332 /* initialize work arrays */ 00333 for (i=0; i<n; i++) 00334 { 00335 list_l[i] = -1; 00336 ifirst_l[i] = -1; 00337 marker_l[i] = -1; 00338 } 00339 00340 /* copy the diagonal */ 00341 #ifdef IFPACK 00342 getdiag(A, diag); 00343 #else 00344 for (i=0; i<n; i++) 00345 diag[i] = Adiag[i]; 00346 #endif 00347 00348 /* start off the matrices right */ 00349 il[0] = 0; 00350 00351 /* 00352 * Main loop over columns and rows 00353 */ 00354 00355 for (k=0; k<n; k++) 00356 { 00357 /* 00358 * lower triangular factor update by columns 00359 * (need ifirst for L and lists for U) 00360 */ 00361 00362 /* copy column of A into work vector */ 00363 norm_l = 0.; 00364 #ifdef IFPACK 00365 getcol(A, k, len_l, work_l, ind_l); 00366 for (j=0; j<len_l; j++) 00367 { 00368 norm_l += ABS(work_l[j]); 00369 marker_l[ind_l[j]] = j; 00370 } 00371 #else 00372 len_l = 0; 00373 for (j=ial[k]; j<ial[k+1]; j++) 00374 { 00375 index = jal[j]; 00376 work_l[len_l] = al[j]; 00377 norm_l += ABS(al[j]); 00378 ind_l[len_l] = index; 00379 marker_l[index] = len_l++; /* points into work array */ 00380 } 00381 #endif 00382 norm_l = (len_l == 0) ? 0.0 : norm_l/((double) len_l); 00383 00384 /* update and scale */ 00385 00386 update_column(k, il, jl, l, ifirst_l, ifirst_l, list_l, l, 00387 diag, marker_l, work_l, ind_l, &len_l); 00388 00389 for (j=0; j<len_l; j++) 00390 work_l[j] /= diag[k]; 00391 00392 i = 0; 00393 for (j=0; j<len_l; j++) 00394 { 00395 if (ABS(work_l[j]) < droptol * norm_l) 00396 { 00397 /* zero out marker array for this */ 00398 marker_l[ind_l[j]] = -1; 00399 } 00400 else 00401 { 00402 work_l[i] = work_l[j]; 00403 ind_l[i] = ind_l[j]; 00404 i++; 00405 } 00406 } 00407 len_l = i; 00408 00409 /* 00410 * dropping: for each work vector, perform qsplit, and then 00411 * sort each part by increasing index; then copy each sorted 00412 * part into the factors 00413 */ 00414 00415 count_l = MIN(len_l, lfil); 00416 qsplit(work_l, ind_l, len_l, count_l); 00417 ifpack_quicksort(ind_l, work_l, count_l); 00418 00419 for (j=0; j<count_l; j++) 00420 { 00421 l[kl] = work_l[j]; 00422 jl[kl++] = ind_l[j]; 00423 } 00424 il[k+1] = kl; 00425 00426 /* 00427 * update lists 00428 */ 00429 00430 update_lists(k, il, jl, ifirst_l, list_l); 00431 00432 if (kl - il[k] > SYMSTR) 00433 update_lists_newcol(k, il[k], jl[il[k]], ifirst_l, list_l); 00434 00435 /* zero out the marker arrays */ 00436 for (j=0; j<len_l; j++) 00437 marker_l[ind_l[j]] = -1; 00438 00439 /* 00440 * update the diagonal (after dropping) 00441 */ 00442 00443 for (j=0; j<count_l; j++) 00444 { 00445 index = ind_l[j]; 00446 diag[index] -= work_l[j] * work_l[j] * diag[k]; 00447 } 00448 } 00449 00450 free(work_l); 00451 free(ind_l); 00452 free(list_l); 00453 free(ifirst_l); 00454 } 00455 00456 #ifdef MATLAB 00457 /* 00458 * [l, d] = icrout_cholesky_mex(AL, Adiag, droptol, lfil) 00459 * 00460 * where AL is a sparse matrix (strictly lower triangular part) 00461 * Adiag is a vector 00462 * droptol is a scalar real 00463 * lfil is a scalar integer 00464 * 00465 * d must be converted to a matrix (from a vector) by icrout_cholesky.m 00466 */ 00467 void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) 00468 { 00469 int n; 00470 Matrix AL; 00471 double *Adiag; 00472 double droptol; 00473 int lfil; 00474 Matrix L; 00475 double *D; 00476 00477 if (nrhs != 4) 00478 mexErrMsgTxt("mex function called with bad number of arguments"); 00479 00480 n = mxGetN(prhs[0]); 00481 AL.ptr = mxGetJc(prhs[0]); 00482 AL.col = mxGetIr(prhs[0]); 00483 AL.val = mxGetPr(prhs[0]); 00484 Adiag = mxGetPr(prhs[1]); 00485 00486 droptol = (double) *mxGetPr(prhs[2]); 00487 lfil = (int) *mxGetPr(prhs[3]); 00488 00489 crout_ict(n, &AL, Adiag, droptol, lfil, &L, &D); 00490 00491 /* create output matrices */ 00492 00493 /* L */ 00494 plhs[0] = mxCreateSparse(n, n, 1, mxREAL); 00495 mxFree(mxGetJc(plhs[0])); 00496 mxFree(mxGetIr(plhs[0])); 00497 mxFree(mxGetPr(plhs[0])); 00498 mxSetJc(plhs[0], L.ptr); 00499 mxSetIr(plhs[0], L.col); 00500 mxSetPr(plhs[0], L.val); 00501 mxSetNzmax(plhs[0], n*lfil); /* must agree */ 00502 00503 /* D */ 00504 plhs[1] = mxCreateDoubleMatrix(n, 1, mxREAL); 00505 mxFree(mxGetPr(plhs[1])); 00506 mxSetPr(plhs[1], D); 00507 } 00508 #endif
1.7.4