Actual source code: mkl_cpardiso.c
petsc-3.13.0 2020-03-29
2: #include <petscsys.h>
3: #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
4: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
6: #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
7: #define MKL_ILP64
8: #endif
9: #include <mkl.h>
10: #include <mkl_cluster_sparse_solver.h>
12: /*
13: * Possible mkl_cpardiso phases that controls the execution of the solver.
14: * For more information check mkl_cpardiso manual.
15: */
16: #define JOB_ANALYSIS 11
17: #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12
18: #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13
19: #define JOB_NUMERICAL_FACTORIZATION 22
20: #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23
21: #define JOB_SOLVE_ITERATIVE_REFINEMENT 33
22: #define JOB_SOLVE_FORWARD_SUBSTITUTION 331
23: #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332
24: #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333
25: #define JOB_RELEASE_OF_LU_MEMORY 0
26: #define JOB_RELEASE_OF_ALL_MEMORY -1
28: #define IPARM_SIZE 64
29: #define INT_TYPE MKL_INT
31: static const char *Err_MSG_CPardiso(int errNo) {
32: switch (errNo) {
33: case -1:
34: return "input inconsistent"; break;
35: case -2:
36: return "not enough memory"; break;
37: case -3:
38: return "reordering problem"; break;
39: case -4:
40: return "zero pivot, numerical factorization or iterative refinement problem"; break;
41: case -5:
42: return "unclassified (internal) error"; break;
43: case -6:
44: return "preordering failed (matrix types 11, 13 only)"; break;
45: case -7:
46: return "diagonal matrix problem"; break;
47: case -8:
48: return "32-bit integer overflow problem"; break;
49: case -9:
50: return "not enough memory for OOC"; break;
51: case -10:
52: return "problems with opening OOC temporary files"; break;
53: case -11:
54: return "read/write problems with the OOC data file"; break;
55: default :
56: return "unknown error";
57: }
58: }
60: /*
61: * Internal data structure.
62: * For more information check mkl_cpardiso manual.
63: */
65: typedef struct {
67: /* Configuration vector */
68: INT_TYPE iparm[IPARM_SIZE];
70: /*
71: * Internal mkl_cpardiso memory location.
72: * After the first call to mkl_cpardiso do not modify pt, as that could cause a serious memory leak.
73: */
74: void *pt[IPARM_SIZE];
76: MPI_Comm comm_mkl_cpardiso;
78: /* Basic mkl_cpardiso info*/
79: INT_TYPE phase, maxfct, mnum, mtype, n, nrhs, msglvl, err;
81: /* Matrix structure */
82: PetscScalar *a;
84: INT_TYPE *ia, *ja;
86: /* Number of non-zero elements */
87: INT_TYPE nz;
89: /* Row permutaton vector*/
90: INT_TYPE *perm;
92: /* Define is matrix preserve sparce structure. */
93: MatStructure matstruc;
95: PetscErrorCode (*ConvertToTriples)(Mat, MatReuse, PetscInt*, PetscInt**, PetscInt**, PetscScalar**);
97: /* True if mkl_cpardiso function have been used. */
98: PetscBool CleanUp;
99: } Mat_MKL_CPARDISO;
101: /*
102: * Copy the elements of matrix A.
103: * Input:
104: * - Mat A: MATSEQAIJ matrix
105: * - int shift: matrix index.
106: * - 0 for c representation
107: * - 1 for fortran representation
108: * - MatReuse reuse:
109: * - MAT_INITIAL_MATRIX: Create a new aij representation
110: * - MAT_REUSE_MATRIX: Reuse all aij representation and just change values
111: * Output:
112: * - int *nnz: Number of nonzero-elements.
113: * - int **r pointer to i index
114: * - int **c pointer to j elements
115: * - MATRIXTYPE **v: Non-zero elements
116: */
117: PetscErrorCode MatCopy_seqaij_seqaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
118: {
119: Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data;
122: *v=aa->a;
123: if (reuse == MAT_INITIAL_MATRIX) {
124: *r = (INT_TYPE*)aa->i;
125: *c = (INT_TYPE*)aa->j;
126: *nnz = aa->nz;
127: }
128: return(0);
129: }
131: PetscErrorCode MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
132: {
133: const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
134: PetscErrorCode ierr;
135: PetscInt rstart,nz,i,j,countA,countB;
136: PetscInt *row,*col;
137: const PetscScalar *av, *bv;
138: PetscScalar *val;
139: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
140: Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(mat->A)->data;
141: Mat_SeqAIJ *bb = (Mat_SeqAIJ*)(mat->B)->data;
142: PetscInt colA_start,jB,jcol;
145: ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart=A->rmap->rstart;
146: av=aa->a; bv=bb->a;
148: garray = mat->garray;
150: if (reuse == MAT_INITIAL_MATRIX) {
151: nz = aa->nz + bb->nz;
152: *nnz = nz;
153: PetscMalloc3(m+1,&row,nz,&col,nz,&val);
154: *r = row; *c = col; *v = val;
155: } else {
156: row = *r; col = *c; val = *v;
157: }
159: nz = 0;
160: for (i=0; i<m; i++) {
161: row[i] = nz;
162: countA = ai[i+1] - ai[i];
163: countB = bi[i+1] - bi[i];
164: ajj = aj + ai[i]; /* ptr to the beginning of this row */
165: bjj = bj + bi[i];
167: /* B part, smaller col index */
168: colA_start = rstart + ajj[0]; /* the smallest global col index of A */
169: jB = 0;
170: for (j=0; j<countB; j++) {
171: jcol = garray[bjj[j]];
172: if (jcol > colA_start) break;
173: col[nz] = jcol;
174: val[nz++] = *bv++;
175: }
176: jB = j;
178: /* A part */
179: for (j=0; j<countA; j++) {
180: col[nz] = rstart + ajj[j];
181: val[nz++] = *av++;
182: }
184: /* B part, larger col index */
185: for (j=jB; j<countB; j++) {
186: col[nz] = garray[bjj[j]];
187: val[nz++] = *bv++;
188: }
189: }
190: row[m] = nz;
192: return(0);
193: }
195: PetscErrorCode MatConvertToTriples_mpibaij_mpibaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
196: {
197: const PetscInt *ai, *aj, *bi, *bj,*garray,bs=A->rmap->bs,bs2=bs*bs,m=A->rmap->n/bs,*ajj,*bjj;
198: PetscErrorCode ierr;
199: PetscInt rstart,nz,i,j,countA,countB;
200: PetscInt *row,*col;
201: const PetscScalar *av, *bv;
202: PetscScalar *val;
203: Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data;
204: Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data;
205: Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data;
206: PetscInt colA_start,jB,jcol;
209: ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart=A->rmap->rstart/bs;
210: av=aa->a; bv=bb->a;
212: garray = mat->garray;
214: if (reuse == MAT_INITIAL_MATRIX) {
215: nz = aa->nz + bb->nz;
216: *nnz = nz;
217: PetscMalloc3(m+1,&row,nz,&col,nz*bs2,&val);
218: *r = row; *c = col; *v = val;
219: } else {
220: row = *r; col = *c; val = *v;
221: }
223: nz = 0;
224: for (i=0; i<m; i++) {
225: row[i] = nz+1;
226: countA = ai[i+1] - ai[i];
227: countB = bi[i+1] - bi[i];
228: ajj = aj + ai[i]; /* ptr to the beginning of this row */
229: bjj = bj + bi[i];
231: /* B part, smaller col index */
232: colA_start = rstart + (countA > 0 ? ajj[0] : 0); /* the smallest global col index of A */
233: jB = 0;
234: for (j=0; j<countB; j++) {
235: jcol = garray[bjj[j]];
236: if (jcol > colA_start) break;
237: col[nz++] = jcol + 1;
238: }
239: jB = j;
240: PetscArraycpy(val,bv,jB*bs2);
241: val += jB*bs2;
242: bv += jB*bs2;
244: /* A part */
245: for (j=0; j<countA; j++) col[nz++] = rstart + ajj[j] + 1;
246: PetscArraycpy(val,av,countA*bs2);
247: val += countA*bs2;
248: av += countA*bs2;
250: /* B part, larger col index */
251: for (j=jB; j<countB; j++) col[nz++] = garray[bjj[j]] + 1;
252: PetscArraycpy(val,bv,(countB-jB)*bs2);
253: val += (countB-jB)*bs2;
254: bv += (countB-jB)*bs2;
255: }
256: row[m] = nz+1;
258: return(0);
259: }
261: PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij_MKL_CPARDISO(Mat A, MatReuse reuse, PetscInt *nnz, PetscInt **r, PetscInt **c, PetscScalar **v)
262: {
263: const PetscInt *ai, *aj, *bi, *bj,*garray,bs=A->rmap->bs,bs2=bs*bs,m=A->rmap->n/bs,*ajj,*bjj;
264: PetscErrorCode ierr;
265: PetscInt rstart,nz,i,j,countA,countB;
266: PetscInt *row,*col;
267: const PetscScalar *av, *bv;
268: PetscScalar *val;
269: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data;
270: Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data;
271: Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data;
274: ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart=A->rmap->rstart/bs;
275: av=aa->a; bv=bb->a;
277: garray = mat->garray;
279: if (reuse == MAT_INITIAL_MATRIX) {
280: nz = aa->nz + bb->nz;
281: *nnz = nz;
282: PetscMalloc3(m+1,&row,nz,&col,nz*bs2,&val);
283: *r = row; *c = col; *v = val;
284: } else {
285: row = *r; col = *c; val = *v;
286: }
288: nz = 0;
289: for (i=0; i<m; i++) {
290: row[i] = nz+1;
291: countA = ai[i+1] - ai[i];
292: countB = bi[i+1] - bi[i];
293: ajj = aj + ai[i]; /* ptr to the beginning of this row */
294: bjj = bj + bi[i];
296: /* A part */
297: for (j=0; j<countA; j++) col[nz++] = rstart + ajj[j] + 1;
298: PetscArraycpy(val,av,countA*bs2);
299: val += countA*bs2;
300: av += countA*bs2;
302: /* B part, larger col index */
303: for (j=0; j<countB; j++) col[nz++] = garray[bjj[j]] + 1;
304: PetscArraycpy(val,bv,countB*bs2);
305: val += countB*bs2;
306: bv += countB*bs2;
307: }
308: row[m] = nz+1;
310: return(0);
311: }
313: /*
314: * Free memory for Mat_MKL_CPARDISO structure and pointers to objects.
315: */
316: PetscErrorCode MatDestroy_MKL_CPARDISO(Mat A)
317: {
318: Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
319: PetscErrorCode ierr;
322: /* Terminate instance, deallocate memories */
323: if (mat_mkl_cpardiso->CleanUp) {
324: mat_mkl_cpardiso->phase = JOB_RELEASE_OF_ALL_MEMORY;
326: cluster_sparse_solver (
327: mat_mkl_cpardiso->pt,
328: &mat_mkl_cpardiso->maxfct,
329: &mat_mkl_cpardiso->mnum,
330: &mat_mkl_cpardiso->mtype,
331: &mat_mkl_cpardiso->phase,
332: &mat_mkl_cpardiso->n,
333: NULL,
334: NULL,
335: NULL,
336: mat_mkl_cpardiso->perm,
337: &mat_mkl_cpardiso->nrhs,
338: mat_mkl_cpardiso->iparm,
339: &mat_mkl_cpardiso->msglvl,
340: NULL,
341: NULL,
342: &mat_mkl_cpardiso->comm_mkl_cpardiso,
343: (PetscInt*)&mat_mkl_cpardiso->err);
344: }
346: if (mat_mkl_cpardiso->ConvertToTriples != MatCopy_seqaij_seqaij_MKL_CPARDISO) {
347: PetscFree3(mat_mkl_cpardiso->ia,mat_mkl_cpardiso->ja,mat_mkl_cpardiso->a);
348: }
349: MPI_Comm_free(&(mat_mkl_cpardiso->comm_mkl_cpardiso));
350: PetscFree(A->data);
352: /* clear composed functions */
353: PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);
354: PetscObjectComposeFunction((PetscObject)A,"MatMkl_CPardisoSetCntl_C",NULL);
355: return(0);
356: }
358: /*
359: * Computes Ax = b
360: */
361: PetscErrorCode MatSolve_MKL_CPARDISO(Mat A,Vec b,Vec x)
362: {
363: Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data;
364: PetscErrorCode ierr;
365: PetscScalar *xarray;
366: const PetscScalar *barray;
369: mat_mkl_cpardiso->nrhs = 1;
370: VecGetArray(x,&xarray);
371: VecGetArrayRead(b,&barray);
373: /* solve phase */
374: /*-------------*/
375: mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
376: cluster_sparse_solver (
377: mat_mkl_cpardiso->pt,
378: &mat_mkl_cpardiso->maxfct,
379: &mat_mkl_cpardiso->mnum,
380: &mat_mkl_cpardiso->mtype,
381: &mat_mkl_cpardiso->phase,
382: &mat_mkl_cpardiso->n,
383: mat_mkl_cpardiso->a,
384: mat_mkl_cpardiso->ia,
385: mat_mkl_cpardiso->ja,
386: mat_mkl_cpardiso->perm,
387: &mat_mkl_cpardiso->nrhs,
388: mat_mkl_cpardiso->iparm,
389: &mat_mkl_cpardiso->msglvl,
390: (void*)barray,
391: (void*)xarray,
392: &mat_mkl_cpardiso->comm_mkl_cpardiso,
393: (PetscInt*)&mat_mkl_cpardiso->err);
395: if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
397: VecRestoreArray(x,&xarray);
398: VecRestoreArrayRead(b,&barray);
399: mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
400: return(0);
401: }
403: PetscErrorCode MatSolveTranspose_MKL_CPARDISO(Mat A,Vec b,Vec x)
404: {
405: Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
406: PetscErrorCode ierr;
409: #if defined(PETSC_USE_COMPLEX)
410: mat_mkl_cpardiso->iparm[12 - 1] = 1;
411: #else
412: mat_mkl_cpardiso->iparm[12 - 1] = 2;
413: #endif
414: MatSolve_MKL_CPARDISO(A,b,x);
415: mat_mkl_cpardiso->iparm[12 - 1] = 0;
416: return(0);
417: }
419: PetscErrorCode MatMatSolve_MKL_CPARDISO(Mat A,Mat B,Mat X)
420: {
421: Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(A)->data;
422: PetscErrorCode ierr;
423: PetscScalar *xarray;
424: const PetscScalar *barray;
427: MatGetSize(B,NULL,(PetscInt*)&mat_mkl_cpardiso->nrhs);
429: if (mat_mkl_cpardiso->nrhs > 0) {
430: MatDenseGetArrayRead(B,&barray);
431: MatDenseGetArray(X,&xarray);
433: if (barray == xarray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"B and X cannot share the same memory location");
435: /* solve phase */
436: /*-------------*/
437: mat_mkl_cpardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
438: cluster_sparse_solver (
439: mat_mkl_cpardiso->pt,
440: &mat_mkl_cpardiso->maxfct,
441: &mat_mkl_cpardiso->mnum,
442: &mat_mkl_cpardiso->mtype,
443: &mat_mkl_cpardiso->phase,
444: &mat_mkl_cpardiso->n,
445: mat_mkl_cpardiso->a,
446: mat_mkl_cpardiso->ia,
447: mat_mkl_cpardiso->ja,
448: mat_mkl_cpardiso->perm,
449: &mat_mkl_cpardiso->nrhs,
450: mat_mkl_cpardiso->iparm,
451: &mat_mkl_cpardiso->msglvl,
452: (void*)barray,
453: (void*)xarray,
454: &mat_mkl_cpardiso->comm_mkl_cpardiso,
455: (PetscInt*)&mat_mkl_cpardiso->err);
456: if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
457: MatDenseRestoreArrayRead(B,&barray);
458: MatDenseRestoreArray(X,&xarray);
460: }
461: mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
462: return(0);
463: }
465: /*
466: * LU Decomposition
467: */
468: PetscErrorCode MatFactorNumeric_MKL_CPARDISO(Mat F,Mat A,const MatFactorInfo *info)
469: {
470: Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)(F)->data;
471: PetscErrorCode ierr;
474: mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
475: (*mat_mkl_cpardiso->ConvertToTriples)(A, MAT_REUSE_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);
477: mat_mkl_cpardiso->phase = JOB_NUMERICAL_FACTORIZATION;
478: cluster_sparse_solver (
479: mat_mkl_cpardiso->pt,
480: &mat_mkl_cpardiso->maxfct,
481: &mat_mkl_cpardiso->mnum,
482: &mat_mkl_cpardiso->mtype,
483: &mat_mkl_cpardiso->phase,
484: &mat_mkl_cpardiso->n,
485: mat_mkl_cpardiso->a,
486: mat_mkl_cpardiso->ia,
487: mat_mkl_cpardiso->ja,
488: mat_mkl_cpardiso->perm,
489: &mat_mkl_cpardiso->nrhs,
490: mat_mkl_cpardiso->iparm,
491: &mat_mkl_cpardiso->msglvl,
492: NULL,
493: NULL,
494: &mat_mkl_cpardiso->comm_mkl_cpardiso,
495: &mat_mkl_cpardiso->err);
496: if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\". Please check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
498: mat_mkl_cpardiso->matstruc = SAME_NONZERO_PATTERN;
499: mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
500: return(0);
501: }
503: /* Sets mkl_cpardiso options from the options database */
504: PetscErrorCode PetscSetMKL_CPARDISOFromOptions(Mat F, Mat A)
505: {
506: Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data;
507: PetscErrorCode ierr;
508: PetscInt icntl,threads;
509: PetscBool flg;
512: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MKL_CPARDISO Options","Mat");
513: PetscOptionsInt("-mat_mkl_cpardiso_65","Number of threads to use","None",threads,&threads,&flg);
514: if (flg) mkl_set_num_threads((int)threads);
516: PetscOptionsInt("-mat_mkl_cpardiso_66","Maximum number of factors with identical sparsity structure that must be kept in memory at the same time","None",mat_mkl_cpardiso->maxfct,&icntl,&flg);
517: if (flg) mat_mkl_cpardiso->maxfct = icntl;
519: PetscOptionsInt("-mat_mkl_cpardiso_67","Indicates the actual matrix for the solution phase","None",mat_mkl_cpardiso->mnum,&icntl,&flg);
520: if (flg) mat_mkl_cpardiso->mnum = icntl;
522: PetscOptionsInt("-mat_mkl_cpardiso_68","Message level information","None",mat_mkl_cpardiso->msglvl,&icntl,&flg);
523: if (flg) mat_mkl_cpardiso->msglvl = icntl;
525: PetscOptionsInt("-mat_mkl_cpardiso_69","Defines the matrix type","None",mat_mkl_cpardiso->mtype,&icntl,&flg);
526: if (flg) {
527: mat_mkl_cpardiso->mtype = icntl;
528: #if defined(PETSC_USE_REAL_SINGLE)
529: mat_mkl_cpardiso->iparm[27] = 1;
530: #else
531: mat_mkl_cpardiso->iparm[27] = 0;
532: #endif
533: mat_mkl_cpardiso->iparm[34] = 1;
534: }
535: PetscOptionsInt("-mat_mkl_cpardiso_1","Use default values","None",mat_mkl_cpardiso->iparm[0],&icntl,&flg);
537: if (flg && icntl != 0) {
538: PetscOptionsInt("-mat_mkl_cpardiso_2","Fill-in reducing ordering for the input matrix","None",mat_mkl_cpardiso->iparm[1],&icntl,&flg);
539: if (flg) mat_mkl_cpardiso->iparm[1] = icntl;
541: PetscOptionsInt("-mat_mkl_cpardiso_4","Preconditioned CGS/CG","None",mat_mkl_cpardiso->iparm[3],&icntl,&flg);
542: if (flg) mat_mkl_cpardiso->iparm[3] = icntl;
544: PetscOptionsInt("-mat_mkl_cpardiso_5","User permutation","None",mat_mkl_cpardiso->iparm[4],&icntl,&flg);
545: if (flg) mat_mkl_cpardiso->iparm[4] = icntl;
547: PetscOptionsInt("-mat_mkl_cpardiso_6","Write solution on x","None",mat_mkl_cpardiso->iparm[5],&icntl,&flg);
548: if (flg) mat_mkl_cpardiso->iparm[5] = icntl;
550: PetscOptionsInt("-mat_mkl_cpardiso_8","Iterative refinement step","None",mat_mkl_cpardiso->iparm[7],&icntl,&flg);
551: if (flg) mat_mkl_cpardiso->iparm[7] = icntl;
553: PetscOptionsInt("-mat_mkl_cpardiso_10","Pivoting perturbation","None",mat_mkl_cpardiso->iparm[9],&icntl,&flg);
554: if (flg) mat_mkl_cpardiso->iparm[9] = icntl;
556: PetscOptionsInt("-mat_mkl_cpardiso_11","Scaling vectors","None",mat_mkl_cpardiso->iparm[10],&icntl,&flg);
557: if (flg) mat_mkl_cpardiso->iparm[10] = icntl;
559: PetscOptionsInt("-mat_mkl_cpardiso_12","Solve with transposed or conjugate transposed matrix A","None",mat_mkl_cpardiso->iparm[11],&icntl,&flg);
560: if (flg) mat_mkl_cpardiso->iparm[11] = icntl;
562: PetscOptionsInt("-mat_mkl_cpardiso_13","Improved accuracy using (non-) symmetric weighted matching","None",mat_mkl_cpardiso->iparm[12],&icntl,
563: &flg);
564: if (flg) mat_mkl_cpardiso->iparm[12] = icntl;
566: PetscOptionsInt("-mat_mkl_cpardiso_18","Numbers of non-zero elements","None",mat_mkl_cpardiso->iparm[17],&icntl,
567: &flg);
568: if (flg) mat_mkl_cpardiso->iparm[17] = icntl;
570: PetscOptionsInt("-mat_mkl_cpardiso_19","Report number of floating point operations","None",mat_mkl_cpardiso->iparm[18],&icntl,&flg);
571: if (flg) mat_mkl_cpardiso->iparm[18] = icntl;
573: PetscOptionsInt("-mat_mkl_cpardiso_21","Pivoting for symmetric indefinite matrices","None",mat_mkl_cpardiso->iparm[20],&icntl,&flg);
574: if (flg) mat_mkl_cpardiso->iparm[20] = icntl;
576: PetscOptionsInt("-mat_mkl_cpardiso_24","Parallel factorization control","None",mat_mkl_cpardiso->iparm[23],&icntl,&flg);
577: if (flg) mat_mkl_cpardiso->iparm[23] = icntl;
579: PetscOptionsInt("-mat_mkl_cpardiso_25","Parallel forward/backward solve control","None",mat_mkl_cpardiso->iparm[24],&icntl,&flg);
580: if (flg) mat_mkl_cpardiso->iparm[24] = icntl;
582: PetscOptionsInt("-mat_mkl_cpardiso_27","Matrix checker","None",mat_mkl_cpardiso->iparm[26],&icntl,&flg);
583: if (flg) mat_mkl_cpardiso->iparm[26] = icntl;
585: PetscOptionsInt("-mat_mkl_cpardiso_31","Partial solve and computing selected components of the solution vectors","None",mat_mkl_cpardiso->iparm[30],&icntl,&flg);
586: if (flg) mat_mkl_cpardiso->iparm[30] = icntl;
588: PetscOptionsInt("-mat_mkl_cpardiso_34","Optimal number of threads for conditional numerical reproducibility (CNR) mode","None",mat_mkl_cpardiso->iparm[33],&icntl,&flg);
589: if (flg) mat_mkl_cpardiso->iparm[33] = icntl;
591: PetscOptionsInt("-mat_mkl_cpardiso_60","Intel MKL_CPARDISO mode","None",mat_mkl_cpardiso->iparm[59],&icntl,&flg);
592: if (flg) mat_mkl_cpardiso->iparm[59] = icntl;
593: }
595: PetscOptionsEnd();
596: return(0);
597: }
599: PetscErrorCode PetscInitialize_MKL_CPARDISO(Mat A, Mat_MKL_CPARDISO *mat_mkl_cpardiso)
600: {
601: PetscErrorCode ierr;
602: PetscInt bs;
603: PetscBool match;
604: PetscMPIInt size;
608: MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(mat_mkl_cpardiso->comm_mkl_cpardiso));
609: MPI_Comm_size(mat_mkl_cpardiso->comm_mkl_cpardiso, &size);
611: mat_mkl_cpardiso->CleanUp = PETSC_FALSE;
612: mat_mkl_cpardiso->maxfct = 1;
613: mat_mkl_cpardiso->mnum = 1;
614: mat_mkl_cpardiso->n = A->rmap->N;
615: if (mat_mkl_cpardiso->iparm[36]) mat_mkl_cpardiso->n /= mat_mkl_cpardiso->iparm[36];
616: mat_mkl_cpardiso->msglvl = 0;
617: mat_mkl_cpardiso->nrhs = 1;
618: mat_mkl_cpardiso->err = 0;
619: mat_mkl_cpardiso->phase = -1;
620: #if defined(PETSC_USE_COMPLEX)
621: mat_mkl_cpardiso->mtype = 13;
622: #else
623: mat_mkl_cpardiso->mtype = 11;
624: #endif
626: #if defined(PETSC_USE_REAL_SINGLE)
627: mat_mkl_cpardiso->iparm[27] = 1;
628: #else
629: mat_mkl_cpardiso->iparm[27] = 0;
630: #endif
632: mat_mkl_cpardiso->iparm[ 0] = 1; /* Solver default parameters overriden with provided by iparm */
633: mat_mkl_cpardiso->iparm[ 1] = 2; /* Use METIS for fill-in reordering */
634: mat_mkl_cpardiso->iparm[ 5] = 0; /* Write solution into x */
635: mat_mkl_cpardiso->iparm[ 7] = 2; /* Max number of iterative refinement steps */
636: mat_mkl_cpardiso->iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */
637: mat_mkl_cpardiso->iparm[10] = 1; /* Use nonsymmetric permutation and scaling MPS */
638: mat_mkl_cpardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */
639: mat_mkl_cpardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */
640: mat_mkl_cpardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */
641: mat_mkl_cpardiso->iparm[26] = 1; /* Check input data for correctness */
643: mat_mkl_cpardiso->iparm[39] = 0;
644: if (size > 1) {
645: mat_mkl_cpardiso->iparm[39] = 2;
646: mat_mkl_cpardiso->iparm[40] = A->rmap->rstart;
647: mat_mkl_cpardiso->iparm[41] = A->rmap->rend-1;
648: }
649: PetscObjectTypeCompareAny((PetscObject)A,&match,MATMPIBAIJ,MATMPISBAIJ,"");
650: if (match) {
651: MatGetBlockSize(A,&bs);
652: mat_mkl_cpardiso->iparm[36] = bs;
653: mat_mkl_cpardiso->iparm[40] /= bs;
654: mat_mkl_cpardiso->iparm[41] /= bs;
655: mat_mkl_cpardiso->iparm[40]++;
656: mat_mkl_cpardiso->iparm[41]++;
657: mat_mkl_cpardiso->iparm[34] = 0; /* Fortran style */
658: } else {
659: mat_mkl_cpardiso->iparm[34] = 1; /* C style */
660: }
662: mat_mkl_cpardiso->perm = 0;
663: return(0);
664: }
666: /*
667: * Symbolic decomposition. Mkl_Pardiso analysis phase.
668: */
669: PetscErrorCode MatLUFactorSymbolic_AIJMKL_CPARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
670: {
671: Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data;
672: PetscErrorCode ierr;
675: mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN;
677: /* Set MKL_CPARDISO options from the options database */
678: PetscSetMKL_CPARDISOFromOptions(F,A);
679: (*mat_mkl_cpardiso->ConvertToTriples)(A,MAT_INITIAL_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);
681: mat_mkl_cpardiso->n = A->rmap->N;
682: if (mat_mkl_cpardiso->iparm[36]) mat_mkl_cpardiso->n /= mat_mkl_cpardiso->iparm[36];
684: /* analysis phase */
685: /*----------------*/
686: mat_mkl_cpardiso->phase = JOB_ANALYSIS;
688: cluster_sparse_solver (
689: mat_mkl_cpardiso->pt,
690: &mat_mkl_cpardiso->maxfct,
691: &mat_mkl_cpardiso->mnum,
692: &mat_mkl_cpardiso->mtype,
693: &mat_mkl_cpardiso->phase,
694: &mat_mkl_cpardiso->n,
695: mat_mkl_cpardiso->a,
696: mat_mkl_cpardiso->ia,
697: mat_mkl_cpardiso->ja,
698: mat_mkl_cpardiso->perm,
699: &mat_mkl_cpardiso->nrhs,
700: mat_mkl_cpardiso->iparm,
701: &mat_mkl_cpardiso->msglvl,
702: NULL,
703: NULL,
704: &mat_mkl_cpardiso->comm_mkl_cpardiso,
705: (PetscInt*)&mat_mkl_cpardiso->err);
707: if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\".Check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
709: mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
710: F->ops->lufactornumeric = MatFactorNumeric_MKL_CPARDISO;
711: F->ops->solve = MatSolve_MKL_CPARDISO;
712: F->ops->solvetranspose = MatSolveTranspose_MKL_CPARDISO;
713: F->ops->matsolve = MatMatSolve_MKL_CPARDISO;
714: return(0);
715: }
717: PetscErrorCode MatCholeskyFactorSymbolic_AIJMKL_CPARDISO(Mat F,Mat A,IS perm,const MatFactorInfo *info)
718: {
719: Mat_MKL_CPARDISO *mat_mkl_cpardiso = (Mat_MKL_CPARDISO*)F->data;
720: PetscErrorCode ierr;
723: mat_mkl_cpardiso->matstruc = DIFFERENT_NONZERO_PATTERN;
726: /* Set MKL_CPARDISO options from the options database */
727: PetscSetMKL_CPARDISOFromOptions(F,A);
728: (*mat_mkl_cpardiso->ConvertToTriples)(A,MAT_INITIAL_MATRIX,&mat_mkl_cpardiso->nz,&mat_mkl_cpardiso->ia,&mat_mkl_cpardiso->ja,&mat_mkl_cpardiso->a);
730: mat_mkl_cpardiso->n = A->rmap->N;
731: if (mat_mkl_cpardiso->iparm[36]) mat_mkl_cpardiso->n /= mat_mkl_cpardiso->iparm[36];
732: #if defined(PETSC_USE_COMPLEX)
733: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for PARDISO CHOLESKY with complex scalars! Use MAT_FACTOR_LU instead",((PetscObject)A)->type_name);
734: #endif
735: if (A->spd_set && A->spd) mat_mkl_cpardiso->mtype = 2;
736: else mat_mkl_cpardiso->mtype = -2;
738: /* analysis phase */
739: /*----------------*/
740: mat_mkl_cpardiso->phase = JOB_ANALYSIS;
742: cluster_sparse_solver (
743: mat_mkl_cpardiso->pt,
744: &mat_mkl_cpardiso->maxfct,
745: &mat_mkl_cpardiso->mnum,
746: &mat_mkl_cpardiso->mtype,
747: &mat_mkl_cpardiso->phase,
748: &mat_mkl_cpardiso->n,
749: mat_mkl_cpardiso->a,
750: mat_mkl_cpardiso->ia,
751: mat_mkl_cpardiso->ja,
752: mat_mkl_cpardiso->perm,
753: &mat_mkl_cpardiso->nrhs,
754: mat_mkl_cpardiso->iparm,
755: &mat_mkl_cpardiso->msglvl,
756: NULL,
757: NULL,
758: &mat_mkl_cpardiso->comm_mkl_cpardiso,
759: (PetscInt*)&mat_mkl_cpardiso->err);
761: if (mat_mkl_cpardiso->err < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MKL_CPARDISO: err=%d, msg = \"%s\".Check manual\n",mat_mkl_cpardiso->err,Err_MSG_CPardiso(mat_mkl_cpardiso->err));
763: mat_mkl_cpardiso->CleanUp = PETSC_TRUE;
764: F->ops->choleskyfactornumeric = MatFactorNumeric_MKL_CPARDISO;
765: F->ops->solve = MatSolve_MKL_CPARDISO;
766: F->ops->solvetranspose = MatSolveTranspose_MKL_CPARDISO;
767: F->ops->matsolve = MatMatSolve_MKL_CPARDISO;
768: return(0);
769: }
771: PetscErrorCode MatView_MKL_CPARDISO(Mat A, PetscViewer viewer)
772: {
773: PetscErrorCode ierr;
774: PetscBool iascii;
775: PetscViewerFormat format;
776: Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
777: PetscInt i;
780: /* check if matrix is mkl_cpardiso type */
781: if (A->ops->solve != MatSolve_MKL_CPARDISO) return(0);
783: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
784: if (iascii) {
785: PetscViewerGetFormat(viewer,&format);
786: if (format == PETSC_VIEWER_ASCII_INFO) {
787: PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO run parameters:\n");
788: PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO phase: %d \n",mat_mkl_cpardiso->phase);
789: for (i = 1; i <= 64; i++) {
790: PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO iparm[%d]: %d \n",i, mat_mkl_cpardiso->iparm[i - 1]);
791: }
792: PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO maxfct: %d \n", mat_mkl_cpardiso->maxfct);
793: PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mnum: %d \n", mat_mkl_cpardiso->mnum);
794: PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO mtype: %d \n", mat_mkl_cpardiso->mtype);
795: PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO n: %d \n", mat_mkl_cpardiso->n);
796: PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO nrhs: %d \n", mat_mkl_cpardiso->nrhs);
797: PetscViewerASCIIPrintf(viewer,"MKL_CPARDISO msglvl: %d \n", mat_mkl_cpardiso->msglvl);
798: }
799: }
800: return(0);
801: }
803: PetscErrorCode MatGetInfo_MKL_CPARDISO(Mat A, MatInfoType flag, MatInfo *info)
804: {
805: Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)A->data;
808: info->block_size = 1.0;
809: info->nz_allocated = mat_mkl_cpardiso->nz + 0.0;
810: info->nz_unneeded = 0.0;
811: info->assemblies = 0.0;
812: info->mallocs = 0.0;
813: info->memory = 0.0;
814: info->fill_ratio_given = 0;
815: info->fill_ratio_needed = 0;
816: info->factor_mallocs = 0;
817: return(0);
818: }
820: PetscErrorCode MatMkl_CPardisoSetCntl_MKL_CPARDISO(Mat F,PetscInt icntl,PetscInt ival)
821: {
822: Mat_MKL_CPARDISO *mat_mkl_cpardiso=(Mat_MKL_CPARDISO*)F->data;
825: if (icntl <= 64) {
826: mat_mkl_cpardiso->iparm[icntl - 1] = ival;
827: } else {
828: if (icntl == 65) mkl_set_num_threads((int)ival);
829: else if (icntl == 66) mat_mkl_cpardiso->maxfct = ival;
830: else if (icntl == 67) mat_mkl_cpardiso->mnum = ival;
831: else if (icntl == 68) mat_mkl_cpardiso->msglvl = ival;
832: else if (icntl == 69) {
833: mat_mkl_cpardiso->mtype = ival;
834: #if defined(PETSC_USE_REAL_SINGLE)
835: mat_mkl_cpardiso->iparm[27] = 1;
836: #else
837: mat_mkl_cpardiso->iparm[27] = 0;
838: #endif
839: mat_mkl_cpardiso->iparm[34] = 1;
840: }
841: }
842: return(0);
843: }
845: /*@
846: MatMkl_CPardisoSetCntl - Set Mkl_Pardiso parameters
848: Logically Collective on Mat
850: Input Parameters:
851: + F - the factored matrix obtained by calling MatGetFactor()
852: . icntl - index of Mkl_Pardiso parameter
853: - ival - value of Mkl_Pardiso parameter
855: Options Database:
856: . -mat_mkl_cpardiso_<icntl> <ival>
858: Level: Intermediate
860: Notes:
861: This routine cannot be used if you are solving the linear system with TS, SNES, or KSP, only if you directly call MatGetFactor() so use the options
862: database approach when working with TS, SNES, or KSP.
864: References:
865: . Mkl_Pardiso Users' Guide
867: .seealso: MatGetFactor()
868: @*/
869: PetscErrorCode MatMkl_CPardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival)
870: {
874: PetscTryMethod(F,"MatMkl_CPardisoSetCntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));
875: return(0);
876: }
878: static PetscErrorCode MatFactorGetSolverType_mkl_cpardiso(Mat A, MatSolverType *type)
879: {
881: *type = MATSOLVERMKL_CPARDISO;
882: return(0);
883: }
885: /* MatGetFactor for MPI AIJ matrices */
886: static PetscErrorCode MatGetFactor_mpiaij_mkl_cpardiso(Mat A,MatFactorType ftype,Mat *F)
887: {
888: Mat B;
889: PetscErrorCode ierr;
890: Mat_MKL_CPARDISO *mat_mkl_cpardiso;
891: PetscBool isSeqAIJ,isMPIBAIJ,isMPISBAIJ;
894: /* Create the factorization matrix */
896: PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);
897: PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&isMPIBAIJ);
898: PetscObjectTypeCompare((PetscObject)A,MATMPISBAIJ,&isMPISBAIJ);
899: MatCreate(PetscObjectComm((PetscObject)A),&B);
900: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
901: PetscStrallocpy("mkl_cpardiso",&((PetscObject)B)->type_name);
902: MatSetUp(B);
904: PetscNewLog(B,&mat_mkl_cpardiso);
906: if (isSeqAIJ) mat_mkl_cpardiso->ConvertToTriples = MatCopy_seqaij_seqaij_MKL_CPARDISO;
907: else if (isMPIBAIJ) mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpibaij_mpibaij_MKL_CPARDISO;
908: else if (isMPISBAIJ) mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij_MKL_CPARDISO;
909: else mat_mkl_cpardiso->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij_MKL_CPARDISO;
911: if (ftype == MAT_FACTOR_LU) B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_CPARDISO;
912: else B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_AIJMKL_CPARDISO;
913: B->ops->destroy = MatDestroy_MKL_CPARDISO;
915: B->ops->view = MatView_MKL_CPARDISO;
916: B->ops->getinfo = MatGetInfo_MKL_CPARDISO;
918: B->factortype = ftype;
919: B->assembled = PETSC_TRUE; /* required by -ksp_view */
921: B->data = mat_mkl_cpardiso;
923: /* set solvertype */
924: PetscFree(B->solvertype);
925: PetscStrallocpy(MATSOLVERMKL_CPARDISO,&B->solvertype);
927: PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mkl_cpardiso);
928: PetscObjectComposeFunction((PetscObject)B,"MatMkl_CPardisoSetCntl_C",MatMkl_CPardisoSetCntl_MKL_CPARDISO);
929: PetscInitialize_MKL_CPARDISO(A, mat_mkl_cpardiso);
931: *F = B;
932: return(0);
933: }
935: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MKL_CPardiso(void)
936: {
938:
940: MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);
941: MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);
942: MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_mpiaij_mkl_cpardiso);
943: MatSolverTypeRegister(MATSOLVERMKL_CPARDISO,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_mpiaij_mkl_cpardiso);
944: return(0);
945: }