OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
cini_defaults.F
Go to the documentation of this file.
1C
2C This file is part of MUMPS 5.5.1, released
3C on Tue Jul 12 13:17:24 UTC 2022
4C
5C
6C Copyright 1991-2022 CERFACS, CNRS, ENS Lyon, INP Toulouse, Inria,
7C Mumps Technologies, University of Bordeaux.
8C
9C This version of MUMPS is provided to you free of charge. It is
10C released under the CeCILL-C license
11C (see doc/CeCILL-C_V1-en.txt, doc/CeCILL-C_V1-fr.txt, and
12C https://cecill.info/licences/Licence_CeCILL-C_V1-en.html)
13C
14C
15C**********************************************************************
16C
17 SUBROUTINE cmumps_set_type_sizes( K34, K35, K16, K10 )
18 IMPLICIT NONE
19C
20C Purpose:
21C =======
22C
23C Set the size in bytes of an "INTEGER" in K34
24C Set the size of the default arithmetic (REAL, DOUBLE PRECISION,
25C COMPLEX or DOUBLE COMPLEX) in K35
26C Set the size of floating-point types that are real or double
27C precision even for complex versions of MUMPS (REAL for S and
28C C versions, DOUBLE PRECISION for D and Z versions)
29C Assuming that the size of an INTEGER(8) is 8, store the ratio
30C nb_bytes(INTEGER(8)) / nb_bytes(INTEGER) = 8 / K34 into K10.
31C
32C In practice, we have:
33C
34C K35: Arithmetic Value Value for T3E
35C S 4 8
36C D 8 16
37C C 8 16
38C Z 16 32
39C
40C K16 = K35 for S and D arithmetics
41C K16 = K35 / 2 for C and Z arithmetics
42C
43C K34= 4 and K10 = 2, except on CRAY machines or when compilation
44C flag -i8 is used, in which case, K34 = 8 and K10 = 1
45C
46C
47 INTEGER, INTENT(OUT) :: K34, K35, K10, K16
48 INTEGER(8) :: SIZE_INT, SIZE_REAL_OR_DOUBLE ! matches MUMPS_INT8
49 INTEGER I(2)
50 REAL R(2) ! Will be DOUBLE PRECISION if 0
51 CALL mumps_size_c(i(1),i(2),size_int)
52 CALL mumps_size_c(r(1),r(2),size_real_or_double)
53 k34 = int(size_int)
54 k10 = 8 / k34
55 k16 = int(size_real_or_double)
56 k35 = k16
57 k35 = k35 * 2
58 RETURN
59 END SUBROUTINE cmumps_set_type_sizes
60C
61C**********************************************************************
62C
63 SUBROUTINE cmumpsid( NSLAVES, LWK_USER, CNTL, ICNTL,
64 & KEEP,KEEP8,
65 & INFO, INFOG, RINFO, RINFOG, SYM, PAR,
66 & DKEEP, MYID )
67!$ USE OMP_LIB
68 IMPLICIT NONE
69C
70C Purpose
71C =======
72C
73C The elements of the arrays CNTL and ICNTL control the action of
74C CMUMPS, CMUMPS_ANA_DRIVER, CMUMPS_FAC_DRIVER, CMUMPS_SOLVE_DRIVER
75C Default values for the elements are set in this routine.
76C
77 REAL DKEEP(230)
78 REAL CNTL(15), RINFO(40), RINFOG(40)
79 INTEGER ICNTL(60), KEEP(500), SYM, PAR, NSLAVES, MYID
80 INTEGER INFO(80), INFOG(80)
81 INTEGER(8) KEEP8(150)
82 INTEGER LWK_USER
83C
84C Parameters
85C ==========
86C===========================================
87C Arrays for control and information
88C===========================================
89C
90C N Matrix order
91C
92C NELT Number of elements for matrix in ELt format
93C
94C
95C SYM = 0 ... initializes the defaults for unsymmetric code
96C = 1,2 ... initializes the defaults for symmetric code
97C
98C
99C
100C PAR = 0 ... instance where host is not working
101C = 1 ... instance where host is working as a normal node.
102C (host uses more memory than other processors in
103C the latter case)
104C
105C CNTL and the elements of the array ICNTL control the action of
106C CMUMPS Default values
107C are set by CMUMPSID. The elements of the arrays RINFO
108C and INFO provide information on the action of CMUMPS.
109C
110C CNTL(1) threshold for partial pivoting
111C has default value 0.0 when SYM=1 and 0.01 otherwise
112C Values and less than zero as treated as zero.
113C Values greater than 1.0 are treated as 1.0 for
114C SYM=1 and as 0.5 for SYM=2
115C In general, a larger value of CNTL(1) leads to
116C greater fill-in but a more accurate factorization.
117C If CNTL(1) is nonzero, numerical pivoting will be performed.
118C If CNTL(1) is zero, no pivoting will be performed and
119C the subroutine will fail if a zero pivot is encountered.
120C If the matrix A is diagonally dominant, then
121C setting CNTL(1) to zero will decrease the factorization
122C time while still providing a stable decomposition.
123C
124C CNTL(2) must be set to the tolerance for convergence of iterative
125C refinement.
126C Default value is sqrt(macheps).
127C Values less than zero are treated as sqrt(macheps).
128C
129C CNTL(3) is used with null pivot row detection (ICNTL(24) .eq. 1)
130C Default value is 0.0.
131C Let A_{preproc} be the preprocessed matrix to be factored (see
132C equation in the user's guide).
133C A pivot is considered to be null if the infinite norm of its
134C row/column is smaller than a threshold. Let MACHEPS be the
135C machine precision and ||.|| be the infinite norm.
136C The absolute value to detect a null pivot row (when ICNTL(24) .EQ.1)
137C is stored in DKEEP(1).
138C IF CNTL(3) > 0 THEN
139C DKEEP(1) = CNTL(3) ||A_{preproc}||
140C ELSE IF CNTL(3) = 0.0 THEN
141C DKEEP(1) = MACHEPS 10^{-5} ||A_{preproc}||
142C ELSE IF CNTL(3) < 0 THEN
143C DKEEP(1) = abs(CNTL(3))! this was added for EDF
144C ! in the context of SOLSTICE project
145C ENDIF
146C
147C CNTL(4) must be set to value for static pivoting.
148C Default value is -1.0
149C Note that static pivoting is enabled only when
150C Rank-Revealing and null pivot detection
151C are off (KEEP(19).EQ.0).AND.(KEEP(110).EQ.0).
152C If negative, static pivoting will be set OFF (KEEP(97)=0)
153C If positive, static pivoting is ON (KEEP(97=1) with
154C threshold CNTL(4)
155C If = 0, static pivoting is ON with threshold MACHEPS^1/2 || A ||
156C
157C CNTL(5) fixation for null pivots
158C Default value is 0.0
159C Only active if ICNTL(24) = 1
160C If > 0 after finding a null pivot, it is set to CNTL(5) x ||A||
161C (This value is stored in DKEEP(2))
162C If <= 0 then
163C SYM=2:
164C the row/column (except the pivot) is set to zero
165C and the pivot is set to 1
166C SYM=0:
167C the fixation is automatically
168C set to a large potitive value and the pivot row of the
169C U factors is set to zero.
170C Default is 0.
171C
172C CNTL(6) not used yet
173C
174C CNTL(7) tolerance for Low Rank approximation of the Blocks (BLR).
175C Dropping parameter expressed with a double precision,
176C real value, controlling
177C compression and used to truncate the RRQR algorithm
178C default value is 0.0. (i.e. no approximation).
179C The truncated RRQR operation is implemented as
180C as variant of the LAPACK GEQP3 and LAQPS routines.
181C 0.0 : full precision approximation.
182C > 0.0 : the dropping parameter is DKEEP(8).
183C
184C Warning: using negative values is an experimental and
185C non recommended setting.
186C < 0.0 : the dropping parameter is |DKEEP(8)|*|Apre|, Apre
187C as defined in user's guide
188C
189C
190C -----------------------------------------
191C
192C ICNTL(1) has default value 6.
193C It is the output stream for error messages.
194C If it is set to zero, these
195C messages will be suppressed.
196C
197C ICNTL(2) has default value 0.
198C It is the output stream for diagnostic printing and
199C for warning messages that are local to each MPI process.
200C If it is set to zero, these messages are suppressed.
201C
202C ICNTL(3) -- Host only
203C It is the output stream for diagnostic printing
204C and for warning messages. Default value is 6.
205C If it is set to zero, these messages are suppressed.
206C
207C ICNTL(4) is used by CMUMPS to control printing of error,
208C warning, and diagnostic messages. It has default value 2.
209C Possible values are:
210C
211C <1 __No messages output.
212C 1 __Only error messages printed.
213C 2 __Errors and warnings printed.
214C 3 __Errors and warnings and terse diagnostics
215C (only first ten entries
216C of arrays printed).
217C 4 __Errors and warnings and all information
218C on input and output parameters printed.
219C
220C
221C ICNTL(5) is the format of the input matrix and rhs
222C 0: assembled matrix, assembled rhs
223C 1: elemental matrix, assembled rhs
224C Default value is 0.
225C
226C ICNTL(6) has default value 7 for unsymmetric and
227C general symmetric matrices, and 0 for SPD matrices.
228C It is only accessed and operational
229C on a call that includes an analysis phase
230C (JOB = 1, 4, or 6).
231C In these cases, if ICNTL(6)=1, 2, 3, 4, 5, 6 or 7,
232C a column permutation based on algorithms described in
233C Duff and Koster, 1997, *SIMAX <20>, 4, 889-901,
234C is applied to the original matrix. Column permutations are
235C then applied to the original matrix to get a zero-free diagonal.
236C Except for ICNTL(6)=1, the numerical values of the
237C original matrix, id%A(NE), need be provided by the user
238C during the analysis phase.
239C If ICNTL(6)=7, based on the structural symmetry of the
240C input matrix the value of ICNTL(6) is automatically chosen.
241C If the ordering is provided by the user
242C (ICNTL(7)=1) then the value of ICNTL(6) is ignored.
243C
244C ICNTL(7) has default value 7 and must be set by the user to
245C 1 if the pivot order in IS is to be used.
246C Effective value of ordering stored in KEEP(256).
247C Possible values are (depending on the softwares installed)
248C 0 AMD: Approximate minimum degree (included in CMUMPS package)
249C 1 Ordering provided by the user
250C 2 Approximate minimum fill (included in CMUMPS package)
251C 3 SCOTCH (see http://gforge.inria.fr/projects/scotch/)
252C should be downloaded/installed separately.
253C 4 PORD from Juergen Schulze (js@juergenschulze.de)
254C PORD package is extracted from the SPACE-1.0 package developed at the
255C University of Paderborn by Juergen Schulze
256C and is provided as a separate package.
257C 5 Metis ordering should be downloaded/installed separately.
258C 6 Approximate minimum degree with automatic quasi
259C dense row detection (included in CMUMPS package).
260C (to be used when ordering time with AMD is abnormally large)
261C 7 Automatic choice done during analysis phase
262C For any other
263C value of ICNTL(7), a suitable pivot order will be
264C chosen automatically.
265C
266C ICNTL(8) is used to describe the scaling strategy.
267C Default value is 77.
268C Note that scaling is performed only when the numerical
269C factorization step is performed (JOB = 2, 4>, 5>, or 6>).
270C If ICNTL(8) is not equal to
271C any of the values listed below then ICNTL(8) is treated
272C as if it had its default value of 0 (no scaling).
273C If the matrix is known to be very badly scaled,
274C our experience has been that option 6 is the most robust but
275C the best scaling is very problem dependent.
276C If ICNTL(8)=0, COLSCA and ROWSCA are dummy arguments
277C of the subroutine that are not accessed.
278C Possible values of ICNTL(8) are:
279C
280C -2 scaling computed during analysis (and applied during the
281C factorization)
282C
283C -1 the user must provide the scaling in arrays
284C COLSCA and ROWSCA
285C
286C 0 no scaling
287C
288C 1 Diagonal scaling
289C
290C 2 not defined
291C
292C 3 Column scaling
293C
294C 4 Row and column scaling
295C
296C 5,6 not defined
297C 7, 8 Scaling based on Daniel Ruiz and Bora Ucar's work done
298C during the ANR-SOLSTICE project.
299C Reference for this work are:
300C The scaling algorithms are based on those discussed in
301C [1] D. Ruiz, "A scaling algorithm to equilibrate both rows and
302C columns norms in matrices", Tech. Rep. Rutherford
303C Appleton Laboratory, Oxon, UK and ENSEEIHT-IRIT,
304C Toulouse, France, RAL-TR-2001-034 and RT/APO/01/4, 2001.
305C [2] D. Ruiz and B. Ucar, "A symmetry preserving algorithm for
306C matrix scaling", in preparation as of Jan'08.
307C This scaling can work on both centralized and distributed
308C assembled input matrix format. (it works for both symmetric
309C and unsymmetric matrices)
310C Option 8 is similar to 7 but more rigourous and expensive to compute.
311C 77 Automatic choice of scaling value done. Proposed algo:
312C if (sym=1) then
313C option = 0
314C else
315C if distributed matrix entry then
316C option = 7
317C else
318C if (maximum transversal is called
319C and makes use of numerical values) then
320C option=-2 and ordering is computed during analysis
321C else
322C option = 7
323C endif
324C endif
325C endif
326C
327C ICNTL(9) has default value 1. If ICNTL(9)=1
328C the system of equations A * x = b is solved. For other
329C values the system A^T * x = b is solved.
330C When ICNTL(30) (compute selected entries in A-1) is activated
331C ICNTL(9) is ignored.
332C
333C ICNTL(10) has default value 0.
334C If ICNTL(10)=0 : iterative refinement is not performed.
335C Values of ICNTL(10) < 0 : a fix number of steps equal
336C to ICNTL(10) of IR is done.
337C Values of ICNTL(10) > 0 : mean a maximum of ICNTL(10) number
338C of steps of IR is done, and a test of
339C convergence is used
340C
341C ICNTL(11) has default value 0.
342C A value equal to 1 will return a backward error estimate in
343C RINFO(4-11).
344C A value equal to 2 will return a backward error estimate in
345C RINFO(4-8). No LCOND 1, 2 and forward error are computed.
346C If ICNTL(11) is negative, zero or greater than 2 no estimate
347C is returned.
348C
349C
350C ICNTL(12) has default value 0 and defines the strategy for
351C LDLT orderings
352C 0 : automatic choice
353C 1 : usual ordering (nothing done)
354C 2 : ordering on the compressed graph, available with all orderings
355C except with AMD
356C 3 : constraint ordering, only available with AMF,
357C -> reset to 2 with other orderings
358C Other values are treated as 1 (nothing done).
359C On output KEEP(95) holds the internal value used and INFOG(24) gives
360C access to KEEP(95) to the user.
361C in LU facto it is always reset to 1
362C
363C - ICNTL(12) = 3 has a lower priority than ICNTL(7)
364C thus if ICNTL(12) = 3 and the ordering required is not AMF
365C then ICNTL(12) is set to 2
366C
367C - ICNTL(12) = 2 has a higher priority than ICNTL(7)
368C thus if ICNTL(12) = 2 and the ordering required is AMD
369C then the ordering used is QAMD
370C
371C - ICNTL(12) has a higher priority than ICNTL(6) and ICNTL(8)
372C thus if ICNTL(12) = 2 then ICNTL(6) is automatically
373C considered as if it was set to a value between 1-6
374C if ICNTL(12) = 3 then ICNTL(6) is considered as if
375C set to 5 and ICNTL(8) as if set to -2 (we need the scaling
376C factors to define free and constrained variables)
377C
378C ICNTL(13) has default value 0 and allows for selecting Type 3 node.
379C IF ICNTL(13).GT. 0 scalapack is forbidden. Otherwise,
380C scalapack will be activated if the root is large enough.
381C Furthermore
382C IF ((ICNTL(13).GT.0) .AND. (NSLAVES.GT.ICNTL(13),
383C or ICNTL(13)=-1 THEN
384C extra splitting of the root will be activated
385C and is controlled by abs(KEEP(82)).
386C The order of the root node is divided by KEEP(82)
387C ENDIF
388C If ICNTL(13) .EQ. -1 then splitting of the root
389C is done whatever the nb of procs is.
390C
391C To summarize:
392C -1 : root splitting and scalapack on
393C 0 or < -1 : root splitting off and sclalapack on
394C > 0 : scalapack off
395C
396C ICNTL(14) has default value 20 (5 if NSLAVES=1 and SYM=1)
397C and is the value for memory relaxation
398C so called "PERLU" in the following.
399C
400C
401C ICNTL(15) : Describes the compression of the graph of the input matrix
402C The analysis step is then performed on the compressed
403C graph
404C Must be set during analysis on the master
405C 0 : OFF
406C 1 : Compression provided by the user:
407C BLKPTR(1:id%NBLK+1) and
408C BLKVAR(1:N or N_LOC if distributed format)
409C (BLKVAR(BLKPTR(iblk):BLKPTR(iblk+1)-1):
410C dof list for iblk)
411C - If BLKVAR is not provided then BLKVAR is
412C treated as the identity
413C (contiguous variables in blocks)
414C - Distributed format if on MASTER N_LOC#N
415C
416C ICNTL(16) : number of OpenMP threads asked by the user.
417C
418C ICNTL(17) not used in this version
419C
420C ICNTL(18) has default value 0 and is only accessed by the host during
421C the analysis phase if the matrix is assembled (ICNTL(5))= 0).
422C ICNTL(18) defines the strategy for the distributed input matrix.
423C Possible values are:
424C 0: input matrix is centralized on the host. This is the default
425C 1: user provides the structure of the matrix on the host at analysis,
426C CMUMPS returns
427C a mapping and user should provide the matrix distributed according
428C to the mapping
429C 2: user provides the structure of the matrix on the host at analysis,
430C and the
431C distributed matrix on all slave processors at factorization.
432C Any distribution is allowed
433C 3: user directly provides the distributed matrix input both
434C for analysis and factorization
435C
436C For flexibility and performance issues, option 3 is recommended.
437C
438C ICNTL(19) has default value 0 and is only accessed by the host
439C during the analysis phase. If ICNTL(19) \neq 0 then Schur matrix will
440C be returned to the user.
441C The user must set on entry on the host node (before analysis):
442C the integer variable SIZE\_SCHUR to the size fo the Schur matrix,
443C the integer array pointer LISTVAR\_SCHUR to the list of indices
444C of the schur matrix.
445C if = 0 : Schur is off and the root node gets factorized
446C if = 1 : Schur is on and the Schur complement is returned entirely
447C on a memory area provided by the user ONLY on the host node
448C if = 2 or 3 : Schur is on and the Schur complement is returned in a
449C distributed fashion according to a 2D block-cyclic
450C distribution. In the case where the matrix is symmetric
451C the lower part is returned if =2 or the complete
452C matrix if =3.
453C
454C ICNTL(20) has default value 0 and is only accessed by the host
455C during the solve phase. If ICNTL(20)=0, the right-hand side must given
456C in dense form in the structure component RHS.
457C If ICNTL(20)=1,2,3, then the right-hand side must be given in sparse form
458C using the structure components IRHS\_SPARSE, RHS\_SPARSE, IRHS\_PTR and
459C NZ\_RHS.
460C When the right-hand side is provided in sparse form then duplicate entries
461C are summed.
462C
463C 0 : dense RHS
464C 1,2,3 : Sparse RHS
465C 1 The decision of exploiting sparsity of the right-hand side to
466C accelerate the solution phase is done automatically.
467C 2 Sparsity of the right-hand sides is NOT exploited
468C to improve solution phase.
469C 3 Sparsity of the right-hand sides is exploited
470C to improve solution phase.
471C Values different from 0,1, 2,3 are treated as 0.
472C For sparse RHS recommended value is 1.
473C
474C ICNTL(21) has default value 0 and is only accessed by the host
475C during the solve phase. If ICNTL(21)=0, the solution vector will be assembled
476C and stored in the structure component RHS, that must have been allocated by
477C the user. If ICNTL(21)=1, the solution vector is kept distributed at the
478C end of the solve phase, and will be available on each slave processor
479C in the structure components ISOL_loc and SOL_loc. ISOL_loc and SOL_loc
480C must then have been allocated by the user and must be of size at least
481C INFO(23), where INFO(23) has been returned by CMUMPS at the end of the
482C factorization phase.
483C Values of ICNTL(21) different from 0 and 1 are currently treated as 0.
484C
485C ICNTL(22) (saved in KEEP(201) controls the OOC setting (0=incore, 1 =OOC)
486C It has default value 0 (incore).Out-of-range values are treated as 1.
487C If set before analysis then special setting and massage of the tree
488C might be done (so far only extra splitting CUTNODES) is performed.
489C It is then accessed by the host
490C during the factorization phase. If ICNTL(22)=0, then no attempt
491C to use the disks is made. If ICNTL(22)=1, then CMUMPS will store
492C the computed factors on disk for later use during the solution
493C phase.
494C
495C ICNTL(23) has default value 0 and is accessed by ALL processors
496C at the beginning of the factorization phase. If positive
497C it corresponds to the maximum size of the working memory
498C in MegaBytes that MUMPS can allocate per working processor.
499C If only the host
500C value is non zero, then other processors also use the value on
501C the host. Otherwise, each processor uses the local value
502C provided.
503C
504C ICNTL(24) default value is 0
505C if = 0 no null pivot detection (CNTL(5) and CNTL(3) are inactive),
506C = 1 null pivot row detection; CNTL(3) and CNTL(5) are
507C then used to describe the action taken.
508C
509C
510C ICNTL(25) has default value 0 and is only accessed by the
511C host during the solution stage. It is only significant if
512C a null space basis was requested during the factorization
513C phase (INFOG(28) .GT. 0); otherwise a normal solution step
514C is performed.
515C If ICNTL(25)=0, then a normal solution step is performed,
516C on the internal problem (excluding the null space).
517C No special property on the solution (discussion with Serge)
518C If ICNTL(25)=i, 1 <= i <= INFOG(28), then the i-th vector
519C of the null space basis is computed. In that case, note
520C that NRHS should be set to 1.
521C If ICNTL(25)=-1, then all null space is computed. The
522C user should set NRHS=INFOG(28) in that case.
523C Note that centralized or distributed solutions are
524C applicable in that case, but that iterative refinement,
525C error analysis, etc... are excluded. Note also that the
526C option to solve the transpose system (ICNTL(9)) is ignored.
527C
528C
529C ICNTL(26) has default value 0 and is accessed on the host only
530C at the beginning of the solution step.
531C It is only effective if the Schur option is ON.
532C (copy in KEEP(221))
533C
534C
535C During the solution step, a value of 0 will perform a normal
536C solution step on the reduced problem not involving the Schur
537C variables.
538C During the solution step, if ICNTL(26)=1 or 2, then REDRHS
539C should be allocated of size at least LREDRHS*(NRHS-1)+
540C SIZE_SCHUR, where LREDRHS is the leading dimension of
541C LREDRHS (LREDRHS >= SIZE_SCHUR).
542C
543C If ICNTL(26)=1, then only a forward substitution is performed,
544C and a reduced RHS will be computed and made available in
545C REDRHS(i+(k-1)*LREDRHS), i=1, ..., SIZE_SCHUR, k=1, ..., NRHS.
546C If ICNTL(26)=2, then REDRHS(i+(k-1)*LREDRHS),i=1, SIZE_SCHUR,
547C k=1,NRHS is considered to be the solution corresponding to the
548C Schur variables. It is injected in CMUMPS, that computes the
549C solution on the "internal" problem during the backward
550C substitution.
551C
552C ICNTL(27) controls the blocking factor for multiple right-hand-sides
553C during the solution phase.
554C It influences both the memory used (see INFOG(30-31)) and
555C the solution time
556C (Larger values of ICNTL(27) leads to larger memory requirements).
557C Its tuning can be critical when
558C the factors are written on disk (out-of core, ICNTL(22)=1).
559C A negative value indicates that automatic setting is
560C performed by the solver.
561C
562C
563C ICNTL(28) decides whether parallel or sequential analysis should be used. Three
564C values are possible at the moment:
565C 0: automatic. This defaults to sequential analysis
566C 1: sequential. In this case the ordering strategy is defined by ICNTL(7)
567C 2: parallel. In this case the ordering strategy is defined by ICNTL(29)
568C
569C ICNTL(29) defines the ordering too to be used during the parallel analysis. Three
570C values are possible at the moment:
571C 0: automatic. This defaults to PT-SCOTCH
572C 1: PT-SCOTCH.
573C 2: ParMetis.
574C
575C
576C ICNTL(30) controls the activation of functionality A-1.
577C It has default value 0 and is only accessed by the master
578C during the solution phase. It enables the solver to
579C compute entries in the inverse of the original matrix.
580C Possible values are:
581C 0 normal solution
582C other values: compute entries in A-1
583C When ICNTL(30).NE.0 then the user
584C must describe on entry to the solution phase,
585C in the sparse right-hand-side
586C (NZ_RHS, NRHS, RHS_SPARSE, IRHS_SPARSE, IRHS_PTR)
587C the target entries of A-1 that need be computed.
588C Note that RHS_SPARSE must be allocated but need not be
589C initialized.
590C On output RHS_SPARSE then holds the requested
591C computed values of A-1.
592C Note that when ICNTL(30).NE.0 then
593C - sparse right hand side interface is implicitly used
594C functionality (ICNTL(20)= 1) but RHS need not be
595C allocated since computed A-1 entries will be stored
596C in place.
597C - ICNTL(9) option (solve Ax=b or Atx=b) is ignored
598C In case of duplicate entries in the sparse rhs then
599C on output duplicate entries in the solution are provided
600C in the same place.
601C This need not be mentioned in the spec since it is a
602C "natural" extension.
603C
604C -----------
605C Fwd in facto
606C -----------
607C ICNTL(31) Must be set before analysis to control storage
608C of LU factors. Default value is 0. Out of range
609C values considered as 0.
610C (copied in KEEP(251) and broadcast,
611C when setting of ICNTL(31)
612C results in not factors to be stored then
613C KEEP(201) = -1, OOC is "suppressed")
614C 0 Keep factors needed for solution phase
615C (when option forward during facto is used then
616C on unsymmetric matrices L factors are not stored)
617C 1 Solve not needed (solve phase will never be called).
618C When the user is only interested in the inertia or the
619C determinant then
620C all factor matrices need not be stored.
621C This can also be useful for testing :
622C to experiment facto OOC without
623C effective storage of factors on disk.
624C 2 L factors not stored: meaningful when both
625C - matrix is unsymmetric and fwd performed during facto
626C - the user is only interested in the null-space basis
627C and thus only need the U factors to be stored.
628C Currently, L factors are always stored in IC.
629C
630C -----------
631C Fwd in facto
632C -----------
633C ICNTL(32) Must be set before analysis to indicate whether
634C forward is performed during factorization.
635C Default value is 0 (normal factorization without fwd)
636C (copied in KEEP(252) and broadcast)
637C 0 Normal factorization (default value)
638C 1 Forward performed during factorization
639C
640C
641C ICNTL(33) Must be set before the factorization phase to compute
642C the determinant. See also KEEP(258), KEEP(259),
643C DKEEP(6), DKEEP(7), INFOG(34), RINFOG(12)
644C
645C If ICNTL(33)=0 the determinant is not computed
646C For all other values, the determinant is computed. Note that
647C null pivots and static pivots are excluded from the
648C computation of the determinant.
649C
650C ICNTL(34) Must be set before a call to MUMPS with JOB=-2 in case
651C the save/restore feature was used and user wants to clean
652C save/restore files (and possibly OOC files).
653C ICTNL(34)=0 => user wants to be able to restore instance later
654C ICTNL(34)=1 => user will not restore the instance again (clean
655C to be done)
656C
657C ICNTL(35) : Block Low-Rank (BLR) functionality,
658C need be set before analysis
659C Default value is 0
660C 0: FR factorization and FR solve
661C 1: Automatic BLR option setting (=> 2)
662C 2: BLR factorization + BLR Solve
663C => keep BLR factors only
664C 3: BLR factorization + FR Solve
665C Other values are treated as zero
666C Note that this functionality is currently incompatible
667C with elemental matrices (ICNTL(5) = 1) and with
668C forward elimination during factorization (ICNTL(32) = 1)
669C
670C ICNTL(36) : Block Low-Rank variant choice
671C Default value is 0
672C 0: UFSC variant, no recompression: Compress step is
673C performed after the Solve; the low-rank updates are not
674C recompressed
675C 1: UCFS variant, no recompression: Compress step is
676C performed before the Solve; pivoting strategy is adapted
677C to pe performed on low-rank blocks; the low-rank updates are not
678C recompressed
679C
680C
681C ICNTL(38): Compression rate of LU factors, can be set before
682C analysis/factorization
683C Between 0 and 1000; other values ares treated as 0;
684C ICNTL(38)/10 is a percentage representing the typical
685C compressed factors compression of the factor matrices
686C in BLR fronts:
687C ICNTL(38)/10= compressed/uncompressed factors Ă— 100.
688C Default value: 600
689C (when factors of BLR fronts are compressed,
690C their size is 60% of their full- rank size).
691C
692C ICNTL(58): strategy for symbolic factorization used
693C with centralized ordering based on METIS (ICNTL(7)=5)
694C or with given given ordering (ICNTL(7)=1)
695C
696#if defined(__ve__)
697C Default value 2
698#else
699C Default value 1
700#endif
701C 1 => SYMBQAMD based symbolic factorization
702C 2 => Column count based symbolic factorization
703C Symbolic factorization based on
704C [GIMP94] "An efficient algorithm to compute row and column
705C counts for sparse cholesky factorization"
706C John R. Gilbert, Esmond G. Ng, and Barry W. Peyton
707C SIMAX 1994
708C implementation of the algorithm described in figure 3
709C of the [GINP94] article
710C
711C Other values are treated as 1
712C
713C=========================
714C ARRAYS FOR INFORMATION
715C========================
716C
717C-----
718C INFO is an INTEGER array of length 80 that need not be
719C set by the user.
720C-----
721C
722C INFO(1) is zero if the routine is successful, is negative if an
723C error occurred, and is positive for a warning (see CMUMPS for
724C a partial documentation and the userguide for a full documentation
725C of INFO(1)).
726C
727C INFO(2) holds additional information concerning the
728C error (see CMUMPS).
729C
730C ------------------------------------------
731C Statistics produced after analysis phase
732C ------------------------------------------
733C
734C INFO(3) Estimated real space needed for factors.
735C
736C INFO(4) Estimated integer space needed for factors.
737C
738C INFO(5) Estimated maximum frontal size.
739C
740C INFO(6) Number of nodes in the tree.
741C
742C INFO(7) Minimum value of integer working array IS (old MAXIS)
743C estimated by the analysis phase
744C to run the numerical factorization.
745C
746C INFO(8) Minimum value of real/complex array S (old MAXS)
747C estimated by the analysis phase
748C to run the numerical factorization.
749C
750C INFO(15) Estimated size in MBytes of all CMUMPS internal data
751C structures to run factorization
752C
753C INFO(17) provides an estimation (minimum in Megabytes)
754C of the total memory required to run
755C the numerical phases out-of-core.
756C This memory estimation corresponds to
757C the least memory consuming out-of-core strategy and it can be
758C used as a lower bound if the user wishes to provide ICNTL(23).
759C ---------------------------------------
760C Statistics produced after factorization
761C ---------------------------------------
762C INFO(9) Size of the real space used to store the LU factors possibly
763C including BLR compressed factors
764C
765C INFO(10) Size of the integer space used to store the LU factors
766C
767C INFO(11) Order of largest frontal matrix.
768C
769C INFO(12) Number of off-diagonal pivots.
770C
771C INFO(13) Number of uneliminated variables sent to the father.
772C
773C INFO(14) Number of memory compresses.
774C
775C INFO(18) On exit to factorization:
776C Local number of null pivots (ICNTL(24)=1)
777C on the local processor even on master.
778C (local size of array PIVNUL_LIST).
779C
780C INFO(19) - after analysis:
781C Estimated size of the main internal integer workarray IS
782C (old MAXIS) to run the numerical factorization out-of-core.
783C
784C INFO(21) - after factorization: Effective space used in the main
785C real/complex workarray S -- or in the workarray WK_USER,
786C in the case where WK_USER is provided.
787C
788C INFO(22) - after factorization:
789C Size in millions of bytes of memory effectively used during
790C factorization.
791C This includes the memory effectively used in the workarray
792C WK_USER, in the case where WK_user is provided.
793C
794C INFO(23) - after factorization: total number of pivots eliminated
795C on the processor. In the case of a distributed solution (see
796C ICNTL(21)), this should be used by the user to allocate solution
797C vectors ISOL_loc and SOL_loc of appropriate dimensions
798C (ISOL_LOC of size INFO(23), SOL_LOC of size LSOL_LOC * NRHS
799C where LSOL_LOC >= INFO(23)) on that processor, between the
800C factorization and solve steps.
801C
802C INFO(24) - after analysis: estimated number of entries in factors on
803C the processor. If negative, then
804C the absolute value corresponds to {\it millions} of entries
805C in the factors.
806C Note that in the unsymmetric case, INFO(24)=INFO(3).
807C In the symmetric case, however, INFO(24) < INFO(3).
808C INFO(25) - after factorization: number of tiny pivots (number of
809C pivots modified by static pivoting) detected on the processor.
810C INFO(26) - after solution:
811C effective size in Megabytes of all working space
812C to run the solution phase.
813C (The maximum and sum over all processors are returned
814C respectively in INFOG(30) and INFOG(31)).
815C INFO(27) - after factorization: effective number of entries in factors
816C on the processor. If negative, then
817C the absolute value corresponds to {\it millions} of entries
818C in the factors.
819C Note that in the unsymmetric case, INFO(27)=INFO(9).
820C In the symmetric case, however, INFO(27) < INFO(9).
821C The total number of entries over all processors is
822C available in INFOG(29).
823C
824C
825C -------------------------------------------------------------
826C -------------------------------------------------------------
827C RINFO is a REAL/DOUBLE PRECISION array of length 40 that
828C need not be set by the user. This array supplies
829C local information on the execution of CMUMPS.
830C
831C
832C RINFOG is a REAL/DOUBLE PRECISION array of length 40 that
833C need not be set by the user. This array supplies
834C global information on the execution of CMUMPS.
835C RINFOG is only significant on processor 0
836C
837C
838C RINFO(1) hold the estimated number of floating-point operations
839C for the elimination process on the local processor
840C
841C RINFOG(1) hold the estimated number of floating-point operations
842C for the elimination process on all processors
843C
844C RINFO(2) Number of floating-point operations
845C for the assembly process on local processor.
846C
847C RINFOG(2) Number of floating-point operations
848C for the assembly process.
849C
850C RINFO(3) Number of floating-point operations
851C for the elimination process on the local processor.
852C
853C RINFOG(3) Number of floating-point operations
854C for the elimination process on all processors.
855C
856C----------------------------------------------------
857C Statistics produced after solve with error analysis
858C----------------------------------------------------
859C
860C RINFOG(4) Infinite norm of the input matrix.
861C
862C RINFOG(5) Infinite norm of the computed solution, where
863C
864C RINFOG(6) Norm of scaled residuals
865C
866C RINFOG(7), `RINFOG(8) and `RINFOG(9) are used to hold information
867C on the backward error.
868C We calculate an estimate of the sparse backward error using the
869C theory and measure developed
870C by Arioli, Demmel, and Duff (1989). The scaled residual w1
871C is calculated for all equations except those
872C for which numerator is nonzero and the denominator is small.
873C For the exceptional equations, w2, is used instead.
874C The largest scaled residual (w1) is returned in
875C RINFOG(7) and the largest scaled
876C residual (w2) is returned in `RINFOG(8)>. If all equations are
877C non exceptional then zero is returned in `RINFOG(8).
878C The upper bound error is returned in `RINFOG(9).
879C
880C RINFOG(14) Number of floating-point operations
881C for the elimination process (on all fronts, BLR or not)
882C performed when BLR option is activated on all processors.
883C (equal to zero if BLR option not used, ICNTL(35).EQ.1)
884C
885C RINFOG(15) - after analysis: if the user decides to perform an
886C out-of-core factorization (ICNTL(22)=1), then a rough
887C estimation of the total size of the disk space in MegaBytes of
888C the files written by all processors is provided in RINFOG(15).
889C
890C RINFOG(16) - after factorization: in the case of an out-of-core
891C execution (ICNTL(22)=1), the total
892C size in MegaBytes of the disk space used by the files written
893C by all processors is provided.
894C
895C RINFOG(17) - after each job: sum over all processors of the sizes
896C (in MegaBytes) of the files used to save the instance
897C
898C RINFOG(18) - after each job: sum over all processors of the sizes
899C (in MegaBytes) of the MUMPS structures.
900C
901C RINFOG(19) - after factorization: smallest pivot in absolute
902C value selected during factorization of the preprocessed
903C matrix A_pre and considering also
904C small pivots selected as null-pivots (see ICNTL(24))
905C and pivots on which static pivoting was applied
906C
907C RINFOG(20) - after factorization: smallest pivot in absolute
908C value selected during factorization of the preprocessed
909C matrix A_pre and NOT considering
910C small pivots selected as null-pivots (see ICNTL(24))
911C and pivots on which static pivoting was applied
912C
913C RINFOG(21) - after factorization: largest pivot in absolute
914C value selected during factorization of the preprocessed
915C matrix A_pre.
916C===========================
917C DESCRIPTION OF KEEP8 ARRAY
918C===========================
919C
920C KEEP8 is a 64-bit integer array of length 150 that need not
921C be set by the user
922C
923C===========================
924C DESCRIPTION OF KEEP ARRAY
925C===========================
926C
927C KEEP is an INTEGER array of length 500 that need not
928C be set by the user.
929C
930C
931C=============================
932C Description of DKEEP array
933C=============================
934C
935C DKEEP internal control array for REAL parameters
936C of size 30
937C===================================
938C Default values for control arrays
939C==================================
940C uninitialized values should be 0
941 lwk_user = 0
942 keep(1:500) = 0
943 keep8(1:150)= 0_8
944 info(1:80) = 0
945 infog(1:80) = 0
946 icntl(1:60) = 0
947 rinfo(1:40) = 0.0e0
948 rinfog(1:40)= 0.0e0
949 cntl(1:15) = 0.0e0
950 dkeep(1:230) = 0.0e0
951C ----------------
952C Symmetric code ?
953C ----------------
954 keep( 50 ) = sym
955C Check value of SYM
956 IF (sym.EQ.1) THEN
957C
958C this option is not available with the complex
959C code on symmetric matrices.
960C We set KEEP(50) to 2 and will exploit symmetry
961C up to the root.
962 keep(50) = 2
963 ENDIF
964C -------------------------------------
965C Only options 0, 1, or 2 are available
966C -------------------------------------
967 IF ( keep(50).NE.1 .and. keep(50).NE.2 ) keep( 50 ) = 0
968C threshold value for pivoting
969 IF ( keep(50) .NE. 1 ) THEN
970 cntl(1) = 0.01e0
971 ELSE
972 cntl(1) = 0.0e0
973 END IF
974 cntl(2) = sqrt(epsilon(0.0e0))
975 cntl(3) = 0.0e0
976 cntl(4) = -1.0e0
977 cntl(5) = 0.0e0
978C Working host ?
979 keep(46) = par
980 IF ( keep(46) .NE. 0 .AND.
981 & keep(46) .NE. 1 ) THEN
982C ----------------------
983C If out-of-range value,
984C use a working host
985C ----------------------
986 keep(46) = 1
987 END IF
988C control printing
989 icntl(1) = 6
990 icntl(2) = 0
991 icntl(3) = 6
992 icntl(4) = 2
993C format of input matrix
994 icntl(5) = 0
995C maximum transversal (0=NO, 7=automatic)
996 IF (sym.NE.1) THEN
997 icntl(6) = 7
998 ELSE
999 icntl(6) = 0
1000 ENDIF
1001C Ordering option (icntl(7))
1002C Default is automatic choice done during analysis
1003 icntl(7) = 7
1004C ask for scaling (0=NO, 4=Row and Column)
1005C Default value is 77: automatic choice for analysis
1006 icntl(8) = 77
1007C solve Ax=b (1) or Atx=b (other values)
1008 icntl(9) = 1
1009C Naximum number of IR (0=NO)
1010 icntl(10) = 0
1011C Error analysis (0=NO)
1012 icntl(11) = 0
1013C Control ordering strategy
1014C automatic choice
1015 IF(sym .EQ. 2) THEN
1016 icntl(12) = 0
1017 ELSE
1018 icntl(12) = 1
1019 ENDIF
1020C Control of the use of ScaLAPACK for root node
1021C If null space options asked, ScaLAPACK is always ignored
1022C and ICNTL(13) is not significant
1023C ICNTL(13) = 0 : Root parallelism on (if size large enough)
1024C ICNTL(13) = 1 : Root parallelism off
1025 icntl(13) = 0
1026C Default value for the memory relaxation
1027 IF (sym.eq.1.AND.nslaves.EQ.1) THEN
1028 icntl(14) = 5 ! it should work with 0
1029 ELSE
1030 icntl(14) = 20
1031 END IF
1032 IF (nslaves.GT.4) icntl(14) = icntl(14) + 5
1033 IF (nslaves.GT.8) icntl(14) = icntl(14) + 5
1034 IF (nslaves.GT.16) icntl(14)= icntl(14) + 5
1035C Distributed matrix entry
1036 icntl(18) = 0
1037C Schur (default is not active)
1038 icntl(19) = 0
1039C dense RHS by default
1040 icntl(20) = 0
1041C solution vector centralized on host
1042 icntl(21) = 0
1043C out-of-core flag
1044 icntl(22) = 0
1045C MEM_ALLOWED (0: not provided)
1046 icntl(23) = 0
1047C null pivots
1048 icntl(24) = 0
1049C blocking factor for multiple RHS during solution phase
1050 icntl(27) = -32
1051C analysis strategy: 0=auto, 1=sequential, 2=parallel
1052 icntl(28) = 1
1053C tool used for parallel ordering computation :
1054C 0 = auto, 1 = PT-SCOTCH, 2 = ParMETIS
1055 icntl(29) = 0
1056C Default BLR compression rate of factors (60%)
1057 icntl(38) = 600
1058 icntl(55) = 0
1059 icntl(56) = 0
1060 icntl(57) = 0
1061#if defined(__ve__)
1062 icntl(58) = 2
1063#else
1064 icntl(58) = 1
1065#endif
1066C===================================
1067C Default values for some components
1068C of KEEP array
1069C===================================
1070 keep(12) = 0
1071 keep(24) = 18
1072 keep(68) = 0
1073 keep(30) = 2000
1074 keep(36) = 1
1075 keep(1) = 5
1076 keep(7) = 150
1077 keep(8) = 120
1078 keep(57) = 2000
1079 keep(58) = 1000
1080 IF ( sym .eq. 0 ) THEN
1081 keep(4) = 32
1082 keep(3) = 96
1083 keep(5) = 16
1084 keep(6) = 32
1085 keep(9) = 700
1086 keep(85) = 300
1087 keep(62) = 50
1088 ELSE
1089 keep(4) = 24
1090 keep(3) = 96
1091 keep(5) = 16
1092 keep(6) = 32
1093 keep(9) = 400
1094 keep(85) = 100
1095 keep(62) = 50
1096 END IF
1097 keep(63) = 60
1098 keep(48) = 5
1099 CALL cmumps_set_type_sizes( keep(34), keep(35),
1100 & keep(16), keep(10) )
1101 keep(51) = 70
1102 keep(37) = max(800, int(sqrt(real(nslaves+1))*real(keep(51))))
1103 IF ( nslaves > 256 ) THEN
1104 keep(39) = 10000
1105 ELSEIF ( nslaves > 128 ) THEN
1106 keep(39) = 20000
1107 ELSEIF ( nslaves > 64 ) THEN
1108 keep(39) = 40000
1109 ELSEIF ( nslaves > 16 ) THEN
1110 keep(39) = 80000
1111 ELSE
1112 keep(39) = 160000
1113 END IF
1114 keep(40) = -1 - 456789
1115 keep(45) = 0
1116 keep(47) = 2
1117 keep(64) = 20
1118 keep(69) = 4
1119C To disable SMP management when using new mapping strategy
1120C KEEP(69) = 1
1121C Forcing proportional is ok with strategy 5
1122 keep(75) = 1
1123 keep(76) = 2
1124 keep(77) = 30
1125 keep(79) = 0 ! old splitting
1126 IF (nslaves.GT.4) THEN
1127 keep(78)=max(
1128 & int(log(real(nslaves))/log(real(2))) - 2
1129 & , 0 )
1130 ENDIF
1131 keep(210) = 2
1132 keep8(79) = -10_8
1133 keep(80) = 1
1134 keep(81) = 0
1135 keep(82) = 30
1136 keep(83) = min(8,nslaves/4)
1137 keep(83) = max(min(4,nslaves),max(keep(83),1))
1138 keep(86)=1
1139 keep(87)=0
1140 keep(88)=0
1141 keep(90)=1
1142 keep(91)=min(8, nslaves)
1143 keep(91) = max(min(4,nslaves),min(keep(83),keep(91)))
1144 IF(nslaves.LT.48)THEN
1145 keep(102)=150
1146 ELSEIF(nslaves.LT.128)THEN
1147 keep(102)=150
1148 ELSEIF(nslaves.LT.256)THEN
1149 keep(102)=200
1150 ELSEIF(nslaves.LT.512)THEN
1151 keep(102)=300
1152 ELSEIF(nslaves.GE.512)THEN
1153 keep(102)=400
1154 ENDIF
1155#if defined(OLD_OOC_NOPANEL)
1156 keep(99)=0 ! no panel -> synchronous / no buffer
1157#else
1158 keep(99)=4 ! new OOC -> asynchronous + buffer
1159#endif
1160 keep(100)=0
1161 keep(114) = 1
1162C Threshold value for null pîvot detection during
1163C LU factorization on root in case of RR
1164 keep(118)=41
1165C strategy for MUMPS_BLOC2_GET_NSLAVESMIN
1166 keep(119)=0
1167C Scaling is enabled by default with the Schur complement option
1168 keep(125) = 1
1169 keep(197)=0
1170#if defined(__ve__)
1171C More amalgamation of small fronts to better exploit vector engine
1172 keep(197)=1
1173#endif
1174C KEEP(199) for MUMPS_PROCNODE, MUMPS_TYPENODE, etc
1175C KEEP(199)=NSLAVES + 7
1176 keep(199)=-1
1177 keep(200)=0 ! root pre-assembled in id%S
1178C Pre-assemble type 3 root in id%S if no L0-OMP,
1179C allocate id%S later otherwise.
1180 keep(200) = -1
1181 keep(204)=0
1182 keep(205)=0
1183 keep(209)=-1
1184 keep(104) = 16
1185 keep(107)=0
1186 keep(121)=-999999
1187 keep(122)=15
1188 keep(141)=1 ! min needed
1189 keep(206)=1
1190 keep(207) = 1
1191 keep(211)=2
1192 IF (nslaves .EQ. 2) THEN
1193 keep(213) = 101
1194 ELSE
1195 keep(213) = 201
1196 ENDIF
1197 keep(217)=0
1198 keep(215)=0
1199 keep(216)=1
1200 keep(218)=250
1201 keep(219)=1
1202 IF (keep(50).EQ.2) THEN
1203 keep(227)= max(2,32)
1204 ELSE
1205 keep(227)= max(1,32)
1206 ENDIF
1207 keep(231) = 1
1208 keep(232) = 3
1209 keep(233) = 0
1210 keep(239) = 1
1211 keep(240) = 10
1212 dkeep(4) = -1.0e0
1213 dkeep(5) = -1.0e0
1214 dkeep(10) = -9e0 ! default value is 10E-1 set in fac_driver.F
1215 dkeep(13) = -9e0 ! to define SEUIL for postponing with RR
1216 ! (default value is 10 set in fac_driver.F)
1217 dkeep(24) = 1000.0e0 ! gap should be larger than dkeep(14)
1218 dkeep(25) = 10.0e0 ! gap precision
1219 keep(238)=18
1220 keep(234)= 1
1221 keep(235)=-1
1222 dkeep(3) =-5.0e0
1223 dkeep(18)= 1.0e12
1224 keep(242) = -9
1225 keep(243) = -1
1226 keep(249)=1
1227!$ KEEP(249) = OMP_GET_MAX_THREADS()
1228 keep(250) = 1
1229 keep(261) = 1
1230 keep(262) = 0
1231 keep(263) = 1
1232 keep(266) = 0
1233 keep(267) = 0
1234 keep(268)=77
1235 keep(350) = 1
1236 keep(351) = 0
1237 keep(360) = 256
1238 keep(361) = 2048
1239 keep(362) = 4
1240 keep(363) = 512
1241 keep(364) = 32768
1242 keep(378) = 1
1243C OMP parallelization of arrowheads
1244 keep(399) = 1
1245 keep(397) = -1
1246#if defined(__ve__)
1247 keep(401) = 1
1248#else
1249 keep(401) = 0
1250#endif
1251 keep(402) = 1
1252 keep(405) = 0 ! 1 under L0OMP
1253 keep(406) = 2
1254C 0.9 equilibration
1255 keep(408) = 90
1256 keep(420) = 4*keep(6) ! if KEEP(6)=32 then 128
1257#if defined(GEMMT_AVAILABLE)
1258 keep(421) = -1
1259#endif
1260C Default size of KEEP(424) is defined below.
1261C It does not depend on arithmetic,
1262C it is related to L1 cache size: 250 * 64 bytes
1263C is about half of the cache size (32768 bytes).
1264C This leaves space in cache for the destination,
1265C of size 250*sizeof(arith). (4k bytes for z)
1266C At each new block of size KEEP(424), there is
1267C probably a cache miss on the pivot.
1268 keep(424) = 250
1269 keep(459) = 10 ! max number of panels
1270 keep(460) = 63 ! min panel size
1271 keep(461) = 10
1272 keep(462) = 10
1273 keep(466) = 1
1274 keep(468) = 3
1275 keep(469) = 3
1276 keep(471) = -1
1277 keep(479) = 1
1278 keep(480) = 3
1279 keep(472) = 1
1280 keep(476) = 50
1281 keep(477) = 100
1282 keep(483) = 50
1283 keep(484) = 50
1284 keep(487) = 1
1285 IF (keep(472).EQ.1) THEN
1286 keep(488) = 512
1287 ELSE
1288 keep(488) = 8*keep(6) ! if KEEP(6)=32 then 256
1289 ENDIF
1290 keep(490) = 128
1291 keep(491) = 1000
1292 keep(492) = 1
1293 keep(82) = 30
1294 keep(493) = 0
1295 keep(496) = 1
1296 keep(495) = -1
1297 keep(497) = -1
1298C
1299 RETURN
1300 END SUBROUTINE cmumpsid
1301 SUBROUTINE cmumps_set_keep72(id, LP)
1303 IMPLICIT NONE
1304 TYPE (CMUMPS_STRUC) :: id
1305 INTEGER LP
1306 IF (id%KEEP(72)==1) THEN
1307 id%KEEP(37) = 2*id%NSLAVES
1308 id%KEEP(3)=3
1309 id%KEEP(4)=2
1310 id%KEEP(5)=1
1311 id%KEEP(6)=2
1312 id%KEEP(9)=3
1313 id%KEEP(39)=300
1314 id%KEEP(7) = 3
1315 id%KEEP(8) = 2
1316 id%KEEP(57)= 3
1317 id%KEEP(58)= 2
1318 id%KEEP(63)=3
1319 id%CNTL(1)=0.1e0
1320 id%KEEP(213) = 101
1321 id%KEEP(85)=2
1322 id%KEEP(85)=-4
1323 id%KEEP(62) = 2
1324 id%KEEP(1) = 1
1325 id%KEEP(51) = 2
1326!$ id%KEEP(360) = 2
1327!$ id%KEEP(361) = 2
1328!$ id%KEEP(362) = 1
1329!$ id%KEEP(363) = 2
1330 id%KEEP(364) = 10
1331 id%KEEP(420) = 4
1332 id%KEEP(488) = 4
1333 id%KEEP(490) = 5
1334 id%KEEP(491) = 5
1335 id%ICNTL(27)=-3
1336 id%KEEP(227)=3
1337 id%KEEP(30) = 1000
1338C Activate L0OMP with test_mumps in case KEEP(72)=1
1339 id%KEEP(401) = 1
1340 ELSE IF (id%KEEP(72)==2) THEN
1341 id%KEEP(85)=2 ! default is
1342 id%KEEP(85)=-10000 ! default is 160
1343 id%KEEP(62) = 10 ! default is 50
1344 id%KEEP(210) = 1 ! defaults is 0 (automatic)
1345 id%KEEP8(79) = 160000_8
1346 id%KEEP(1) = 2 ! default is 8
1347 id%KEEP(102) = 110 ! defaults is 150 up to 48 procs
1348 id%KEEP(213) = 121 ! default is 201
1349 END IF
1350 RETURN
1351 END SUBROUTINE cmumps_set_keep72
subroutine cmumps_set_type_sizes(k34, k35, k16, k10)
subroutine cmumpsid(nslaves, lwk_user, cntl, icntl, keep, keep8, info, infog, rinfo, rinfog, sym, par, dkeep, myid)
subroutine cmumps_set_keep72(id, lp)
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21