OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
zfac_driver.F
Go to the documentation of this file.
1C
2C This file is part of MUMPS 5.5.1, released
3C on Tue Jul 12 13:17:24 UTC 2022
4C
5C
6C Copyright 1991-2022 CERFACS, CNRS, ENS Lyon, INP Toulouse, Inria,
7C Mumps Technologies, University of Bordeaux.
8C
9C This version of MUMPS is provided to you free of charge. It is
10C released under the CeCILL-C license
11C (see doc/CeCILL-C_V1-en.txt, doc/CeCILL-C_V1-fr.txt, and
12C https://cecill.info/licences/Licence_CeCILL-C_V1-en.html)
13C
14 SUBROUTINE zmumps_fac_driver( id)
15 USE zmumps_buf
16 USE zmumps_load
17 USE zmumps_ooc
26#if ! defined(NO_FDM_DESCBAND)
28#endif
29#if ! defined(NO_FDM_MAPROW)
31#endif
32!$ USE OMP_LIB
33C Derived datatype to pass pointers with implicit interfaces
35 IMPLICIT NONE
36C
37C Purpose
38C =======
39C
40C Performs scaling, sorting in arrowhead, then
41C distributes the matrix, and perform
42C factorization.
43C
44C
45 INTERFACE
46 SUBROUTINE zmumps_anorminf(id, ANORMINF, LSCAL, EFF_SIZE_SCHUR)
48 TYPE (ZMUMPS_STRUC), TARGET :: id
49 DOUBLE PRECISION, INTENT(OUT) :: ANORMINF
50 LOGICAL, INTENT(IN) :: LSCAL
51 INTEGER, INTENT(IN) :: EFF_SIZE_SCHUR
52 END SUBROUTINE zmumps_anorminf
53 SUBROUTINE zmumps_free_id_data_modules(id_FDM_F_ENCODING,
54 & id_BLRARRAY_ENCODING, KEEP8, K34)
55# if defined(MUMPS_F2003)
56 CHARACTER, DIMENSION(:), POINTER, intent(inout) ::
57 & id_blrarray_encoding
58 CHARACTER, DIMENSION(:), POINTER, intent(inout) ::
59 & id_fdm_f_encoding
60# else
61 CHARACTER, DIMENSION(:), POINTER :: id_BLRARRAY_ENCODING
62 CHARACTER, DIMENSION(:), POINTER :: id_FDM_F_ENCODING
63# endif
64 INTEGER(8), intent(inout) :: KEEP8(150)
65 INTEGER, intent(in) :: K34
66 END SUBROUTINE zmumps_free_id_data_modules
67 END INTERFACE
68C
69C Parameters
70C ==========
71C
72 TYPE(zmumps_struc), TARGET :: id
73C
74C MPI
75C ===
76C
77 include 'mpif.h'
78 include 'mumps_tags.h'
79 INTEGER :: STATUS(MPI_STATUS_SIZE)
80 INTEGER :: IERR
81 INTEGER, PARAMETER :: MASTER = 0
82C
83C Local variables
84C ===============
85C
86 include 'mumps_headers.h'
87 INTEGER(8) :: NSEND8, NSEND_TOT8
88 INTEGER(8) :: NLOCAL8, NLOCAL_TOT8
89 INTEGER(4) :: I4
90 INTEGER :: LDPTRAR, NELT_arg, NBRECORDS
91 INTEGER :: ITMP, JTMP
92 INTEGER :: KEEP464COPY, KEEP465COPY
93 DOUBLE PRECISION :: RATIOK465
94 INTEGER(8) :: KEEP826_SAVE
95 INTEGER(8) :: K67, K68, K70, K74, K75
96 INTEGER(8) ITMP8
97 INTEGER MUMPS_PROCNODE
98 EXTERNAL mumps_procnode
99 INTEGER MP, LP, MPG, allocok
100 LOGICAL PROK, PROKG, LSCAL, LPOK, COMPUTE_ANORMINF
101C Reception buffer
102 INTEGER :: ZMUMPS_LBUFR, ZMUMPS_LBUFR_BYTES
103 INTEGER(8) :: ZMUMPS_LBUFR_BYTES8 ! for intermediate computation
104 INTEGER, ALLOCATABLE, DIMENSION(:) :: BUFR
105C Size of send buffers (in bytes)
106 INTEGER :: ZMUMPS_LBUF, ZMUMPS_LBUF_INT
107 INTEGER(8) :: ZMUMPS_LBUF8 ! for intermediate computation
108C
109 INTEGER PTRIST, PTRWB, MAXELT_SIZE,
110 & itloc, ipool, k28, lpool
111 INTEGER IRANK, ID_ROOT
112 INTEGER KKKK
113 INTEGER(8) :: NZ_locMAX8
114 INTEGER(8) MEMORY_MD_ARG
115 INTEGER(8) MAXS_BASE8, MAXS_BASE_RELAXED8
116 DOUBLE PRECISION CNTL4, AVG_FLOPS
117 INTEGER MIN_PERLU, MAXIS_ESTIM
118 INTEGER SUM_INFO22_THIS_NODE, MAX_SUM_INFO22_THIS_NODE
119C
120 TYPE (S_IS_POINTERS_T) :: S_IS_POINTERS
121 INTEGER MAXIS
122 INTEGER(8) :: MAXS
123C For S argument to arrowhead routines:
124 INTEGER(8) :: MAXS_ARG
125 COMPLEX(kind=8), TARGET :: S_DUMMY_ARG(1)
126 COMPLEX(kind=8), POINTER, DIMENSION(:) :: S_PTR_ARG
127 INTEGER NB_THREADS, NOMP
128 DOUBLE PRECISION TIMEAVG, TIMEMAX,
129 & flopavg, flopmax
130 DOUBLE PRECISION TMPTIME, TMPFLOP
131 INTEGER NPIV_CRITICAL_PATH, EFF_SIZE_SCHUR
132 DOUBLE PRECISION TIME, TIMEET
133 DOUBLE PRECISION ZERO, ONE, MONE
134 parameter( zero = 0.0d0, one = 1.0d0, mone = -1.0d0)
135 COMPLEX(kind=8) CZERO
136 parameter( czero = (0.0d0, 0.0d0) )
137 INTEGER PERLU, TOTAL_MBYTES, K231, K232, K233, BLR_STRAT
138 INTEGER, PARAMETER :: IDUMMY = -9999
139 LOGICAL, PARAMETER :: BDUMMY =.false.
140 INTEGER, PARAMETER :: PANEL_TABSIZE = 20
141 INTEGER COLOUR, COMM_FOR_SCALING ! For Simultaneous scaling
142 INTEGER LIWK, LWK_REAL
143 INTEGER(8) :: LWK
144C I_AM_SLAVE: used to determine if proc has the role of a slave
145C WK_USER_PROVIDED is set to true when WK_USER is provided by user
146 LOGICAL I_AM_SLAVE, PERLU_ON, WK_USER_PROVIDED, EARLYT3ROOTINS
147 LOGICAL PRINT_MAXAVG, PRINT_NODEINFO
148 DOUBLE PRECISION :: ANORMINF, SEUIL, SEUIL_LDLT_NIV2, Thresh_Seuil
149 DOUBLE PRECISION :: CNTL1, CNTL3, CNTL5, CNTL6, EPS
150 INTEGER N, LPN_LIST,POSBUF
151 INTEGER, DIMENSION (:), ALLOCATABLE :: ITMP2
152 INTEGER I,K
153 INTEGER(8) :: ITEMP8
154 INTEGER :: PARPIV_T1
155 INTEGER FRONTWISE
156C temporary variables for collecting stats from all processors
157 DOUBLE PRECISION :: TMP_MRY_LU_FR
158 DOUBLE PRECISION :: TMP_MRY_LU_LRGAIN
159 DOUBLE PRECISION :: TMP_MRY_CB_FR
160 DOUBLE PRECISION :: TMP_MRY_CB_LRGAIN
161 DOUBLE PRECISION :: TMP_FLOP_LRGAIN
162 DOUBLE PRECISION :: TMP_FLOP_TRSM
163 DOUBLE PRECISION :: TMP_FLOP_PANEL
164 DOUBLE PRECISION :: TMP_FLOP_FRFRONTS
165 DOUBLE PRECISION :: TMP_FLOP_TRSM_FR
166 DOUBLE PRECISION :: TMP_FLOP_TRSM_LR
167 DOUBLE PRECISION :: TMP_FLOP_UPDATE_FR
168 DOUBLE PRECISION :: TMP_FLOP_UPDATE_LR
169 DOUBLE PRECISION :: TMP_FLOP_UPDATE_LRLR3
170 DOUBLE PRECISION :: TMP_FLOP_COMPRESS
171 DOUBLE PRECISION :: TMP_FLOP_DECOMPRESS
172 DOUBLE PRECISION :: TMP_FLOP_MIDBLK_COMPRESS
173 DOUBLE PRECISION :: TMP_FLOP_FRSWAP_COMPRESS
174 DOUBLE PRECISION :: TMP_FLOP_ACCUM_COMPRESS
175 DOUBLE PRECISION :: TMP_FLOP_CB_COMPRESS
176 DOUBLE PRECISION :: TMP_FLOP_CB_DECOMPRESS
177 DOUBLE PRECISION :: TMP_FLOP_FACTO_FR
178 INTEGER :: TMP_CNT_NODES
179 DOUBLE PRECISION :: TMP_TIME_UPDATE
180 DOUBLE PRECISION :: TMP_TIME_UPDATE_LRLR1
181 DOUBLE PRECISION :: TMP_TIME_UPDATE_LRLR2
182 DOUBLE PRECISION :: TMP_TIME_UPDATE_LRLR3
183 DOUBLE PRECISION :: TMP_TIME_UPDATE_FRLR
184 DOUBLE PRECISION :: TMP_TIME_UPDATE_FRFR
185 DOUBLE PRECISION :: TMP_TIME_COMPRESS
186 DOUBLE PRECISION :: TMP_TIME_MIDBLK_COMPRESS
187 DOUBLE PRECISION :: TMP_TIME_FRSWAP_COMPRESS
188 DOUBLE PRECISION :: TMP_TIME_CB_COMPRESS
189 DOUBLE PRECISION :: TMP_TIME_PANEL
190 DOUBLE PRECISION :: TMP_TIME_FAC_I
191 DOUBLE PRECISION :: TMP_TIME_FAC_MQ
192 DOUBLE PRECISION :: TMP_TIME_FAC_SQ
193 DOUBLE PRECISION :: TMP_TIME_LRTRSM
194 DOUBLE PRECISION :: TMP_TIME_FRTRSM
195 DOUBLE PRECISION :: TMP_TIME_FRFRONTS
196 DOUBLE PRECISION :: TMP_TIME_LR_MODULE
197 DOUBLE PRECISION :: TMP_TIME_DIAGCOPY
198 DOUBLE PRECISION :: TMP_TIME_DECOMP
199 DOUBLE PRECISION :: TMP_TIME_DECOMP_UCFS
200 DOUBLE PRECISION :: TMP_TIME_DECOMP_ASM1
201 DOUBLE PRECISION :: TMP_TIME_DECOMP_LOCASM2
202 DOUBLE PRECISION :: TMP_TIME_DECOMP_MAPLIG1
203 DOUBLE PRECISION :: TMP_TIME_DECOMP_ASMS2S
204 DOUBLE PRECISION :: TMP_TIME_DECOMP_ASMS2M
205C
206C Workspace.
207C
208 INTEGER, DIMENSION(:), ALLOCATABLE :: IWK
209 COMPLEX(kind=8), DIMENSION(:), ALLOCATABLE :: WK
210 DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: WK_REAL
211 INTEGER(8), DIMENSION(:), ALLOCATABLE :: IWK8
212 INTEGER, DIMENSION(:), ALLOCATABLE :: BURP
213 INTEGER, DIMENSION(:), ALLOCATABLE :: BUCP
214 INTEGER, DIMENSION(:), ALLOCATABLE :: BURS
215 INTEGER, DIMENSION(:), ALLOCATABLE :: BUCS
216 INTEGER BUREGISTRE(12)
217 INTEGER BUINTSZ, BURESZ, BUJOB
218 INTEGER BUMAXMN, M, SCMYID, SCNPROCS
219 DOUBLE PRECISION SCONEERR, SCINFERR
220C
221C Parameters arising from the structure
222C =====================================
223C
224 INTEGER, POINTER :: JOB
225* Control parameters: see description in ZMUMPSID
226 DOUBLE PRECISION,DIMENSION(:),POINTER::RINFO, RINFOG
227 DOUBLE PRECISION,DIMENSION(:),POINTER:: CNTL
228 INTEGER,DIMENSION(:),POINTER:: INFOG, KEEP
229 INTEGER, DIMENSION(:), POINTER :: MYIRN_loc, MYJCN_loc
230 COMPLEX(kind=8), DIMENSION(:), POINTER :: MYA_loc
231 INTEGER, TARGET :: DUMMYIRN_loc(1), DUMMYJCN_loc(1)
232 COMPLEX(kind=8), TARGET :: DUMMYA_loc(1)
233 INTEGER,DIMENSION(:),POINTER::ICNTL
234 EXTERNAL mumps_get_pool_length
235 INTEGER MUMPS_GET_POOL_LENGTH
236 INTEGER(8) :: TOTAL_BYTES
237 INTEGER(8) :: I8TMP, LWK_USER_SUM8
238C
239C External references
240C ===================
241 INTEGER numroc
242 EXTERNAL numroc
243 INTEGER:: NWORKING
244 LOGICAL:: MEM_EFF_ALLOCATED
245 INTEGER :: TOTAL_MBYTES_UNDER_L0
246 INTEGER(8):: TOTAL_BYTES_UNDER_L0
247C Fwd in facto:
248 COMPLEX(kind=8), DIMENSION(:), POINTER :: RHS_MUMPS
249 LOGICAL :: RHS_MUMPS_ALLOCATED
250 INTEGER :: NB_ACTIVE_FRONTS_ESTIM
251 INTEGER :: NB_FRONTS_F_ESTIM
252C
253C
254 job=>id%JOB
255 rinfo=>id%RINFO
256 rinfog=>id%RINFOG
257 cntl=>id%CNTL
258 infog=>id%INFOG
259 keep=>id%KEEP
260 icntl=>id%ICNTL
261 IF (id%KEEP8(29) .NE. 0) THEN
262 myirn_loc=>id%IRN_loc
263 myjcn_loc=>id%JCN_loc
264 mya_loc=>id%A_loc
265 ELSE
266 myirn_loc=>dummyirn_loc
267 myjcn_loc=>dummyjcn_loc
268 mya_loc=>dummya_loc
269 ENDIF
270 n = id%N
271 eps = epsilon( zero )
272C TIMINGS: reset to 0
273 id%DKEEP(92)=0.0d0
274 id%DKEEP(93)=0.0d0
275 id%DKEEP(94)=0.0d0
276 id%DKEEP(97)=0.0d0
277 id%DKEEP(98)=0.0d0
278 id%DKEEP(56)=0.0d0
279C Count of MPI messages: reset to 0
280 id%KEEP(266)=0
281 id%KEEP(267)=0
282C MIN/MAX pivots reset to 0
283 id%DKEEP(19)=huge(0.0d0)
284 id%DKEEP(20)=huge(0.0d0)
285 id%DKEEP(21)=0.0d0
286C Number of symmetric swaps
287 id%KEEP8(80)=0_8
288C Largest increase of internal panel size
289 id%KEEP(425) =0
290C
291 print_maxavg = .NOT.(id%NSLAVES.EQ.1 .AND. keep(46).EQ.1)
292C Print per node informtation only in case ther are several
293C compute nodes (id%KEEP(412): #MPI procs on comupte node)
294 print_nodeinfo = print_maxavg .AND. id%NPROCS .NE. id%KEEP(412)
295C
296C BEGIN CASE OF ALLOCATED DATA FROM PREVIOUS CALLS
297C Data from factorization is now freed asap
298C id%S, id%IS
299 IF (id%KEEP8(24).EQ.0_8) THEN
300C -- deallocate only when not provided/allocated by the user
301 IF (associated(id%S)) THEN
302 DEALLOCATE(id%S)
303 id%KEEP8(23)=0_8
304 NULLIFY(id%S)
305 ENDIF
306 ENDIF
307 IF (associated(id%IS)) THEN
308 DEALLOCATE(id%IS)
309 NULLIFY(id%IS)
310 ENDIF
311C Free BLR factors, if any
312 CALL zmumps_free_id_data_modules(id%FDM_F_ENCODING,
313 & id%BLRARRAY_ENCODING, id%KEEP8(1), id%KEEP(34))
314 IF (associated(id%root%RG2L_ROW))THEN
315 DEALLOCATE(id%root%RG2L_ROW)
316 NULLIFY(id%root%RG2L_ROW)
317 ENDIF
318 IF (associated(id%root%RG2L_COL))THEN
319 DEALLOCATE(id%root%RG2L_COL)
320 NULLIFY(id%root%RG2L_COL)
321 ENDIF
322 IF (associated( id%PTLUST_S )) THEN
323 DEALLOCATE(id%PTLUST_S)
324 NULLIFY(id%PTLUST_S)
325 ENDIF
326 IF (associated(id%PTRFAC)) THEN
327 DEALLOCATE(id%PTRFAC)
328 NULLIFY(id%PTRFAC)
329 END IF
330 IF (associated(id%RHSCOMP)) THEN
331 DEALLOCATE(id%RHSCOMP)
332 NULLIFY(id%RHSCOMP)
333 id%KEEP8(25)=0_8
334 ENDIF
335 IF (associated(id%POSINRHSCOMP_ROW)) THEN
336 DEALLOCATE(id%POSINRHSCOMP_ROW)
337 NULLIFY(id%POSINRHSCOMP_ROW)
338 ENDIF
339 IF (id%POSINRHSCOMP_COL_ALLOC) THEN
340 DEALLOCATE(id%POSINRHSCOMP_COL)
341 NULLIFY(id%POSINRHSCOMP_COL)
342 id%POSINRHSCOMP_COL_ALLOC = .false.
343 ENDIF
344C
345C END CASE OF ALLOCATED DATA FROM PREVIOUS CALLS
346C
347C Related to forward in facto functionality (referred to as "Fwd in facto")
348 NULLIFY(rhs_mumps)
349 rhs_mumps_allocated = .false.
350C -----------------------------------------------------------------------
351C Set WK_USER_PROVIDED to true when workspace WK_USER is provided by user
352C We can accept WK_USER to be provided on only one proc and
353C different values of WK_USER per processor
354C
355 IF (id%KEEP8(24).GT.0_8) THEN
356C We nullify S so that later when we test
357C if (associated(S) we can free space and reallocate it).
358 NULLIFY(id%S)
359 ENDIF
360C
361C -- KEEP8(24) can now then be reset safely
362 wk_user_provided = (id%LWK_USER.NE.0)
363 IF (wk_user_provided) THEN
364 IF (id%LWK_USER.GT.0) THEN
365 id%KEEP8(24) = int(id%LWK_USER,8)
366 ELSE
367 id%KEEP8(24) = -int(id%LWK_USER,8)* 1000000_8
368 ENDIF
369 ELSE
370 id%KEEP8(24) = 0_8
371 ENDIF
372C Compute sum of LWK_USER provided by user
373 lwk_user_sum8 = 0_8
374 CALL mpi_reduce ( id%KEEP8(24), lwk_user_sum8, 1, mpi_integer8,
375 & mpi_sum, master, id%COMM, ierr )
376C
377C KEEP8(26) might be modified
378C (element entry format)
379C but need be restore for
380C future factorisation
381C with different scaling option
382C
383 keep826_save = id%KEEP8(26)
384C In case of loop on factorization with
385C different scaling options, initialize
386C DKEEP(4:5) to 0.
387 id%DKEEP(4)=-1.0d0
388 id%DKEEP(5)=-1.0d0
389C Mapping information used during solve. In case of several facto+solve
390C it has to be recomputed. In case of several solves with the same
391C facto, it is not recomputed.
392 IF (associated(id%IPTR_WORKING)) THEN
393 DEALLOCATE(id%IPTR_WORKING)
394 NULLIFY(id%IPTR_WORKING)
395 END IF
396 IF (associated(id%WORKING)) THEN
397 DEALLOCATE(id%WORKING)
398 NULLIFY(id%WORKING)
399 END IF
400C
401C Units for printing
402C MP: diagnostics
403C LP: errors
404C
405 lp = icntl( 1 )
406 mp = icntl( 2 )
407 mpg = icntl( 3 )
408 lpok = ((lp.GT.0).AND.(id%ICNTL(4).GE.1))
409 prok = ((mp.GT.0).AND.(id%ICNTL(4).GE.2))
410 prokg = ( mpg .GT. 0 .and. id%MYID .eq. master )
411 prokg = (prokg.AND.(id%ICNTL(4).GE.2))
412 IF ( prok ) WRITE( mp, 130 )
413 IF ( prokg ) WRITE( mpg, 130 )
414C -------------------------------------
415C Depending on the type of parallelism,
416C the master can now (soon) potentially
417C have the role of a slave
418C -------------------------------------
419 i_am_slave = ( id%MYID .ne. master .OR.
420 & ( id%MYID .eq. master .AND.
421 & keep(46) .eq. 1 ) )
422C
423C Prepare work for out-of-core
424C
425 IF (id%MYID .EQ. master .AND. keep(201) .NE. -1) THEN
426C Note that if KEEP(201)=-1, then we have decided
427C at analysis phase that factors will not be stored
428C (neither in memory nor on disk). In that case,
429C ICNTL(22) is ignored.
430C -- ICNTL(22) must be set before facto phase
431C (=1 OOC on; =0 OOC off)
432C and cannot be changed for subsequent solve phases.
433 keep(201)=id%ICNTL(22)
434 IF (keep(201) .NE. 0) THEN
435# if defined(OLD_OOC_NOPANEL)
436 keep(201)=2
437# else
438 keep(201)=1
439# endif
440 ENDIF
441 ENDIF
442C ----------------------
443C Broadcast KEEP options
444C defined for facto:
445C ----------------------
446 CALL mpi_bcast( keep(12), 1, mpi_integer,
447 & master, id%COMM, ierr )
448 CALL mpi_bcast( keep(19), 1, mpi_integer,
449 & master, id%COMM, ierr )
450 CALL mpi_bcast( keep(21), 1, mpi_integer,
451 & master, id%COMM, ierr )
452 CALL mpi_bcast( keep(201), 1, mpi_integer,
453 & master, id%COMM, ierr )
454 CALL mpi_bcast( keep(459), 1, mpi_integer,
455 & master, id%COMM, ierr )
456 CALL mpi_bcast( keep(460), 1, mpi_integer,
457 & master, id%COMM, ierr )
458 IF ( keep(459) .GE. panel_tabsize ) THEN
459 IF ( lpok ) THEN
460 WRITE(lp,'(A,I4,A,I3)') " ** WARNING ** KEEP(459)=",keep(459),
461 & " too large, resetting to",panel_tabsize-1
462 ENDIF
463 keep(459) = panel_tabsize - 1
464 ENDIF
465 perlu = keep(12)
466 IF (id%MYID.EQ.master) THEN
467C KEEP(50) case
468C ==============
469C
470C KEEP(50) = 0 : matrix is unsymmetric
471C KEEP(50) /= 0 : matrix is symmetric
472C KEEP(50) = 1 : Ask L L^T on the root. Matrix is PSD.
473C KEEP(50) = 2 : Ask for L U on the root
474C KEEP(50) = 3 ... L D L^T ??
475C
476 cntl1 = id%CNTL(1)
477C ---------------------------------------
478C For symmetric (non general) matrices
479C set (directly) CNTL1 = 0.0
480C ---------------------------------------
481 keep(17)=0
482 IF ( keep(50) .eq. 1 ) THEN
483 IF (cntl1 .ne. zero ) THEN
484 IF ( prokg ) THEN
485 WRITE(mpg,'(A)')
486 & '** Warning : SPD solver called, resetting CNTL(1) to 0.0D0'
487 END IF
488 END IF
489 cntl1 = zero
490 END IF
491C CNTL1 threshold value must be between
492C 0.0 and 1.0 (for SYM=0) and 0.5 (for SYM=1,2)
493 IF (cntl1.GT.one) cntl1=one
494 IF (cntl1.LT.zero) cntl1=zero
495 IF (keep(50).NE.0.AND.cntl1.GT.0.5d0) THEN
496 cntl1 = 0.5d0
497 ENDIF
498 parpiv_t1 = id%KEEP(268)
499 IF (parpiv_t1.EQ.77) THEN
500 parpiv_t1 = 0
501#if defined(__ve__)
502 parpiv_t1 = -2
503#endif
504 ENDIF
505 IF (parpiv_t1.EQ.-3) THEN
506 parpiv_t1 = 0
507 ENDIF
508 IF ((parpiv_t1.LT.-3).OR.(parpiv_t1.GT.1)) THEN
509C out of range values
510 parpiv_t1 =0
511 ENDIF
512C note that KEEP(50).EQ.1 => CNTL1=0.0
513 IF (cntl1.EQ.0.0.OR.(keep(50).eq.1)) parpiv_t1 = 0
514C
515 IF (parpiv_t1.EQ.-2) THEN
516 IF (keep(19).NE.0) THEN
517C switch off PARPIV_T1 if RR activated
518C but do NOT switch off PARPIV_1 with null pivot detection
519 parpiv_t1 = 0
520 ENDIF
521 ENDIF
522 id%KEEP(269) = parpiv_t1
523 ENDIF
524 CALL mpi_bcast(cntl1, 1, mpi_double_precision,
525 & master, id%COMM, ierr)
526 CALL mpi_bcast( keep(269), 1, mpi_integer,
527 & master, id%COMM, ierr )
528 IF (id%MYID.EQ.master) THEN
529C -----------------------------------------------------
530C Decoding of ICNTL(35) for factorization: same as
531C at analysis except that we store a copy of ICNTL(35)
532C in KEEP(486) instead of KEEP(494) and need to check
533C compatibility of KEEP(486) and KEEP(494): If LR was
534C not activated during analysis, it cannot be activated
535C at factorization.
536C ------------------------------------------------------
537 id%KEEP(486) = id%ICNTL(35)
538 IF (id%KEEP(486).EQ.1) THEN
539C -- Automatic BLR option setting
540 id%KEEP(486)= 2
541 ENDIF
542 IF ( id%KEEP(486).EQ.4) id%KEEP(486)=0
543 IF ((id%KEEP(486).LT.0).OR.(id%KEEP(486).GT.4)) THEN
544C Out of range values treated as 0
545 id%KEEP(486) = 0
546 ENDIF
547 IF ((keep(486).NE.0).AND.(keep(494).EQ.0)) THEN
548C To activate BLR during factorization,
549C ICNTL(35) must have been set at analysis.
550 IF (lpok) THEN
551 WRITE(lp,'(a)')
552 & " *** Error with BLR setting "
553 WRITE(LP,'(a)') " *** BLR was not activated during ",
554 & " analysis but is requested during factorization."
555 ENDIF
556 id%INFO(1)=-54
557 id%INFO(2)=0
558 GOTO 105
559 ENDIF
560 KEEP464COPY = id%ICNTL(38)
561.LT..OR..GT. IF (KEEP464COPY0KEEP464COPY1000) THEN
562C Out of range values treated as 1000
563 KEEP464COPY = 1000
564 ENDIF
565.LT. IF (id%KEEP(461)1) THEN
566 id%KEEP(461) = 10
567 ENDIF
568 KEEP465COPY=0
569.EQ..OR..EQ. IF (id%ICNTL(36)1id%ICNTL(36)3) THEN
570.EQ..OR..LE. IF (CNTL1ZERO KEEP(468)1) THEN
571 KEEP(475) = 3
572.GT..OR..EQ. ELSE IF ( (KEEP(269)0) (KEEP(269)-2)) THEN
573 KEEP(475) = 2
574.EQ. ELSE IF (KEEP(468)2) THEN
575 KEEP(475) = 2
576 ELSE
577 KEEP(475) = 1
578 ENDIF
579 ELSE
580 KEEP(475) = 0
581 ENDIF
582 KEEP(481)=0
583.LT..OR..GE. IF (id%ICNTL(36)0 id%ICNTL(36)2) THEN
584C Only options 1 and 2 are allowed
585 KEEP(475) = 0
586 ENDIF
587C K489 is set according to ICNTL(37)
588.EQ..OR..EQ. IF (id%ICNTL(37)0id%ICNTL(37)1) THEN
589 KEEP(489) = id%ICNTL(37)
590 ELSE
591C Other values treated as zero
592 KEEP(489) = 0
593 ENDIF
594.GE. IF (KEEP(79)1) THEN
595C CompressCB incompatible with type4,5,6 nodes
596 KEEP(489)=0
597 ENDIF
598 KEEP(489)=0
599C id%KEEP(476) \in [1,100]
600.GT..OR..LT. IF ((id%KEEP(476)100)(id%KEEP(476)1)) THEN
601 id%KEEP(476)= 50
602 ENDIF
603C id%KEEP(477) \in [1,100]
604.GT..OR..LT. IF ((id%KEEP(477)100)(id%KEEP(477)1)) THEN
605 id%KEEP(477)= 100
606 ENDIF
607C id%KEEP(483) \in [1,100]
608.GT..OR..LT. IF ((id%KEEP(483)100)(id%KEEP(483)1)) THEN
609 id%KEEP(483)= 50
610 ENDIF
611C id%KEEP(484) \in [1,100]
612.GT..OR..LT. IF ((id%KEEP(484)100)(id%KEEP(484)1)) THEN
613 id%KEEP(484)= 50
614 ENDIF
615C id%KEEP(480)=0,2,3,4,5,6
616.GT..OR..LT. IF ((id%KEEP(480)6)(id%KEEP(480)0)
617.OR..EQ. & (id%KEEP(480)1)) THEN
618 id%KEEP(480)=0
619 ENDIF
620C id%KEEP(473)=0 or 1
621.NE..AND..NE. IF ((id%KEEP(473)0)(id%KEEP(473)1)) THEN
622 id%KEEP(473)=0
623 ENDIF
624C id%KEEP(474)=0,1,2,3
625.GT..OR..LT. IF ((id%KEEP(474)3)(id%KEEP(474)0)) THEN
626 id%KEEP(474)=0
627 ENDIF
628C id%KEEP(479)>0
629.LE. IF (id%KEEP(479)0) THEN
630 id%KEEP(479)=1
631 ENDIF
632.NE..AND..EQ. IF (id%KEEP(474)0id%KEEP(480)0) THEN
633 id%KEEP(474) = 0
634 ENDIF
635.NE..AND..LT. IF (id%KEEP(478)0id%KEEP(480)4) THEN
636 id%KEEP(478) = 0
637 ENDIF
638.GE..OR. IF (id%KEEP(480)5
639.NE..AND..EQ. & (id%KEEP(480)0id%KEEP(474)3)) THEN
640.LT. IF (id%KEEP(475)2) THEN
641C Reset to 3 if 5 or to 4 if 6
642 id%KEEP(480) = id%KEEP(480) - 2
643 write(*,*) ' resetting keep(480) to ', id%KEEP(480)
644 ENDIF
645 ENDIF
646 105 CONTINUE
647.EQ. ENDIF ! id%MYID MASTER
648 CALL MUMPS_PROPINFO( id%ICNTL(1), id%INFO(1),
649 & id%COMM, id%MYID )
650C
651.LT. IF (id%INFO(1)0) GOTO 530
652 CALL MPI_BCAST( KEEP(473), 14, MPI_INTEGER,
653 & MASTER, id%COMM, IERR )
654.NE. IF (KEEP(486)0) THEN
655 CALL MPI_BCAST( KEEP(489), 1, MPI_INTEGER,
656 & MASTER, id%COMM, IERR )
657 CALL MPI_BCAST( KEEP464COPY, 1, MPI_INTEGER,
658 & MASTER, id%COMM, IERR )
659 CALL MPI_BCAST( KEEP465COPY, 1, MPI_INTEGER,
660 & MASTER, id%COMM, IERR )
661 ENDIF
662.EQ. IF (id%MYIDMASTER) THEN
663.GT..OR..LT. IF (KEEP(217)2KEEP(217)0) THEN
664 KEEP(217)=0
665 ENDIF
666 KEEP(214)=KEEP(217)
667.EQ. IF (KEEP(214)0) THEN
668.NE. IF (KEEP(201)0) THEN ! OOC or no factors
669 KEEP(214)=1
670 ELSE
671 KEEP(214)=2
672 ENDIF
673.EQ. IF (KEEP(486)2) THEN
674 KEEP(214)=1
675 ENDIF
676 ENDIF
677 ENDIF
678 CALL MPI_BCAST( KEEP(214), 1, MPI_INTEGER,
679 & MASTER, id%COMM, IERR )
680.NE. IF (KEEP(201)0) THEN
681C -- Low Level I/O strategy
682 CALL MPI_BCAST( KEEP(99), 1, MPI_INTEGER,
683 & MASTER, id%COMM, IERR )
684 CALL MPI_BCAST( KEEP(205), 1, MPI_INTEGER,
685 & MASTER, id%COMM, IERR )
686 CALL MPI_BCAST( KEEP(211), 1, MPI_INTEGER,
687 & MASTER, id%COMM, IERR )
688 ENDIF
689C Fwd in facto: explicitly forbid
690C sparse RHS and A-1 computation
691.EQ..AND..EQ. IF (id%KEEP(252)1 id%MYIDMASTER) THEN
692.EQ. IF (id%ICNTL(20)1) THEN ! out-of-range => 0
693C NB: in doc ICNTL(20) only accessed during solve
694C In practice, will have failed earlier if RHS not allocated.
695C Still it looks safer to keep this test.
696 id%INFO(1)=-43
697 id%INFO(2)=20
698 IF (LPOK) WRITE(LP,'(a)')
699 & ' error: sparse rhs is incompatible with forward',
700 & ' performed during factorization(icntl(32)=1)'
701.NE. ELSE IF (id%ICNTL(30)0) THEN ! out-of-range => 1
702 id%INFO(1)=-43
703 id%INFO(2)=30
704 IF (LPOK) WRITE(LP,'(a)')
705 & ' error: a-1 functionality incompatible with forward',
706 & ' performed during factorization (ICNTL(32)=1)'
707 ELSE IF (id%ICNTL(9) .NE. 1) THEN
708 id%INFO(1)=-43
709 id%INFO(2)=9
710 IF (lpok) WRITE(lp,'(A)')
711 & .NE.' ERROR: Transpose system (ICNTL(9)0) not ',
712 & ' compatible with forward performed during',
713 & ' factorization (ICNTL(32)=1)'
714 ENDIF
715 ENDIF
716 CALL mumps_propinfo( id%ICNTL(1), id%INFO(1),
717 & id%COMM, id%MYID )
718C
719 IF (id%INFO(1).LT.0) GOTO 530
720C
721C The memory allowed is given by ICNTL(23) in Mbytes
722C 0 means that nothing is provided.
723C Save memory available, ICNTL(23) in KEEP8(4)
724C
725 IF ( icntl(23) .GT. 0 ) THEN
726 itmp = 1
727 ELSE
728 itmp = 0
729 ENDIF
730 CALL mpi_allreduce( itmp, jtmp, 1, mpi_integer,
731 & mpi_sum, id%COMM, ierr)
732 IF ( id%MYID.EQ.master ) THEN
733C Negative values considered 0
734 itmp = max(icntl(23),0)
735 END IF
736 CALL mpi_bcast( itmp, 1, mpi_integer,
737 & master, id%COMM, ierr )
738C JTMP: nb of procs with nonzero ICNTL(23)
739C ITMP: value of ICNTL(23) on master
740 IF ( itmp .GT. 0 .AND. jtmp .EQ. 1 ) THEN
741C ICNTL(23)>0 only on master
742 ELSE
743C Local values of ICNTL(23) are used, note that
744C they could all be zeros
745 itmp = icntl(23)
746 ENDIF
747C
748 itmp8 = int(itmp, 8)
749 id%KEEP8(4) = itmp8 * 1000000_8 ! convert to nb of bytes
750C Compute \sum of memories allowed
751 CALL mpi_reduce( id%KEEP8(4), itmp8, 1, mpi_integer8,
752 & mpi_sum, master, id%COMM, ierr )
753 itmp8 = itmp8 / 1000000_8 ! Use to print \sum_{ICNTL(23)}
754 IF ( prokg ) THEN
755 nworking = id%NSLAVES
756 WRITE( mpg, 172 ) nworking, id%ICNTL(22), keep(486),
757 & keep(12),
758 & id%KEEP8(111), keep(126), keep(127), keep(28),
759 & id%KEEP8(4)/1000000_8, itmp8, lwk_user_sum8, cntl1
760 IF (keep(252).GT.0)
761 & WRITE(mpg,173) keep(253)
762 IF (keep(269).NE.0)
763 & WRITE(mpg,174) keep(269)
764 ENDIF
765 IF (keep(201).LE.0) THEN
766C In-core version or no factors
767 keep(ixsz)=xsize_ic
768 ELSE IF (keep(201).EQ.2) THEN
769C OOC version, no panels
770 keep(ixsz)=xsize_ooc_nopanel
771 ELSE IF (keep(201).EQ.1) THEN
772C Panel versions:
773 IF (keep(50).EQ.0) THEN
774 keep(ixsz)=xsize_ooc_unsym
775 ELSE
776 keep(ixsz)=xsize_ooc_sym
777 ENDIF
778 ENDIF
779 IF ( keep(486) .NE. 0 ) THEN !LR is activated
780C Stats initialization for LR
781 CALL init_stats_global(id)
782 END IF
783C
784* **********************************
785* Begin intializations regarding the
786* computation of the determinant
787* **********************************
788 IF (id%MYID.EQ.master) keep(258)=icntl(33)
789 CALL mpi_bcast(keep(258), 1, mpi_integer,
790 & master, id%COMM, ierr)
791 IF (keep(258) .NE. 0) THEN
792 keep(259) = 0 ! Initial exponent of the local determinant
793 keep(260) = 1 ! Number of permutations
794 id%DKEEP(6) = 1.0d0 ! real part of the local determinant
795 id%DKEEP(7) = 0.0d0 ! imaginary part of the local determinant
796 ENDIF
797* ********************************
798* End intializations regarding the
799* computation of the determinant
800* ********************************
801C
802* **********************
803* Begin of Scaling phase
804* **********************
805C
806C SCALING MANAGEMENT
807C * Options 1, 3, 4 centralized only
808C
809C * Options 7, 8 : also works for distributed matrix
810C
811C At this point, we have the scaling arrays allocated
812C on the master. They have been allocated on the master
813C inside the main MUMPS driver.
814C
815 CALL mpi_bcast(keep(52), 1, mpi_integer,
816 & master, id%COMM, ierr)
817 lscal = ((keep(52) .GT. 0) .AND. (keep(52) .LE. 8))
818 IF (lscal) THEN
819C
820 IF ( id%MYID.EQ.master ) THEN
821 CALL mumps_secdeb(timeet)
822 ENDIF
823C -----------------------
824C Retrieve parameters for
825C simultaneous scaling
826C -----------------------
827 IF (keep(52) .EQ. 7) THEN
828C -- Cheap setting of SIMSCALING (it is the default in 4.8.4)
829 k231= keep(231)
830 k232= keep(232)
831 k233= keep(233)
832 ELSEIF (keep(52) .EQ. 8) THEN
833C -- More expensive setting of SIMSCALING (it was the default in 4.8.1,2,3)
834 k231= keep(239)
835 k232= keep(240)
836 k233= keep(241)
837 ENDIF
838 CALL mpi_bcast(id%DKEEP(3),1,mpi_double_precision,master,
839 & id%COMM,ierr)
840C
841 IF ( ((keep(52).EQ.7).OR.(keep(52).EQ.8)) .AND.
842 & keep(54).NE.0 ) THEN
843C ------------------------------
844C Scaling for distributed matrix
845C We need to allocate scaling
846C arrays on all processors, not
847C only the master.
848C ------------------------------
849 IF ( id%MYID .NE. master ) THEN
850 IF ( associated(id%COLSCA))
851 & DEALLOCATE( id%COLSCA )
852 IF ( associated(id%ROWSCA))
853 & DEALLOCATE( id%ROWSCA )
854 ALLOCATE( id%COLSCA(n), stat=ierr)
855 IF (ierr .GT.0) THEN
856 id%INFO(1)=-13
857 id%INFO(2)=n
858 ENDIF
859 ALLOCATE( id%ROWSCA(n), stat=ierr)
860 IF (ierr .GT.0) THEN
861 id%INFO(1)=-13
862 id%INFO(2)=n
863 ENDIF
864 ENDIF
865 m = n
866 bumaxmn=m
867 IF(n > bumaxmn) bumaxmn = n
868 liwk = 4*bumaxmn
869 ALLOCATE (iwk(liwk),burp(m),bucp(n),
870 & burs(2* (id%NPROCS)),bucs(2* (id%NPROCS)),
871 & stat=allocok)
872 IF (allocok > 0) THEN
873 id%INFO(1)=-13
874 id%INFO(2)=liwk+m+n+4* (id%NPROCS)
875 ENDIF
876C --- Propagate enventual error
877 CALL mumps_propinfo( icntl(1), id%INFO(1),
878 & id%COMM, id%MYID )
879 IF (id%INFO(1).LT.0) GOTO 517
880C -- estimation of memory and construction of partvecs
881 bujob = 1
882C -- LWK not used
883 lwk_real = 1
884 ALLOCATE(wk_real(lwk_real),
885 & stat=allocok)
886 IF (allocok > 0) THEN
887 id%INFO(1)=-13
888 id%INFO(2)=lwk_real
889 ENDIF
890C --- Propagate enventual error
891 CALL mumps_propinfo( icntl(1), id%INFO(1),
892 & id%COMM, id%MYID )
893 IF (id%INFO(1).LT.0) GOTO 517
895 & myirn_loc(1), myjcn_loc(1), mya_loc(1),
896 & id%KEEP8(29),
897 & m, n, id%NPROCS, id%MYID, id%COMM,
898 & burp, bucp,
899 & burs, bucs, buregistre,
900 & iwk, liwk,
901 & buintsz, buresz, bujob,
902 & id%ROWSCA(1), id%COLSCA(1), wk_real, lwk_real,
903 & id%KEEP(50),
904 & k231, k232, k233,
905 & id%DKEEP(3),
906 & sconeerr, scinferr)
907 IF(liwk < buintsz) THEN
908 DEALLOCATE(iwk)
909 liwk = buintsz
910 ALLOCATE(iwk(liwk), stat=allocok)
911 IF (allocok > 0) THEN
912 id%INFO(1)=-13
913 id%INFO(2)=liwk
914 ENDIF
915 ENDIF
916 lwk_real = buresz
917 DEALLOCATE(wk_real)
918 ALLOCATE (wk_real(lwk_real), stat=allocok)
919 IF (allocok > 0) THEN
920 id%INFO(1)=-13
921 id%INFO(2)=lwk_real
922 ENDIF
923C --- Propagate enventual error
924 CALL mumps_propinfo( icntl(1), id%INFO(1),
925 & id%COMM, id%MYID )
926 IF (id%INFO(1).LT.0) GOTO 517
927C -- estimation of memory and construction of partvecs
928 bujob = 2
930 & myirn_loc(1), myjcn_loc(1), mya_loc(1),
931 & id%KEEP8(29),
932 & m, n, id%NPROCS, id%MYID, id%COMM,
933 & burp, bucp,
934 & burs, bucs, buregistre,
935 & iwk, liwk,
936 & buintsz, buresz, bujob,
937 & id%ROWSCA(1), id%COLSCA(1), wk_real, lwk_real,
938 & id%KEEP(50),
939 & k231, k232, k233,
940 & id%DKEEP(3),
941 & sconeerr, scinferr)
942 id%DKEEP(4) = sconeerr
943 id%DKEEP(5) = scinferr
944CXXXX
945 DEALLOCATE(iwk, wk_real,burp,bucp,burs, bucs)
946 ELSE IF ( keep(54) .EQ. 0 ) THEN
947C ------------------
948C Centralized matrix
949C ------------------
950 IF ((keep(52).EQ.7).OR.(keep(52).EQ.8)) THEN
951C -------------------------------
952C Create a communicator of size 1
953C -------------------------------
954 IF (id%MYID.EQ.master) THEN
955 colour = 0
956 ELSE
957 colour = mpi_undefined
958 ENDIF
959 CALL mpi_comm_split( id%COMM, colour, 0,
960 & comm_for_scaling, ierr )
961 IF (id%MYID.EQ.master) THEN
962 m = n
963 bumaxmn=n
964 IF(n > bumaxmn) bumaxmn = n
965 liwk = 1
966 ALLOCATE(iwk(liwk),burp(1),bucp(1),
967 & burs(1),bucs(1),
968 & stat=allocok)
969 IF (allocok > 0) THEN
970 id%INFO(1)=-13
971 id%INFO(2)=liwk+1+1+1+1
972 GOTO 400
973 ENDIF
974 lwk_real = m + n
975 ALLOCATE (wk_real(lwk_real), stat=allocok)
976 IF (allocok > 0) THEN
977 id%INFO(1)=-13
978 id%INFO(2)=lwk_real
979 GOTO 400
980 ENDIF
981 CALL mpi_comm_rank(comm_for_scaling, scmyid, ierr)
982 CALL mpi_comm_size(comm_for_scaling, scnprocs, ierr)
983 bujob = 1
985 & id%IRN(1), id%JCN(1), id%A(1),
986 & id%KEEP8(28),
987 & m, n, scnprocs, scmyid, comm_for_scaling,
988 & burp, bucp,
989 & burs, bucs, buregistre,
990 & iwk, liwk,
991 & buintsz, buresz, bujob,
992 & id%ROWSCA(1), id%COLSCA(1), wk_real, lwk_real,
993 & id%KEEP(50),
994 & k231, k232, k233,
995 & id%DKEEP(3),
996 & sconeerr, scinferr)
997 IF(lwk_real < buresz) THEN
998 id%INFO(1) = -136
999 GOTO 400
1000 ENDIF
1001 bujob = 2
1002 CALL zmumps_simscaleabs(id%IRN(1),
1003 & id%JCN(1), id%A(1),
1004 & id%KEEP8(28),
1005 & m, n, scnprocs, scmyid, comm_for_scaling,
1006 & burp, bucp,
1007 & burs, bucs, buregistre,
1008 & iwk, liwk,
1009 & buintsz, buresz, bujob,
1010 & id%ROWSCA(1), id%COLSCA(1), wk_real, lwk_real,
1011 & id%KEEP(50),
1012 & k231, k232, k233,
1013 & id%DKEEP(3),
1014 & sconeerr, scinferr)
1015 id%DKEEP(4) = sconeerr
1016 id%DKEEP(5) = scinferr
1017 400 CONTINUE
1018 IF (allocated(wk_real)) DEALLOCATE(wk_real)
1019 IF (allocated(iwk)) DEALLOCATE(iwk)
1020 IF (allocated(burp)) DEALLOCATE(burp)
1021 IF (allocated(bucp)) DEALLOCATE(bucp)
1022 IF (allocated(burs)) DEALLOCATE(burs)
1023 IF (allocated(bucs)) DEALLOCATE(bucs)
1024 ENDIF
1025C Centralized matrix: make DKEEP(4:5) available to all processors
1026 CALL mpi_bcast( id%DKEEP(4),2,mpi_double_precision,
1027 & master, id%COMM, ierr )
1028 IF (id%MYID.EQ.master) THEN
1029C Communicator should only be
1030C freed on the master process
1031 CALL mpi_comm_free(comm_for_scaling, ierr)
1032 ENDIF
1033 CALL mumps_propinfo(icntl(1), id%INFO(1),
1034 & id%COMM, id%MYID)
1035 IF (id%INFO(1).LT.0) GOTO 517
1036 ELSE IF (id%MYID.EQ.master) THEN
1037C ----------------------------------
1038C Centralized scaling, options 1 to 6
1039C ----------------------------------
1040 IF (keep(52).GT.0 .AND. keep(52).LE.6) THEN
1041C ---------------------
1042C Allocate temporary
1043C workspace for scaling
1044C ---------------------
1045 IF ( keep(52) .eq. 5 .or.
1046 & keep(52) .eq. 6 ) THEN
1047C We have an explicit copy of the original
1048C matrix in complex format which should probably
1049C be avoided (but do we want to keep all
1050C those old scaling options ?)
1051 lwk = id%KEEP8(28)
1052 ELSE
1053 lwk = 1_8
1054 END IF
1055 lwk_real = 5 * n
1056 ALLOCATE( wk_real( lwk_real ), stat = ierr )
1057 IF ( ierr .GT. 0 ) THEN
1058 id%INFO(1) = -13
1059 id%INFO(2) = lwk_real
1060 GOTO 137
1061 END IF
1062 ALLOCATE( wk( lwk ), stat = ierr )
1063 IF ( ierr .GT. 0 ) THEN
1064 id%INFO(1) = -13
1065 CALL mumps_set_ierror(lwk, id%INFO(2))
1066 GOTO 137
1067 END IF
1068 CALL zmumps_fac_a(n, id%KEEP8(28), keep(52), id%A(1),
1069 & id%IRN(1), id%JCN(1),
1070 & id%COLSCA(1), id%ROWSCA(1),
1071 & wk, lwk, wk_real, lwk_real, icntl(1), id%INFO(1) )
1072 DEALLOCATE( wk_real )
1073 DEALLOCATE( wk )
1074 ENDIF
1075 ENDIF
1076 ENDIF ! Scaling distributed matrices or centralized
1077 IF (keep(125).NE.0) THEN
1078C ------------------------
1079C If we enable the scaling of the |A11 A12| block
1080C we et to 1 the scaling corresponding to the Schur
1081C complement matrix A22
1082C ------------------------
1083 IF ((keep(60).GT.0) .and. (keep(116).GT.0)) THEN
1084C Schur is active, reset Schur entries to ONE
1085 IF ( ((keep(52).EQ.7).OR.(keep(52).EQ.8)) .AND.
1086 & keep(54).NE.0 ) THEN
1087C Scaling available on all procs
1088 DO i=1, n
1089 IF (id%SYM_PERM(i).GT.id%N-keep(116)) THEN
1090 id%COLSCA(i) = one
1091 id%ROWSCA(i) = one
1092 ENDIF
1093 ENDDO
1094 ELSE IF ( id%MYID .EQ. master) THEN
1095C Scaling available on master
1096 DO i=1, n
1097 IF (id%SYM_PERM(i).GT.id%N-keep(116)) THEN
1098 id%COLSCA(i) = one
1099 id%ROWSCA(i) = one
1100 ENDIF
1101 ENDDO
1102 ENDIF
1103 ENDIF
1104 ENDIF
1105 IF (id%MYID.EQ.master) THEN
1106 CALL mumps_secfin(timeet)
1107 id%DKEEP(92)=timeet
1108C Print inf-norm after last KEEP(233) iterations of
1109C scaling option KEEP(52)=7 or 8 (SimScale)
1110C
1111 IF (prokg.AND.(keep(52).EQ.7.OR.keep(52).EQ.8)
1112 & .AND. (k233+k231+k232).GT.0) THEN
1113 IF (k232.GT.0) WRITE(mpg, 166) id%DKEEP(4)
1114 ENDIF
1115 ENDIF
1116 ENDIF ! LSCAL
1117C
1118C scaling might also be provided by the user
1119 lscal = (lscal .OR. (keep(52) .EQ. -1) .OR. keep(52) .EQ. -2)
1120 IF (lscal .AND. keep(258).NE.0 .AND. id%MYID .EQ. master) THEN
1121 DO i = 1, id%N
1122 CALL zmumps_updatedeter_scaling(id%ROWSCA(i),
1123 & id%DKEEP(6), ! determinant
1124 & keep(259)) ! exponent of the determinant
1125 ENDDO
1126 IF (keep(50) .EQ. 0) THEN ! unsymmetric
1127 DO i = 1, id%N
1128 CALL zmumps_updatedeter_scaling(id%COLSCA(i),
1129 & id%DKEEP(6), ! determinant
1130 & keep(259)) ! exponent of the determinant
1131 ENDDO
1132 ELSE
1133C -----------------------------------------
1134C In this case COLSCA = ROWSCA
1135C Since determinant was initialized to 1,
1136C compute square of the current determinant
1137C rather than going through COLSCA.
1138C -----------------------------------------
1139 CALL zmumps_deter_square(id%DKEEP(6), keep(259))
1140 ENDIF
1141C Now we should have taken the
1142C inverse of the scaling vectors
1143 CALL zmumps_deter_scaling_inverse(id%DKEEP(6), keep(259))
1144 ENDIF
1145C
1146C ********************
1147C End of Scaling phase
1148C At this point: either (matrix is distributed and KEEP(52)=7 or 8)
1149C in which case scaling arrays are allocated on all processors,
1150C or scaling arrays are only on the host processor.
1151C In case of distributed matrix input, we will free the scaling
1152C arrays on procs with MYID .NE. 0 after the all-to-all distribution
1153C of the original matrix.
1154C ********************
1155C
1156 137 CONTINUE
1157C Fwd in facto: in case of repeated factorizations
1158C with different Schur options we prefer to free
1159C systematically this array now than waiting for
1160C the root node. We rely on the fact that it is
1161C allocated or not during the solve phase so if
1162C it was allocated in a 1st call to facto and not
1163C in a second, we don't want the solve to think
1164C it was allocated in the second call.
1165 IF (associated(id%root%RHS_CNTR_MASTER_ROOT)) THEN
1166 DEALLOCATE (id%root%RHS_CNTR_MASTER_ROOT)
1167 NULLIFY (id%root%RHS_CNTR_MASTER_ROOT)
1168 ENDIF
1169C Fwd in facto: check that id%NRHS has not changed
1170 IF ( id%MYID.EQ.master.AND. keep(252).EQ.1 .AND.
1171 & id%NRHS .NE. id%KEEP(253) ) THEN
1172C Error: NRHS should not have
1173C changed since the analysis
1174 id%INFO(1)=-42
1175 id%INFO(2)=id%KEEP(253)
1176 ENDIF
1177C Fwd in facto: allocate and broadcast RHS_MUMPS
1178C to make it available on all processors.
1179 IF (id%KEEP(252) .EQ. 1) THEN
1180 IF ( id%MYID.NE.master ) THEN
1181 id%KEEP(254) = n ! Leading dimension
1182 id%KEEP(255) = n*id%KEEP(253) ! Tot size
1183 ALLOCATE(rhs_mumps(id%KEEP(255)),stat=ierr)
1184 IF (ierr > 0) THEN
1185 id%INFO(1)=-13
1186 id%INFO(2)=id%KEEP(255)
1187 IF (lpok)
1188 & WRITE(lp,*) 'ERROR while allocating RHS on a slave'
1189 NULLIFY(rhs_mumps)
1190 ENDIF
1191 rhs_mumps_allocated = .true.
1192 ELSE
1193C Case of non working master
1194 id%KEEP(254)=id%LRHS ! Leading dimension
1195 id%KEEP(255)=id%LRHS*(id%KEEP(253)-1)+id%N ! Tot size
1196 rhs_mumps=>id%RHS
1197 rhs_mumps_allocated = .false.
1198 IF (lscal) THEN
1199C Scale before broadcast: apply row
1200C scaling (remark that we assume no
1201C transpose).
1202 DO k=1, id%KEEP(253)
1203 DO i=1, n
1204 rhs_mumps( id%KEEP(254) * (k-1) + i )
1205 & = rhs_mumps( id%KEEP(254) * (k-1) + i )
1206 & * id%ROWSCA(i)
1207 ENDDO
1208 ENDDO
1209 ENDIF
1210 ENDIF
1211 ELSE
1212 id%KEEP(255)=1
1213 ALLOCATE(rhs_mumps(1),stat=ierr)
1214 IF (ierr > 0) THEN
1215 id%INFO(1)=-13
1216 id%INFO(2)=1
1217 IF (lpok)
1218 & WRITE(lp,*) 'ERREUR while allocating RHS on a slave'
1219 NULLIFY(rhs_mumps)
1220 ENDIF
1221 rhs_mumps_allocated = .true.
1222 ENDIF
1223 CALL mumps_propinfo( icntl(1), id%INFO(1),
1224 & id%COMM, id%MYID )
1225 IF ( id%INFO(1).lt.0 ) GOTO 517
1226 IF (keep(252) .EQ. 1) THEN
1227C
1228C Broadcast the columns of the right-hand side
1229C one by one. Leading dimension is keep(254)=N
1230C on procs with MYID > 0 but may be larger on
1231C the master processor.
1232 DO i= 1, id%KEEP(253)
1233 CALL mpi_bcast(rhs_mumps((i-1)*id%KEEP(254)+1), n,
1234 & mpi_double_complex, master,id%COMM,ierr)
1235 END DO
1236 ENDIF
1237C Keep a copy of ICNTL(24) and make it
1238C available on all working processors.
1239 keep(110)=id%ICNTL(24)
1240 CALL mpi_bcast(keep(110), 1, mpi_integer,
1241 & master, id%COMM, ierr)
1242C KEEP(110) defaults to 0 for out of range values
1243 IF (keep(110).NE.1) keep(110)=0
1244 IF (keep(219).NE.0) THEN
1245 CALL zmumps_buf_max_array_minsize(max(keep(108),1),ierr)
1246 IF (ierr .NE. 0) THEN
1247C ------------------------
1248C Error allocating ZMUMPS_BUF
1249C ------------------------
1250 id%INFO(1) = -13
1251 id%INFO(2) = max(keep(108),1)
1252 END IF
1253 ENDIF
1254C -----------------------------------------------
1255C Depending on the option used for
1256C -detecting null pivots (ICNTL(24)/KEEP(110))
1257C CNTL(3) is used to set DKEEP(1)
1258C ( A row is considered as null if ||row|| < DKEEP(1) )
1259C CNTL(5) is then used to define if a large
1260C value is set on the diagonal or if a 1 is set
1261C and other values in the row are reset to zeros.
1262C SEUIL* corresponds to the minimum required
1263C absolute value of pivot.
1264C SEUIL_LDLT_NIV2 is used only in the
1265C case of SYM=2 within a niv2 node for which
1266C we have only a partial view of the fully summed rows.
1267 IF (id%MYID .EQ. master) cntl3 = id%CNTL(3)
1268 CALL mpi_bcast(cntl3, 1, mpi_double_precision,
1269 & master, id%COMM, ierr)
1270 IF (id%MYID .EQ. master) cntl5 = id%CNTL(5)
1271 CALL mpi_bcast(cntl5, 1, mpi_double_precision,
1272 & master, id%COMM, ierr)
1273 IF (id%MYID .EQ. master) cntl6 = id%CNTL(6)
1274 CALL mpi_bcast(cntl6, 1, mpi_double_precision,
1275 & master, id%COMM, ierr)
1276 IF (id%MYID .EQ. master) id%DKEEP(8) = id%CNTL(7)
1277 CALL mpi_bcast(id%DKEEP(8), 1, mpi_double_precision,
1278 & master, id%COMM, ierr)
1279 id%DKEEP(11) = id%DKEEP(8)/id%KEEP(461)
1280 id%DKEEP(12) = id%DKEEP(8)/id%KEEP(462)
1281 IF (keep(486).EQ.0) id%DKEEP(8) = zero
1282 compute_anorminf = .false.
1283 IF ( (keep(486) .NE. 0).AND. (id%DKEEP(8).LT.zero)) THEN
1284 compute_anorminf = .true.
1285 ENDIF
1286 IF (keep(19).NE.0) THEN
1287C Rank revealing factorisation
1288 compute_anorminf = .true.
1289 ENDIF
1290 IF (keep(110).NE.0) THEN
1291C Null pivot detection
1292 compute_anorminf = .true.
1293 ENDIF
1294 IF (id%DKEEP(8).LT.zero) THEN
1295C Experimental setting of CNTL(7)
1296 IF (compute_anorminf) THEN
1297 eff_size_schur = 0
1298 CALL zmumps_anorminf( id , anorminf, lscal, eff_size_schur )
1299C If no schur ANORMINF fine for other cases
1300 ELSE
1301 anorminf = zero
1302 ENDIF
1303 id%DKEEP(8) = abs(id%DKEEP(8))*anorminf
1304C ANORMINF need be recomputed in case of schur
1305 IF ((keep(60).GT.0).AND.keep(116).GT.0) anorminf=zero
1306 ENDIF
1307C -------------------------------------------------------
1308C We compute ANORMINF, when needed, based on
1309C the infinite norm of Rowsca *A*Colsca
1310C and make it available on all working processes.
1311 IF (compute_anorminf) THEN
1312 eff_size_schur = 0
1313 IF (keep(60).GT.0) eff_size_schur = keep(116)
1314 CALL zmumps_anorminf( id , anorminf, lscal, eff_size_schur )
1315 ELSE
1316 anorminf = zero
1317 ENDIF
1318C
1319 IF ((keep(19).NE.0).OR.(keep(110).NE.0)) THEN
1320 IF (prokg) THEN
1321 IF (keep(19).NE.0) THEN
1322 WRITE(mpg,'(A,1PD16.4)')
1323 & ' CNTL(3) for null pivot rows/singularities =',cntl3
1324 ELSE
1325 WRITE(mpg,'(A,1PD16.4)')
1326 & ' CNTL(3) for null pivot row detection =',cntl3
1327 ENDIF
1328 ENDIF
1329 ENDIF
1330 IF (keep(19).EQ.0) THEN
1331C -- RR is off
1332 seuil = zero
1333 id%DKEEP(9) = zero
1334 ELSE
1335C -- RR is on
1336C
1337C CNTL(3) is the threshold used in the following to compute
1338C DKEEP(9) the threshold under which the sing val. are considered
1339C as null and from which we start to look for a gap between two
1340C sing val.
1341 IF (cntl3 .LT. zero) THEN
1342 id%DKEEP(9) = abs(cntl(3))
1343 ELSE IF (cntl3 .GT. zero) THEN
1344 id%DKEEP(9) = cntl3*anorminf
1345 ELSE ! (CNTL(3) .EQ. ZERO) THEN
1346 ENDIF
1347 IF (prokg) THEN
1348 WRITE(mpg, '(A,I16)')
1349 & ' ICNTL(56) rank revealing effective value =',keep(19)
1350 WRITE(mpg,'(A,1PD16.4)')
1351 & ' ...Threshold for singularities on the root =',id%DKEEP(9)
1352 ENDIF
1353C RR postponing considers that pivot rows with norm smaller
1354C than SEUIL should be postponed.
1355C SEUIL should be bigger than DKEEP(9), this means that
1356C DKEEP(13) should be bigger than 1.
1357 thresh_seuil = id%DKEEP(13)
1358 IF (id%DKEEP(13).LT.1) thresh_seuil = 10
1359 seuil = id%DKEEP(9)*thresh_seuil
1360 IF (prokg) WRITE(mpg,'(A,1PD16.4)')
1361 & ' ...Threshold for postponing =',seuil
1362 ENDIF !end KEEP(19)
1363 seuil_ldlt_niv2 = seuil
1364C -------------------------------
1365C -- Null pivot row detection
1366C -------------------------------
1367 IF (keep(110).EQ.0) THEN
1368C -- Null pivot is off
1369C Initialize DKEEP(1) to a negative value
1370C in order to avoid detection of null pivots
1371C (test max(AMAX,RMAX,abs(PIVOT)).LE.PIVNUL
1372C in ZMUMPS_FAC_I, where PIVNUL=DKEEP(1))
1373 id%DKEEP(1) = -1.0d0
1374 id%DKEEP(2) = zero
1375 ELSE
1376C -- Null pivot is on
1377 IF (keep(19).NE.0) THEN
1378C -- RR is on
1379C RR postponing considers that pivot rows of norm smaller that SEUIL
1380C should be postponed, but pivot rows smaller than DKEEP(1) are
1381C directly added to null space and thus considered as null pivot rows.
1382 IF ((id%DKEEP(10).LE.0).OR.(id%DKEEP(10).GT.1)) THEN
1383C DKEEP(10) is out of range, set to the default value 10-1
1384 id%DKEEP(1) = id%DKEEP(9)*1d-1
1385 ELSE
1386 id%DKEEP(1) = id%DKEEP(9)*id%DKEEP(10)
1387 ENDIF
1388 ELSE
1389C -- RR is off
1390C -- only Null pivot detection
1391C We keep strategy currently used in MUMPS 4.10.0
1392 IF (cntl3 .LT. zero) THEN
1393 id%DKEEP(1) = abs(cntl(3))
1394 ELSE IF (cntl3 .GT. zero) THEN
1395 id%DKEEP(1) = cntl3*anorminf
1396 ELSE ! (CNTL(3) .EQ. ZERO) THEN
1397c id%DKEEP(1) = NPIV_CRITICAL_PATH*EPS*ANORMINF
1399 & n, keep(28), id%STEP(1), id%FRERE_STEPS(1), id%FILS(1),
1400 & id%NA(1), id%LNA, id%NE_STEPS(1), npiv_critical_path )
1401 id%DKEEP(1) = sqrt(dble(npiv_critical_path))*eps*anorminf
1402 ENDIF
1403 ENDIF ! fin rank revealing
1404 IF ((keep(110).NE.0).AND.(prokg)) THEN
1405 WRITE(mpg, '(A,I16)')
1406 & ' ICNTL(24) null pivot rows detection =',keep(110)
1407 WRITE(mpg,'(A,1PD16.4)')
1408 & ' ...Zero pivot detection threshold =',id%DKEEP(1)
1409 ENDIF
1410 IF (cntl5.GT.zero) THEN
1411 id%DKEEP(2) = cntl5 * anorminf
1412 IF (prokg) WRITE(mpg,'(A,1PD10.3)')
1413 & ' ...Fixation for null pivots =',id%DKEEP(2)
1414 ELSE
1415 IF (prokg) WRITE(mpg,*) '...Infinite fixation '
1416 IF (id%KEEP(50).EQ.0) THEN
1417C Unsym
1418 ! the user let us choose a fixation. set in NEGATIVE
1419 ! to detect during facto when to set row to zero !
1420 id%DKEEP(2) = -max(1.0d10*anorminf,
1421 & sqrt(huge(anorminf))/1.0d8)
1422 ELSE
1423C Sym
1424 id%DKEEP(2) = zero
1425 ENDIF
1426 ENDIF
1427 ENDIF ! fin null pivot detection.
1428C Find id of root node if RR is on
1429 IF (keep(53).NE.0) THEN
1430 id_root =mumps_procnode(id%PROCNODE_STEPS(id%STEP(keep(20))),
1431 & id%KEEP(199))
1432 IF ( keep( 46 ) .NE. 1 ) THEN
1433 id_root = id_root + 1
1434 END IF
1435 ENDIF
1436C Second pass: set parameters for null pivot detection
1437C Allocate PIVNUL_LIST in case of null pivot detection
1438 lpn_list = 1
1439 IF ( associated( id%PIVNUL_LIST) ) DEALLOCATE(id%PIVNUL_LIST)
1440 IF(keep(110) .EQ. 1) THEN
1441 lpn_list = n
1442 ENDIF
1443 IF (keep(19).NE.0 .AND.
1444 & (id_root.EQ.id%MYID .OR. id%MYID.EQ.master)) THEN
1445 lpn_list = n
1446 ENDIF
1447 ALLOCATE( id%PIVNUL_LIST(lpn_list),stat = ierr )
1448 IF ( ierr .GT. 0 ) THEN
1449 id%INFO(1)=-13
1450 id%INFO(2)=lpn_list
1451 END IF
1452 id%PIVNUL_LIST(1:lpn_list) = 0
1453 keep(109) = 0
1454C end set parameter for null pivot detection
1455 CALL mumps_propinfo( icntl(1), id%INFO(1),
1456 & id%COMM, id%MYID )
1457 IF ( id%INFO(1).lt.0 ) GOTO 517
1458C --------------------------------------------------------------
1459C STATIC PIVOTING
1460C -- Static pivoting only when RR and Null pivot detection OFF
1461C --------------------------------------------------------------
1462 keep(97) = 0
1463 IF ((keep(19).EQ.0).AND.(keep(110).EQ.0)) THEN
1464 IF (id%MYID .EQ. master) cntl4 = id%CNTL(4)
1465 CALL mpi_bcast( cntl4, 1, mpi_double_precision,
1466 & master, id%COMM, ierr )
1467C
1468 IF ( cntl4 .GE. zero ) THEN
1469 keep(97) = 1
1470 IF ( cntl4 .EQ. zero ) THEN
1471C -- set seuil to sqrt(eps)*||A||
1472 IF(anorminf .EQ. zero) THEN
1473 eff_size_schur = 0
1474 IF (keep(60).GT.0) eff_size_schur = keep(116)
1475 CALL zmumps_anorminf( id , anorminf, lscal,
1476 & eff_size_schur )
1477 ENDIF
1478 seuil = sqrt(eps) * anorminf
1479 ELSE
1480 seuil = cntl4
1481 ENDIF
1482 seuil_ldlt_niv2 = seuil
1483 ELSE
1484 seuil = zero
1485 ENDIF
1486 ENDIF
1487C set number of tiny pivots / 2x2 pivots in types 1 /
1488C 2x2 pivots in types 2, to zero. This is because the
1489C user can call the factorization step several times.
1490 keep(98) = 0
1491 keep(103) = 0
1492 keep(105) = 0
1493 maxs = 1_8
1494*
1495* Start allocations
1496* *****************
1497*
1498C
1499C The slaves can now perform the factorization
1500C
1501C
1502C Allocate id%S on all nodes
1503C or point to user provided data WK_USER when LWK_USER>0
1504C =======================
1505C
1506C Compute BLR_STRAT and a first estimation
1507C of MAXS, the size of id%S
1509 & maxs_base8, maxs_base_relaxed8,
1510 & blr_strat,
1511 & id%KEEP(1), id%KEEP8(1))
1512C
1513 maxs = maxs_base_relaxed8
1514 IF (wk_user_provided) THEN
1515C -- Set MAXS to size of WK_USER_
1516 maxs = id%KEEP8(24)
1517 ENDIF
1518 CALL mumps_propinfo( icntl(1), id%INFO(1),
1519 & id%COMM, id%MYID )
1520 IF (id%INFO(1) .LT. 0) THEN
1521 GOTO 517
1522 ENDIF
1523C
1524 id%KEEP8(75) = huge(id%KEEP8(75))
1525 id%KEEP8(76) = huge(id%KEEP8(76))
1526 IF (i_am_slave) THEN
1527C
1528 IF (id%KEEP8(4) .NE. 0_8) THEN
1529C
1530 IF ( .NOT. wk_user_provided ) THEN
1531C Set MAXS given BLR_STRAT, KEEP(201) and MAXS_BASE_RELAXED8
1533 & maxs,
1534 & blr_strat, id%KEEP(201), maxs_base_relaxed8,
1535 & id%KEEP(1), id%KEEP8(1), id%MYID, id%N, id%NELT,
1536 & id%NA(1), id%LNA, id%NSLAVES,
1537 & keep464copy, keep465copy,
1538 & id%INFO(1), id%INFO(2)
1539 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
1540 & size(id%I8_L0_OMP,2)
1541 & )
1542C Given MAXS and max memory allowed KEEP8(4)
1543C compute in KEEP8(75) the number of real/complex
1544C available for dynamic allocations
1546 & maxs, id%MYID,
1547 & .false., ! UNDER_L0_OMP
1548 & n, id%NELT, id%NA(1), id%LNA, id%NSLAVES,
1549 & blr_strat, id%KEEP(201),
1550 & id%KEEP, id%KEEP8, id%INFO(1), id%INFO(2)
1551 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
1552 & size(id%I8_L0_OMP,2)
1553 & )
1554 ELSE
1555C KEEP8(75) dow not include MAXS, since WK_USER is provided
1557 & 0_8, id%MYID,
1558 & .false., ! UNDER_L0_OMP
1559 & n, id%NELT, id%NA(1), id%LNA, id%NSLAVES,
1560 & blr_strat, id%KEEP(201),
1561 & id%KEEP, id%KEEP8, id%INFO(1), id%INFO(2)
1562 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
1563 & size(id%I8_L0_OMP,2)
1564 & )
1565 ENDIF
1566 IF (keep(400) .GT.0) THEN
1567C ------------------------------
1568C compute KEEP8(75) under L0_OMP
1569C ------------------------------
1570C Save KEEP8(75) above L0_OMP to reset KEEP8(75)
1571C when starting FAC_PAR_M
1572 id%KEEP8(76) = id%KEEP8(75)
1574 & 0_8, ! MAXS=0_8
1575 & id%MYID,
1576 & .true., ! UNDER_L0_OMP
1577 & id%N, id%NELT, id%NA(1), id%LNA, id%NSLAVES,
1578 & blr_strat, id%KEEP(201),
1579 & id%KEEP, id%KEEP8, id%INFO(1), id%INFO(2)
1580 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
1581 & size(id%I8_L0_OMP,2)
1582 & )
1583C KEEP8(75) holds the number of entries that
1584C can be allocated underL0.
1585C It will be used during ZMUMPS_FAC_L0_OMP to adjust the
1586C the size of MUMPS_TPS_ARR(ITH)%LA
1587 ENDIF
1588 ENDIF ! MEM_ALLOWED
1589C
1590 ENDIF ! I_AM_SLAVE THEN
1591C
1592 IF (i_am_slave) THEN
1593 IF ( (keep(400).GT.0) .AND. (keep(406).EQ.2) ) THEN
1594C Compute KEEP8(77) the peak authorized used by
1595C ZMUMPS_PERFORM_COPIES
1597 & id%MYID, id%N,
1598 & id%NELT, id%NA(1), id%LNA, id%NSLAVES,
1599 & blr_strat, id%KEEP(201),
1600 & id%KEEP(1), id%KEEP8(1), id%INFO(1), id%INFO(2)
1601 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
1602 & size(id%I8_L0_OMP,2)
1603 & )
1604 ENDIF
1605 ENDIF ! I_AM_SLAVE)
1606C
1607 CALL mumps_propinfo( icntl(1), id%INFO(1),
1608 & id%COMM, id%MYID )
1609 IF (id%INFO(1) .LT. 0) THEN
1610 GOTO 517
1611 ENDIF
1612 CALL mumps_seti8toi4(maxs, id%INFO(39))
1613 CALL zmumps_avgmax_stat8(prokg, mpg, maxs, id%NSLAVES,
1614 & print_maxavg,
1615 & id%COMM, " Effective size of S (based on INFO(39))= ")
1616C
1617 IF ( i_am_slave ) THEN
1618C ------------------
1619C Dynamic scheduling
1620C ------------------
1621 CALL zmumps_load_set_inicost( dble(id%COST_SUBTREES),
1622 & keep(64), id%DKEEP(15), keep(375), maxs )
1623 k28=keep(28)
1624 memory_md_arg = min(int(perlu,8) * ( maxs_base8 / 100_8 + 1_8 ),
1625C Restrict freedom from dynamic scheduler when
1626C MEM_ALLOWED=ICNTL(23) is small (case where KEEP8(4)-MAXS_BASE8
1627C is negative after call to ZMUMPS_MAX_MEM)
1628 & max(0_8, maxs-maxs_base8))
1629 CALL zmumps_load_init( id, memory_md_arg, maxs )
1630C
1631C Out-Of-Core (OOC) issues. Case where we ran one factorization OOC
1632C and the second one is in-core: we try to free OOC
1633C related data from previous factorization.
1634C
1635 CALL zmumps_clean_ooc_data(id, ierr)
1636 IF (ierr < 0) THEN
1637 id%INFO(1) = -90
1638 id%INFO(2) = 0
1639 GOTO 112
1640 ENDIF
1641 IF (keep(201) .GT. 0) THEN
1642C -------------------
1643C OOC initializations
1644C -------------------
1645 IF (keep(201).EQ.1 !PANEL Version
1646 & .AND.keep(50).EQ.0 ! Unsymmetric
1647 & .AND.keep(251).NE.2 ! Store L to disk
1648 & ) THEN
1649 id%OOC_NB_FILE_TYPE=2 ! declared in MUMPS_OOC_COMMON
1650 ELSE
1651 id%OOC_NB_FILE_TYPE=1 ! declared in MUMPS_OOC_COMMON
1652 ENDIF
1653C ------------------------------
1654C Dimension IO buffer, KEEP(100)
1655C ------------------------------
1656 IF (keep(205) .GT. 0) THEN
1657 keep(100) = keep(205)
1658 ELSE
1659 IF (keep(201).EQ.1) THEN ! PANEL version
1660 i8tmp = int(id%OOC_NB_FILE_TYPE,8) *
1661 & 2_8 * int(keep(226),8)
1662 ELSE
1663 i8tmp = 2_8 * id%KEEP8(119)
1664 ENDIF
1665 i8tmp = i8tmp + int(max(keep(12),0),8) *
1666 & (i8tmp/100_8+1_8)
1667C we want to avoid too large IO buffers.
1668C 12M corresponds to 100Mbytes given to buffers.
1669 i8tmp = min(i8tmp, 12000000_8)
1670 keep(100)=int(i8tmp)
1671 ENDIF
1672 IF (keep(201).EQ.1) THEN
1673C Panel version. Force the use of a buffer.
1674 IF ( keep(99) < 3 ) THEN
1675 keep(99) = keep(99) + 3
1676 ENDIF
1677 ENDIF
1678C --------------------------
1679C Reset KEEP(100) to 0 if no
1680C buffer is used for OOC.
1681C --------------------------
1682 IF (keep(99) .LT.3) keep(100)=0
1683 IF((dble(keep(100))*dble(keep(35))/dble(2)).GT.
1684 & (dble(1999999999)))THEN
1685 IF (prokg) THEN
1686 WRITE(mpg,*)id%MYID,': Warning: DIM_BUF_IO might be
1687 & too big for Filesystem'
1688 ENDIF
1689 ENDIF
1690 ALLOCATE (id%OOC_INODE_SEQUENCE(keep(28),
1691 & id%OOC_NB_FILE_TYPE),
1692 & stat=ierr)
1693 IF ( ierr .GT. 0 ) THEN
1694 id%INFO(1) = -13
1695 id%INFO(2) = id%OOC_NB_FILE_TYPE*keep(28)
1696 NULLIFY(id%OOC_INODE_SEQUENCE)
1697 GOTO 112
1698 ENDIF
1699 ALLOCATE (id%OOC_TOTAL_NB_NODES(id%OOC_NB_FILE_TYPE),
1700 & stat=ierr)
1701 IF ( ierr .GT. 0 ) THEN
1702 id%INFO(1) = -13
1703 id%INFO(2) = id%OOC_NB_FILE_TYPE
1704 NULLIFY(id%OOC_TOTAL_NB_NODES)
1705 GOTO 112
1706 ENDIF
1707 ALLOCATE (id%OOC_SIZE_OF_BLOCK(keep(28),
1708 & id%OOC_NB_FILE_TYPE),
1709 & stat=ierr)
1710 IF ( ierr .GT. 0 ) THEN
1711 id%INFO(1) = -13
1712 id%INFO(2) = id%OOC_NB_FILE_TYPE*keep(28)
1713 NULLIFY(id%OOC_SIZE_OF_BLOCK)
1714 GOTO 112
1715 ENDIF
1716 ALLOCATE (id%OOC_VADDR(keep(28),id%OOC_NB_FILE_TYPE),
1717 & stat=ierr)
1718 IF ( ierr .GT. 0 ) THEN
1719 id%INFO(1) = -13
1720 id%INFO(2) = id%OOC_NB_FILE_TYPE*keep(28)
1721 NULLIFY(id%OOC_VADDR)
1722 GOTO 112
1723 ENDIF
1724 ENDIF
1725 ENDIF
1726 112 CALL mumps_propinfo( icntl(1), id%INFO(1),
1727 & id%COMM, id%MYID )
1728 IF (id%INFO(1) < 0) THEN
1729C LOAD_END must be done but not OOC_END_FACTO
1730 GOTO 513
1731 ENDIF
1732 IF (i_am_slave) THEN
1733 IF (keep(201) .GT. 0) THEN
1734 IF ((keep(201).EQ.1).OR.(keep(201).EQ.2)) THEN
1735 CALL zmumps_ooc_init_facto(id,maxs)
1736 ELSE
1737 WRITE(*,*) "Internal error in ZMUMPS_FAC_DRIVER"
1738 CALL mumps_abort()
1739 ENDIF
1740 IF(id%INFO(1).LT.0)THEN
1741 GOTO 111
1742 ENDIF
1743 ENDIF
1744C First increment corresponds to the number of
1745C floating-point operations for subtrees allocated
1746C to the local processor.
1747 CALL zmumps_load_update(0,.false.,dble(id%COST_SUBTREES),
1748 & id%KEEP(1),id%KEEP8(1))
1749 IF (id%INFO(1).LT.0) GOTO 111
1750 END IF
1751C -----------------------
1752C Manage main workarray S
1753C -----------------------
1754 earlyt3rootins = keep(200) .EQ.0
1755 & .OR. ( keep(200) .LT. 0 .AND. keep(400) .EQ. 0 )
1756#if defined (LARGEMATRICES)
1757 IF ( id%MYID .ne. master ) THEN
1758#endif
1759 IF (.NOT.wk_user_provided) THEN
1760 IF ( earlyt3rootins ) THEN
1761C Standard allocation strategy
1762 ALLOCATE (id%S(maxs),stat=ierr)
1763 id%KEEP8(23) = maxs
1764 IF ( ierr .GT. 0 ) THEN
1765 id%INFO(1) = -13
1766 CALL mumps_set_ierror(maxs, id%INFO(2))
1767C On some platforms (IBM for example), an
1768C allocation failure returns a non-null pointer.
1769C Therefore we nullify S
1770 NULLIFY(id%S)
1771 id%KEEP8(23)=0_8
1772 ENDIF
1773 ENDIF
1774 ELSE
1775 id%S => id%WK_USER(1:id%KEEP8(24))
1776 id%KEEP8(23) = 0_8
1777 ENDIF
1778#if defined (LARGEMATRICES)
1779 END IF
1780#endif
1781C
1782 111 CALL mumps_propinfo( icntl(1), id%INFO(1),
1783 & id%COMM, id%MYID )
1784 IF ( id%INFO(1).LT.0 ) GOTO 514
1785C --------------------------
1786C Initialization of modules
1787C related to data management
1788C --------------------------
1789 nb_active_fronts_estim = 3
1790 nb_threads = 1
1791!$ NB_THREADS = OMP_GET_MAX_THREADS()
1792C
1793 nb_active_fronts_estim = 3*nb_threads
1794 IF (i_am_slave) THEN
1795C
1796 CALL mumps_fdm_init('A',nb_active_fronts_estim, id%INFO)
1797C
1798 IF ( (keep(486).EQ.2)
1799 & .OR. ((keep(489).NE.0).AND.(keep(400).GT.1))
1800 & ) THEN
1801C In case of LRSOLVE or CompressCB,
1802C initialize nb of handlers to nb of BLR
1803C nodes estimated at analysis
1804 nb_fronts_f_estim = keep(470)
1805 ELSE
1806 IF (keep(489).NE.0) THEN
1807C Compress CB and no L0 OMP (or 1 thread under L0):
1808C NB_ACTIVE_FRONTS_ESTIM is too small,
1809C to limit nb of reallocations make it twice larger
1810 nb_fronts_f_estim = 2*nb_active_fronts_estim
1811 ELSE
1812 nb_fronts_f_estim = nb_active_fronts_estim
1813 ENDIF
1814 ENDIF
1815 CALL mumps_fdm_init('F',nb_fronts_f_estim, id%INFO )
1816 IF (id%INFO(1) .LT. 0 ) GOTO 114
1817#if ! defined(NO_FDM_DESCBAND)
1818C Storage of DESCBAND information
1819 CALL mumps_fdbd_init( nb_active_fronts_estim, id%INFO )
1820#endif
1821#if ! defined(NO_FDM_MAPROW)
1822C Storage of MAPROW and ROOT2SON information
1823 CALL mumps_fmrd_init( nb_active_fronts_estim, id%INFO )
1824#endif
1825 CALL zmumps_blr_init_module( nb_fronts_f_estim, id%INFO )
1826 114 CONTINUE
1827 ENDIF
1828 CALL mumps_propinfo( icntl(1), id%INFO(1),
1829 & id%COMM, id%MYID )
1830C GOTO 500: one of the above module initializations failed
1831 IF ( id%INFO(1).LT.0 ) GOTO 500
1832C
1833C
1834C Allocate space for matrix in arrowhead
1835C ======================================
1836C
1837C CASE 1 : Matrix is assembled
1838C CASE 2 : Matrix is elemental
1839C
1840 IF ( keep(55) .eq. 0 ) THEN
1841C ------------------------------------
1842C Space has been allocated already for
1843C the integer part during analysis
1844C Only slaves need the arrowheads.
1845C ------------------------------------
1846 IF (associated( id%DBLARR)) THEN
1847 DEALLOCATE(id%DBLARR)
1848 NULLIFY(id%DBLARR)
1849 ENDIF
1850 IF ( i_am_slave .and. id%KEEP8(26) .ne. 0_8 ) THEN
1851 ALLOCATE( id%DBLARR( id%KEEP8(26) ), stat = ierr )
1852 ELSE
1853 ALLOCATE( id%DBLARR( 1 ), stat =ierr )
1854 END IF
1855 IF ( ierr .NE. 0 ) THEN
1856 IF (lpok) THEN
1857 WRITE(lp,*) id%MYID,
1858 & ': Allocation error for DBLARR(',id%KEEP8(26),')'
1859 ENDIF
1860 id%INFO(1)=-13
1861 CALL mumps_set_ierror(id%KEEP8(26), id%INFO(2))
1862 NULLIFY(id%DBLARR)
1863 GOTO 100
1864 END IF
1865 ELSE
1866C ----------------------------------------
1867C Allocate variable lists. Systematically.
1868C ----------------------------------------
1869 IF ( associated( id%INTARR ) ) THEN
1870 DEALLOCATE( id%INTARR )
1871 NULLIFY( id%INTARR )
1872 END IF
1873 IF ( i_am_slave .and. id%KEEP8(27) .ne. 0_8 ) THEN
1874 ALLOCATE( id%INTARR( id%KEEP8(27) ), stat = allocok )
1875 IF ( allocok .GT. 0 ) THEN
1876 id%INFO(1) = -13
1877 CALL mumps_set_ierror(id%KEEP8(27), id%INFO(2))
1878 NULLIFY(id%INTARR)
1879 GOTO 100
1880 END IF
1881 ELSE
1882 ALLOCATE( id%INTARR(1),stat=allocok )
1883 IF ( allocok .GT. 0 ) THEN
1884 id%INFO(1) = -13
1885 id%INFO(2) = 1
1886 NULLIFY(id%INTARR)
1887 GOTO 100
1888 END IF
1889 END IF
1890C -----------------------------
1891C Allocate real values.
1892C On master, if hybrid host and
1893C no scaling, avoid the copy.
1894C -----------------------------
1895 IF (associated( id%DBLARR)) THEN
1896 DEALLOCATE(id%DBLARR)
1897 NULLIFY(id%DBLARR)
1898 ENDIF
1899 IF ( i_am_slave ) THEN
1900 IF ( id%MYID_NODES .eq. master
1901 & .AND. keep(46) .eq. 1
1902 & .AND. keep(52) .eq. 0 ) THEN
1903C --------------------------
1904C Simple pointer association
1905C --------------------------
1906 id%DBLARR => id%A_ELT
1907 ELSE
1908C ----------
1909C Allocation
1910C ----------
1911 IF ( id%KEEP8(26) .ne. 0_8 ) THEN
1912 ALLOCATE( id%DBLARR( id%KEEP8(26) ), stat = allocok )
1913 IF ( allocok .GT. 0 ) THEN
1914 id%INFO(1) = -13
1915 CALL mumps_set_ierror(id%KEEP8(26), id%INFO(2))
1916 NULLIFY(id%DBLARR)
1917 GOTO 100
1918 END IF
1919 ELSE
1920 ALLOCATE( id%DBLARR(1), stat = allocok )
1921 IF ( allocok .GT. 0 ) THEN
1922 id%INFO(1) = -13
1923 id%INFO(2) = 1
1924 NULLIFY(id%DBLARR)
1925 GOTO 100
1926 END IF
1927 END IF
1928 END IF
1929 ELSE
1930 ALLOCATE( id%DBLARR(1), stat = allocok )
1931 IF ( allocok .GT. 0 ) THEN
1932 id%INFO(1) = -13
1933 id%INFO(2) = 1
1934 NULLIFY(id%DBLARR)
1935 GOTO 100
1936 END IF
1937 END IF
1938 END IF
1939C -----------------
1940C Also prepare some
1941C data for the root
1942C -----------------
1943 IF ( keep(38).NE.0 .AND. i_am_slave ) THEN
1944 CALL zmumps_init_root_fac( id%N,
1945 & id%root, id%FILS(1), keep(38), id%KEEP(1), id%INFO(1) )
1946 END IF
1947C
1948C
1949 100 CONTINUE
1950C ----------------
1951C Check for errors
1952C ----------------
1953 CALL mumps_propinfo( id%ICNTL(1), id%INFO(1),
1954 & id%COMM, id%MYID )
1955 IF ( id%INFO(1).LT.0 ) GOTO 500
1956C
1957C -----------------------------------
1958C
1959C DISTRIBUTION OF THE ORIGINAL MATRIX
1960C
1961C -----------------------------------
1962C
1963C TIMINGS: computed (and printed) on the host
1964C Next line: global time for distrib(arrowheads,elts)
1965C on the host. Synchronization has been performed.
1966 IF (id%MYID.EQ.master) CALL mumps_secdeb(time)
1967C -------------------------------------------
1968C S_PTR_ARG / MAXS_ARG will be used for id%S
1969C argument to arrowhead/element distribution
1970C routines: if id%S is not allocated, we pass
1971C S_DUMMY_ARG instead, which is not accessed.
1972C -------------------------------------------
1973 IF (earlyt3rootins) THEN
1974 s_ptr_arg => id%S
1975 maxs_arg = maxs
1976 ELSE
1977 s_ptr_arg => s_dummy_arg
1978 maxs_arg = 1
1979 ENDIF
1980C
1981 IF ( keep( 55 ) .eq. 0 ) THEN
1982C ----------------------------
1983C Original matrix is assembled
1984C Arrowhead format to be used.
1985C ----------------------------
1986C KEEP8(26) and KEEP8(27) hold the number of entries for real/integer
1987C for the matrix in arrowhead format. They have been set by the
1988C analysis phase (ZMUMPS_ANA_F and ZMUMPS_ANA_G)
1989C
1990C ------------------------------------------------------------------
1991C Blocking is used for sending arrowhead records (I,J,VAL)
1992C buffer(1) is used to store number of bytes already packed
1993C buffer(2) number of records already packed
1994C KEEP(39) : Number of records (blocking factor)
1995C ------------------------------------------------------------------
1996C
1997C ---------------------------------------------
1998C In case of parallel root compute minimum
1999C size of workspace to receive arrowheads
2000C of root node. Will be used to check that
2001C MAXS is large enough for arrowheads (case
2002C of EARLYT3ROOTINS (KEEP(200)=0); if .NOT.
2003C EARLYT3ROOTINS (KEEP(200)=1), root will
2004C be assembled into id%S later and size of
2005C id%S will be checked later)
2006C ---------------------------------------------
2007 IF (earlyt3rootins .AND. keep(38).NE.0 .AND.
2008 & keep(60) .EQ.0 .AND. i_am_slave) THEN
2009 lwk = int(numroc( id%root%ROOT_SIZE, id%root%MBLOCK,
2010 & id%root%MYROW, 0, id%root%NPROW ),8)
2011 lwk = max( 1_8, lwk )
2012 lwk = lwk*
2013 & int(numroc( id%root%ROOT_SIZE, id%root%NBLOCK,
2014 & id%root%MYCOL, 0, id%root%NPCOL ),8)
2015 lwk = max( 1_8, lwk )
2016 ELSE
2017 lwk = 1_8
2018 ENDIF
2019C MAXS must be at least 1, and in case of
2020C parallel root, large enough to receive
2021C arrowheads of root.
2022 IF (maxs .LT. int(lwk,8)) THEN
2023 id%INFO(1) = -9
2024 CALL mumps_set_ierror(lwk, id%INFO(2))
2025 ENDIF
2026 CALL mumps_propinfo( id%ICNTL(1), id%INFO(1),
2027 & id%COMM, id%MYID )
2028 IF ( id%INFO(1).LT.0 ) GOTO 500
2029C
2030 IF ( keep(54) .eq. 0 ) THEN
2031C ================================================
2032C FIRST CASE : MATRIX IS NOT INITIALLY DISTRIBUTED
2033C ================================================
2034C A small integer workspace is needed to
2035C send the arrowheads.
2036 IF ( id%MYID .eq. master ) THEN
2037 ALLOCATE(iwk(id%N), stat=allocok)
2038 IF ( allocok .NE. 0 ) THEN
2039 id%INFO(1)=-13
2040 id%INFO(2)=id%N
2041 END IF
2042#if defined(LARGEMATRICES)
2043 ALLOCATE (wk(lwk),stat=ierr)
2044 IF ( ierr .GT. 0 ) THEN
2045 id%INFO(1) = -13
2046 CALL mumps_set_ierror(lwk, id%INFO(2))
2047 write(6,*) ' PB1 ALLOC LARGEMAT'
2048 ENDIF
2049#endif
2050 ENDIF
2051 CALL mumps_propinfo( id%ICNTL(1), id%INFO(1),
2052 & id%COMM, id%MYID )
2053 IF ( id%INFO(1).LT.0 ) GOTO 500
2054 IF ( id%MYID .eq. master ) THEN
2055C
2056C --------------------------------
2057C MASTER sends arowheads using the
2058C global communicator with ranks
2059C also in global communicator
2060C IWK is used as temporary
2061C workspace of size N.
2062C --------------------------------
2063 IF ( .not. associated( id%INTARR ) ) THEN
2064 ALLOCATE( id%INTARR( 1 ),stat=ierr)
2065 IF ( ierr .GT. 0 ) THEN
2066 id%INFO(1) = -13
2067 id%INFO(2) = 1
2068 NULLIFY(id%INTARR)
2069 write(6,*) ' PB2 ALLOC INTARR'
2070 CALL mumps_abort()
2071 ENDIF
2072 ENDIF
2073 nbrecords = keep(39)
2074 IF (id%KEEP8(28) .LT. int(nbrecords,8)) THEN
2075 nbrecords = int(id%KEEP8(28))
2076 ENDIF
2077#if defined(LARGEMATRICES)
2078 CALL zmumps_facto_send_arrowheads(id%N, id%KEEP8(28), id%A(1),
2079 & id%IRN(1), id%JCN(1), id%SYM_PERM(1),
2080 & lscal, id%COLSCA(1), id%ROWSCA(1),
2081 & id%MYID, id%NSLAVES, id%PROCNODE_STEPS(1),
2082 & nbrecords,
2083 & lp, id%COMM, id%root, keep,id%KEEP8,
2084 & id%FILS(1), iwk(1), ! workspace of size N
2085 &
2086 & id%INTARR(1), id%KEEP8(27), id%DBLARR(1), id%KEEP8(26),
2087 & id%PTRAR(1), id%PTRAR(id%N+1),
2088 & id%FRERE_STEPS(1), id%STEP(1), wk(1), lwk,
2089 & id%ISTEP_TO_INIV2(1), id%I_AM_CAND(1),
2090 & id%CANDIDATES(1,1))
2091C write(6,*) '!!! A,IRN,JCN are freed during factorization '
2092 DEALLOCATE (id%A)
2093 NULLIFY(id%A)
2094 DEALLOCATE (id%IRN)
2095 NULLIFY (id%IRN)
2096 DEALLOCATE (id%JCN)
2097 NULLIFY (id%JCN)
2098 IF (.NOT.wk_user_provided) THEN
2099 IF (earlyt3rootins) THEN
2100 ALLOCATE (id%S(maxs),stat=ierr)
2101 id%KEEP8(23) = maxs
2102 IF ( ierr .GT. 0 ) THEN
2103 id%INFO(1) = -13
2104 id%INFO(2) = maxs
2105 NULLIFY(id%S)
2106 id%KEEP8(23)=0_8
2107 write(6,*) ' PB2 ALLOC LARGEMAT',maxs
2108 CALL mumps_abort()
2109 ENDIF
2110 ENDIF
2111 ENDIF
2112 ELSE
2113 id%S => id%WK_USER(1:id%KEEP8(24))
2114 ENDIF
2115 IF (earlyt3rootins) THEN
2116 id%S(maxs-lwk+1_8:maxs) = wk(1_8:lwk)
2117 ENDIF
2118 DEALLOCATE (wk)
2119#else
2120 CALL zmumps_facto_send_arrowheads(id%N, id%KEEP8(28), id%A(1),
2121 & id%IRN(1), id%JCN(1), id%SYM_PERM(1),
2122 & lscal, id%COLSCA(1), id%ROWSCA(1),
2123 & id%MYID, id%NSLAVES, id%PROCNODE_STEPS(1),
2124 & nbrecords,
2125 & lp, id%COMM, id%root, keep(1),id%KEEP8(1),
2126 & id%FILS(1), iwk(1),
2127 &
2128 & id%INTARR(1), id%KEEP8(27), id%DBLARR(1), id%KEEP8(26),
2129 & id%PTRAR(1), id%PTRAR(id%N+1),
2130 & id%FRERE_STEPS(1), id%STEP(1), s_ptr_arg(1), maxs_arg,
2131 & id%ISTEP_TO_INIV2(1), id%I_AM_CAND(1),
2132 & id%CANDIDATES(1,1) )
2133#endif
2134 DEALLOCATE(iwk)
2135 ELSE
2136 nbrecords = keep(39)
2137 IF (id%KEEP8(28) .LT. int(nbrecords,8)) THEN
2138 nbrecords = int(id%KEEP8(28))
2139 ENDIF
2140 CALL zmumps_facto_recv_arrowhd2( id%N,
2141 & id%DBLARR(1), id%KEEP8(26),
2142 & id%INTARR(1), id%KEEP8(27),
2143 & id%PTRAR( 1 ),
2144 & id%PTRAR(id%N+1),
2145 & keep( 1 ), id%KEEP8(1), id%MYID, id%COMM,
2146 & nbrecords,
2147 &
2148 & s_ptr_arg(1), maxs_arg,
2149 & id%root,
2150 & id%PROCNODE_STEPS(1), id%NSLAVES,
2151 & id%SYM_PERM(1), id%FRERE_STEPS(1), id%STEP(1),
2152 & id%INFO(1), id%INFO(2) )
2153 ENDIF
2154 ELSE
2155C
2156C =============================================
2157C SECOND CASE : MATRIX IS INITIALLY DISTRIBUTED
2158C =============================================
2159C Timing on master.
2160 IF (id%MYID.EQ.master) THEN
2161 CALL mumps_secdeb(time)
2162 END IF
2163 IF ( i_am_slave ) THEN
2164C ---------------------------------------------------
2165C In order to have possibly IRN_loc/JCN_loc/A_loc
2166C of size 0, avoid to pass them inside REDISTRIBUTION
2167C and pass id instead
2168C NZ_locMAX8 gives as a maximum buffer size (send/recv) used
2169C an upper bound to limit buffers on small matrices
2170C ---------------------------------------------------
2171 CALL mpi_allreduce(id%KEEP8(29), nz_locmax8, 1, mpi_integer8,
2172 & mpi_max, id%COMM_NODES, ierr)
2173 nbrecords = keep(39)
2174 IF (nz_locmax8 .LT. int(nbrecords,8)) THEN
2175 nbrecords = int(nz_locmax8)
2176 ENDIF
2177 CALL zmumps_redistribution( id%N,
2178 & id%KEEP8(29),
2179 & id,
2180 & id%DBLARR(1), id%KEEP8(26), id%INTARR(1),
2181 & id%KEEP8(27), id%PTRAR(1), id%PTRAR(id%N+1),
2182 & keep(1), id%KEEP8(1), id%MYID_NODES,
2183 & id%COMM_NODES, nbrecords,
2184 & s_ptr_arg(1), maxs_arg, id%root, id%PROCNODE_STEPS(1),
2185 & id%NSLAVES, id%SYM_PERM(1), id%STEP(1),
2186 & id%ICNTL(1), id%INFO(1), nsend8, nlocal8,
2187 & id%ISTEP_TO_INIV2(1),
2188 & id%CANDIDATES(1,1) )
2189 IF ( ( keep(52).EQ.7 ).OR. (keep(52).EQ.8) ) THEN
2190C -------------------------------------------------
2191C In that case, scaling arrays have been allocated
2192C on all processors. They were useful for matrix
2193C distribution. But we now really only need them
2194C on the host. In case of distributed solution, we
2195C will have to broadcast either ROWSCA or COLSCA
2196C (depending on MTYPE) but this is done later.
2197C
2198C In other words, on exit from the factorization,
2199C we want to have scaling arrays available only
2200C on the host.
2201C -------------------------------------------------
2202 IF ( id%MYID > 0 ) THEN
2203 IF (associated(id%ROWSCA)) THEN
2204 DEALLOCATE(id%ROWSCA)
2205 NULLIFY(id%ROWSCA)
2206 ENDIF
2207 IF (associated(id%COLSCA)) THEN
2208 DEALLOCATE(id%COLSCA)
2209 NULLIFY(id%COLSCA)
2210 ENDIF
2211 ENDIF
2212 ENDIF
2213#if defined(LARGEMATRICES)
2214C deallocate id%IRN_loc, id%JCN(loc) to free extra space
2215C Note that in this case IRN_loc cannot be used
2216C anymore during the solve phase for IR and Error analysis.
2217 IF (associated(id%IRN_loc)) THEN
2218 DEALLOCATE(id%IRN_loc)
2219 NULLIFY(id%IRN_loc)
2220 ENDIF
2221 IF (associated(id%JCN_loc)) THEN
2222 DEALLOCATE(id%JCN_loc)
2223 NULLIFY(id%JCN_loc)
2224 ENDIF
2225 IF (associated(id%A_loc)) THEN
2226 DEALLOCATE(id%A_loc)
2227 NULLIFY(id%A_loc)
2228 ENDIF
2229 write(6,*) ' Warning :',
2230 & ' id%A_loc, IRN_loc, JCN_loc deallocated !!! '
2231#endif
2232 IF (prok) THEN
2233 WRITE(mp,120) nlocal8, nsend8
2234 END IF
2235 END IF
2236 IF ( keep(46) .eq. 0 .AND. id%MYID.eq.master ) THEN
2237C ------------------------------
2238C The host is not working -> had
2239C no data from initial matrix
2240C ------------------------------
2241 nsend8 = 0_8
2242 nlocal8 = 0_8
2243 END IF
2244C --------------------------
2245C Put into some info/infog ?
2246C --------------------------
2247 CALL mpi_reduce( nsend8, nsend_tot8, 1, mpi_integer8,
2248 & mpi_sum, master, id%COMM, ierr )
2249 CALL mpi_reduce( nlocal8, nlocal_tot8, 1, mpi_integer8,
2250 & mpi_sum, master, id%COMM, ierr )
2251 IF ( prokg ) THEN
2252 WRITE(mpg,125) nlocal_tot8, nsend_tot8
2253 END IF
2254C
2255C -------------------------
2256C Check for possible errors
2257C -------------------------
2258 CALL mumps_propinfo( icntl(1), id%INFO(1),
2259 & id%COMM, id%MYID )
2260 IF ( id%INFO( 1 ) .LT. 0 ) GOTO 500
2261C
2262 ENDIF
2263 ELSE
2264C -------------------
2265C Matrix is elemental,
2266C provided on the
2267C master only
2268C -------------------
2269 IF ( id%MYID.eq.master)
2270 & CALL zmumps_maxelt_size( id%ELTPTR(1),
2271 & id%NELT,
2272 & maxelt_size )
2273C
2274C Perform the distribution of the elements.
2275C A this point,
2276C PTRAIW/PTRARW have been computed.
2277C INTARR/DBLARR have been allocated
2278C ELTPROC gives the mapping of elements
2279C
2280 CALL zmumps_elt_distrib( id%N, id%NELT, id%KEEP8(30),
2281 & id%COMM, id%MYID,
2282 & id%NSLAVES, id%PTRAR(1),
2283 & id%PTRAR(id%NELT+2),
2284 & id%INTARR(1), id%DBLARR(1), id%KEEP8(27), id%KEEP8(26),
2285 & id%KEEP(1), id%KEEP8(1), maxelt_size,
2286 & id%FRTPTR(1), id%FRTELT(1),
2287 & s_ptr_arg(1), maxs_arg, id%FILS(1),
2288 & id, id%root )
2289C ----------------
2290C Broadcast errors
2291C ----------------
2292 CALL mumps_propinfo( icntl(1), id%INFO(1),
2293 & id%COMM, id%MYID )
2294 IF ( id%INFO( 1 ) .LT. 0 ) GOTO 500
2295 END IF ! Element entry
2296C ------------------------
2297C Time the redistribution:
2298C ------------------------
2299 IF ( id%MYID.EQ.master) THEN
2300 CALL mumps_secfin(time)
2301 id%DKEEP(93) = time
2302 IF (prokg) WRITE(mpg,160) id%DKEEP(93)
2303 END IF
2304 IF ( keep(400) .GT. 0 ) THEN
2305C L0-OMP was active at analysis and
2306C thus will be active at factorization
2307C We check the number of threads.
2308 nomp=1
2309!$ NOMP = omp_get_max_threads()
2310 IF ( nomp .NE. keep(400) ) THEN
2311 id%INFO(1)=-58
2312 id%INFO(2)=keep(400)
2313 IF (lpok) WRITE(lp,'(A,A,I5,A,I5)')
2314 &" FAILURE DETECTED IN FACTORIZATION: #threads for KEEP(401)",
2315 &" changed from",keep(400)," at analysis to", nomp
2316 ENDIF
2317C error check
2318 CALL mumps_propinfo( icntl(1), id%INFO(1),
2319 & id%COMM, id%MYID )
2320 IF ( id%INFO( 1 ) .LT. 0 ) GOTO 500
2321 ENDIF
2322C
2323C TIMINGS:
2324C Next line: elapsed time for factorization
2325 IF (id%MYID.EQ.master) CALL mumps_secdeb(time)
2326C
2327C Allocate buffers on the workers
2328C ===============================
2329C
2330 IF ( i_am_slave ) THEN
2331 CALL zmumps_buf_ini_myid(id%MYID_NODES)
2332C
2333C Some buffers are required to pack/unpack data and for
2334C receiving MPI messages.
2335C For packing/unpacking : the buffer must be large
2336C enough to send several messages while receives might not
2337C be posted yet.
2338C It is assumed that the size of an integer is held in KEEP(34)
2339C while the size of a complex is held in KEEP(35).
2340C BUFR and LBUFR are declared of type integer, since byte is not
2341C a standard datatype.
2342C We now use KEEP(43) or KEEP(379) and KEEP(44) or KEEP(380)
2343C as estimated at analysis to allocate appropriate buffer sizes
2344C
2345C Reception buffer
2346C ----------------
2347 IF (keep(486).NE.0) THEN
2348 zmumps_lbufr_bytes8 = int(keep( 380 ),8) * int(keep( 35 ),8)
2349 ELSE
2350 zmumps_lbufr_bytes8 = int(keep( 44 ),8) * int(keep( 35 ),8)
2351 ENDIF
2352C ---------------------------------------
2353C Ensure a reasonable minimal buffer size
2354C ---------------------------------------
2355 zmumps_lbufr_bytes8 = max( zmumps_lbufr_bytes8,
2356 & 100000_8 )
2357 IF ((keep(50).NE.0).AND.(keep(489).GT.0).AND.
2358 & (id%NSLAVES.GE.2)) THEN
2359C ----------------------------------------------------------
2360C Ensure large enough receive buffer in case of BLR with
2361C CompressCB for symmetric matrices.
2362C -----------------------------------------------------------
2363 ratiok465 = dble(keep465copy)/dble(1000)
2364 zmumps_lbufr_bytes8 = max(zmumps_lbufr_bytes8,
2365 & int(
2366 & ratiok465*
2367 & dble(
2368 & int(keep(2)+1,8)*int(keep(142),8)*int(keep(35),8)
2369 & )
2370 & ,8)
2371 & )
2372 ENDIF
2373C
2374C If there is pivoting, size of the message might still increase.
2375C We use a relaxation (so called PERLU) to increase the estimate.
2376C
2377C Note: PERLU is a global estimate for pivoting.
2378C It may happen that one large contribution block size is increased
2379C by more than that.
2380C This is why we use an extra factor 2 relaxation coefficient for
2381C the relaxation of
2382C the reception buffer in the case where pivoting is allowed.
2383C A more dynamic strategy could be applied: if message to
2384C be received is larger than expected, reallocate a larger
2385C buffer. (But this won't work with IRECV.)
2386C Finally, one may want (as we are currently doing it for
2387C most messages)
2388C to cut large messages into a series of smaller ones.
2389C
2390 IF (keep(48).EQ.5) THEN
2391 min_perlu = 2
2392 ELSE
2393 min_perlu = 0
2394 ENDIF
2395C
2396 zmumps_lbufr_bytes8 = zmumps_lbufr_bytes8
2397 & + int( 2.0d0 * dble(max(perlu,min_perlu))*
2398 & dble(zmumps_lbufr_bytes8)/100d0, 8)
2399 zmumps_lbufr_bytes8 = min(zmumps_lbufr_bytes8,
2400 & int(huge(i4)-100,8))
2401 zmumps_lbufr_bytes = int( zmumps_lbufr_bytes8 )
2402 IF (keep(48)==5) THEN
2403C Since the buffer is going to be allocated, use
2404C it as the constraint for memory/granularity
2405C in hybrid scheduler
2406C
2407 id%KEEP8(21) = id%KEEP8(22) +
2408 & int( dble(max(perlu,min_perlu))*
2409 & dble(id%KEEP8(22))/100d0,8)
2410 ENDIF
2411C
2412C Now estimate the size for the buffer for asynchronous
2413C sends of contribution blocks (so called CB). We want to be able to send at
2414C least KEEP(213)/100 (two in general) messages at the
2415C same time.
2416C
2417C Send buffer
2418C -----------
2419 IF (keep(486).NE.0) THEN
2420 zmumps_lbuf8 = int( dble(keep(213)) / 100.0d0 *
2421 & dble(keep(379)) * dble(keep(35)), 8 )
2422 ELSE
2423 zmumps_lbuf8 = int( dble(keep(213)) / 100.0d0 *
2424 & dble(keep(43)) * dble(keep(35)), 8 )
2425 ENDIF
2426 zmumps_lbuf8 = max( zmumps_lbuf8, 100000_8 )
2427 zmumps_lbuf8 = zmumps_lbuf8
2428 & + int( 2.0d0 * dble(max(perlu,min_perlu))*
2429 & dble(zmumps_lbuf8)/100d0, 8)
2430C Make ZMUMPS_LBUF8 small enough to be stored in a standard integer
2431 zmumps_lbuf8 = min(zmumps_lbuf8, int(huge(i4)-100,8))
2432C
2433C No reason to have send buffer smaller than receive buffer.
2434C This should never occur with the formulas above but just
2435C in case:
2436 zmumps_lbuf8 = max(zmumps_lbuf8, zmumps_lbufr_bytes8+3*keep(34))
2437 zmumps_lbuf = int(zmumps_lbuf8)
2438 IF(id%KEEP(48).EQ.4)THEN
2439 zmumps_lbufr_bytes=zmumps_lbufr_bytes*5
2440 zmumps_lbuf=zmumps_lbuf*5
2441 ENDIF
2442C
2443C Estimate size of buffer for small messages
2444C Each node can send ( NSLAVES - 1 ) messages to (NSLAVES-1) nodes
2445C
2446C KEEP(56) is the number of nodes of level II.
2447C Messages will be sent for the symmetric case
2448C for synchronisation issues.
2449C
2450C We take an upperbound
2451C
2452 zmumps_lbuf_int = ( keep(56) + id%NSLAVES * id%NSLAVES ) * 5
2453 & * keep(34)
2454 IF ( keep( 38 ) .NE. 0 ) THEN
2455C
2456C
2457 kkkk = mumps_procnode( id%PROCNODE_STEPS(id%STEP(keep(38))),
2458 & id%KEEP(199) )
2459 IF ( kkkk .EQ. id%MYID_NODES ) THEN
2460 zmumps_lbuf_int = zmumps_lbuf_int + 4 * keep(34) *
2461 & ( id%NSLAVES + id%NE_STEPS(id%STEP(keep(38)))
2462 & + min(keep(56), id%NE_STEPS(id%STEP(keep(38)))) * id%NSLAVES
2463 & )
2464 END IF
2465 END IF
2466C At this point, ZMUMPS_LBUFR_BYTES, ZMUMPS_LBUF
2467C and ZMUMPS_LBUF_INT have been computed (all
2468C are in numbers of bytes).
2469 IF ( prok ) THEN
2470 WRITE( mp, 9999 ) zmumps_lbufr_bytes,
2471 & zmumps_lbuf, zmumps_lbuf_int
2472 END IF
2473 9999 FORMAT( /,' Allocated buffers',/,' ------------------',/,
2474 & ' Size of reception buffer in bytes ...... = ', i10,
2475 & /,
2476 & ' Size of async. emission buffer (bytes).. = ', i10,/,
2477 & ' Small emission buffer (bytes) .......... = ', i10)
2478C --------------------------
2479C Allocate small send buffer
2480C required for ZMUMPS_FAC_B
2481C --------------------------
2482 CALL zmumps_buf_alloc_small_buf( zmumps_lbuf_int, ierr )
2483 IF ( ierr .NE. 0 ) THEN
2484 id%INFO(1)= -13
2485C convert to size in integer id%INFO(2)= ZMUMPS_LBUF_INT
2486 id%INFO(2)= (zmumps_lbuf_int+keep(34)-1)/keep(34)
2487 IF (lpok) THEN
2488 WRITE(lp,*) id%MYID,
2489 & ':Allocation error in ZMUMPS_BUF_ALLOC_SMALL_BUF'
2490 & ,id%INFO(2)
2491 ENDIF
2492 GO TO 110
2493 END IF
2494C
2495C --------------------------------------
2496C Allocate reception buffer on all procs
2497C This is done now.
2498C --------------------------------------
2499 zmumps_lbufr = (zmumps_lbufr_bytes+keep(34)-1)/keep(34)
2500 ALLOCATE( bufr( zmumps_lbufr ),stat=ierr )
2501 IF ( ierr .NE. 0 ) THEN
2502 id%INFO(1) = -13
2503 id%INFO(2) = zmumps_lbufr
2504 IF (lpok) THEN
2505 WRITE(lp,*)
2506 & ': Allocation error for BUFR(', zmumps_lbufr,
2507 & ') on MPI process',id%MYID
2508 ENDIF
2509 GO TO 110
2510 END IF
2511C -----------------------------------------
2512C Estimate MAXIS. IS will be allocated in
2513C ZMUMPS_FAC_B. It will contain factors and
2514C contribution blocks integer information
2515C -----------------------------------------
2516C Relax integer workspace based on PERLU
2517 perlu = keep( 12 )
2518 IF (keep(201).GT.0) THEN
2519C OOC panel or non panel (note that
2520C KEEP(15)=KEEP(225) if non panel)
2521 maxis_estim = keep(225)
2522 ELSE
2523C In-core or reals for factors not stored
2524 maxis_estim = keep(15)
2525 ENDIF
2526 maxis = max( 1, int( min( int(huge(maxis),8),
2527 & int(maxis_estim,8) + 3_8 * max(int(perlu,8),10_8) *
2528 & ( int(maxis_estim,8) / 100_8 + 1_8 )
2529 & ) ! min
2530 & ) ! int
2531 & ) !max
2532C ----------------------------
2533C Allocate PTLUST_S and PTRFAC
2534C They will be used to access
2535C factors in the solve phase.
2536C They are also needed for
2537C ZMUMPS_FAC_L0_OMP.
2538C ----------------------------
2539 ALLOCATE( id%PTLUST_S( id%KEEP(28) ), stat = ierr )
2540 IF ( ierr .NE. 0 ) THEN
2541 id%INFO(1)=-13
2542 id%INFO(2)=id%KEEP(28)
2543 IF (lpok) THEN
2544 WRITE(lp,*) id%MYID,
2545 & ': Allocation error for id%PTLUST_S(', id%KEEP(28),')'
2546 ENDIF
2547 NULLIFY(id%PTLUST_S)
2548 GOTO 110
2549 END IF
2550 ALLOCATE( id%PTRFAC( id%KEEP(28) ), stat = ierr )
2551 IF ( ierr .NE. 0 ) THEN
2552 id%INFO(1)=-13
2553 id%INFO(2)=id%KEEP(28)
2554 NULLIFY(id%PTRFAC)
2555 IF (lpok) THEN
2556 WRITE(lp,*) id%MYID,
2557 & ': Allocation error for id%PTRFAC(', id%KEEP(28),')'
2558 ENDIF
2559 GOTO 110
2560 END IF
2561C -----------------------------
2562C Reserve temporary workspace :
2563C IPOOL, PTRWB, ITLOC, PTRIST
2564C PTRWB will be subdivided again
2565C in routine ZMUMPS_FAC_B
2566C -----------------------------
2567 ptrist = 1
2568 ptrwb = ptrist + id%KEEP(28)
2569 itloc = ptrwb + 2 * id%KEEP(28)
2570C Fwd in facto: ITLOC of size id%N + id%KEEP(253)
2571 ipool = itloc + id%N + id%KEEP(253)
2572C
2573C --------------------------------
2574C NA(1) is an upperbound for LPOOL
2575C --------------------------------
2576C Structure of the pool:
2577C ____________________________________________________
2578C | Subtrees | | Top nodes | 1 2 3 |
2579C ----------------------------------------------------
2580 lpool = mumps_get_pool_length(id%NA(1), id%KEEP(1),id%KEEP8(1))
2581 ALLOCATE( iwk( ipool + lpool - 1 ), stat = ierr )
2582 IF ( ierr .NE. 0 ) THEN
2583 id%INFO(1)=-13
2584 id%INFO(2)=ipool + lpool - 1
2585 IF (lpok) THEN
2586 WRITE(lp,*) id%MYID,
2587 & ': Allocation error for IWK(',ipool+lpool-1,')'
2588 ENDIF
2589 GOTO 110
2590 END IF
2591 ALLOCATE(iwk8( 2 * id%KEEP(28)), stat = ierr)
2592 IF ( ierr .NE. 0 ) THEN
2593 id%INFO(1)=-13
2594 id%INFO(2)=2 * id%KEEP(28)
2595 IF (lpok) THEN
2596 WRITE(lp,*) id%MYID,
2597 & ': Allocation error for IWKB(', 2*id%KEEP(28),')'
2598 ENDIF
2599 GOTO 110
2600 END IF
2601C
2602C Return to SPMD
2603C
2604 ENDIF
2605C
2606 110 CONTINUE
2607C ----------------
2608C Broadcast errors
2609C ----------------
2610 CALL mumps_propinfo( icntl(1), id%INFO(1),
2611 & id%COMM, id%MYID )
2612 IF ( id%INFO( 1 ) .LT. 0 ) GOTO 500
2613C
2614 IF ( i_am_slave ) THEN
2615C Store size of receive buffers in ZMUMPS_LBUF module
2616 CALL zmumps_buf_dist_irecv_size( zmumps_lbufr_bytes )
2617 IF (prok) THEN
2618 WRITE( mp, 170 ) maxs, maxis, id%KEEP8(12), keep(15),
2619 & id%KEEP8(26), id%KEEP8(27), id%KEEP8(11), keep(26), keep(27)
2620 ENDIF
2621 END IF
2622C ===============================================================
2623C Before calling the main driver, ZMUMPS_FAC_B,
2624C some statistics should be initialized to 0,
2625C even on the host node because they will be
2626C used in REDUCE operations afterwards.
2627C --------------------------------------------
2628C Size of factors written. It will be set to POSFAC in
2629C IC, otherwise we accumulate written factors in it.
2630 id%KEEP8(31)= 0_8
2631C Size of factors under L0 will be returned
2632C in id%KEEP8(64), not included in KEEP8(31))
2633C Number of entries in factors
2634 id%KEEP8(10) = 0_8
2635C KEEP8(8) will hold the volume of extra copies due to
2636C in-place stacking in fac_mem_stack.F
2637 id%KEEP8(8)=0_8
2638 id%INFO(9:14)=0
2639 rinfo(2:3)=zero
2640 IF ( i_am_slave ) THEN
2641C ------------------------------------
2642C Call effective factorization routine
2643C ------------------------------------
2644 IF ( keep(55) .eq. 0 ) THEN
2645 ldptrar = id%N
2646 ELSE
2647 ldptrar = id%NELT + 1
2648 END IF
2649 IF ( id%KEEP(55) .NE. 0 ) THEN
2650 nelt_arg = id%NELT
2651 ELSE
2652C ------------------------------
2653C Use size 1 to avoid complaints
2654C when using check bound options
2655C ------------------------------
2656 nelt_arg = 1
2657 END IF
2658 ENDIF
2659 IF (i_am_slave) THEN
2660 IF (associated(id%L0_OMP_MAPPING))
2661 & DEALLOCATE(id%L0_OMP_MAPPING)
2662 IF (keep(400) .GT. 0) THEN
2663 id%LL0_OMP_MAPPING = keep(28)
2664 ELSE
2665 id%LL0_OMP_MAPPING = 1
2666 ENDIF
2667 ALLOCATE(id%L0_OMP_MAPPING(id%LL0_OMP_MAPPING), stat=allocok)
2668 IF ( allocok > 0) THEN
2669 write(*,*) "problem allocating l0_omp_mapping",
2670 & IERR, KEEP(28)
2671 GOTO 115
2672 ENDIF
2673 IF (associated(id%L0_OMP_FACTORS)) THEN
2674 CALL ZMUMPS_FREE_L0_OMP_FACTORS(id%L0_OMP_FACTORS)
2675 ENDIF
2676.GT. IF (KEEP(400) 0) THEN
2677 id%LL0_OMP_FACTORS = KEEP(400)
2678 ELSE
2679 id%LL0_OMP_FACTORS = 1
2680 ENDIF
2681 ALLOCATE(id%L0_OMP_FACTORS(id%LL0_OMP_FACTORS),stat = allocok)
2682 IF (allocok > 0) THEN
2683 id%INFO(1)=-7
2684 id%INFO(2)=NB_THREADS
2685 GOTO 111
2686 ENDIF
2687 CALL ZMUMPS_INIT_L0_OMP_FACTORS(id%L0_OMP_FACTORS)
2688 ENDIF
2689 115 CONTINUE
2690 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1),
2691 & id%COMM, id%MYID )
2692.LT. IF ( id%INFO( 1 ) 0 ) GOTO 500
2693C Compute DKEEP(17)
2694 AVG_FLOPS = RINFOG(1)/(dble(id%NSLAVES))
2695 id%DKEEP(17) = max ( id%DKEEP(18), AVG_FLOPS/dble(50) )
2696 &
2697.AND..EQ. IF (PROKid%MYIDMASTER) THEN
2698.LE. IF (id%NSLAVES1) THEN
2699 WRITE(MP,'(/A,A,1PD10.3)')
2700 &' Start factorization with total',
2701 &' estimated flops (RINFOG(1)) = ',
2702 & RINFOG(1)
2703 ELSE
2704 WRITE(MP,'(/A,A,1PD10.3,A,1PD10.3)')
2705 &' Start factorization with total',
2706 &' estimated flops RINFOG(1) / Average per MPI proc = ',
2707 & RINFOG(1), ' / ', AVG_FLOPS
2708 ENDIF
2709 ENDIF
2710 IF (I_AM_SLAVE) THEN
2711C IS/S pointers passed to ZMUMPS_FAC_B with
2712C implicit interface through intermediate
2713C structure S_IS_POINTERS. IS will be allocated
2714C during ZMUMPS_FAC_B.
2715C In case of L0OMP, id%IS and id%S are allocated during
2716C ZMUMPS_FAC_B, and only after L0OMP nodes are processed,
2717C in order to limit the global memory peak.
2718 S_IS_POINTERS%IW => id%IS; NULLIFY(id%IS)
2719 S_IS_POINTERS%A => id%S ; NULLIFY(id%S)
2720 CALL ZMUMPS_FAC_B(id%N,S_IS_POINTERS,MAXS,MAXIS,id%SYM_PERM(1),
2721 & id%NA(1),id%LNA,id%NE_STEPS(1),id%ND_STEPS(1), id%FILS(1),
2722 & id%STEP(1),id%FRERE_STEPS(1),id%DAD_STEPS(1),id%CANDIDATES(1,1),
2723 & id%ISTEP_TO_INIV2(1),id%TAB_POS_IN_PERE(1,1), id%PTRAR(1),
2724 & LDPTRAR,IWK(PTRIST),id%PTLUST_S(1),id%PTRFAC(1),IWK(PTRWB),IWK8,
2725 & IWK(ITLOC),RHS_MUMPS(1),IWK(IPOOL),LPOOL,CNTL1,ICNTL(1),
2726 & id%INFO(1), RINFO(1),KEEP(1),id%KEEP8(1),id%PROCNODE_STEPS(1),
2727 & id%NSLAVES,id%COMM_NODES,id%MYID,id%MYID_NODES,BUFR,ZMUMPS_LBUFR
2728 & , ZMUMPS_LBUFR_BYTES, ZMUMPS_LBUF, id%INTARR(1),id%DBLARR(1),
2729 & id%root, NELT_arg, id%FRTPTR(1), id%FRTELT(1),id%COMM_LOAD,
2730 & id%ASS_IRECV, SEUIL, SEUIL_LDLT_NIV2, id%MEM_DIST(0),
2731 & id%DKEEP(1), id%PIVNUL_LIST(1), LPN_LIST, id%LRGROUPS(1)
2732 & ,id%IPOOL_B_L0_OMP(1),id%LPOOL_B_L0_OMP,
2733 & id%IPOOL_A_L0_OMP(1),id%LPOOL_A_L0_OMP,id%L_VIRT_L0_OMP,
2734 & id%VIRT_L0_OMP(1), id%VIRT_L0_OMP_MAPPING(1),id%L_PHYS_L0_OMP,
2735 & id%PHYS_L0_OMP(1), id%PERM_L0_OMP(1), id%PTR_LEAFS_L0_OMP(1),
2736 & id%L0_OMP_MAPPING(1),id%LL0_OMP_MAPPING,
2737 & id%THREAD_LA, id%L0_OMP_FACTORS(1), id%LL0_OMP_FACTORS,
2738 & id%I4_L0_OMP(1,1), size(id%I4_L0_OMP,1), size(id%I4_L0_OMP,2),
2739 & id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1), size(id%I8_L0_OMP,2)
2740 & )
2741 id%IS => S_IS_POINTERS%IW; NULLIFY(S_IS_POINTERS%IW)
2742 id%S => S_IS_POINTERS%A ; NULLIFY(S_IS_POINTERS%A)
2743C
2744C ------------------------------
2745C Deallocate temporary workspace
2746C ------------------------------
2747 DEALLOCATE( IWK )
2748 DEALLOCATE( IWK8 )
2749 ENDIF
2750C ---------------------------------
2751C Free some workspace corresponding
2752C to the original matrix in
2753C arrowhead or elemental format.
2754C -----
2755C Note : INTARR was not allocated
2756C during factorization in the case
2757C of an assembled matrix.
2758C ---------------------------------
2759.eq. IF ( KEEP(55) 0 ) THEN
2760C
2761C ----------------
2762C Assembled matrix
2763C ----------------
2764 IF (associated( id%DBLARR)) THEN
2765 DEALLOCATE(id%DBLARR)
2766 NULLIFY(id%DBLARR)
2767 ENDIF
2768C
2769 ELSE
2770C
2771C ----------------
2772C Elemental matrix
2773C ----------------
2774 IF (associated(id%INTARR)) THEN
2775 DEALLOCATE( id%INTARR)
2776 NULLIFY( id%INTARR )
2777 ENDIF
2778C ------------------------------------
2779C For the master from an hybrid host
2780C execution without scaling, then real
2781C values have not been copied !
2782C -------------------------------------
2783.eq. IF ( id%MYID_NODES MASTER
2784.AND..eq. & KEEP(46) 1
2785.AND..eq. & KEEP(52) 0 ) THEN
2786 NULLIFY( id%DBLARR )
2787 ELSE
2788 IF (associated( id%DBLARR)) THEN
2789 DEALLOCATE(id%DBLARR)
2790 NULLIFY(id%DBLARR)
2791 ENDIF
2792 END IF
2793 END IF
2794C Memroy statistics
2795C -----------------------------------
2796C If QR (Keep(19)) is not zero, and if
2797C the host does not have the information
2798C (ie is not slave), send information
2799C computed on the slaves during facto
2800C to the host.
2801C -----------------------------------
2802.NE. IF ( KEEP(19) 0 ) THEN
2803.NE. IF ( KEEP(46) 1 ) THEN
2804C Host was not working during facto_root
2805C Send him the information
2806.eq. IF ( id%MYID MASTER ) THEN
2807 CALL MPI_RECV( KEEP(17), 1, MPI_INTEGER, 1, DEFIC_TAG,
2808 & id%COMM, STATUS, IERR )
2809 CALL MPI_RECV( KEEP(143), 1, MPI_INTEGER, 1, DEFIC_TAG,
2810 & id%COMM, STATUS, IERR )
2811.EQ. ELSE IF ( id%MYID 1 ) THEN
2812 CALL MPI_SEND( KEEP(17), 1, MPI_INTEGER, 0, DEFIC_TAG,
2813 & id%COMM, IERR )
2814 CALL MPI_SEND( KEEP(143), 1, MPI_INTEGER, 0, DEFIC_TAG,
2815 & id%COMM, IERR )
2816 END IF
2817 END IF
2818 END IF
2819C --------------------------------
2820C Deallocate communication buffers
2821C They will be reallocated
2822C in the solve.
2823C --------------------------------
2824 IF (allocated(BUFR)) DEALLOCATE(BUFR)
2825 CALL ZMUMPS_BUF_DEALL_SMALL_BUF( IERR )
2826C//PIV
2827.NE. IF (KEEP(219)0) THEN
2828 CALL ZMUMPS_BUF_DEALL_MAX_ARRAY()
2829 ENDIF
2830C
2831C Check for errors.
2832C After ZMUMPS_FAC_B every slave is aware of an error.
2833C If master is included in computations, the call below should
2834C not be necessary.
2835 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1),
2836 & id%COMM, id%MYID )
2837C
2838 CALL ZMUMPS_EXTRACT_SCHUR_REDRHS(id)
2839.GT. IF (KEEP(201) 0) THEN
2840.EQ..OR..EQ. IF ((KEEP(201)1) (KEEP(201)2)) THEN
2841 IF ( I_AM_SLAVE ) THEN
2842 CALL ZMUMPS_OOC_CLEAN_PENDING(IERR)
2843.LT. IF(IERR0)THEN
2844 id%INFO(1)=IERR
2845 id%INFO(2)=0
2846 ENDIF
2847 ENDIF
2848 CALL MUMPS_PROPINFO( id%ICNTL(1), id%INFO(1),
2849 & id%COMM, id%MYID )
2850C We want to collect statistics even in case of
2851C error to understand if it is due to numerical
2852C issues
2853CC IF ( id%INFO(1) < 0 ) GOTO 500
2854 END IF
2855 END IF
2856.EQ. IF (id%MYIDMASTER) THEN
2857 CALL MUMPS_SECFIN(TIME)
2858 id%DKEEP(94)=TIME
2859.GT. IF (KEEP(400)0) THEN
2860C Facto time above L0_OMP = total time - facto time under L0_OMP
2861 id%DKEEP(96)=id%DKEEP(94)-id%DKEEP(95)
2862 ENDIF
2863 ENDIF
2864C =====================================================================
2865C COMPUTE MEMORY ALLOCATED BY MUMPS, INFO(16)
2866C ---------------------------------------------
2867 MEM_EFF_ALLOCATED = .TRUE.
2868 CALL ZMUMPS_MAX_MEM( id%KEEP(1),id%KEEP8(1),
2869 & id%MYID, N, id%NELT, id%NA(1), id%LNA, id%KEEP8(28),
2870 & id%KEEP8(30),
2871 & id%NSLAVES, TOTAL_MBYTES, .TRUE., id%KEEP(201),
2872 & BLR_STRAT, .TRUE., TOTAL_BYTES,
2873 & IDUMMY, BDUMMY, MEM_EFF_ALLOCATED
2874 & , .FALSE. ! UNDER_L0_OMP
2875 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
2876 & size(id%I8_L0_OMP,2)
2877 & )
2878.GT. IF (KEEP(400) 0 ) THEN ! L0 activated
2879 CALL ZMUMPS_MAX_MEM( id%KEEP(1),id%KEEP8(1),
2880 & id%MYID, N, id%NELT, id%NA(1), id%LNA, id%KEEP8(28),
2881 & id%KEEP8(30),
2882 & id%NSLAVES, TOTAL_MBYTES_UNDER_L0, .TRUE., id%KEEP(201),
2883 & BLR_STRAT, .TRUE., TOTAL_BYTES_UNDER_L0,
2884 & IDUMMY, BDUMMY, MEM_EFF_ALLOCATED
2885 & , .TRUE. ! UNDER_L0_OMP
2886 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
2887 & size(id%I8_L0_OMP,2)
2888 & )
2889 TOTAL_MBYTES = max (TOTAL_MBYTES,TOTAL_MBYTES_UNDER_L0)
2890 TOTAL_BYTES = max (TOTAL_BYTES, TOTAL_BYTES_UNDER_L0)
2891 ENDIF
2892.NE. IF (id%KEEP8(24)0) THEN
2893C WK_USER is not part of memory allocated by MUMPS
2894C and is not counted, id%KEEP8(23) should be zero
2895 id%INFO(16) = TOTAL_MBYTES
2896 ELSE
2897C Note that even for the case of ICNTL(23)>0
2898C we report here the memory effectively allocated
2899C that can be smaller than ICNTL(23) !
2900 id%INFO(16) = TOTAL_MBYTES
2901 ENDIF
2902C ----------------------------------------------------
2903C Centralize memory statistics on the host
2904C id%INFOG(18) = size of mem in Mbytes for facto,
2905C for the processor using largest memory
2906C id%INFOG(19) = size of mem in Mbytes for facto,
2907C sum over all processors
2908C ----------------------------------------------------
2909 CALL MUMPS_MEM_CENTRALIZE( id%MYID, id%COMM,
2910 & id%INFO(16), id%INFOG(18), IRANK )
2911 CALL ZMUMPS_PRINT_ALLOCATED_MEM( PROK, PROKG, PRINT_MAXAVG,
2912 & MP, MPG, id%INFO(16), id%INFOG(18), id%INFOG(19),
2913 & id%NSLAVES, IRANK,
2914 & id%KEEP(1) )
2915C If WK_USER is provided, this excludes WK_USER
2916 IF (PROK ) THEN
2917 WRITE(MP,'(A,I12) ')
2918 & ' ** Eff. min. Space MBYTES for facto (INFO(16)):',
2919 & TOTAL_MBYTES
2920 ENDIF
2921C ========================(INFO(16) RELATED)======================
2922C ---------------------------------------
2923C COMPUTE EFFECTIVE MEMORY USED INFO(22)
2924C ---------------------------------------
2925 PERLU_ON = .TRUE.
2926 MEM_EFF_ALLOCATED = .FALSE.
2927 CALL ZMUMPS_MAX_MEM( id%KEEP(1),id%KEEP8(1),
2928 & id%MYID, N, id%NELT, id%NA(1), id%LNA, id%KEEP8(28),
2929 & id%KEEP8(30),
2930 & id%NSLAVES, TOTAL_MBYTES, .TRUE., id%KEEP(201),
2931 & BLR_STRAT, PERLU_ON, TOTAL_BYTES,
2932 & IDUMMY, BDUMMY, MEM_EFF_ALLOCATED
2933 & , .FALSE. ! UNDER_L0_OMP
2934 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
2935 & size(id%I8_L0_OMP,2)
2936 & )
2937.GT. IF (KEEP(400) 0 ) THEN ! L0 activated
2938 CALL ZMUMPS_MAX_MEM( id%KEEP(1),id%KEEP8(1),
2939 & id%MYID, N, id%NELT, id%NA(1), id%LNA, id%KEEP8(28),
2940 & id%KEEP8(30),
2941 & id%NSLAVES, TOTAL_MBYTES_UNDER_L0, .TRUE., id%KEEP(201),
2942 & BLR_STRAT, PERLU_ON, TOTAL_BYTES_UNDER_L0,
2943 & IDUMMY, BDUMMY, MEM_EFF_ALLOCATED
2944 & , .TRUE. ! UNDER_L0_OMP
2945 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
2946 & size(id%I8_L0_OMP,2)
2947 & )
2948 TOTAL_MBYTES = max (TOTAL_MBYTES,TOTAL_MBYTES_UNDER_L0)
2949 TOTAL_BYTES = max (TOTAL_BYTES, TOTAL_BYTES_UNDER_L0)
2950 ENDIF
2951C -- TOTAL_BYTES and TOTAL_MBYTES includes both static
2952C -- (MAXS) and BLR structures computed as the SUM of the PEAKS
2953C -- (KEEP8(67) + KEEP8(70))
2954 id%KEEP8(7) = TOTAL_BYTES
2955C -- INFO(22) holds the effective space (in Mbytes) used by MUMPS
2956C -- (it includes part of WK_USER used if provided by user)
2957 id%INFO(22) = TOTAL_MBYTES
2958C ----------------------------------------------------
2959C Centralize memory statistics on the host
2960C INFOG(21) = size of effective mem (Mbytes) for facto,
2961C for the processor using largest memory
2962C INFOG(22) = size of effective mem (Mbytes) for facto,
2963C sum over all processors
2964C ----------------------------------------------------
2965 CALL MUMPS_MEM_CENTRALIZE( id%MYID, id%COMM,
2966 & id%INFO(22), id%INFOG(21), IRANK )
2967 IF ( PROKG ) THEN
2968 IF (PRINT_MAXAVG) THEN
2969 WRITE( MPG,'(A,I12) ')
2970 & ' ** Memory effectively used, max in Mbytes (INFOG(21)):',
2971 & id%INFOG(21)
2972 ENDIF
2973 WRITE( MPG,'(A,I12) ')
2974 & ' ** Memory effectively used, total in Mbytes (INFOG(22)):',
2975 & id%INFOG(22)
2976 END IF
2977 SUM_INFO22_THIS_NODE=0
2978 CALL MPI_REDUCE( id%INFO(22), SUM_INFO22_THIS_NODE, 1,
2979 & MPI_INTEGER,
2980 & MPI_SUM, 0, id%KEEP(411), IERR )
2981 CALL MPI_REDUCE( SUM_INFO22_THIS_NODE, MAX_SUM_INFO22_THIS_NODE,
2982 & 1, MPI_INTEGER, MPI_MAX, 0, id%COMM, IERR )
2983.AND. IF (PROKG PRINT_NODEINFO) THEN
2984 WRITE(MPG,'(A,I12)')
2985 & ' ** Max. effective space per compute node, in MBytes :',
2986 & MAX_SUM_INFO22_THIS_NODE
2987 ENDIF
2988C
2989 IF (I_AM_SLAVE) THEN
2990 K67 = id%KEEP8(67)
2991 K68 = id%KEEP8(68)
2992 K70 = id%KEEP8(70)
2993 K74 = id%KEEP8(74)
2994 K75 = id%KEEP8(75)
2995 ELSE
2996 K67 = 0_8
2997 K68 = 0_8
2998 K70 = 0_8
2999 K74 = 0_8
3000 K75 = 0_8
3001 ENDIF
3002C -- Save the number of entries effectively used
3003C in main working array S
3004 CALL MUMPS_SETI8TOI4(K67,id%INFO(21))
3005C
3006C
3007.GT. IF (KEEP(400) 0 ) THEN
3008.NOT. IF ( I_AM_SLAVE) THEN
3009 id%DKEEP(95) = 0.0D0
3010 id%DKEEP(16) = 0.0D0
3011 ENDIF
3012.GT. IF (id%NPROCS 1) THEN
3013C Compute average and max (across MPI's)
3014 CALL MPI_REDUCE(id%DKEEP(95), TMPTIME, 1,
3015 & MPI_DOUBLE_PRECISION, MPI_SUM, MASTER, id%COMM, IERR)
3016.EQ. IF (id%MYIDMASTER) TIMEAVG = TMPTIME
3017 CALL MPI_REDUCE(id%DKEEP(16), TMPFLOP, 1,
3018 & MPI_DOUBLE_PRECISION, MPI_SUM, MASTER, id%COMM, IERR)
3019.EQ. IF (id%MYIDMASTER) FLOPAVG = TMPFLOP
3020.EQ. IF (id%MYIDMASTER) THEN
3021 TIMEAVG = TIMEAVG / id%NSLAVES
3022 FLOPAVG = FLOPAVG / id%NSLAVES
3023 ENDIF
3024 CALL MPI_REDUCE(id%DKEEP(95), TIMEMAX, 1,
3025 & MPI_DOUBLE_PRECISION, MPI_MAX, MASTER, id%COMM, IERR)
3026 CALL MPI_REDUCE(id%DKEEP(16), FLOPMAX, 1,
3027 & MPI_DOUBLE_PRECISION, MPI_MAX, MASTER, id%COMM, IERR)
3028C (PROKG may only be true on master)
3029 IF ( PROKG ) THEN
3030 WRITE(MPG,190) FLOPAVG, FLOPMAX
3031 WRITE(MPG,188) TIMEAVG, TIMEMAX
3032 ENDIF
3033 ELSE
3034C Print DKEEP(95) directly without reduction
3035 IF ( PROKG ) THEN
3036 WRITE(MPG,189) id%DKEEP(16)
3037 WRITE(MPG,187) id%DKEEP(95)
3038 ENDIF
3039 ENDIF
3040 ENDIF
3041 IF ( PROKG ) THEN
3042.GE. IF (id%INFO(1) 0) THEN
3043 WRITE(MPG,180) id%DKEEP(94)
3044 ELSE
3045 WRITE(MPG,185) id%DKEEP(94)
3046 ENDIF
3047 ENDIF
3048C
3049C Sum RINFO(2) : total number of flops for assemblies
3050C Sum RINFO(3) : total number of flops for eliminations
3051C Initialize RINFO(4) in case BLR was not activated
3052 RINFO(4) = RINFO(3)
3053C
3054C Should work even if the master does some work
3055C
3056 CALL MPI_REDUCE( RINFO(2), RINFOG(2), 2,
3057 & MPI_DOUBLE_PRECISION,
3058 & MPI_SUM, MASTER, id%COMM, IERR)
3059C Reduce needed to dimension small working array
3060C on all procs during ZMUMPS_GATHER_SOLUTION
3061 KEEP(247) = 0
3062 CALL MPI_REDUCE( KEEP(246), KEEP(247), 1, MPI_INTEGER,
3063 & MPI_MAX, MASTER, id%COMM, IERR)
3064C
3065C Reduce compression times: get max compression times
3066 CALL MPI_REDUCE( id%DKEEP(97), id%DKEEP(98), 1,
3067 & MPI_DOUBLE_PRECISION,
3068 & MPI_MAX, MASTER, id%COMM, IERR)
3069C
3070 CALL MPI_REDUCE( RINFO(2), RINFOG(2), 2,
3071 & MPI_DOUBLE_PRECISION,
3072 & MPI_SUM, MASTER, id%COMM, IERR)
3073 CALL MUMPS_REDUCEI8( id%KEEP8(31)+id%KEEP8(64),id%KEEP8(6),
3074 & MPI_SUM, MASTER, id%COMM )
3075C
3076.EQ. IF (id%MYID0) THEN
3077C In MegaBytes
3078 RINFOG(16) = dble(id%KEEP8(6)*int(KEEP(35),8))/dble(1D6)
3079.LE. IF (KEEP(201)0) THEN
3080 RINFOG(16) = ZERO
3081 ENDIF
3082 ENDIF
3083 CALL MUMPS_REDUCEI8( id%KEEP8(48),id%KEEP8(148), MPI_SUM,
3084 & MASTER, id%COMM )
3085 CALL MUMPS_SETI8TOI4(id%KEEP8(148), INFOG(9))
3086C
3087 CALL MPI_REDUCE( int(id%INFO(10),8), id%KEEP8(128),
3088 & 1, MPI_INTEGER8,
3089 & MPI_SUM, MASTER, id%COMM, IERR)
3090.EQ. IF (id%MYIDMASTER) THEN
3091 CALL MUMPS_SETI8TOI4(id%KEEP8(128), id%INFOG(10))
3092 ENDIF
3093C Use MPI_MAX for this one to get largest front size
3094 CALL MPI_ALLREDUCE( id%INFO(11), INFOG(11), 1, MPI_INTEGER,
3095 & MPI_MAX, id%COMM, IERR)
3096C make maximum effective frontal size available on all procs
3097C for solve phase
3098C (Note that INFO(11) includes root size on root master)
3099 KEEP(133) = INFOG(11)
3100 CALL MPI_REDUCE( id%INFO(12), INFOG(12), 3, MPI_INTEGER,
3101 & MPI_SUM, MASTER, id%COMM, IERR)
3102 CALL MPI_REDUCE( KEEP(103), INFOG(25), 1, MPI_INTEGER,
3103 & MPI_SUM, MASTER, id%COMM, IERR)
3104 KEEP(229) = INFOG(25)
3105 CALL MPI_REDUCE( KEEP(105), INFOG(25), 1, MPI_INTEGER,
3106 & MPI_SUM, MASTER, id%COMM, IERR)
3107 KEEP(230) = INFOG(25)
3108C
3109 id%INFO(25) = KEEP(98)
3110 CALL MPI_ALLREDUCE( id%INFO(25), INFOG(25), 1, MPI_INTEGER,
3111 & MPI_SUM, id%COMM, IERR)
3112C Extra copies due to in-place stacking
3113 CALL MUMPS_REDUCEI8( id%KEEP8(8), id%KEEP8(108), MPI_SUM,
3114 & MASTER, id%COMM )
3115C Entries in factors
3116 CALL MUMPS_SETI8TOI4(id%KEEP8(10), id%INFO(27))
3117 CALL MUMPS_REDUCEI8( id%KEEP8(10),id%KEEP8(110), MPI_SUM,
3118 & MASTER, id%COMM )
3119 CALL MUMPS_SETI8TOI4(id%KEEP8(110), INFOG(29))
3120C Initialize INFO(28)/INFOG(35) in case BLR not activated
3121 id%INFO(28) = id%INFO(27)
3122 INFOG(35) = INFOG(29)
3123C ==============================
3124C LOW-RANK
3125C ==============================
3126.NE. IF ( KEEP(486) 0 ) THEN !LR is activated
3127C Compute and Save local amount of flops in case of BLR
3128 RINFO(4) = dble(FLOP_FRFRONTS + FLOP_FACTO_FR - FLOP_LRGAIN
3129 & + FLOP_COMPRESS + FLOP_FRFRONTS)
3130C
3131C Compute and Save local number of entries in compressed factors
3132C
3133 ITMP8 = id%KEEP8(10) - int(MRY_LU_LRGAIN,8)
3134 CALL MUMPS_SETI8TOI4( ITMP8, id%INFO(28))
3135C
3136 CALL MPI_REDUCE( MRY_LU_LRGAIN, TMP_MRY_LU_LRGAIN
3137 & , 1, MPI_DOUBLE_PRECISION,
3138 & MPI_SUM, MASTER, id%COMM, IERR)
3139 CALL MPI_REDUCE( MRY_LU_FR, TMP_MRY_LU_FR
3140 & , 1, MPI_DOUBLE_PRECISION,
3141 & MPI_SUM, MASTER, id%COMM, IERR)
3142 CALL MPI_REDUCE( MRY_CB_FR, TMP_MRY_CB_FR
3143 & , 1, MPI_DOUBLE_PRECISION,
3144 & MPI_SUM, MASTER, id%COMM, IERR)
3145 CALL MPI_REDUCE( MRY_CB_LRGAIN, TMP_MRY_CB_LRGAIN
3146 & , 1, MPI_DOUBLE_PRECISION,
3147 & MPI_SUM, MASTER, id%COMM, IERR)
3148 CALL MPI_REDUCE( FLOP_LRGAIN, TMP_FLOP_LRGAIN
3149 & , 1, MPI_DOUBLE_PRECISION,
3150 & MPI_SUM, MASTER, id%COMM, IERR)
3151 CALL MPI_REDUCE( FLOP_TRSM_FR, TMP_FLOP_TRSM_FR
3152 & , 1, MPI_DOUBLE_PRECISION,
3153 & MPI_SUM, MASTER, id%COMM, IERR)
3154 CALL MPI_REDUCE( FLOP_TRSM_LR, TMP_FLOP_TRSM_LR
3155 & , 1, MPI_DOUBLE_PRECISION,
3156 & MPI_SUM, MASTER, id%COMM, IERR)
3157 CALL MPI_REDUCE( FLOP_UPDATE_FR, TMP_FLOP_UPDATE_FR
3158 & , 1, MPI_DOUBLE_PRECISION,
3159 & MPI_SUM, MASTER, id%COMM, IERR)
3160 CALL MPI_REDUCE( FLOP_UPDATE_LR, TMP_FLOP_UPDATE_LR
3161 & , 1, MPI_DOUBLE_PRECISION,
3162 & MPI_SUM, MASTER, id%COMM, IERR)
3163 CALL MPI_REDUCE( FLOP_FRSWAP_COMPRESS,
3164 & TMP_FLOP_FRSWAP_COMPRESS
3165 & , 1, MPI_DOUBLE_PRECISION,
3166 & MPI_SUM, MASTER, id%COMM, IERR)
3167 CALL MPI_REDUCE( FLOP_MIDBLK_COMPRESS,
3168 & TMP_FLOP_MIDBLK_COMPRESS
3169 & , 1, MPI_DOUBLE_PRECISION,
3170 & MPI_SUM, MASTER, id%COMM, IERR)
3171 CALL MPI_REDUCE( FLOP_UPDATE_LRLR3, TMP_FLOP_UPDATE_LRLR3
3172 & , 1, MPI_DOUBLE_PRECISION,
3173 & MPI_SUM, MASTER, id%COMM, IERR)
3174 CALL MPI_REDUCE(FLOP_ACCUM_COMPRESS, TMP_FLOP_ACCUM_COMPRESS
3175 & , 1, MPI_DOUBLE_PRECISION,
3176 & MPI_SUM, MASTER, id%COMM, IERR)
3177 CALL MPI_REDUCE( FLOP_TRSM, TMP_FLOP_TRSM
3178 & , 1, MPI_DOUBLE_PRECISION,
3179 & MPI_SUM, MASTER, id%COMM, IERR)
3180 CALL MPI_REDUCE( FLOP_PANEL, TMP_FLOP_PANEL
3181 & , 1, MPI_DOUBLE_PRECISION,
3182 & MPI_SUM, MASTER, id%COMM, IERR)
3183 CALL MPI_REDUCE( FLOP_FRFRONTS, TMP_FLOP_FRFRONTS
3184 & , 1, MPI_DOUBLE_PRECISION,
3185 & MPI_SUM, MASTER, id%COMM, IERR)
3186 CALL MPI_REDUCE( FLOP_COMPRESS, TMP_FLOP_COMPRESS
3187 & , 1, MPI_DOUBLE_PRECISION,
3188 & MPI_SUM, MASTER, id%COMM, IERR)
3189 CALL MPI_REDUCE( FLOP_DECOMPRESS, TMP_FLOP_DECOMPRESS
3190 & , 1, MPI_DOUBLE_PRECISION,
3191 & MPI_SUM, MASTER, id%COMM, IERR)
3192 CALL MPI_REDUCE( FLOP_CB_COMPRESS, TMP_FLOP_CB_COMPRESS
3193 & , 1, MPI_DOUBLE_PRECISION,
3194 & MPI_SUM, MASTER, id%COMM, IERR)
3195 CALL MPI_REDUCE( FLOP_CB_DECOMPRESS,TMP_FLOP_CB_DECOMPRESS
3196 & , 1, MPI_DOUBLE_PRECISION,
3197 & MPI_SUM, MASTER, id%COMM, IERR)
3198 CALL MPI_REDUCE( FLOP_FACTO_FR, TMP_FLOP_FACTO_FR
3199 & , 1, MPI_DOUBLE_PRECISION,
3200 & MPI_SUM, MASTER, id%COMM, IERR)
3201 CALL MPI_REDUCE( CNT_NODES,TMP_CNT_NODES
3202 & , 1, MPI_INTEGER,
3203 & MPI_SUM, MASTER, id%COMM, IERR)
3204.GT. IF (id%NPROCS1) THEN
3205 FLOP_FACTO_LR = FLOP_FACTO_FR - FLOP_LRGAIN
3206 & + FLOP_COMPRESS + FLOP_FRFRONTS
3207 CALL MPI_REDUCE( FLOP_FACTO_LR, AVG_FLOP_FACTO_LR
3208 & , 1, MPI_DOUBLE_PRECISION,
3209 & MPI_SUM, MASTER, id%COMM, IERR)
3210.EQ. IF (id%MYIDMASTER) THEN
3211 AVG_FLOP_FACTO_LR = AVG_FLOP_FACTO_LR/id%NPROCS
3212 ENDIF
3213 CALL MPI_REDUCE( FLOP_FACTO_LR, MIN_FLOP_FACTO_LR
3214 & , 1, MPI_DOUBLE_PRECISION,
3215 & MPI_MIN, MASTER, id%COMM, IERR)
3216 CALL MPI_REDUCE( FLOP_FACTO_LR, MAX_FLOP_FACTO_LR
3217 & , 1, MPI_DOUBLE_PRECISION,
3218 & MPI_MAX, MASTER, id%COMM, IERR)
3219 ENDIF ! NPROCS > 1
3220 CALL MPI_REDUCE( TIME_UPDATE, TMP_TIME_UPDATE
3221 & , 1, MPI_DOUBLE_PRECISION,
3222 & MPI_SUM, MASTER, id%COMM, IERR)
3223 CALL MPI_REDUCE( TIME_UPDATE_LRLR1, TMP_TIME_UPDATE_LRLR1
3224 & , 1, MPI_DOUBLE_PRECISION,
3225 & MPI_SUM, MASTER, id%COMM, IERR)
3226 CALL MPI_REDUCE( TIME_UPDATE_LRLR2, TMP_TIME_UPDATE_LRLR2
3227 & , 1, MPI_DOUBLE_PRECISION,
3228 & MPI_SUM, MASTER, id%COMM, IERR)
3229 CALL MPI_REDUCE( TIME_UPDATE_LRLR3, TMP_TIME_UPDATE_LRLR3
3230 & , 1, MPI_DOUBLE_PRECISION,
3231 & MPI_SUM, MASTER, id%COMM, IERR)
3232 CALL MPI_REDUCE( TIME_UPDATE_FRLR, TMP_TIME_UPDATE_FRLR
3233 & , 1, MPI_DOUBLE_PRECISION,
3234 & MPI_SUM, MASTER, id%COMM, IERR)
3235 CALL MPI_REDUCE( TIME_UPDATE_FRFR, TMP_TIME_UPDATE_FRFR
3236 & , 1, MPI_DOUBLE_PRECISION,
3237 & MPI_SUM, MASTER, id%COMM, IERR)
3238 CALL MPI_REDUCE( TIME_DIAGCOPY, TMP_TIME_DIAGCOPY
3239 & , 1, MPI_DOUBLE_PRECISION,
3240 & MPI_SUM, MASTER, id%COMM, IERR)
3241 CALL MPI_REDUCE( TIME_COMPRESS,TMP_TIME_COMPRESS
3242 & , 1, MPI_DOUBLE_PRECISION,
3243 & MPI_SUM, MASTER, id%COMM, IERR)
3244 CALL MPI_REDUCE( TIME_MIDBLK_COMPRESS,
3245 & TMP_TIME_MIDBLK_COMPRESS
3246 & , 1, MPI_DOUBLE_PRECISION,
3247 & MPI_SUM, MASTER, id%COMM, IERR)
3248 CALL MPI_REDUCE( TIME_FRSWAP_COMPRESS,
3249 & TMP_TIME_FRSWAP_COMPRESS
3250 & , 1, MPI_DOUBLE_PRECISION,
3251 & MPI_SUM, MASTER, id%COMM, IERR)
3252 CALL MPI_REDUCE( TIME_CB_COMPRESS, TMP_TIME_CB_COMPRESS
3253 & , 1, MPI_DOUBLE_PRECISION,
3254 & MPI_SUM, MASTER, id%COMM, IERR)
3255 CALL MPI_REDUCE( TIME_DECOMP, TMP_TIME_DECOMP
3256 & , 1, MPI_DOUBLE_PRECISION,
3257 & MPI_SUM, MASTER, id%COMM, IERR)
3258 CALL MPI_REDUCE( TIME_DECOMP_UCFS, TMP_TIME_DECOMP_UCFS
3259 & , 1, MPI_DOUBLE_PRECISION,
3260 & MPI_SUM, MASTER, id%COMM, IERR)
3261 CALL MPI_REDUCE( TIME_DECOMP_ASM1, TMP_TIME_DECOMP_ASM1
3262 & , 1, MPI_DOUBLE_PRECISION,
3263 & MPI_SUM, MASTER, id%COMM, IERR)
3264 CALL MPI_REDUCE(TIME_DECOMP_LOCASM2, TMP_TIME_DECOMP_LOCASM2
3265 & , 1, MPI_DOUBLE_PRECISION,
3266 & MPI_SUM, MASTER, id%COMM, IERR)
3267 CALL MPI_REDUCE(TIME_DECOMP_MAPLIG1, TMP_TIME_DECOMP_MAPLIG1
3268 & , 1, MPI_DOUBLE_PRECISION,
3269 & MPI_SUM, MASTER, id%COMM, IERR)
3270 CALL MPI_REDUCE( TIME_DECOMP_ASMS2S, TMP_TIME_DECOMP_ASMS2S
3271 & , 1, MPI_DOUBLE_PRECISION,
3272 & MPI_SUM, MASTER, id%COMM, IERR)
3273 CALL MPI_REDUCE( TIME_DECOMP_ASMS2M, TMP_TIME_DECOMP_ASMS2M
3274 & , 1, MPI_DOUBLE_PRECISION,
3275 & MPI_SUM, MASTER, id%COMM, IERR)
3276 CALL MPI_REDUCE( TIME_PANEL, TMP_TIME_PANEL
3277 & , 1, MPI_DOUBLE_PRECISION,
3278 & MPI_SUM, MASTER, id%COMM, IERR)
3279 CALL MPI_REDUCE( TIME_FAC_I, TMP_TIME_FAC_I
3280 & , 1, MPI_DOUBLE_PRECISION,
3281 & MPI_SUM, MASTER, id%COMM, IERR)
3282 CALL MPI_REDUCE( TIME_FAC_MQ, TMP_TIME_FAC_MQ
3283 & , 1, MPI_DOUBLE_PRECISION,
3284 & MPI_SUM, MASTER, id%COMM, IERR)
3285 CALL MPI_REDUCE( TIME_FAC_SQ, TMP_TIME_FAC_SQ
3286 & , 1, MPI_DOUBLE_PRECISION,
3287 & MPI_SUM, MASTER, id%COMM, IERR)
3288 CALL MPI_REDUCE( TIME_LRTRSM, TMP_TIME_LRTRSM
3289 & , 1, MPI_DOUBLE_PRECISION,
3290 & MPI_SUM, MASTER, id%COMM, IERR)
3291 CALL MPI_REDUCE( TIME_FRTRSM, TMP_TIME_FRTRSM
3292 & , 1, MPI_DOUBLE_PRECISION,
3293 & MPI_SUM, MASTER, id%COMM, IERR)
3294 CALL MPI_REDUCE( TIME_FRFRONTS, TMP_TIME_FRFRONTS
3295 & , 1, MPI_DOUBLE_PRECISION,
3296 & MPI_SUM, MASTER, id%COMM, IERR)
3297 CALL MPI_REDUCE( TIME_LR_MODULE, TMP_TIME_LR_MODULE
3298 & , 1, MPI_DOUBLE_PRECISION,
3299 & MPI_SUM, MASTER, id%COMM, IERR)
3300.EQ. IF (id%MYIDMASTER) THEN
3301.GT. IF (id%NPROCS1) THEN
3302C rename the stat variable so that COMPUTE_GLOBAL_GAINS can work for any
3303C number of procs
3304 MRY_LU_FR = TMP_MRY_LU_FR
3305 MRY_LU_LRGAIN = TMP_MRY_LU_LRGAIN
3306 MRY_CB_FR = TMP_MRY_CB_FR
3307 MRY_CB_LRGAIN = TMP_MRY_CB_LRGAIN
3308 FLOP_LRGAIN = TMP_FLOP_LRGAIN
3309 FLOP_PANEL = TMP_FLOP_PANEL
3310 FLOP_TRSM = TMP_FLOP_TRSM
3311 FLOP_TRSM_FR = TMP_FLOP_TRSM_FR
3312 FLOP_TRSM_LR = TMP_FLOP_TRSM_LR
3313 FLOP_UPDATE_FR = TMP_FLOP_UPDATE_FR
3314 FLOP_UPDATE_LR = TMP_FLOP_UPDATE_LR
3315 FLOP_UPDATE_LRLR3 = TMP_FLOP_UPDATE_LRLR3
3316 FLOP_COMPRESS = TMP_FLOP_COMPRESS
3317 FLOP_MIDBLK_COMPRESS = TMP_FLOP_MIDBLK_COMPRESS
3318 FLOP_FRSWAP_COMPRESS = TMP_FLOP_FRSWAP_COMPRESS
3319 FLOP_ACCUM_COMPRESS = TMP_FLOP_ACCUM_COMPRESS
3320 FLOP_CB_COMPRESS = TMP_FLOP_CB_COMPRESS
3321 FLOP_DECOMPRESS = TMP_FLOP_DECOMPRESS
3322 FLOP_CB_DECOMPRESS = TMP_FLOP_CB_DECOMPRESS
3323 FLOP_FRFRONTS = TMP_FLOP_FRFRONTS
3324 FLOP_FACTO_FR = TMP_FLOP_FACTO_FR
3325 CNT_NODES = TMP_CNT_NODES
3326 TIME_UPDATE = TMP_TIME_UPDATE /id%NPROCS
3327 TIME_UPDATE_LRLR1 = TMP_TIME_UPDATE_LRLR1 /id%NPROCS
3328 TIME_UPDATE_LRLR2 = TMP_TIME_UPDATE_LRLR2 /id%NPROCS
3329 TIME_UPDATE_LRLR3 = TMP_TIME_UPDATE_LRLR3 /id%NPROCS
3330 TIME_UPDATE_FRLR = TMP_TIME_UPDATE_FRLR /id%NPROCS
3331 TIME_UPDATE_FRFR = TMP_TIME_UPDATE_FRFR /id%NPROCS
3332 TIME_COMPRESS = TMP_TIME_COMPRESS /id%NPROCS
3333 TIME_MIDBLK_COMPRESS = TMP_TIME_MIDBLK_COMPRESS/id%NPROCS
3334 TIME_FRSWAP_COMPRESS = TMP_TIME_FRSWAP_COMPRESS/id%NPROCS
3335 TIME_DIAGCOPY = TMP_TIME_DIAGCOPY /id%NPROCS
3336 TIME_CB_COMPRESS = TMP_TIME_CB_COMPRESS /id%NPROCS
3337 TIME_PANEL = TMP_TIME_PANEL /id%NPROCS
3338 TIME_FAC_I = TMP_TIME_FAC_I /id%NPROCS
3339 TIME_FAC_MQ = TMP_TIME_FAC_MQ /id%NPROCS
3340 TIME_FAC_SQ = TMP_TIME_FAC_SQ /id%NPROCS
3341 TIME_LRTRSM = TMP_TIME_LRTRSM /id%NPROCS
3342 TIME_FRTRSM = TMP_TIME_FRTRSM /id%NPROCS
3343 TIME_FRFRONTS = TMP_TIME_FRFRONTS /id%NPROCS
3344 TIME_LR_MODULE = TMP_TIME_LR_MODULE /id%NPROCS
3345 TIME_DECOMP = TMP_TIME_DECOMP /id%NPROCS
3346 TIME_DECOMP_UCFS = TMP_TIME_DECOMP_UCFS /id%NPROCS
3347 TIME_DECOMP_ASM1 = TMP_TIME_DECOMP_ASM1 /id%NPROCS
3348 TIME_DECOMP_LOCASM2 = TMP_TIME_DECOMP_LOCASM2 /id%NPROCS
3349 TIME_DECOMP_MAPLIG1 = TMP_TIME_DECOMP_MAPLIG1 /id%NPROCS
3350 TIME_DECOMP_ASMS2S = TMP_TIME_DECOMP_ASMS2S /id%NPROCS
3351 TIME_DECOMP_ASMS2M = TMP_TIME_DECOMP_ASMS2M /id%NPROCS
3352 ENDIF
3353 CALL COMPUTE_GLOBAL_GAINS(id%KEEP8(110),id%RINFOG(3),
3354 & id%KEEP8(49), PROKG, MPG)
3355C Number of entries in factor INFOG(35) in
3356C compressed form is updated as long as
3357C BLR is activated, this independently of the
3358C fact that factors are saved in LR.
3359 CALL MUMPS_SETI8TOI4(id%KEEP8(49), id%INFOG(35))
3360 FRONTWISE = 0
3361C WRITE gains also compute stats stored in DKEEP array
3362 IF (LPOK) THEN
3363 IF (CNTL(7) < 0.0D0) THEN
3364C Warning : using negative values is an experimental and
3365C non recommended setting.
3366 WRITE(LP,'(/A/,A/,A/,A,A)')
3367 & ' WARNING in BLR input setting',
3368 & ' CNTL(7) < 0 is experimental: ',
3369 & ' RRQR precision = |CNTL(7| x ||A_pre||, ',
3370 & ' where A_pre is the preprocessed matrix as defined',
3371 & ' in the Users guide '
3372 ENDIF
3373 ENDIF
3374 CALL SAVEandWRITE_GAINS(FRONTWISE,
3375 & KEEP(489), id%DKEEP, N, id%ICNTL(36),
3376 & KEEP(487), KEEP(488), KEEP(490),
3377 & KEEP(491), KEEP(50), KEEP(486),
3378 & KEEP(472), KEEP(475), KEEP(478), KEEP(480),
3379 & KEEP(481),
3380 & KEEP(483), KEEP(484),
3381 & id%KEEP8(110), id%KEEP8(49),
3382 & KEEP(28), id%NPROCS, MPG, PROKG)
3383C flops when BLR activated
3384 RINFOG(14) = id%DKEEP(56)
3385 ELSE
3386 RINFOG(14) = 0.0D00
3387 ENDIF
3388 ENDIF
3389C ==============================
3390C NULL PIVOTS AND RANK-REVEALING
3391C ==============================
3392.EQ. IF(KEEP(110) 1) THEN
3393C -- make available to users the local number of null pivots detected
3394C -- with ICNTL(24) = 1.
3395 id%INFO(18) = KEEP(109)
3396 CALL MPI_ALLREDUCE( KEEP(109), KEEP(112), 1, MPI_INTEGER,
3397 & MPI_SUM, id%COMM, IERR)
3398 ELSE
3399 id%INFO(18) = 0
3400 KEEP(109) = 0
3401 KEEP(112) = 0
3402 ENDIF
3403.EQ. IF (id%MYIDMASTER) THEN
3404C INFOG(28) deficiency resulting from ICNTL(24) and ICNTL(56).
3405 INFOG(28)=KEEP(112)+KEEP(17)
3406 ENDIF
3407C ========================================
3408C We now provide to the host the part of
3409C PIVNUL_LIST resulting from the processing
3410C of the root node and we update id%INFO(18)
3411C on the processor holding the root to
3412C include null pivots relative to the root
3413C ========================================
3414.NE. IF (KEEP(17) 0) THEN
3415.EQ. IF (id%MYID ID_ROOT) THEN
3416C Include in id%INFO(18) null pivots resulting
3417C from deficiency on the root. In this way,
3418C the sum of all id%INFO(18) is equal to INFOG(28).
3419 id%INFO(18)=id%INFO(18)+KEEP(17)
3420 ENDIF
3421.EQ. IF (ID_ROOT MASTER) THEN
3422.EQ. IF (id%MYIDMASTER) THEN
3423C --------------------------------------------------
3424C Null pivots of root have been stored in
3425C PIVNUL_LIST(KEEP(109)+1:KEEP(109)+KEEP(17).
3426C Shift them at the end of the list because:
3427C * this is what we need to build the null space
3428C * we would otherwise overwrite them on the host
3429C when gathering null pivots from other processors
3430C --------------------------------------------------
3431 DO I= KEEP(17), 1, -1
3432c DO I=1, KEEP(17) % incorrect
3433C when KEEP(112) < KEEP(109)+ KEEP(17)
3434 id%PIVNUL_LIST(KEEP(112)+I)=id%PIVNUL_LIST(KEEP(109)+I)
3435 ENDDO
3436 ENDIF
3437 ELSE
3438C ---------------------------------
3439C Null pivots of root must be sent
3440C from the processor responsible of
3441C the root to the host (or MASTER).
3442C ---------------------------------
3443.EQ. IF (id%MYID ID_ROOT) THEN
3444 CALL MPI_SEND(id%PIVNUL_LIST(KEEP(109)+1), KEEP(17),
3445 & MPI_INTEGER, MASTER, ZERO_PIV,
3446 & id%COMM, IERR)
3447.EQ. ELSE IF (id%MYID MASTER) THEN
3448 CALL MPI_RECV(id%PIVNUL_LIST(KEEP(112)+1), KEEP(17),
3449 & MPI_INTEGER, ID_ROOT, ZERO_PIV,
3450 & id%COMM, STATUS, IERR )
3451 ENDIF
3452 ENDIF
3453 ENDIF
3454C ===========================
3455C gather zero pivots indices
3456C on the host node
3457C ===========================
3458C In case of non working host, the following code also
3459C works considering that KEEP(109) is equal to 0 on
3460C the non-working host
3461.EQ. IF(KEEP(110) 1) THEN
3462 ALLOCATE(ITMP2(id%NPROCS),stat = IERR ) ! deallocated in 490
3463.GT. IF ( IERR 0 ) THEN
3464 id%INFO(1)=-13
3465 id%INFO(2)=id%NPROCS
3466 END IF
3467 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1),
3468 & id%COMM, id%MYID )
3469.LT. IF (id%INFO(1)0) GOTO 490
3470 CALL MPI_GATHER ( KEEP(109),1, MPI_INTEGER,
3471 & ITMP2(1), 1, MPI_INTEGER,
3472 & MASTER, id%COMM, IERR)
3473.EQ. IF(id%MYID MASTER) THEN
3474 POSBUF = ITMP2(1)+1
3475C First null pivot of master is in
3476C position 1 of global list
3477 KEEP(220)=1
3478 DO I = 1,id%NPROCS-1
3479 CALL MPI_RECV(id%PIVNUL_LIST(POSBUF), ITMP2(I+1),
3480 & MPI_INTEGER,I,
3481 & ZERO_PIV, id%COMM, STATUS, IERR)
3482C Send position POSBUF of first null pivot of proc I
3483C in global list. Will allow to quickly identify during
3484C the solve step if one is concerned by a global position
3485C K, 0 <= K <= INFOG(28).
3486 CALL MPI_SEND(POSBUF, 1, MPI_INTEGER, I, ZERO_PIV,
3487 & id%COMM, IERR)
3488 POSBUF = POSBUF + ITMP2(I+1)
3489 ENDDO
3490 ELSE
3491 CALL MPI_SEND( id%PIVNUL_LIST(1), KEEP(109), MPI_INTEGER,
3492 & MASTER,ZERO_PIV, id%COMM, IERR)
3493 CALL MPI_RECV( KEEP(220), 1, MPI_INTEGER, MASTER, ZERO_PIV,
3494 & id%COMM, STATUS, IERR )
3495 ENDIF
3496 ENDIF
3497C =====================================
3498C Statistics relative to min/max pivots
3499C =====================================
3500 CALL MPI_REDUCE( id%DKEEP(19), RINFOG(19), 1,
3501 & MPI_DOUBLE_PRECISION,
3502 & MPI_MIN, MASTER, id%COMM, IERR )
3503 CALL MPI_REDUCE( id%DKEEP(20), RINFOG(20), 1,
3504 & MPI_DOUBLE_PRECISION,
3505 & MPI_MIN, MASTER, id%COMM, IERR )
3506 CALL MPI_REDUCE( id%DKEEP(21), RINFOG(21), 1,
3507 & MPI_DOUBLE_PRECISION,
3508 & MPI_MAX, MASTER, id%COMM, IERR )
3509C =========================================
3510C Centralized number of swaps for pivoting
3511C =========================================
3512 CALL MPI_REDUCE( id%KEEP8(80), ITEMP8, 1, MPI_INTEGER8,
3513 & MPI_SUM, MASTER, id%COMM, IERR )
3514.EQ. IF (id%MYID MASTER) THEN
3515 CALL MUMPS_SETI8TOI4(ITEMP8,id%INFOG(48))
3516 ENDIF
3517C ==========================================
3518C Centralized largest increase of panel size
3519C ==========================================
3520 CALL MPI_REDUCE( id%KEEP(425), id%INFOG(49), 1, MPI_INTEGER,
3521 & MPI_MAX, MASTER, id%COMM, IERR )
3522C =====================================
3523C Statistics concerning the determinant
3524C =====================================
3525C
3526C 1/ on the host better take into account null pivots if scaling:
3527C
3528C Since null pivots are excluded from the computation
3529C of the determinant, we also exclude the corresponding
3530C scaling entries. Since those entries have already been
3531C taken into account before the factorization, we multiply
3532C the determinant on the host by the scaling values corresponding
3533C to pivots in PIVNUL_LIST.
3534.EQ..AND..NE. IF (id%MYIDMASTER LSCAL. AND. KEEP(258)0) THEN
3535 K = min(KEEP(143), KEEP(17))
3536 K = max(K, 0)
3537 DO I = 1, KEEP(112)+ K
3538c DO I = 1, id%INFOG(28) ! all null pivots + singular values
3539 CALL ZMUMPS_UPDATEDETER_SCALING(
3540 & id%ROWSCA(id%PIVNUL_LIST(I)),
3541 & id%DKEEP(6), KEEP(259))
3542 CALL ZMUMPS_UPDATEDETER_SCALING(
3543 & id%COLSCA(id%PIVNUL_LIST(I)),
3544 & id%DKEEP(6), KEEP(259))
3545 ENDDO
3546 ENDIF
3547C
3548C 2/ Swap signs depending on pivoting on each proc
3549C
3550.NE. IF (KEEP(258)0) THEN
3551C Return the determinant in INFOG(34) and RINFOG(12/13)
3552.EQ. IF (KEEP(260)-1) THEN ! Local to each processor
3553 id%DKEEP(6)=-id%DKEEP(6)
3554 id%DKEEP(7)=-id%DKEEP(7)
3555 ENDIF
3556C
3557C 3/ Perform a reduction
3558C
3559 CALL ZMUMPS_DETER_REDUCTION(
3560 & id%COMM, id%DKEEP(6), KEEP(259),
3561 & RINFOG(12), INFOG(34), id%NPROCS)
3562C
3563C 4/ Swap sign if needed
3564C
3565.EQ..AND..EQ. IF (id%KEEP(50)0 id%MYID MASTER) THEN
3566C Modify sign of determinant according
3567C to unsymmetric permutation (max-trans
3568C of max-weighted matching)
3569.NE. IF (id%KEEP(23)0) THEN
3570 CALL ZMUMPS_DETER_SIGN_PERM(
3571 & RINFOG(12), id%N,
3572C id%STEP: used as workspace of size N still
3573C allocated on master; restored on exit
3574 & id%STEP(1),
3575 & id%UNS_PERM(1) )
3576C Remark that RINFOG(12/13) are modified only
3577C on the host but will be broadcast on exit
3578C from MUMPS (see ZMUMPS_DRIVER)
3579 ENDIF
3580 ENDIF
3581 ENDIF
3582 490 IF (allocated(ITMP2)) DEALLOCATE(ITMP2)
3583 IF ( PROKG ) THEN
3584C -----------------------------
3585C PRINT STATISTICS (on master)
3586C -----------------------------
3587 WRITE(MPG,99984) RINFOG(2),RINFOG(3),KEEP(52),
3588 & id%KEEP8(148),
3589 & id%KEEP8(128), INFOG(11), id%KEEP8(110)
3590 IF (id%KEEP(50) == 0) THEN
3591 ! off diag pivots
3592 WRITE(MPG, 99985) INFOG(12)
3593 END IF
3594.NE. IF (id%KEEP(50) 1) THEN
3595 ! delayed pivots
3596 WRITE(MPG, 99982) INFOG(13)
3597 END IF
3598.NE. IF (KEEP(97) 0) THEN
3599 ! tiny pivots
3600 WRITE(MPG, '(A,D16.4)')
3601 & ' Effective static pivoting thresh., CNTL(4) =', SEUIL
3602 WRITE(MPG, 99986) INFOG(25)
3603 ENDIF
3604 IF (id%KEEP(50) == 2) THEN
3605 !number of 2x2 pivots in type 1 nodes
3606 WRITE(MPG, 99988) KEEP(229)
3607 !number of 2x2 pivots in type 2 nodes
3608 WRITE(MPG, 99989) KEEP(230)
3609 ENDIF
3610 !number of zero pivots
3611.NE. IF (KEEP(110) 0) THEN
3612 WRITE(MPG, 99991) KEEP(112)
3613 ENDIF
3614 !Deficiency on root
3615.ne. IF ( KEEP(19) 0 )
3616c IF ( KEEP(17) .ne. 0 )
3617 & WRITE(MPG, 99983) KEEP(17)
3618 !Total deficiency
3619.NE..OR..NE. IF (KEEP(110)0KEEP(19)0)
3620c IF (KEEP(110).NE.0.OR.KEEP(17).NE.0)
3621 & WRITE(MPG, 99992) KEEP(17)+KEEP(112)
3622 ! Memory compress
3623 WRITE(MPG, 99981) INFOG(14)
3624 ! Extra copies due to ip stack in unsym case
3625 ! in core case (or OLD_OOC_PANEL)
3626.GT. IF (id%KEEP8(108) 0_8) THEN
3627 WRITE(MPG, 99980) id%KEEP8(108)
3628 ENDIF
3629.NE..AND..GT. IF ((KEEP(60)0) INFOG(25)0) THEN
3630 ! Schur on and tiny pivots set in last level
3631 ! before the Schur if KEEP(114)=0
3632 WRITE(MPG, '(A)')
3633 & " ** warning static pivoting was necessary"
3634 WRITE(MPG, '(A)')
3635 & " ** to factor interior variables with schur on"
3636 ENDIF
3637.NE. IF (KEEP(258)0) THEN
3638 WRITE(MPG,99978) RINFOG(12)
3639 WRITE(MPG,99979) RINFOG(13)
3640 WRITE(MPG,99977) INFOG(34)
3641 ENDIF
3642 END IF
3643* ==========================================
3644*
3645* End of Factorization Phase
3646*
3647* ==========================================
3648C
3649C Goto 500 is done when
3650C LOAD_INIT
3651C OOC_INIT_FACTO
3652C MUMPS_FDM_INIT
3653#if ! defined(NO_FDM_DESCBAND)
3654C MUMPS_FDBD_INIT
3655#endif
3656#if ! defined(NO_FDM_MAPROW)
3657C MUMPS_FMRD_INIT
3658#endif
3659C are all called.
3660C
3661 500 CONTINUE
3662C Redo free DBLARR (as in end_driver.F)
3663C in case an error occurred after allocating
3664C DBLARR and before freeing it above.
3665.EQ..AND. IF (id%KEEP(46)1
3666.NE..AND. & id%KEEP(55)0
3667.EQ..AND. & id%MYIDMASTER
3668.EQ. & id%KEEP(52) 0) THEN
3669 NULLIFY(id%DBLARR)
3670 ELSE
3671 IF (associated(id%DBLARR)) THEN
3672 DEALLOCATE(id%DBLARR)
3673 NULLIFY(id%DBLARR)
3674 ENDIF
3675 ENDIF
3676#if ! defined(NO_FDM_DESCBAND)
3677 IF (I_AM_SLAVE) THEN
3678 CALL MUMPS_FDBD_END(id%INFO(1)) ! INFO(1): input only
3679 ENDIF
3680#endif
3681#if ! defined(NO_FDM_MAPROW)
3682 IF (I_AM_SLAVE) THEN
3683 CALL MUMPS_FMRD_END(id%INFO(1)) ! INFO(1): input only
3684 ENDIF
3685#endif
3686 IF (I_AM_SLAVE) THEN
3687C Terminate BLR module except if it is still needed for solve
3688 IF (
3689 & (
3690.EQ. & (KEEP(486)2)
3691 & )
3692.AND..GE. & id%INFO(1)0
3693 & ) THEN
3694C Store pointer to BLR_ARRAY in MUMPS structure
3695C (requires successful factorization otherwise module is freed)
3696 CALL ZMUMPS_BLR_MOD_TO_STRUC(id%BLRARRAY_ENCODING)
3697 ELSE
3698C INFO(1) positive or negative
3699 CALL ZMUMPS_BLR_END_MODULE(id%INFO(1), id%KEEP8, id%KEEP(34))
3700 ENDIF
3701 ENDIF
3702 IF (I_AM_SLAVE) THEN
3703 CALL MUMPS_FDM_END('A')
3704C Terminate BLR module except if it is still needed for solve
3705 IF (
3706 & (
3707.EQ. & (KEEP(486)2)
3708 & )
3709.AND..GE. & id%INFO(1)0
3710 & ) THEN
3711 CALL MUMPS_FDM_MOD_TO_STRUC('F', id%FDM_F_ENCODING,
3712 & id%INFO(1))
3713.NOT. IF ( associated(id%FDM_F_ENCODING)) THEN
3714 WRITE(*,*) "internal error 2 in zmumps_fac_driver"
3715 ENDIF
3716 ELSE
3717 CALL MUMPS_FDM_END('F')
3718 ENDIF
3719 ENDIF
3720C
3721C Goto 514 is done when an
3722C error occurred in MUMPS_FDM_INIT
3723C or (after FDM_INIT but before
3724C OOC_INIT)
3725C
3726 514 CONTINUE
3727 IF ( I_AM_SLAVE ) THEN
3728.EQ..OR..EQ. IF ((KEEP(201)1)(KEEP(201)2)) THEN
3729 CALL ZMUMPS_OOC_END_FACTO(id,IERR)
3730 IF (id%ASSOCIATED_OOC_FILES) THEN
3731 id%ASSOCIATED_OOC_FILES = .FALSE.
3732 ENDIF
3733.LT..AND..GE. IF (IERR0 id%INFO(1) 0) id%INFO(1) = IERR
3734 ENDIF
3735 IF (WK_USER_PROVIDED) THEN
3736C at the end of a phase S is always freed when WK_USER provided
3737 NULLIFY(id%S)
3738.NE. ELSE IF (KEEP(201)0) THEN
3739C ----------------------------------------
3740C In OOC or if KEEP(201).EQ.-1 we always
3741C free S at end of factorization. As id%S
3742C may be unassociated in case of error
3743C during or before the allocation of id%S,
3744C we only free S when it was associated.
3745C ----------------------------------------
3746 IF (associated(id%S)) DEALLOCATE(id%S)
3747 NULLIFY(id%S) ! in all cases
3748 id%KEEP8(23)=0_8
3749 ENDIF
3750 ELSE ! host not working
3751 IF (WK_USER_PROVIDED) THEN
3752C at the end of a phase S is always freed when WK_USER provided
3753 NULLIFY(id%S)
3754 ELSE
3755 IF (associated(id%S)) DEALLOCATE(id%S)
3756 NULLIFY(id%S) ! in all cases
3757 id%KEEP8(23)=0_8
3758 END IF
3759 END IF
3760C
3761C Goto 513 is done in case of error where LOAD_INIT was
3762C called but not OOC_INIT_FACTO.
3763 513 CONTINUE
3764 IF ( I_AM_SLAVE ) THEN
3765 CALL ZMUMPS_LOAD_END( id%INFO(1), id%NSLAVES, IERR )
3766.LT..AND..GE. IF (IERR0 id%INFO(1) 0) id%INFO(1) = IERR
3767 ENDIF
3768 CALL MUMPS_PROPINFO( ICNTL(1), id%INFO(1),
3769 & id%COMM, id%MYID )
3770C
3771C Goto 517 is done when an error occurs when GPU initialization
3772C has been performed but not LOAD_INIT or OOC_INIT_FACTO
3773C
3774 517 CONTINUE
3775C
3776C Goto 530 is done when an error occurs before
3777C the calls to GPU_INIT, LOAD_INIT and OOC_INIT_FACTO
3778 530 CONTINUE
3779C Fwd in facto: free RHS_MUMPS in case
3780C it was allocated.
3781 IF (RHS_MUMPS_ALLOCATED) DEALLOCATE(RHS_MUMPS)
3782 NULLIFY(RHS_MUMPS)
3783C
3784 id%KEEP8(26) = KEEP826_SAVE
3785 RETURN
3786 120 FORMAT(/' Local redistrib: data local/sent =',I16,I16)
3787 125 FORMAT(/' Redistrib: total data local/sent =',I16,I16)
3788 130 FORMAT(//'****** FACTORIZATION STEP ********'/)
3789 160 FORMAT(
3790 & /' Elapsed time to reformat/distribute matrix =',F12.4)
3791 166 FORMAT(' Max difference from 1 after scaling the entries',
3792 & ' for ONE-NORM (option 7/8) =',D9.2)
3793 170 FORMAT(' STATISTICS PRIOR NUMERICAL FACTORIZATION ...'/
3794 & ' Size of internal working array S =',I16/
3795 & ' Size of internal working array IS =',I16/
3796 & ' Minimum (ICNTL(14)=0) size of S =',I16/
3797 & ' Minimum (ICNTL(14)=0) size of IS =',I16/
3798 & ' Real space for original matrix =',I16/
3799 & ' Integer space for original matrix =',I16/
3800 & ' INFO(3) Real space for factors (estimated) =',I16/
3801 & ' INFO(4) Integer space for factors (estim.) =',I16/
3802 & ' Maximum frontal size (estimated) =',I16)
3803 172 FORMAT(' GLOBAL STATISTICS PRIOR NUMERICAL FACTORIZATION ...'/
3804 & ' Number of working processes =',I16/
3805 & ' ICNTL(22) Out-of-core option =',I16/
3806 & ' ICNTL(35) BLR activation (eff. choice) =',I16/
3807 & ' ICNTL(14) Memory relaxation =',I16/
3808 & ' INFOG(3) Real space for factors (estimated)=',I16/
3809 & ' INFOG(4) Integer space for factors (estim.)=',I16/
3810 & ' Maximum frontal size (estimated) =',I16/
3811 & ' Number of nodes in the tree =',I16/
3812 & ' ICNTL(23) Memory allowed (value on host) =',I16/
3813 & ' Sum over all procs =',I16/
3814 & ' Memory provided by user, sum of LWK_USER =',I16/
3815 & ' Effective threshold for pivoting, CNTL(1) =',D16.4)
3816 173 FORMAT( ' Perform forward during facto, NRHS =',I16)
3817 174 FORMAT( ' KEEP(268) Relaxed pivoting effective value =',I16)
3818 180 FORMAT(/' Elapsed time for factorization =',F12.4)
3819 185 FORMAT(/' Elapsed time for (failed) factorization =',F12.4)
3820 187 FORMAT( ' Elapsed time under L0 =',F12.4)
3821 188 FORMAT( ' Elapsed time under L0 (avg/max across MPI) =',
3822 & F12.4,F12.4)
3823 189 FORMAT(/' Flops under L0 layer =',1PD12.3)
3824 190 FORMAT(/' Flops under L0 Layer (avg/max across MPI) =',
3825 & 1PD12.3,1PD12.3)
382699977 FORMAT( ' INFOG(34) Determinant (base 2 exponent) =',I16)
382799978 FORMAT( ' RINFOG(12) Determinant (real part) =',F16.8)
382899979 FORMAT( ' RINFOG(12) Determinant (imaginary part) =',F16.8)
382999980 FORMAT( ' Extra copies due to In-Place stacking =',I16)
383099981 FORMAT( ' INFOG(14) Number of memory compress =',I16)
383199982 FORMAT( ' INFOG(13) Number of delayed pivots =',I16)
383299983 FORMAT( ' Nb of singularities detected by ICNTL(56) =',I16)
383399991 FORMAT( ' Nb of null pivots detected by ICNTL(24) =',I16)
383499992 FORMAT( ' INFOG(28) Estimated deficiency =',I16)
383599984 FORMAT(/'Leaving factorization with ...'/
3836 & ' RINFOG(2) Operations in node assembly =',1PD10.3/
3837 & ' ------(3) Operations in node elimination =',1PD10.3/
3838 & ' ICNTL (8) Scaling effectively used =',I16/
3839 & ' INFOG (9) Real space for factors =',I16/
3840 & ' INFOG(10) Integer space for factors =',I16/
3841 & ' INFOG(11) Maximum front size =',I16/
3842 & ' INFOG(29) Number of entries in factors =',I16)
384399985 FORMAT( ' INFOG(12) Number of off diagonal pivots =',I16)
384499986 FORMAT( ' INFOG(25) Number of tiny pivots(static) =',I16)
384599988 FORMAT( ' Number of 2x2 pivots in type 1 nodes =',I16)
384699989 FORMAT( ' Number of 2x2 pivots in type 2 nodes =',I16)
3847 END SUBROUTINE ZMUMPS_FAC_DRIVER
3848C
3849 SUBROUTINE ZMUMPS_PRINT_ALLOCATED_MEM( PROK, PROKG, PRINT_MAXAVG,
3850 & MP, MPG, INFO16, INFOG18, INFOG19, NSLAVES, IRANK, KEEP )
3851 IMPLICIT NONE
3852C
3853C Purpose:
3854C =======
3855C Print memory allocated during factorization
3856C - called at beginning of factorization in full-rank
3857C - called at end of factorization in low-rank (because
3858C of dynamic allocations)
3859C
3860 LOGICAL, INTENT(IN) :: PROK, PROKG, PRINT_MAXAVG
3861 INTEGER, INTENT(IN) :: MP, MPG, INFO16, INFOG18, INFOG19
3862 INTEGER, INTENT(IN) :: IRANK, NSLAVES
3863 INTEGER, INTENT(IN) :: KEEP(500)
3864C
3865 IF ( PROKG ) THEN
3866 IF (PRINT_MAXAVG) THEN
3867 WRITE( MPG,'(A,I12) ')
3868 & ' ** Memory allocated, max in Mbytes (INFOG(18)):',
3869 & INFOG18
3870 ENDIF
3871 WRITE( MPG,'(/A,I12) ')
3872 & ' ** Memory allocated, total in Mbytes (INFOG(19)):',
3873 & INFOG19
3874 END IF
3875 RETURN
3876 END SUBROUTINE ZMUMPS_PRINT_ALLOCATED_MEM
3877 SUBROUTINE ZMUMPS_AVGMAX_STAT8(PROKG, MPG, VAL, NSLAVES,
3878 & PRINT_MAXAVG, COMM, MSG)
3879 IMPLICIT NONE
3880 INCLUDE 'mpif.h'
3881 LOGICAL, intent(in) :: PROKG
3882 INTEGER, intent(in) :: MPG
3883 INTEGER(8), intent(in) :: VAL
3884 INTEGER, intent(in) :: NSLAVES
3885 LOGICAL, intent(in) :: PRINT_MAXAVG
3886 INTEGER, intent(in) :: COMM
3887 CHARACTER*48 MSG
3888C Local
3889 INTEGER(8) MAX_VAL
3890 INTEGER IERR, MASTER
3891 DOUBLE PRECISION LOC_VAL, AVG_VAL
3892 PARAMETER(MASTER=0)
3893C
3894 CALL MUMPS_REDUCEI8( VAL, MAX_VAL, MPI_MAX, MASTER, COMM)
3895 LOC_VAL = dble(VAL)/dble(NSLAVES)
3896 CALL MPI_REDUCE( LOC_VAL, AVG_VAL, 1, MPI_DOUBLE_PRECISION,
3897 & MPI_SUM, MASTER, COMM, IERR )
3898 IF (PROKG) THEN
3899 IF (PRINT_MAXAVG) THEN
3900 WRITE(MPG,100) " average", MSG, int(AVG_VAL,8)
3901 ELSE
3902 WRITE(MPG,110) MSG, MAX_VAL
3903 ENDIF
3904 ENDIF
3905 RETURN
3906 100 FORMAT(A8,A48,I18)
3907 110 FORMAT(A48,I18)
3908 END SUBROUTINE ZMUMPS_AVGMAX_STAT8
3909C
3910 SUBROUTINE ZMUMPS_EXTRACT_SCHUR_REDRHS(id)
3911 USE ZMUMPS_STRUC_DEF
3912 IMPLICIT NONE
3913C
3914C Purpose
3915C =======
3916C
3917C Extract the Schur and possibly also the reduced right-hand side
3918C (if Fwd in facto) from the processor working on Schur and copy
3919C it into the user datastructures id%SCHUR and id%REDRHS on the host.
3920C This routine assumes that the integer list of the Schur has not
3921C been permuted and still corresponds to LISTVAR_SCHUR.
3922C
3923C If the Schur is centralized, the master of the Schur holds the
3924C Schur and possibly also the reduced right-hand side.
3925C If the Schur is distribued (already built in user's datastructure),
3926C then the master of the Schur may hold the reduced right-hand side,
3927C in which case it is available in root%RHS_CNTR_MASTER_ROOT.
3928C
3929 TYPE(ZMUMPS_STRUC) :: id
3930C
3931C Local variables
3932C ===============
3933C
3934 INCLUDE 'mpif.h'
3935 INCLUDE 'mumps_tags.h'
3936 INCLUDE 'mumps_headers.h'
3937 INTEGER :: STATUS(MPI_STATUS_SIZE)
3938 INTEGER :: IERR
3939 INTEGER, PARAMETER :: MASTER = 0
3940 INTEGER :: ID_SCHUR, SIZE_SCHUR, LD_SCHUR, IB, BL4
3941 INTEGER(4) :: I4 ! 32-bit even in 64-bit version
3942 INTEGER :: ROW_LENGTH, I
3943 INTEGER(8) :: SURFSCHUR8, BL8, SHIFT8
3944 INTEGER(8) :: ISCHUR_SRC, ISCHUR_DEST, ISCHUR_SYM, ISCHUR_UNS
3945C
3946C External functions
3947C ==================
3948C
3949 INTEGER MUMPS_PROCNODE
3950 EXTERNAL MUMPS_PROCNODE
3951C Quick return in case factorization did not terminate correctly
3952.LT. IF (id%INFO(1) 0) RETURN
3953C Quick return if Schur option off
3954.EQ. IF (id%KEEP(60) 0) RETURN
3955C Get Schur id
3956 ID_SCHUR =MUMPS_PROCNODE(
3957 & id%PROCNODE_STEPS(id%STEP(max(id%KEEP(20),id%KEEP(38)))),
3958 & id%KEEP(199))
3959.NE. IF ( id%KEEP( 46 ) 1 ) THEN
3960 ID_SCHUR = ID_SCHUR + 1
3961 END IF
3962C Get size of Schur
3963.EQ. IF (id%MYIDID_SCHUR) THEN
3964.EQ. IF (id%KEEP(60)1) THEN
3965C Sequential Schur
3966 LD_SCHUR =
3967 & id%IS(id%PTLUST_S(id%STEP(id%KEEP(20)))+2+id%KEEP(IXSZ))
3968 SIZE_SCHUR = LD_SCHUR - id%KEEP(253)
3969 ELSE
3970C Parallel Schur
3971 LD_SCHUR = -999999 ! not used
3972 SIZE_SCHUR = id%root%TOT_ROOT_SIZE
3973 ENDIF
3974.EQ. ELSE IF (id%MYID MASTER) THEN
3975 SIZE_SCHUR = id%KEEP(116)
3976 LD_SCHUR = -44444 ! Not used
3977 ELSE
3978C Proc is not concerned with Schur, return
3979 RETURN
3980 ENDIF
3981 SURFSCHUR8 = int(SIZE_SCHUR,8)*int(SIZE_SCHUR,8)
3982C =================================
3983C Case of parallel Schur: if REDRHS
3984C was requested, obtain it directly
3985C from id%root%RHS_CNTR_MASTER_ROOT
3986C =================================
3987.GT. IF (id%KEEP(60) 1) THEN
3988.EQ..AND..GT. IF (id%KEEP(221)1 id%KEEP(252)0) THEN
3989 DO I = 1, id%KEEP(253)
3990.EQ. IF (ID_SCHURMASTER) THEN ! Necessarily = id%MYID
3991 CALL zcopy(SIZE_SCHUR,
3992 & id%root%RHS_CNTR_MASTER_ROOT((I-1)*SIZE_SCHUR+1), 1,
3993 & id%REDRHS((I-1)*id%LREDRHS+1), 1)
3994 ELSE
3995.EQ. IF (id%MYIDID_SCHUR) THEN
3996C Send
3997 CALL MPI_SEND(
3998 & id%root%RHS_CNTR_MASTER_ROOT((I-1)*SIZE_SCHUR+1),
3999 & SIZE_SCHUR,
4000 & MPI_DOUBLE_COMPLEX,
4001 & MASTER, TAG_SCHUR,
4002 & id%COMM, IERR )
4003.EQ. ELSE ! MYIDMASTER
4004C Receive
4005 CALL MPI_RECV( id%REDRHS((I-1)*id%LREDRHS+1),
4006 & SIZE_SCHUR,
4007 & MPI_DOUBLE_COMPLEX, ID_SCHUR, TAG_SCHUR,
4008 & id%COMM, STATUS, IERR )
4009 ENDIF
4010 ENDIF
4011 ENDDO
4012C ------------------------------
4013C In case of parallel Schur, we
4014C free root%RHS_CNTR_MASTER_ROOT
4015C ------------------------------
4016.EQ. IF (id%MYIDID_SCHUR) THEN
4017 DEALLOCATE(id%root%RHS_CNTR_MASTER_ROOT)
4018 NULLIFY (id%root%RHS_CNTR_MASTER_ROOT)
4019 ENDIF
4020 ENDIF
4021C return because this is all we need to do
4022C in case of parallel Schur complement
4023 RETURN
4024 ENDIF
4025C ============================
4026C Centralized Schur complement
4027C ============================
4028C PTRAST has been freed at the moment of calling this
4029C routine. Schur is available through
4030C PTRFAC(IW( PTLUST_S( STEP(KEEP(20)) ) + 4 +KEEP(IXSZ) ))
4031.EQ. IF (id%KEEP(252)0) THEN
4032C CASE 1 (ORIGINAL CODE):
4033C Schur is contiguous on ID_SCHUR
4034.EQ. IF ( ID_SCHUR MASTER ) THEN ! Necessarily equals id%MYID
4035C ---------------------
4036C Copy Schur complement
4037C ---------------------
4038 CALL ZMUMPS_COPYI8SIZE( SURFSCHUR8,
4039 & id%S(id%PTRFAC(id%STEP(id%KEEP(20)))),
4040 & id%SCHUR(1) )
4041 ELSE
4042C -----------------------------------------
4043C The processor responsible of the Schur
4044C complement sends it to the host processor
4045C Use blocks to avoid too large messages.
4046C -----------------------------------------
4047 BL8=int(huge(I4)/id%KEEP(35)/10,8)
4048 DO IB=1, int((SURFSCHUR8+BL8-1_8) / BL8)
4049 SHIFT8 = int(IB-1,8) * BL8 ! Where to send
4050 BL4 = int(min(BL8,SURFSCHUR8-SHIFT8)) ! Size of block
4051.eq. IF ( id%MYID ID_SCHUR ) THEN
4052C Send Schur complement
4053 CALL MPI_SEND( id%S( SHIFT8 +
4054 & id%PTRFAC(id%IS(id%PTLUST_S(id%STEP(id%KEEP(20)))
4055 & +4+id%KEEP(IXSZ)))),
4056 & BL4,
4057 & MPI_DOUBLE_COMPLEX,
4058 & MASTER, TAG_SCHUR,
4059 & id%COMM, IERR )
4060.eq. ELSE IF ( id%MYID MASTER ) THEN
4061C Receive Schur complement
4062 CALL MPI_RECV( id%SCHUR(1_8 + SHIFT8),
4063 & BL4,
4064 & MPI_DOUBLE_COMPLEX, ID_SCHUR, TAG_SCHUR,
4065 & id%COMM, STATUS, IERR )
4066 END IF
4067 ENDDO
4068 END IF
4069 ELSE
4070C CASE 2 (Fwd in facto): Schur is not contiguous on ID_SCHUR,
4071C process it row by row.
4072C
4073C 2.1: We first centralize Schur complement into id%SCHUR
4074 ISCHUR_SRC = id%PTRFAC(id%IS(id%PTLUST_S(id%STEP(id%KEEP(20)))
4075 & +4+id%KEEP(IXSZ)))
4076 ISCHUR_DEST= 1_8
4077 DO I=1, SIZE_SCHUR
4078 ROW_LENGTH = SIZE_SCHUR
4079.EQ. IF (ID_SCHURMASTER) THEN ! Necessarily = id%MYID
4080 CALL zcopy(ROW_LENGTH, id%S(ISCHUR_SRC), 1,
4081 & id%SCHUR(ISCHUR_DEST),1)
4082 ELSE
4083.EQ. IF (id%MYIDID_SCHUR) THEN
4084C Send
4085 CALL MPI_SEND( id%S(ISCHUR_SRC), ROW_LENGTH,
4086 & MPI_DOUBLE_COMPLEX,
4087 & MASTER, TAG_SCHUR,
4088 & id%COMM, IERR )
4089 ELSE
4090C Recv
4091 CALL MPI_RECV( id%SCHUR(ISCHUR_DEST),
4092 & ROW_LENGTH,
4093 & MPI_DOUBLE_COMPLEX, ID_SCHUR, TAG_SCHUR,
4094 & id%COMM, STATUS, IERR )
4095 ENDIF
4096 ENDIF
4097 ISCHUR_SRC = ISCHUR_SRC+int(LD_SCHUR,8)
4098 ISCHUR_DEST= ISCHUR_DEST+int(SIZE_SCHUR,8)
4099 ENDDO
4100C 2.2: Get REDRHS on host
4101C 2.2.1: Symmetric => REDRHS is available in last KEEP(253)
4102C rows of Schur structure on ID_SCHUR
4103C 2.2.2: Unsymmetric => REDRHS corresponds to last KEEP(253)
4104C columns. However it must be transposed.
4105.EQ. IF (id%KEEP(221)1) THEN ! Implies Fwd in facto
4106 ISCHUR_SYM = id%PTRFAC(id%IS(id%PTLUST_S(id%STEP(id%KEEP(20)))
4107 & +4+id%KEEP(IXSZ))) + int(SIZE_SCHUR,8) *
4108 & int(LD_SCHUR,8)
4109 ISCHUR_UNS =
4110 & id%PTRFAC(id%IS(id%PTLUST_S(id%STEP(id%KEEP(20)))
4111 & +4+id%KEEP(IXSZ))) + int(SIZE_SCHUR,8)
4112 ISCHUR_DEST = 1_8
4113 DO I = 1, id%KEEP(253)
4114.EQ. IF (ID_SCHUR MASTER) THEN ! necessarily = id%MYID
4115.EQ. IF (id%KEEP(50) 0) THEN
4116 CALL zcopy(SIZE_SCHUR, id%S(ISCHUR_UNS), LD_SCHUR,
4117 & id%REDRHS(ISCHUR_DEST), 1)
4118 ELSE
4119 CALL zcopy(SIZE_SCHUR, id%S(ISCHUR_SYM), 1,
4120 & id%REDRHS(ISCHUR_DEST), 1)
4121 ENDIF
4122 ELSE
4123.NE. IF (id%MYID MASTER) THEN
4124.EQ. IF (id%KEEP(50) 0) THEN
4125C Use id%S(ISCHUR_SYM) as temporary contig. workspace
4126C of size SIZE_SCHUR.
4127 CALL zcopy(SIZE_SCHUR, id%S(ISCHUR_UNS), LD_SCHUR,
4128 & id%S(ISCHUR_SYM), 1)
4129 ENDIF
4130 CALL MPI_SEND(id%S(ISCHUR_SYM), SIZE_SCHUR,
4131 & MPI_DOUBLE_COMPLEX, MASTER, TAG_SCHUR,
4132 & id%COMM, IERR )
4133 ELSE
4134 CALL MPI_RECV(id%REDRHS(ISCHUR_DEST),
4135 & SIZE_SCHUR, MPI_DOUBLE_COMPLEX, ID_SCHUR, TAG_SCHUR,
4136 & id%COMM, STATUS, IERR )
4137 ENDIF
4138 ENDIF
4139.EQ. IF (id%KEEP(50)0) THEN
4140 ISCHUR_UNS = ISCHUR_UNS + int(LD_SCHUR,8)
4141 ELSE
4142 ISCHUR_SYM = ISCHUR_SYM + int(LD_SCHUR,8)
4143 ENDIF
4144 ISCHUR_DEST = ISCHUR_DEST + int(id%LREDRHS,8)
4145 ENDDO
4146 ENDIF
4147 ENDIF
4148 RETURN
4149 END SUBROUTINE ZMUMPS_EXTRACT_SCHUR_REDRHS
#define mumps_abort
Definition VE_Metis.h:25
subroutine mumps_propinfo(icntl, info, comm, id)
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21
subroutine mpi_reduce(sendbuf, recvbuf, cnt, datatype, op, root, comm, ierr)
Definition mpi.f:120
subroutine mpi_comm_split(comm, color, key, comm2, ierr)
Definition mpi.f:272
subroutine mpi_allreduce(sendbuf, recvbuf, cnt, datatype, operation, comm, ierr)
Definition mpi.f:103
subroutine mpi_comm_size(comm, size, ierr)
Definition mpi.f:263
subroutine mpi_bcast(buffer, cnt, datatype, root, comm, ierr)
Definition mpi.f:205
subroutine mpi_comm_free(comm, ierr)
Definition mpi.f:238
subroutine mpi_comm_rank(comm, rank, ierr)
Definition mpi.f:254
subroutine, public mumps_fdbd_init(initial_size, info)
subroutine, public mumps_fmrd_init(initial_size, info)
subroutine, public mumps_fdm_init(what, initial_size, info)
subroutine, public zmumps_buf_max_array_minsize(nfs4father, ierr)
subroutine, public zmumps_buf_alloc_small_buf(size, ierr)
subroutine, public zmumps_buf_dist_irecv_size(zmumps_lbufr_bytes)
subroutine, public zmumps_buf_ini_myid(myid)
subroutine, public zmumps_free_l0_omp_factors(id_l0_omp_factors)
subroutine, public zmumps_init_l0_omp_factors(id_l0_omp_factors)
subroutine, public zmumps_load_init(id, memory_md_arg, maxs)
subroutine, public zmumps_load_set_inicost(cost_subtree_arg, k64, dk15, k375, maxs)
subroutine, public zmumps_load_update(check_flops, process_bande, inc_load, keep, keep8)
subroutine, public zmumps_blr_init_module(initial_size, info)
subroutine, public zmumps_blr_mod_to_struc(id_blrarray_encoding)
subroutine, public zmumps_blr_end_module(info1, keep8, k34, lrsolve_act_opt)
subroutine init_stats_global(id)
Definition zlr_stats.F:344
subroutine zmumps_clean_ooc_data(id, ierr)
Definition zmumps_ooc.F:568
subroutine, public zmumps_ooc_init_facto(id, maxs)
Definition zmumps_ooc.F:114
subroutine static(v, vr, a, ar, ms, in, igrnod, weight_md, wfext)
Definition static.F:33
subroutine mumps_secfin(t)
subroutine mumps_seti8toi4(i8, i)
integer function mumps_get_pool_length(max_active_nodes, keep, keep8)
subroutine mumps_set_ierror(size8, ierror)
subroutine mumps_secdeb(t)
subroutine mumps_npiv_critical_path(n, nsteps, step, frere, fils, na, lna, ne, maxnpivtree)
subroutine zmumps_facto_recv_arrowhd2(n, dblarr, ldblarr, intarr, lintarr, ptraiw, ptrarw, keep, keep8, myid, comm, nbrecords, a, la, root, procnode_steps, slavef, perm, frere_steps, step, info1, info2)
subroutine zmumps_facto_send_arrowheads(n, nz, aspk, irn, icn, perm, lscal, colsca, rowsca, myid, slavef, procnode_steps, nbrecords, lp, comm, root, keep, keep8, fils, rg2l, intarr, lintarr, dblarr, ldblarr, ptraiw, ptrarw, frere_steps, step, a, la, istep_to_iniv2, i_am_cand, candidates)
subroutine zmumps_free_id_data_modules(id_fdm_f_encoding, id_blrarray_encoding, keep8, k34)
subroutine zmumps_deter_square(deter, nexp)
subroutine zmumps_deter_scaling_inverse(deter, nexp)
subroutine zmumps_updatedeter_scaling(piv, deter, nexp)
subroutine zmumps_elt_distrib(n, nelt, na_elt8, comm, myid, slavef, ielptr_loc8, relptr_loc8, eltvar_loc, eltval_loc, lintarr, ldblarr, keep, keep8, maxelt_size, frtptr, frtelt, a, la, fils, id, root)
subroutine zmumps_maxelt_size(eltptr, nelt, maxelt_size)
subroutine zmumps_redistribution(n, nz_loc8, id, dblarr, ldblarr, intarr, lintarr, ptraiw, ptrarw, keep, keep8, myid, comm, nbrecords a, la, root, procnode_steps, slavef, perm, step, icntl, info, nsend8, nlocal8, istep_to_iniv2, candidates)
subroutine zmumps_avgmax_stat8(prokg, mpg, val, nslaves, print_maxavg, comm, msg)
subroutine zmumps_fac_driver(id)
Definition zfac_driver.F:15
subroutine zmumps_anorminf(id, anorminf, lscal, eff_size_schur)
subroutine zmumps_fac_a(n, nz8, nsca, aspk, irn, icn, colsca, rowsca, wk, lwk8, wk_real, lwk_real, icntl, info)
subroutine zmumps_simscaleabs(irn_loc, jcn_loc, a_loc, nz_loc, m, n, numprocs, myid, comm, rpartvec, cpartvec, rsndrcvsz, csndrcvsz, registre, iwrk, iwrksz, intsz, resz, op, rowsca, colsca, wrkrc, iszwrkrc, sym, nb1, nb2, nb3, eps, onenormerr, infnormerr)
subroutine zmumps_mem_allowed_set_maxs(maxs, blr_strat, ooc_strat, maxs_estim_relaxed8, keep, keep8, myid, n, nelt, na, lna, nslaves, icntl38, icntl39, iflag, ierror, i8_l0_omp, nbstats_i8, nbcols_i8)
Definition ztools.F:1250
subroutine zmumps_set_blrstrat_and_maxs_k8(maxs_base8, maxs_base_relaxed8, blr_strat, keep, keep8)
Definition ztools.F:1165
subroutine zmumps_l0_compute_peak_allowed(myid, n, nelt, na, lna, nslaves, blr_strat, ooc_strat, keep, keep8, iflag, ierror, i8_l0_omp, nbstats_i8, nbcols_i8)
Definition ztools.F:1479
subroutine zmumps_mem_allowed_set_k75(maxs, myid, under_l0_omp, n, nelt, na, lna, nslaves, blr_strat, ooc_strat, keep, keep8, iflag, ierror, i8_l0_omp, nbstats_i8, nbcols_i8)
Definition ztools.F:1432
subroutine zmumps_init_root_fac(n, root, fils, iroot, keep, info)