23 & FILS,STEP, FRERE, DAD, ISTEP_TO_INIV2, TAB_POS_IN_PERE,
24 & PTRIST, PTRAST, PIMASTER, PAMASTER, PTRARW, PTRAIW, ITLOC,
25 & RHS_MUMPS, RINFO, NBROOT, NBRTOT, NBROOT_UNDER_L0, UU, ICNTL,
26 & PTLUST_S, PTRFAC, INFO, KEEP, KEEP8, PROCNODE_STEPS,SLAVEF,
27 & COMM_NODES, MYID_NODES, BUFR, LBUFR, LBUFR_BYTES,
28 & INTARR, DBLARR, root, PERM, NELT, FRTPTR, FRTELT, LPTRAR,
29 & COMM_LOAD, ASS_IRECV, SEUIL, SEUIL_LDLT_NIV2,
30 & MEM_DISTRIB, NE, DKEEP,PIVNUL_LIST,LPN_LIST,
31 & LPOOL_B_L0_OMP, IPOOL_B_L0_OMP,
32 & L_VIRT_L0_OMP, VIRT_L0_OMP, VIRT_L0_OMP_MAPPING,
33 & L_PHYS_L0_OMP, PHYS_L0_OMP, PERM_L0_OMP, PTR_LEAFS_L0_OMP,
34 & L0_OMP_MAPPING, LL0_OMP_MAPPING, THREAD_LA,
35 & MUMPS_TPS_ARR, SMUMPS_TPS_ARR,
36 & NSTEPSW, OPASSW, OPELIW, NELVAW, COMP,
37 & MAXFRW, NMAXNPIVW, NPVW, NOFFNEGW,
38 & NB22T1W, NBTINYW, DET_EXPW, DET_MANTW, DET_SIGNW,
39 & LRGROUPS, L0_OMP_FACTORS, LL0_OMP_FACTORS,
40 & I4_L0_OMP, NBSTATS_I4, NBCOLS_I4,
41 & I8_L0_OMP, NBSTATS_I8, NBCOLS_I8 )
53 include
'mumps_tags.h'
54 include
'mumps_headers.h'
55 TYPE (SMUMPS_ROOT_STRUC) :: root
56 INTEGER N,LIW, LPTRAR,
58 INTEGER SLAVEF, COMM_NODES, MYID_NODES
59 INTEGER,
DIMENSION(0: SLAVEF - 1) :: MEM_DISTRIB
60 INTEGER KEEP(500), ICNTL(60)
62 INTEGER(8),
INTENT(IN) :: THREAD_LA
63 INTEGER PROCNODE_STEPS(KEEP(28))
64 INTEGER ITLOC(N+KEEP(253))
65 REAL :: RHS_MUMPS(KEEP(255))
66 INTEGER NSTK_STEPS(KEEP(28))
67 INTEGER(8),
INTENT(IN) :: PTRARW(LPTRAR), PTRAIW(LPTRAR)
69 INTEGER FILS(N),PTRIST(KEEP(28))
70 INTEGER STEP(N), FRERE(KEEP(28)), DAD(KEEP(28))
71 INTEGER PIMASTER(KEEP(28))
72 INTEGER PTLUST_S(KEEP(28)), PERM(N)
73 INTEGER ISTEP_TO_INIV2(KEEP(71)),
74 & tab_pos_in_pere(slavef+2,
max(1,keep(56)))
77 INTEGER(8) :: PAMASTER(KEEP(28)), PTRAST(KEEP(28))
78 INTEGER(8) :: PTRFAC(KEEP(28))
81 INTEGER,
intent(out) :: NBROOT_UNDER_L0
83 REAL , SEUIL, SEUIL_LDLT_NIV2
85 INTEGER FRTPTR( N+1 ), FRTELT( NELT )
86 INTEGER LBUFR, LBUFR_BYTES
88 REAL DBLARR( KEEP8(26) )
89 INTEGER INTARR( KEEP8(27) )
91 INTEGER PIVNUL_LIST(LPN_LIST)
93 DOUBLE PRECISION :: OPASSW, OPELIW
95 INTEGER,
INTENT ( IN ) :: LPOOL_B_L0_OMP
96 INTEGER,
INTENT ( IN ) :: IPOOL_B_L0_OMP
98 INTEGER,
INTENT ( IN ) :: L_PHYS_L0_OMP
99 INTEGER,
INTENT ( IN ) :: PHYS_L0_OMP( L_PHYS_L0_OMP )
100 INTEGER,
INTENT ( IN ) :: L_VIRT_L0_OMP
101 INTEGER,
INTENT ( IN ) :: VIRT_L0_OMP( L_VIRT_L0_OMP )
102 INTEGER,
INTENT ( IN ) :: VIRT_L0_OMP_MAPPING( L_VIRT_L0_OMP )
103 INTEGER,
INTENT ( IN ) :: PERM_L0_OMP( L_PHYS_L0_OMP )
104 INTEGER,
INTENT ( IN ) :: PTR_LEAFS_L0_OMP( L_PHYS_L0_OMP + 1)
105 INTEGER,
INTENT ( IN ) :: LL0_OMP_MAPPING
106 INTEGER,
INTENT ( OUT ) :: L0_OMP_MAPPING( LL0_OMP_MAPPING )
107 TYPE (MUMPS_TPS_T),
DIMENSION(:) :: MUMPS_TPS_ARR
108 TYPE (SMUMPS_TPS_T),
DIMENSION(:) :: SMUMPS_TPS_ARR
109 INTEGER,
INTENT ( IN ) :: LL0_OMP_FACTORS
110 TYPE (),
INTENT(INOUT) ::
111 & l0_omp_factors(ll0_omp_factors)
112 INTEGER,
INTENT (IN) :: NBSTATS_I4, NBSTATS_I8
113 INTEGER,
INTENT (IN) :: NBCOLS_I4, NBCOLS_I8
114 INTEGER,
INTENT (IN) :: I4_L0_OMP(NBSTATS_I4, NBCOLS_I4)
115 INTEGER(8),
INTENT (IN) :: I8_L0_OMP(NBSTATS_I8, NBCOLS_I8)
116 EXTERNAL MUMPS_TYPENODE, MUMPS_PROCNODE
117 INTEGER MUMPS_TYPENODE, MUMPS_PROCNODE
118 LOGICAL ,MUMPS_ROOTSSARBR
120 LOGICAL SMUMPS_POOL_EMPTY
124 INTEGER :: MYTHREAD_ID, ITH
125 INTEGER :: THREAD_ID_P
126 DOUBLE PRECISION,
PARAMETER :: DZERO = 0.0d0, done = 1.0d0
131 LOGICAL AVOID_DELAYED
132 INTEGER NBROOT_PROCESSED
133 INTEGER MAXFRW, NPVW, NMAXNPIVW, NOFFNEGW, NELVAW, COMP
134 INTEGER :: NB22T1W, NBTINYW, DET_EXPW, DET_SIGNW
136 DOUBLE PRECISION FLOP_ESTIM_ACC
139 INTEGER LIWFAC, STRAT, TYPEFile, NextPiv2beWritten
143 INTEGER,
DIMENSION(:),
ALLOCATABLE :: IPOOL_P
144 INTEGER(8) :: TO_ALLOCATE
145 INTEGER,
DIMENSION(:),
ALLOCATABLE :: ID
146 INTEGER(8),
DIMENSION(:),
ALLOCATABLE :: VAL
147 INTEGER(8),
ALLOCATABLE,
DIMENSION(:) :: STATE, SIZE_COPIED
148 INTEGER :: NbWaitMem, NbFinished, NbOnGoingCopies, NbUnderL0
149 INTEGER(8) :: KEEP8_77_SAVE
150 DOUBLE PRECISION :: GTIME
151 INTEGER(8) :: MEMDISPO_UNDERL0, MEMDISPO_PERTHREAD
153 INTEGER :: VIRTUAL_TASK, PHYSICAL_TASK
155 INTEGER :: I, INFO_P(2)
172 flop_estim_acc = dzero
177 det_mantw =
cmplx(1.0e0,0.0e0, kind=kind(1.0e0))
179 DO ith = 1, keep(400)
180 NULLIFY(mumps_tps_arr(ith)%IW)
181 NULLIFY(mumps_tps_arr(ith)%ITLOC)
182 NULLIFY(smumps_tps_arr(ith)%A)
184 & mumps_tps_arr(ith)%LA,
185 & mumps_tps_arr(ith)%LIW, blr_strat,
187 & i4_l0_omp(1,ith), nbstats_i4,
188 & i8_l0_omp(1,ith), nbstats_i8)
190 IF (keep8(4) .NE. 0_8)
THEN
192 & mumps_tps_arr, keep(400),keep8, keep,
193 & n, blr_strat, lpool_b_l0_omp,
194 & i8_l0_omp, nbstats_i8,
197 memdispo_underl0 =
max(memdispo_underl0/2_8,0_8)
198 keep8(77) = keep8(77) + memdispo_underl0
199 memdispo_perthread = 0_8
200 IF (memdispo_underl0.GT.0)
THEN
201 memdispo_perthread = memdispo_underl0/(int(keep(400),8))
203 DO ith = 1, keep(400)
204 mumps_tps_arr(ith)%LA = mumps_tps_arr(ith)%LA +
208 DO ith = 1, keep(400)
209 mumps_tps_arr(ith)%LRLU = mumps_tps_arr(ith)%LA
210 mumps_tps_arr(ith)%LRLUS = mumps_tps_arr(ith)%LA
211 mumps_tps_arr(ith)%LRLUSM = mumps_tps_arr(ith)%LA
212 mumps_tps_arr(ith)%IPTRLU = mumps_tps_arr(ith)%LA
213 mumps_tps_arr(ith)%POSFAC = 1_8
214 mumps_tps_arr(ith)%IWPOS = 1
215 mumps_tps_arr(ith)%IWPOSCB = mumps_tps_arr(ith)%LIW
217 IF (keep(406) .EQ. 2 )
THEN
218 ALLOCATE(state(keep(400)), size_copied(keep(400)), stat=allocok)
219 IF (allocok .GT. 0 )
THEN
220 WRITE(*,*)
"Problem allocating STATE/SIZE_COPIED", keep(400)
224 & nbwaitmem, nbfinished, nbongoingcopies, nbunderl0,
230!$omp& ipool_p, lpool_p, leaf, inode, ifath, info_p, i, i8,
243 lpool_p = lpool_b_l0_omp
246 avoid_delayed = .false.
255 & info_p(1), info_p(2) )
256 IF (info_p(1) .LT. 0)
GOTO 700
257 ALLOCATE ( smumps_tps_arr(thread_id_p)%A(
258 &
max(1_8,mumps_tps_arr(thread_id_p)%LA) ),
260 IF (allocok.GT.0)
THEN
267 & mumps_tps_arr(thread_id_p)%LA,
269 & keep8, info_p(1), info_p(2),
271 IF (info_p(1) .LT. 0)
THEN
276 & ((int(mumps_tps_arr(thread_id_p)%LIW,8) * int(keep(34),8 )) /
278 & ((int(lpool_p,8) * int(keep(34),8 )) / int(keep(35),8 ))+
279 & ((int(n+keep(253),8) * int(keep(34),8 )) / int(keep(35),8 ))
281 & keep8, info_p(1), info_p(2) )
282 IF ( info_p(1) .LT. 0 )
GOTO 700
283 ALLOCATE ( mumps_tps_arr(thread_id_p)%IW(
284 & mumps_tps_arr(thread_id_p)%LIW ),
285 & ipool_p( lpool_p ),
286 & mumps_tps_arr(thread_id_p)%ITLOC ( n + keep(253) ),
288 IF ( allocok .GT. 0 )
THEN
290 info_p(2) = mumps_tps_arr(thread_id_p)%LIW +
291 & lpool_p + n+keep(253)
295 & keep(405).EQ.1, keep8,
296 & info_p(1), info_p(2), .true., .false. )
297 IF (info_p(1) .LT. 0)
THEN
302 & .false., .false., myid_nodes, n, keep, keep8, dkeep,
303 & mumps_tps_arr(thread_id_p)%IW(1),
304 & mumps_tps_arr(thread_id_p)%LIW,
305 & smumps_tps_arr(thread_id_p)%A(1),
306 & mumps_tps_arr(thread_id_p)%LA,
307 & mumps_tps_arr(thread_id_p)%LRLU,
308 & mumps_tps_arr(thread_id_p)%IPTRLU,
309 & mumps_tps_arr(thread_id_p)%IWPOS,
310 & mumps_tps_arr(thread_id_p)%IWPOSCB,
311 & slavef, procnode_steps, dad,
312 & ptrist, ptrast, step, pimaster,
313 & pamaster, keep(ixsz), 0_8, -444, -444, .true.,
314 & comp, mumps_tps_arr(thread_id_p)%LRLUS,
315 & mumps_tps_arr(thread_id_p)%LRLUSM,
316 & info_p(1), info_p(2)
320 mumps_tps_arr(thread_id_p)%ITLOC = 0
322 virtual_task = virtual_task + 1
323 IF ( virtual_task .LT. l_virt_l0_omp )
THEN
324 IF ( virt_l0_omp_mapping( virtual_task ) .EQ. thread_id_p )
THEN
326 & virt_l0_omp( virtual_task ),
327 & virt_l0_omp( virtual_task + 1 ) - 1
328 DO i = ptr_leafs_l0_omp( perm_l0_omp( physical_task )+1 ) + 1,
329 & ptr_leafs_l0_omp( perm_l0_omp( physical_task ) )
330 IF (ipool_b_l0_omp(i) .GT. 0)
THEN
333 & procnode_steps, slavef, keep(199), keep(28), 3, 0, 1, step,
334 & ipool_b_l0_omp(i) )
338 & .NOT. smumps_pool_empty( ipool_p(1), lpool_p )
339 & .AND. info_p(1) .GE. 0 )
341 & procnode_steps, slavef, step, inode, keep, keep8, myid_nodes,
344 l0_omp_mapping( step( inode ) ) = thread_id_p
345 ifath = dad( step( inode ) )
347 IF ( ifath .NE. 0 )
THEN
353 & inode,
TYPE, typef, mumps_tps_arr(thread_id_p)%LA, MUMPS_TPS_ARR
354 & (thread_id_p)%IW(1), mumps_tps_arr(thread_id_p)%LIW,
355 & smumps_tps_arr(thread_id_p)%A(1), maxfrw, noffnegw, npvw,
356 & nb22t1w, nbtinyw, det_expw, det_mantw, det_signw, info_p, uu,
357 & seuil, seuil_ldlt_niv2, opeliw, nelvaw, nmaxnpivw, nstepsw,
358 & ptrist, ptlust_s, ptrfac, ptrast, step, pimaster, pamaster,
359 & ne, mumps_tps_arr(thread_id_p)%POSFAC,
360 & mumps_tps_arr(thread_id_p)%LRLU,
361 & mumps_tps_arr(thread_id_p)%LRLUS, mumps_tps_arr(thread_id_p)%
362 % LRLUSM, mumps_tps_arr(thread_id_p)%IPTRLU, icntl, keep, keep8,
363 & dkeep, pivnul_list, lpn_list, comp, mumps_tps_arr(thread_id_p)%
364 & iwpos, mumps_tps_arr(thread_id_p)%IWPOSCB, procnode_steps,
365 & slavef, ifath, comm_nodes, myid_nodes, ipool_p, lpool_p, leaf,
366 & perm, nstk_steps, bufr, lbufr, lbufr_bytes,
367 & nbfin, root, opassw, mumps_tps_arr(thread_id_p)%ITLOC(1),
368 & rhs_mumps, fils, ptrarw, ptraiw
369 & lptrar, nelt, frtptr, frtelt, istep_to_iniv2, tab_pos_in_pere,
370 & lrgroups, flop_estim_acc )
371 IF (info_p(1) .LT. 0)
THEN
374 IF ( ifath .NE. 0 )
THEN
375 IF ( phys_l0_omp( perm_l0_omp( physical_task ) )
377 nstk_steps( step( ifath ) ) =
378 & nstk_steps( step ( ifath ) ) - 1
379 IF ( nstk_steps( step( ifath ) ) .EQ. 0 )
THEN
385 nstk_steps( step( ifath ) ) =
386 & nstk_steps( step( ifath ) ) - 1
390 nbroot_processed = nbroot_processed + 1
398 IF (
associated(mumps_tps_arr(thread_id_p)%ITLOC))
THEN
399 DEALLOCATE(mumps_tps_arr(thread_id_p)%ITLOC)
400 NULLIFY(mumps_tps_arr(thread_id_p)%ITLOC)
402 & -(int(n+keep(253),8) * int(keep(34),8 )) / int(keep(35),8),
403 & keep(405).EQ.1, keep8,
404 & info_p(1), info_p(2), .true., .false. )
406 IF (
allocated(ipool_p))
THEN
409 & -(int(lpool_p,8) * int(keep(34),8 )) / int(keep(35),8),
410 & keep(405).EQ.1, keep8,
411 & info_p(1), info_p(2), .true., .false. )
413 IF ( keep(406) .EQ. 2)
THEN
415 & mumps_tps_arr, smumps_tps_arr,
416 & l0_omp_factors, ll0_omp_factors,
417 & state, size_copied,
418 & nbwaitmem, nbfinished, nbongoingcopies, nbunderl0,
419 & myid_nodes, n, slavef,
420 & step, ptrast, pamaster, procnode_steps, dad,
421 & keep, keep8, info_p
424 IF ((keep(407) .EQ. 1) .OR. (keep(406) .EQ.1) )
THEN
425 IF (info_p(1) .GE. 0)
THEN
430 & myid_nodes, n, slavef, keep, keep8,
431 & mumps_tps_arr(thread_id_p)%IW(1),
432 & mumps_tps_arr(thread_id_p)%LIW,
433 & mumps_tps_arr(thread_id_p)%IWPOSCB,
434 & mumps_tps_arr(thread_id_p)%IWPOS,
435 & smumps_tps_arr(thread_id_p)%A(1),
436 & mumps_tps_arr(thread_id_p)%LA,
437 & mumps_tps_arr(thread_id_p)%LRLU,
438 & mumps_tps_arr(thread_id_p)%IPTRLU,
439 & mumps_tps_arr(thread_id_p)%LRLUS,
440 & step, ptrast, pamaster, procnode_steps, dad,
441 & info_p(1), info_p(2) )
444 IF (keep(406) .EQ.1)
THEN
445 IF (info_p(1) .GE.0 )
THEN
446 to_allocate =
max(mumps_tps_arr(thread_id_p)%POSFAC-1,1_8)
448 & keep8, info_p(1), info_p(2) )
450 IF (info_p(1) .GE.0 )
THEN
451 ALLOCATE(l0_omp_factors(thread_id_p)%A(to_allocate),
453 IF (allocok .GT. 0)
THEN
456 l0_omp_factors(thread_id_p)%LA = 0_8
458 l0_omp_factors(thread_id_p)%LA =
459 & mumps_tps_arr(thread_id_p)%POSFAC-1_8
461 & l0_omp_factors(thread_id_p)%LA, keep(405).EQ.1, keep8,
462 & info_p(1), info_p(2), .true., .false. )
465 IF (info_p(1) .GE.0 )
THEN
466 DO i8 = 1_8, mumps_tps_arr(thread_id_p)%POSFAC-1_8
467 l0_omp_factors(thread_id_p)%A(i8) =
468 & smumps_tps_arr(thread_id_p)%A(i8)
471 IF (
associated(smumps_tps_arr(thread_id_p)%A))
THEN
472 DEALLOCATE(smumps_tps_arr(thread_id_p)%A)
473 NULLIFY(smumps_tps_arr(thread_id_p)%A)
475 & -mumps_tps_arr(thread_id_p)%LA,
478 & info_p(1), info_p(2),
480 IF (info_p(1) .GE. 0)
THEN
482 keep8(69) = keep8(69) - l0_omp_factors(thread_id_p)%LA
488 IF (info_p(1) .LT.0)
THEN
493 ELSE IF (info_p(1) .GE. 0)
THEN
495 IF (info(1) .EQ. 0)
THEN
501#if defined(WORKAROUNDINTELILP64OPENMPLIMITATION)
502!$
CALL omp_set_num_threads(int(nomp_save,4))
507 IF (info(1) .LT. 0)
THEN
508 IF (icntl(1) .GT. 0 .AND. icntl(4) .GE.1 )
THEN
509 WRITE(icntl(1),
'(A,I6,I16,A,I5,A)')
510 &
"** ERROR DURING L0_OMP: INFO(1:2)=",
511 & info(1), info(2),
" (MPI worker ", myid_nodes,
")"
514 IF ( keep(406) .EQ. 0 )
THEN
515 ALLOCATE(id(keep(400)), val(keep(400)),
517 IF ( allocok .GT. 0 )
THEN
522 DO mythread_id = 1, keep(400)
523 val(mythread_id) = mumps_tps_arr( mythread_id )%POSFAC-1_8
524 id(mythread_id) = mythread_id
528 mythread_id = id(ith)
529 IF ((keep(407).NE.1) .AND. (keep(406).EQ.0))
THEN
530 IF (info(1) .GE. 0)
THEN
535 & myid_nodes, n, slavef, keep, keep8,
536 & mumps_tps_arr(mythread_id)%IW(1),
537 & mumps_tps_arr(mythread_id)%LIW,
538 & mumps_tps_arr(mythread_id)%IWPOSCB,
539 & mumps_tps_arr(mythread_id)%IWPOS,
540 & smumps_tps_arr(mythread_id)%A(1),
541 & mumps_tps_arr(mythread_id)%LA,
542 & mumps_tps_arr(mythread_id)%LRLU,
543 & mumps_tps_arr(mythread_id)%IPTRLU,
544 & mumps_tps_arr(mythread_id)%LRLUS,
545 & step, ptrast, pamaster, procnode_steps, dad,
549 IF (keep(406).EQ.0)
THEN
550 IF (info(1) .GE. 0 )
THEN
551 to_allocate =
max(mumps_tps_arr(mythread_id)%POSFAC-1,1_8)
553 & keep8, info(1), info(2) )
555 IF (info(1) .GE.0 )
THEN
556 ALLOCATE(l0_omp_factors(mythread_id)%A(to_allocate),
558 IF (allocok .GT. 0)
THEN
561 l0_omp_factors(mythread_id)%LA = 0_8
563 l0_omp_factors(mythread_id)%LA =
564 & mumps_tps_arr(mythread_id)%POSFAC-1_8
566 & l0_omp_factors(mythread_id)%LA,
567 & keep(405).EQ.1, keep8,
568 & info(1), info(2), .true., .false. )
571 IF (info(1) .GE. 0)
THEN
581 DO i8 = 1_8, mumps_tps_arr(mythread_id)%POSFAC-1_8
582 l0_omp_factors(mythread_id)%A(i8) =
583 & smumps_tps_arr(mythread_id)%A(i8)
587 IF (
associated(smumps_tps_arr(mythread_id)%A))
THEN
588 DEALLOCATE(smumps_tps_arr(mythread_id)%A)
589 NULLIFY(smumps_tps_arr(mythread_id)%A)
591 & -mumps_tps_arr(mythread_id)%LA,
592 & keep(405).EQ.1, keep8,
595 IF (info(1).GE.0)
THEN
596 keep8(69) = keep8(69) - l0_omp_factors(mythread_id)%LA
601 IF (
ALLOCATED(id))
DEALLOCATE(id)
602 IF (
ALLOCATED(val))
DEALLOCATE(val)
607 keep8(64) = keep8(64) + mumps_tps_arr(i)%POSFAC - 1_8
611 keep8(62) = keep8(62) + mumps_tps_arr(i)%LRLUSM
613 nbroot_under_l0 = nbroot_processed
846 & MUMPS_TPS_ARR, SMUMPS_TPS_ARR,
847 & L0_OMP_FACTORS, LL0_OMP_FACTORS,
848 & STATE, SIZE_COPIED,
849 & NbWaitMem, NbFinished, NbOngoingCopies, NbUnderL0,
850 & MYID_NODES, N, SLAVEF,
851 & STEP, PTRAST, PAMASTER, PROCNODE_STEPS, DAD,
852 & KEEP, KEEP8, INFO_P
858 INTEGER(8) :: KEEP8(150)
859 INTEGER,
INTENT(IN) :: THREAD_ID_P
860 INTEGER,
INTENT(INOUT) :: INFO_P(2)
861 INTEGER,
INTENT(IN) :: MYID_NODES, N, SLAVEF
862 INTEGER,
INTENT(IN) :: STEP(N), DAD(KEEP(28))
863 INTEGER(8),
INTENT(IN) :: PAMASTER(KEEP(28)), PTRAST(KEEP(28))
864 INTEGER,
INTENT(IN) :: PROCNODE_STEPS(KEEP(28))
865 INTEGER,
INTENT(INOUT) :: NbWaitMem,
869 INTEGER(8),
INTENT(INOUT) :: STATE( KEEP(400) )
870 INTEGER(8),
INTENT(INOUT) :: SIZE_COPIED(KEEP(400) )
871 TYPE (MUMPS_TPS_T),
DIMENSION(:) :: MUMPS_TPS_ARR
872 TYPE (SMUMPS_TPS_T),
DIMENSION(:) ::
873 INTEGER,
INTENT ( IN ) :: LL0_OMP_FACTORS
874 TYPE (SMUMPS_L0OMPFAC_T),
INTENT(INOUT) ::
875 & l0_omp_factors(ll0_omp_factors)
876 INTEGER :: LOCAL_ACTION
877 INTEGER,
PARAMETER :: NOTHING = 0
878 INTEGER,
PARAMETER :: FREE_WORK_MYID = 1
879 INTEGER,
PARAMETER :: COPY_FACTORS = 2
880 INTEGER,
PARAMETER :: AllocateViderCB = 3
881 INTEGER,
PARAMETER :: DORMIR = 4
882 INTEGER(8) :: COPY_START, CHUNK8, I8, TO_ALLOCATE
885 INTEGER(8) :: PeakAuthorized_P
886 INTEGER(8) :: MemNeeded_P, MemNeededForCB_P, MemDispo_P,
887 & CBCopiedToDynamic_P, LRLUS_SAVE_P
888 INTEGER(8) :: KEEP8_71, KEEP8_73
891 IF ( info_p(1) .LT. 0 )
THEN
892 nbfinished = nbfinished + 1
895 DO ith = 1, keep(400)
896 IF ( state(ith) .EQ.
waitmem )
THEN
902 DO WHILE ( nbfinished .NE. keep(400) )
903 local_action = dormir
905 IF ( nbfinished.EQ. keep(400))
THEN
906 local_action = nothing
907 ELSE IF ( (nbfinished+nbwaitmem) .EQ. keep(400) )
THEN
914 memdispo_p = keep8(77) - (keep8_73 -keep8_71)
915 memdispo_p =
min(memdispo_p, keep8(75)-keep8_73)
916 memneeded_p = huge(memneeded_p)
917 DO ith = 1, keep(400)
918 IF (state(ith).EQ.
waitmem)
THEN
919 memneeded_p =
min( memneeded_p,
920 & mumps_tps_arr(ith)%LA -
921 & mumps_tps_arr(ith)%LRLUS )
924 IF ((keep8(75)-keep8_73).LT.memneeded_p)
THEN
927 & memneeded_p-(keep8(75)-keep8_73), info_p(2))
928 DO ith = 1, keep(400)
931 nbfinished = keep(400)
933 keep8(77) = memneeded_p + (keep8_73 -keep8_71)
934 DO ith = 1, keep(400)
935 IF ( state(ith) .EQ.
waitmem )
THEN
941 local_action = nothing
943 SELECT CASE (state(thread_id_p))
945 local_action = free_work_myid
953 peakauthorized_p = keep8(77)
954 memdispo_p = peakauthorized_p - (keep8_73 -keep8_71)
955 memdispo_p =
min(memdispo_p, keep8(75)-keep8_73)
956 memneeded_p = mumps_tps_arr(thread_id_p)%LA -
957 & mumps_tps_arr(thread_id_p)%LRLUS
958 memneededforcb_p = memneeded_p -
959 & ( mumps_tps_arr(thread_id_p)%POSFAC - 1_8 )
960 IF ( memdispo_p .GE. memneeded_p )
THEN
962 keep8(73) = keep8(73) + memneeded_p
968 memdispo_p = peakauthorized_p - (keep8_73 -keep8_71)
969 memdispo_p =
min(memdispo_p, keep8(75)-keep8_73)
970 IF ( memdispo_p .LT. 0 )
THEN
972 keep8(73) = keep8(73) - memneeded_p
974 IF ( state(thread_id_p) .NE.
waitmem )
THEN
976 nbwaitmem = nbwaitmem + 1
980 keep8(74) =
max(keep8(74), keep8_73 )
982 IF ( state( thread_id_p ) .EQ.
waitmem )
THEN
983 nbwaitmem = nbwaitmem - 1
986 local_action = allocatevidercb
987 nbongoingcopies = nbongoingcopies + 1
990 IF ( state(thread_id_p) .NE.
waitmem )
THEN
992 nbwaitmem = nbwaitmem + 1
997 DO k = thread_id_p, thread_id_p + keep(400) - 1
998 IF ( k > keep(400) )
THEN
1003 IF ( state(ith) .GE. 0 .AND.
1004 & state(ith) .LT. mumps_tps_arr(ith)%POSFAC - 1_8 )
THEN
1010 IF ( ith .GT. 0 )
THEN
1011 local_action = copy_factors
1012 copy_start = state(ith) + 1
1017 & (mumps_tps_arr(ith)%POSFAC+keep(400)-2_8) /
1018 & (int(keep(400)*2,8))
1021 IF (keep(72) .EQ. 1)
THEN
1024 chunk8 =
min( chunk8,
1025 & mumps_tps_arr(ith)%POSFAC - 1_8 - copy_start + 1_8
1027 state(ith) = state(ith) + chunk8
1032 SELECT CASE ( local_action )
1033 CASE ( free_work_myid )
1034 IF (
associated(smumps_tps_arr(thread_id_p)%A) )
THEN
1035 DEALLOCATE(smumps_tps_arr(thread_id_p)%A)
1036 NULLIFY(smumps_tps_arr(thread_id_p)%A)
1038 & -mumps_tps_arr(thread_id_p)%LA,
1041 & info_p(1), info_p(2),
1042 & .false., .false. )
1043 IF (info_p(1) .GE. 0)
THEN
1045 keep8(69) = keep8(69) - l0_omp_factors(thread_id_p)%LA
1048 DO ith = 1, keep(400)
1049 IF ( state(ith) .EQ.
waitmem )
THEN
1054 nbfinished = nbfinished + 1
1056 nbongoingcopies = nbongoingcopies -1
1060 CASE ( allocatevidercb )
1061 to_allocate =
max(mumps_tps_arr(thread_id_p)%POSFAC-1_8,1_8)
1062 ALLOCATE( l0_omp_factors(thread_id_p)%A(to_allocate),
1064 IF ( allocok .GT. 0 )
THEN
1067 l0_omp_factors(thread_id_p)%LA = 0_8
1068!$omp critical(l0_copies)
1070 keep8(73) = keep8(73) - memneeded_p
1074 l0_omp_factors(thread_id_p)%LA =
1075 & mumps_tps_arr(thread_id_p)%POSFAC-1_8
1077 keep8(69) = keep8(69) + l0_omp_factors(thread_id_p)%LA
1080 IF ( mumps_tps_arr(thread_id_p)%POSFAC-1_8 == 0_8 )
THEN
1083 state( thread_id_p ) = 0
1084 size_copied( thread_id_p ) = 0
1087 lrlus_save_p = mumps_tps_arr(thread_id_p)%LRLUS
1092 & myid_nodes, n, slavef, keep, keep8,
1093 & mumps_tps_arr(thread_id_p)%IW(1),
1094 & mumps_tps_arr(thread_id_p)%LIW,
1095 & mumps_tps_arr(thread_id_p)%IWPOSCB,
1096 & mumps_tps_arr(thread_id_p)%IWPOS,
1097 & smumps_tps_arr(thread_id_p)%A(1),
1098 & mumps_tps_arr(thread_id_p)%LA,
1099 & mumps_tps_arr(thread_id_p)%LRLU,
1100 & mumps_tps_arr(thread_id_p)%IPTRLU,
1101 & mumps_tps_arr(thread_id_p)%LRLUS,
1102 & step, ptrast, pamaster, procnode_steps, dad,
1103 & info_p(1), info_p(2) )
1104 cbcopiedtodynamic_p =
1105 & mumps_tps_arr(thread_id_p)%LRLUS - lrlus_save_p
1106 IF (info_p(1) .LT. 0 )
THEN
1108 keep8(73) = keep8(73) -
1109 & ( memneededforcb_p - cbcopiedtodynamic_p )
1114 CASE ( copy_factors )
1115 DO i8 = copy_start, copy_start + chunk8 - 1
1116 l0_omp_factors(ith)%A(i8) = smumps_tps_arr(ith)%A(i8)
1119 size_copied(ith) = size_copied(ith) + chunk8
1120 IF ( size_copied(ith) .EQ. l0_omp_factors(ith)%LA )
THEN
1126 CALL mumps_usleep(1000)
1128 WRITE(*,*)
" Internal error in SMUMPS_PERFORM_COPIES",
1136 & ASS_IRECV, N, INODE, TYPE, TYPEF, LA, IW, LIW, A,
1137 & MAXFRW, NOFFNEGW, NPVW, NB22T1W, NBTINYW,
1138 & DET_EXPW, DET_MANTW, DET_SIGNW,
1139 & INFO_P, UU, SEUIL, SEUIL_LDLT_NIV2,
1140 & OPELIW, NELVAW, NMAXNPIVW, NSTEPSW, PTRIST, PTLUST_S,
1141 & PTRFAC, PTRAST, STEP, PIMASTER, PAMASTER, NE, POSFAC,
1142 & LRLU, LRLUS, LRLUSM, IPTRLU, ICNTL, KEEP, KEEP8, DKEEP,
1143 & PIVNUL_LIST, LPN_LIST, COMP, IWPOS, IWPOSCB, PROCNODE_STEPS,
1144 & SLAVEF, IFATH, COMM_NODES, MYID_NODES, IPOOL_P,
1145 & LPOOL_P, LEAF, PERM, NSTK_STEPS, BUFR, LBUFR,
1146 & LBUFR_BYTES, NBFIN, root, OPASSW, ITLOC, RHS_MUMPS, FILS,
1147 & PTRARW, PTRAIW, INTARR, DBLARR, ND, FRERE, DAD, LPTRAR, NELT,
1148 & FRTPTR, FRTELT, ISTEP_TO_INIV2, TAB_POS_IN_PERE, LRGROUPS,
1156 include
'mumps_headers.h'
1157 TYPE (smumps_root_struc) :: root
1158 INTEGER comm_load, ass_irecv
1159 INTEGER comm_nodes, myid_nodes,
TYPE,
1160 INTEGER n, liw, inode,info_p(2)
1161 INTEGER icntl(60), keep(500)
1163 REAL , seuil, seuil_ldlt_niv2
1164 INTEGER(8) keep8(150)
1165 INTEGER(8) :: la, posfac, lrlu, lrlus, lrlusm, iptrlu
1166 INTEGER iwposcb, iwpos,
1167 & ifath, slavef, nelvaw, nmaxnpivw, nstepsw
1168 INTEGER iw(liw),procnode_steps(keep(28))
1169 INTEGER(8) :: ptrast (keep(28))
1170 INTEGER(8) :: ptrfac (keep(28))
1171 INTEGER(8) :: pamaster(keep(28))
1172 INTEGER ptrist(keep(28)), ptlust_s(keep(28))
1173 INTEGER step(n), pimaster(keep(28)), ne(keep(28))
1175 INTEGER :: maxfrw, noffnegw, npvw, nbtinyw
1176 INTEGER,
intent(in) :: lrgroups(n)
1177 DOUBLE PRECISION opassw, opeliw
1178 REAL dblarr(keep8(26))
1179 INTEGER intarr(keep8(27))
1180 INTEGER itloc( n + keep(253) ), fils( n ),
1181 & nd( keep(28) ), frere( keep(28) )
1182 INTEGER dad( keep(28) )
1183 REAL :: rhs_mumps(keep(255))
1184 INTEGER istep_to_iniv2((71)),
1185 & tab_pos_in_pere(slavef+2,
max(1,keep(56)))
1186 INTEGER nelt, lptrar
1187 INTEGER frtptr( n+1 ), frtelt( nelt )
1188 INTEGER(8),
INTENT(IN) :: ptraiw( lptrar ), ptrarw( lptrar )
1190 INTEGER :: nb22t1w, det_expw, det_signw
1193 INTEGER nstk_steps( keep(28) )
1194 INTEGER lbufr, lbufr_bytes
1195 INTEGER bufr( lbufr )
1198 INTEGER :: pivnul_list(lpn_list)
1199 DOUBLE PRECISION flop_estim_acc
1200 INTEGER,
INTENT(IN) :: lpool_p
1201 INTEGER,
INTENT(IN) :: ipool_p(lpool_p)
1202 INTEGER :: ioldps, jobass, etatass
1203 INTEGER(8) :: poselt
1204 LOGICAL :: avoid_delayed, son_level2
1207 IF ( keep(55) .EQ. 0 )
THEN
1211 & info_p, nd, fils, frere, dad, maxfrw, root, opassw,
1212 & opeliw, ptrist, ptlust_s, ptrfac, ptrast, step, pimaster,
1213 & pamaster, ptrarw, ptraiw, itloc, rhs_mumps, nstepsw,
1214 & son_level2,
comp, lrlu, iptrlu,
1215 & iwpos, iwposcb, posfac,
1216 & lrlus, lrlusm, icntl, keep, keep8, dkeep,
1217 & intarr, keep8(27), dblarr, keep8(26),
1218 & nstk_steps, procnode_steps, slavef, comm_nodes,
1219 & myid_nodes, bufr, lbufr, lbufr_bytes, nbfin
1220 & lpool_p, leaf, perm, istep_to_iniv2, tab_pos_in_pere,
1226 & frtelt, n, inode, iw, liw, a,
1227 & la, info_p, nd, fils, frere, dad, maxfrw,
1228 & root, opassw, opeliw, ptrist, ptlust_s, ptrfac, ptrast, step,
1229 & pimaster, pamaster, ptrarw, ptraiw, itloc,
1230 & rhs_mumps, nstepsw, son_level2,
comp, lrlu,
1231 & iptrlu, iwpos, iwposcb,
1232 & posfac, lrlus, lrlusm, icntl, keep, keep8, dkeep,
1233 & intarr, keep8(27), dblarr, keep8(26),
1234 & nstk_steps, procnode_steps, slavef,
1235 & comm_nodes, myid_nodes, bufr, lbufr, lbufr_bytes, nbfin,
1236 & ipool_p, lpool_p, leaf, perm, istep_to_iniv2,
1241 IF (info_p(1) .LT. 0)
THEN
1244 avoid_delayed = ( ( ifath .EQ. keep(20)
1246 & ifath .EQ. keep(38) )
1248 & ( keep(60) .NE. 0 ) )
1249 poselt = ptrast(step(inode))
1250 ioldps = ptlust_s(step(inode))
1251 IF ( keep(50) .EQ. 0 )
THEN
1256 & info_p(1), info_p(2), uu, noffnegw, npvw, nbtinyw,
1257 & det_expw, det_mantw, det_signw,
1259 & step, procnode_steps, myid_nodes, slavef,
1260 & seuil, avoid_delayed, etatass,
1261 & dkeep(1), pivnul_list(1), lpn_list, iwpos
1266 iw( ioldps + 4 + keep(ixsz) ) = 1
1271 & info_p(1), info_p(2), uu, noffnegw, npvw,
1272 & nb22t1w, nbtinyw, det_expw, det_mantw, det_signw,
1273 & keep, keep8, myid_nodes, seuil,
1276 & dkeep(1), pivnul_list(1), lpn_list, iwpos
1280 iw(ioldps + 4 + keep(ixsz)) = step(inode)
1282 IF (info_p(1) .LT. 0)
THEN
1286 &typef, la, iw, liw, a,
1287 &info_p(1), info_p(2), opeliw, nelvaw, nmaxnpivw, ptrist, ptlust_s,
1288 &ptrfac, ptrast, step, pimaster, pamaster, ne, posfac,
1289 &lrlu, lrlus, lrlusm, iptrlu, icntl, keep,
1291 &
comp,iwpos, iwposcb, procnode_steps,
1292 &slavef, , comm_nodes, myid_nodes, ipool_p,
1293 &lpool_p, leaf, nstk_steps, perm, bufr
1294 &lbufr_bytes, nbfin, root, opassw, itloc, rhs_mumps,
1295 &fils, dad, ptrarw, ptraiw, intarr, dblarr, nd, frere,
1296 &lptrar, nelt, frtptr, frtelt, istep_to_iniv2, tab_pos_in_pere,
subroutine smumps_fac_l0_omp(n, liw, nstk_steps, nd, fils, step, frere, dad, istep_to_iniv2, tab_pos_in_pere, ptrist, ptrast, pimaster, pamaster, ptrarw, ptraiw, itloc, rhs_mumps, rinfo, nbroot, nbrtot, nbroot_under_l0, uu, icntl, ptlust_s, ptrfac, info, keep, keep8, procnode_steps, slavef, comm_nodes, myid_nodes, bufr, lbufr, lbufr_bytes, intarr, dblarr, root, perm, nelt, frtptr, frtelt, lptrar, comm_load, ass_irecv, seuil, seuil_ldlt_niv2, mem_distrib, ne, dkeep, pivnul_list, lpn_list, lpool_b_l0_omp, ipool_b_l0_omp, l_virt_l0_omp, virt_l0_omp, virt_l0_omp_mapping, l_phys_l0_omp, phys_l0_omp, perm_l0_omp, ptr_leafs_l0_omp, l0_omp_mapping, ll0_omp_mapping, thread_la, mumps_tps_arr, smumps_tps_arr, nstepsw, opassw, opeliw, nelvaw, comp, maxfrw, nmaxnpivw, npvw, noffnegw, nb22t1w, nbtinyw, det_expw, det_mantw, det_signw, lrgroups, l0_omp_factors, ll0_omp_factors, i4_l0_omp, nbstats_i4, nbcols_i4, i8_l0_omp, nbstats_i8, nbcols_i8)
recursive subroutine smumps_process_front_niv1(comm_load, ass_irecv, n, inode, type, typef, la, iw, liw, a, maxfrw, noffnegw, npvw, nb22t1w, nbtinyw, det_expw, det_mantw, det_signw, info_p, uu, seuil, seuil_ldlt_niv2, opeliw, nelvaw, nmaxnpivw, nstepsw, ptrist, ptlust_s, ptrfac, ptrast, step, pimaster, pamaster, ne, posfac, lrlu, lrlus, lrlusm, iptrlu, icntl, keep, keep8, dkeep, pivnul_list, lpn_list, comp, iwpos, iwposcb, procnode_steps, slavef, ifath, comm_nodes, myid_nodes, ipool_p, lpool_p, leaf, perm, nstk_steps, bufr, lbufr, lbufr_bytes, nbfin, root, opassw, itloc, rhs_mumps, fils, ptrarw, ptraiw, intarr, dblarr, nd, frere, dad, lptrar, nelt, frtptr, frtelt, istep_to_iniv2, tab_pos_in_pere, lrgroups, flop_estim_acc)