15 & COMM_LOAD, ASS_IRECV,
17 & LBUFR_BYTES, PROCNODE_STEPS, MSGSOU,
18 & SLAVEF, IWPOS, IWPOSCB, IPTRLU, LRLU, LRLUS, N, IW, LIW,
19 & A, LA, PTRIST, PTRAST, NSTK_S, PERM,
20 & COMP, STEP, PIMASTER, PAMASTER, POSFAC,
21 & MYID, COMM, IFLAG, IERROR, NBFIN,
23 & PTLUST_S, PTRFAC, root, OPASSW, OPELIW,
24 & ITLOC, RHS_MUMPS, FILS, DAD,
25 & PTRARW, PTRAIW, INTARR, DBLARR,
26 & ICNTL, KEEP,KEEP8, DKEEP,
27 & IPOOL, LPOOL, LEAF, ND, FRERE_STEPS,
28 & LPTRAR, NELT, FRTPTR, FRTELT,
29 & ISTEP_TO_INIV2, TAB_POS_IN_PERE
45 include
'mumps_headers.h'
46 TYPE (zmumps_root_struc) :: root
47 INTEGER icntl( 60 ), keep( 500 )
49 DOUBLE PRECISION dkeep(230)
50 INTEGER lbufr, lbufr_bytes
51 INTEGER comm_load, ass_irecv
53 INTEGER n, slavef, iwpos, iwposcb, liw
54 INTEGER(8) :: iptrlu, lrlu, lrlus, la
57 INTEGER iflag, ierror, nbfin, msgsou
58 INTEGER procnode_steps(keep(28)), ptrist(keep(28)),
60 INTEGER(8) :: pamaster(keep(28))
61 INTEGER(8) :: ptrast(keep(28))
62 INTEGER(8) :: ptrfac(keep(28))
63 INTEGER perm(n), step(n),
66 COMPLEX(kind=8) a( la )
67 INTEGER,
intent(in) :: (n)
70 INTEGER frtptr( n+1 ), frtelt( nelt )
71 INTEGER ptlust_s(keep(28)),
72 & itloc(n+keep(253)), fils(n), dad(keep(28)), nd(keep(28))
73 COMPLEX(kind=8) :: rhs_mumps(keep(255))
74 INTEGER(8),
INTENT(IN) :: ptraiw( lptrar ), ptrarw( lptrar )
75 INTEGER frere_steps(keep(28))
76 DOUBLE PRECISION opassw, opeliw
77 DOUBLE PRECISION flop1
78 INTEGER intarr( keep8(27) )
79 COMPLEX(kind=8) dblarr( keep8(26) )
81 INTEGER ipool( lpool )
82 INTEGER istep_to_iniv2(keep(71)),
83 & tab_pos_in_pere(slavef+2,
max(1,keep(56)))
85 include
'mumps_tags.h'
86 INTEGER :: status(mpi_status_size)
87 LOGICAL :: i_have_set_k117
88 INTEGER inode, position, npiv, ierr, lp
90 INTEGER(8) :: posblocfacto
92 INTEGER(8) :: la_blocfacto
95 COMPLEX(kind=8),
DIMENSION(:),
POINTER :: a_ptr
96 INTEGER ioldps, lcont1, nass1, nrow1, ncol1, npiv1
97 INTEGER nslav1, hs, isw
98 INTEGER (8) :: lpos, upos, lpos2, ipos, kpos
100 INTEGER i, ipiv, fpere
101 LOGICAL lastbl, keep_begs_blr_l
102 LOGICAL blocking, set_irecv, message_received
103 COMPLEX(kind=8) one,
alpha
104 parameter(one=(1.0d0,0.0d0),
alpha=(-1.0d0,0.0d0))
105 INTEGER liwfac, strat,
109 INTEGER :: info_tmp(2)
111 INTEGER :: nelim, npartsass_master, npartsass_master_aux,
114 & nb_blr_l, nb_blr_u, nb_blr_col
115 TYPE (
lrb_type),
POINTER,
DIMENSION(:,:) :: cb_lrb
116 TYPE (
lrb_type),
DIMENSION(:),
POINTER :: blr_u, blr_l
117 LOGICAL :: lr_activated, compress_cb, compress_panel
118 LOGICAL oocwrite_compatible_with_blr
119 INTEGER :: lr_activated_int
120 INTEGER,
POINTER,
DIMENSION(:) :: begs_blr_l, begs_blr_u,
122 COMPLEX(kind=8),
ALLOCATABLE,
DIMENSION(:) :: work, tau
123 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: jpvt
124 DOUBLE PRECISION,
ALLOCATABLE,
DIMENSION(:) :: rwork
125 COMPLEX(kind=8),
ALLOCATABLE,
DIMENSION(:,:) :: block
127 INTEGER npartsass, npartscb, , lwork,
128 & maxi_cluster_l, maxi_cluster_u, maxi_cluster_col
132 keep_begs_blr_l = .false.
136 i_have_set_k117 = .false.
139 CALL mpi_unpack( bufr, lbufr_bytes, position, inode, 1,
140 & mpi_integer, comm, ierr )
141 CALL mpi_unpack( bufr, lbufr_bytes, position, npiv, 1,
142 & mpi_integer, comm, ierr )
146 CALL mpi_unpack( bufr, lbufr_bytes, position, fpere, 1,
147 & mpi_integer, comm, ierr )
149 CALL mpi_unpack( bufr, lbufr_bytes, position, ncol, 1,
150 & mpi_integer, comm, ierr )
151 CALL mpi_unpack( bufr, lbufr_bytes, position, nelim, 1,
152 & mpi_integer, comm, ierr )
154 & npartsass_master , 1,
155 & mpi_integer, comm, ierr )
156 CALL mpi_unpack( bufr, lbufr_bytes, position, ipanel,
157 & 1, mpi_integer, comm, ierr )
158 CALL mpi_unpack( bufr, lbufr_bytes, position, lr_activated_int,
159 & 1, mpi_integer, comm, ierr )
160 lr_activated = (lr_activated_int.EQ.1)
161 IF ( lr_activated )
THEN
162 la_blocfacto = int(npiv,8) * int(npiv+nelim,8)
164 la_blocfacto = int(npiv,8) * int(ncol,8)
167 & npiv, la_blocfacto, .false.,
171 & iwpos, iwposcb, ptrist, ptrast,
172 & step, pimaster, pamaster, lrlus,
173 & keep(ixsz),
comp,dkeep(97),
myid,slavef, procnode_steps,
174 & dad, iflag, ierror)
175 IF (iflag.LT.0)
GOTO 700
176 lrlu = lrlu - la_blocfacto
177 lrlus = lrlus - la_blocfacto
178 keep8(67) =
min(lrlus, keep8(67))
179 keep8(69) = keep8(69) + la_blocfacto
180 keep8(68) =
max(keep8(69), keep8(68))
181 posblocfacto = posfac
182 posfac = posfac + la_blocfacto
184 & la-lrlus,0_8,la_blocfacto,keep,keep8,lrlus)
191 IF (npiv .GT. 0)
THEN
194 & mpi_integer, comm, ierr )
196 IF ( lr_activated )
THEN
198 & a(posblocfacto), npiv*(npiv+nelim),
199 & mpi_double_complex,
201 ld_blocfacto = npiv+nelim
203 & nb_blr_u, 1, mpi_integer,
205 ALLOCATE(blr_u(
max(nb_blr_u,1)), stat=allocok)
206 IF (allocok > 0 )
THEN
208 ierror =
max(nb_blr_u,1)
210 IF (icntl(4) .LE. 0) lp=-1
211 IF (lp > 0)
WRITE(lp,*)
myid,
212 &
': ERROR allocation during ZMUMPS_PROCESS_BLOCFACTO'
215 ALLOCATE(begs_blr_u(nb_blr_u+2), stat=allocok)
216 IF (allocok > 0 )
THEN
220 IF (icntl(4) .LE. 0) lp=-1
221 IF (lp > 0)
WRITE(lp,*)
myid,
222 &
': ERROR allocation during ZMUMPS_PROCESS_BLOCFACTO'
226 & position, npiv, nelim,
'H',
227 & blr_u(1), nb_blr_u,
229 & keep8, comm, ierr, iflag, ierror)
230 IF (iflag.LT.0)
GOTO 700
233 & a(posblocfacto), npiv*ncol,
234 & mpi_double_complex,
241 & mpi_integer, comm, ierr )
242 IF (ptrist(step( inode )) .EQ. 0)
THEN
245 & bufr, lbufr, lbufr_bytes, procnode_steps, posfac,
246 & iwpos, iwposcb, iptrlu,
247 & lrlu, lrlus, n, iw, liw, a, la, ptrist,
249 & ptrast, step, pimaster, pamaster, nstk_s,
comp,
250 & iflag, ierror, comm,
251 & perm, ipool, lpool, leaf, nbfin,
myid, slavef,
253 & root, opassw, opeliw, itloc, rhs_mumps,
254 & fils, dad, ptrarw, ptraiw,
255 & intarr, dblarr, icntl,keep,keep8,dkeep,nd, frere_steps,
256 & lptrar, nelt, frtptr, frtelt,
257 & istep_to_iniv2, tab_pos_in_pere, .true.
260 IF ( iflag .LT. 0 )
GOTO 600
262 IF ( iw( ptrist(step(inode)) + 3 +keep(ixsz)) .EQ. 0 )
THEN
263 DO WHILE ( iw(ptrist(step(inode)) + xxnbpr) .NE. 0)
266 message_received = .false.
268 & ass_irecv, blocking, set_irecv, message_received,
269 & mpi_any_source, contrib_type2,
271 & bufr, lbufr, lbufr_bytes, procnode_steps, posfac,
272 & iwpos, iwposcb, iptrlu,
273 & lrlu, lrlus, n, iw, liw, a, la, ptrist,
275 & ptrast, step, pimaster, pamaster, nstk_s,
comp,
276 & iflag, ierror, comm,
277 & perm, ipool, lpool, leaf, nbfin,
myid, slavef,
279 & root, opassw, opeliw, itloc, rhs_mumps,
280 & fils, dad, ptrarw, ptraiw,
281 & intarr, dblarr, icntl,keep,keep8,dkeep,nd, frere_steps,
282 & lptrar, nelt, frtptr, frtelt,
283 & istep_to_iniv2, tab_pos_in_pere, .true.
286 IF ( iflag .LT. 0 )
GOTO 600
291 message_received = .true.
293 & blocking, set_irecv, message_received,
294 & mpi_any_source, mpi_any_tag,
296 & bufr, lbufr, lbufr_bytes, procnode_steps, posfac,
297 & iwpos, iwposcb, iptrlu,
298 & lrlu, lrlus, n, iw, liw, a, la, ptrist,
300 & ptrast, step, pimaster, pamaster, nstk_s,
comp,
301 & iflag, ierror, comm,
302 & perm, ipool, lpool, leaf, nbfin,
myid, slavef,
304 & root, opassw, opeliw, itloc, rhs_mumps,
305 & fils, dad, ptrarw, ptraiw,
306 & intarr, dblarr,icntl,keep,keep8,dkeep,nd, frere_steps,
307 & lptrar, nelt, frtptr, frtelt,
308 & istep_to_iniv2, tab_pos_in_pere, .true.
311 ioldps = ptrist(step(inode))
313 & ptrast(step(inode)), iw(ioldps+xxd), iw(ioldps+xxr),
314 & a_ptr, poselt, la_ptr )
315 lcont1 = iw( ioldps + keep(ixsz))
316 nass1 = iw( ioldps + 1 + keep(ixsz))
317 compress_panel = (iw(ioldps+xxlr).GE.2)
318 oocwrite_compatible_with_blr =
319 & ( .NOT.lr_activated.OR. (.NOT.compress_panel).OR.
322 IF ( nass1 < 0 )
THEN
324 iw( ioldps + 1 + keep(ixsz)) = nass1
325 IF (keep(55) .EQ. 0)
THEN
327 & ioldps, a_ptr(poselt), la_ptr, 1_8, keep, keep8, itloc,
329 & ptrarw, intarr, dblarr, keep8(27), keep8(26), rhs_mumps,
333 & ioldps, a_ptr(poselt), la_ptr, 1_8, keep, keep8, itloc,
335 & ptrarw, intarr, dblarr, keep8(27), keep8(26),
336 & frtptr, frtelt, rhs_mumps, lrgroups)
339 nrow1 = iw( ioldps + 2 +keep(ixsz))
340 npiv1 = iw( ioldps + 3 +keep(ixsz))
341 nslav1 = iw( ioldps + 5 + keep(ixsz))
342 hs = 6 + nslav1 + keep(ixsz)
343 ncol1 = lcont1 + npiv1
345 ict11 = ioldps+hs+nrow1+npiv1 - 1
347 IF (iw(ipiv+i-1).EQ.i) cycle
349 iw(ict11+i) = iw(ict11+iw(ipiv+i-1))
350 iw(ict11+iw(ipiv+i-1)) = isw
351 ipos = poselt + int(npiv1 + i - 1,8)
352 kpos = poselt + int(npiv1 + iw(ipiv+i-1) - 1,8)
353 CALL zswap(nrow1, a_ptr(ipos), ncol1, a_ptr(kpos), ncol1)
355 lpos2 = poselt + int(npiv1,8)
356 lpos = lpos2 + int(npiv,8)
357 IF ((.NOT. lr_activated).OR.keep(475).EQ.0)
THEN
358 CALL ztrsm(
'L',
'L',
'N',
'N', npiv, nrow1, one,
359 & a(posblocfacto), ld_blocfacto,
360 & a_ptr(lpos2), ncol1)
363 compress_cb = .false.
364 IF ( lr_activated)
THEN
365 compress_cb = ((iw(ioldps+xxlr).EQ.1).OR.
366 & (iw(ioldps+xxlr).EQ.3))
367 IF (compress_cb.AND.npiv.EQ.0)
THEN
368 compress_cb = .false.
369 iw(ioldps+xxlr) = iw(ioldps+xxlr) -1
374 ioldps = ptrist(step(inode))
375 CALL get_cut(iw(ioldps+hs:ioldps+hs+nrow1-1), 0,
376 & nrow1, lrgroups, npartscb,
377 & npartsass, begs_blr_l)
378 CALL regrouping2(begs_blr_l, npartsass, 0, npartscb,
379 & nrow1-0, keep(488), .true., keep(472))
381 IF (ipanel.EQ.1)
THEN
382 begs_blr_col=>begs_blr_u
384 ALLOCATE(begs_blr_col(
size(begs_blr_u)+ipanel-1),
386 IF (allocok > 0 )
THEN
388 ierror =
size(begs_blr_u)+ipanel-1
390 IF (icntl(4) .LE. 0) lp=-1
391 IF (lp > 0)
WRITE(lp,*)
myid,
392 &
': ERROR allocation during ZMUMPS_PROCESS_BLOCFACTO'
395 begs_blr_col(1:ipanel-1) = 1
396 DO i=1,
size(begs_blr_u)
397 begs_blr_col(ipanel+i-1) = begs_blr_u(i)
402 IF (iflag.LT.0)
GOTO 700
410 & huge(npartsass_master),
414 IF (ipanel.NE.1)
THEN
415 DEALLOCATE(begs_blr_col)
417 IF (iflag.LT.0)
GOTO 700
421 keep_begs_blr_l = .true.
422 nb_blr_l =
size(begs_blr_l) - 2
430 IF (lr_activated)
THEN
431 call max_cluster(begs_blr_l,nb_blr_l+1,maxi_cluster_l)
432 call max_cluster(begs_blr_u,nb_blr_u+1,maxi_cluster_u)
433 IF (lastbl.AND.compress_cb)
THEN
434 maxi_cluster=
max(maxi_cluster_u+nelim,maxi_cluster_l)
436 maxi_cluster=
max(maxi_cluster_u,maxi_cluster_l)
438 lwork = maxi_cluster*maxi_cluster
443 ALLOCATE(block(maxi_cluster, omp_num*maxi_cluster),
444 & rwork(2*maxi_cluster*omp_num),
445 & tau(maxi_cluster*omp_num),
446 & jpvt(maxi_cluster*omp_num),
447 & work(lwork*omp_num), stat=allocok)
448 IF (allocok > 0 )
THEN
450 ierror = maxi_cluster*omp_num*maxi_cluster
451 & + 2*maxi_cluster*omp_num
452 & + maxi_cluster*omp_num
453 & + maxi_cluster*omp_num
456 IF (icntl(4) .LE. 0) lp=-1
457 IF (lp > 0)
WRITE(lp,*)
myid,
458 &
': ERROR allocation during ZMUMPS_PROCESS_BLOCFACTO'
462 ALLOCATE(blr_l(nb_blr_l), stat=allocok)
463 IF (allocok > 0 )
THEN
467 IF (icntl(4) .LE. 0) lp=-1
468 IF (lp > 0)
WRITE(lp,*)
myid,
469 &
': ERROR allocation during ZMUMPS_PROCESS_BLOCFACTO'
476 & (a_ptr(poselt), la_ptr, 1_8,
477 & iflag, ierror, ncol1,
478 & begs_blr_l(1),
size(begs_blr_l), nb_blr_l+1,
479 & dkeep(8), keep(466), keep(473),
481 & current_blr,
'V', work, tau, jpvt, lwork,
482 & block, maxi_cluster, nelim,
485 & 2, keep(483), keep8,
490 IF ( (keep(486).EQ.2)
501 IF (iflag.LT.0)
GOTO 300
502 IF (keep(475).GE.1)
THEN
504 & ld_blocfacto, -6666,
506 & blr_l, current_blr, current_blr+1, nb_blr_l+1,
512 IF (keep(486).NE.2)
THEN
514 & a_ptr(poselt), la_ptr, 1_8,
519 & nb_blr_l+1, blr_l(1), current_blr,
'V', 1)
526 IF (iflag.LT.0)
GOTO 700
529 IF ( (keep(201).eq.1) .AND.
530 & (oocwrite_compatible_with_blr .OR. npiv.EQ.0) )
THEN
531 monbloc%INODE = inode
532 monbloc%MASTER = .false.
537 monbloc%LastPiv = npiv1 + npiv
538 monbloc%LastPanelWritten_L = -9999
539 monbloc%LastPanelWritten_U = -9999
540 NULLIFY(monbloc%INDICES)
541 monbloc%Last = lastbl
544 liwfac = iw(ioldps+xxi)
548 & la_ptr, monbloc, nextpivdummy, nextpivdummy,
549 & iw(ioldps), liwfac,
myid, keep8(31), iflag,last_call)
553 IF (lr_activated)
THEN
555 upos = 1_8+int(npiv,8)
557 & a(posblocfacto), la_blocfacto, upos,
558 & a_ptr(poselt), la_ptr, lpos-poselt+1_8,
559 & iflag, ierror, ld_blocfacto, ncol1,
560 & begs_blr_l(1),
size(begs_blr_l),
561 & current_blr, blr_l(1), nb_blr_l+1,
562 & current_blr+1, nelim,
'N')
568 & a_ptr(poselt), la_ptr, 1_8,
569 & iflag, ierror, ncol1,
570 & begs_blr_l(1),
size(begs_blr_l),
571 & begs_blr_u(1),
size(begs_blr_u), current_blr,
572 & blr_l(1), nb_blr_l+1,
573 & blr_u(1), nb_blr_u+1,
578 & keep(481), dkeep(11), keep(466), keep(477)
583 IF (iflag.LT.0)
GOTO 700
585 upos = posblocfacto+int(npiv,8)
586 CALL zgemm(
'N',
'N', ncol-npiv, nrow1, npiv,
587 &
alpha,a(upos), ncol,
588 & a_ptr(lpos2), ncol1, one, a_ptr(lpos), ncol1)
591 iw(ioldps+keep(ixsz) ) = iw(ioldps+keep(ixsz) ) - npiv
592 iw(ioldps + 3+keep(ixsz) ) = iw(ioldps+3+keep(ixsz) ) + npiv
594 iw(ioldps+1+keep(ixsz) ) = iw(ioldps + 3+keep(ixsz) )
596 IF ( .not. lastbl .AND.
597 & (iw(ioldps+1+keep(ixsz)) .EQ. iw(ioldps + 3+keep(ixsz))) )
THEN
598 write(*,*)
'Internal ERROR 1 **** IN BLACFACTO '
601 IF (lr_activated)
THEN
606 IF (keep(486).EQ.3)
THEN
615 lrlu = lrlu + la_blocfacto
616 lrlus = lrlus + la_blocfacto
617 keep8(69) = keep8(69) - la_blocfacto
618 posfac = posfac - la_blocfacto
620 & la-lrlus,0_8,-la_blocfacto,keep
622 flop1 = dble( npiv1*nrow1 ) +
623 & dble(nrow1*npiv1)*dble(2*ncol1-npiv1-1)
625 & dble((npiv1+npiv)*nrow1 ) -
626 & dble(nrow1*(npiv1+npiv))*dble(2*ncol1-npiv1-npiv-1)
629 IF (keep(486).NE.0)
THEN
630 IF (lr_activated)
THEN
638 IF (lr_activated)
THEN
639 IF (compress_cb)
THEN
641 & begs_blr_col, npartsass_master_aux)
642 begs_blr_col(1+npartsass_master) =
643 & begs_blr_col(1+npartsass_master) - nelim
644 nb_blr_col =
size(begs_blr_col
646 call max_cluster(begs_blr_l,nb_blr_l+1,maxi_cluster_l)
647 call max_cluster(begs_blr_col,nb_blr_col,maxi_cluster_col)
648 IF (compress_cb)
THEN
649 maxi_cluster=
max(maxi_cluster_col+nelim,maxi_cluster_l)
651 maxi_cluster=
max(maxi_cluster_col,maxi_cluster_l)
653 lwork = maxi_cluster*maxi_cluster
656!$ omp_num = omp_get_max_threads()
658 ALLOCATE(block(maxi_cluster, omp_num*maxi_cluster),
659 & rwork(2*maxi_cluster*omp_num),
660 & tau(maxi_cluster*omp_num),
661 & jpvt(maxi_cluster*omp_num),
662 & work(lwork*omp_num), stat=allocok)
663 IF (allocok > 0 )
THEN
665 ierror = maxi_cluster*omp_num*maxi_cluster
666 & + 2*maxi_cluster*omp_num
667 & + maxi_cluster*omp_num
668 & + maxi_cluster*omp_num
671 IF (icntl(4) .LE. 0) lp=-1
672 IF (lp > 0)
WRITE(lp,*)
myid,
673 &
': ERROR allocation during ZMUMPS_PROCESS_BLOCFACTO'
677 allocate(cb_lrb(nb_blr_l,nb_blr_col-npartsass_master),
679 IF (allocok > 0)
THEN
681 ierror = nb_blr_l*(nb_blr_col-npartsass_master)
689 IF (compress_cb)
THEN
691 & a_ptr(poselt), la_ptr, 1_8, ncol1,
692 & begs_blr_l(1),
size(begs_blr_l),
693 & begs_blr_col(1),
size(begs_blr_col),
694 & nb_blr_l, nb_blr_col-npartsass_master,
696 & nrow1, ncol1-npiv1-npiv, inode,
697 & iw(ioldps+xxf), 0, 2, iflag, ierror,
698 & dkeep(12), keep(466), keep(484), keep(489),
700 & work, tau, jpvt, lwork, rwork, block,
701 & maxi_cluster, keep8, omp_num,
702 & -9999, -9999, -9999, keep(1),
711 IF (iflag.LT.0)
GOTO 700
714 & comm_load, ass_irecv,
719 & bufr, lbufr, lbufr_bytes, procnode_steps, posfac,
720 & iwpos, iwposcb, iptrlu, lrlu, lrlus, iw, liw, a, la,
721 & ptrist, ptlust_s, ptrfac, ptrast, step, pimaster,
723 & nstk_s,
comp, iflag, ierror, perm,
724 & ipool, lpool, leaf, nbfin, slavef,
725 & opassw, opeliw, itloc, rhs_mumps, fils, dad, ptrarw, ptraiw,
726 & intarr, dblarr,icntl,keep,keep8,dkeep,nd, frere_steps,
727 & lptrar, nelt, frtptr, frtelt,
728 & istep_to_iniv2, tab_pos_in_pere
732 IF (lr_activated)
THEN
733 IF (
allocated(rwork))
DEALLOCATE(rwork)
734 IF (
allocated(work))
DEALLOCATE(work)
735 IF (
allocated(tau))
DEALLOCATE(tau)
736 IF (
allocated(jpvt))
DEALLOCATE(jpvt)
737 IF (
allocated(block))
DEALLOCATE(block
738 IF (
associated(begs_blr_l))
THEN
739 IF (.NOT. keep_begs_blr_l)
DEALLOCATE(begs_blr_l)
743 IF (
associated(begs_blr_u))
DEALLOCATE(begs_blr_u)