20 integer,
pointer,
dimension(:,:),
SAVE::
cv_cand
21 integer,
pointer,
dimension(:),
SAVE::
37 integer,
dimension(:),
allocatable,
save ::
score
43 integer,
pointer,
dimension(:)::t2_nodenumbers
44 integer,
pointer,
dimension(:,:)::t2_cand
45 DOUBLE PRECISION,
pointer,
dimension(:)::t2_candcostw,
50 integer:: new_ison,new_ifather,old_keep2
51 DOUBLE PRECISION:: ncostw_oldinode,ncostm_oldinode,
55 integer,
dimension(:),
pointer :: ind_proc
57 DOUBLE PRECISION,
pointer,
dimension(:) ::
77 integer,
dimension(:),
pointer::
90 & ne,nfsiz,frere,fils,keep,KEEP8,
91 & procnode,ssarbr,nbsa,peak,istat
92 & , SIZEOFBLOCKS, LSIZEOFBLOCKS
95 integer,
intent(in)::n,slavef
96 integer,
intent(inout),
TARGET:: ne(n),nfsiz(n),
97 & procnode(n),ssarbr(n),frere(n),fils(n),keep(500),
99 integer,
intent(in) :: lsizeofblocks
100 integer,
intent(in) :: sizeofblocks(lsizeofblocks)
101 INTEGER(8) keep8(150)
102 integer,
intent(out)::nbsa,istat
103 integer ierr,nmb_thislayer,layernmb,mapalgo,allocok,i
104 integer,
pointer,
dimension(:)::thislayer
105 integer,
parameter::memonly=1,floponly=2,hybrid=3
107 & maxwork,minwork,maxmem,minmem,workbalance,membalance
108 DOUBLE PRECISION:: cost_root_node
109 DOUBLE PRECISION,
dimension(:),
allocatable:: work_per_proc
110 integer,
dimension(:),
allocatable::id_son
112 character (len=48):: err_rep,subname
113 DOUBLE PRECISION peak
115 blkon = (sizeofblocks(1).GT.0)
121 IF (icntl(4).LT.2)
cv_mp=0
125 & frere,fils,nfsiz,ne,keep,keep8,icntl,info,
126 & procnode,ssarbr,peak,ierr
127 & , sizeofblocks, lsizeofblocks
129 if (ierr.ne.0)
goto 99999
132 if (ierr.ne.0)
goto 99999
135 if (ierr.ne.0)
goto 99999
138 if (ierr.ne.0)
goto 99999
141 if (ierr.ne.0)
goto 99999
142 if (ierr.ne.0)
goto 99999
145 if (ierr.ne.0)
goto 99999
149 & maxwork,minwork,maxmem,minmem)
150 if(maxwork.gt.0.0d0)
then
151 workbalance=minwork/maxwork
155 if(maxmem.gt.0.0d0)
then
156 membalance=minmem/maxmem
162 if (allocok.gt.0)
then
166 &
write(
cv_lp,*)
'memory allocation error in ',subname
173 err_rep=
'SELECT_TYPE3'
175 if (ierr.ne.0)
goto 99999
179 &
cv_keep(50), 3, cost_root_node)
180 cost_root_node = cost_root_node / dble(
cv_slavef)
192 if (ierr.ne.0)
goto 99999
193 err_rep=
'DO_SPLITTING'
196 & (layernmb,thislayer,nmb_thislayer,ierr)
198 if (ierr.ne.0)
goto 99999
199 err_rep=
'ASSIGN_TYPES'
202 if (ierr.ne.0)
goto 99999
203 if(layernmb.gt.0)
then
206 err_rep=
'COSTS_LAYER_T2'
211 err_rep=
'COSTS_LAYER_T2PM'
214 err_rep=
'wrong strategy for COSTS_LAYER_T2'
217 if (ierr.ne.0)
goto 99999
221 & maxwork,minwork,maxmem,minmem)
222 if(maxwork.gt.0.0d0)
then
223 workbalance=minwork/maxwork
227 if(maxmem.gt.0.0d0)
then
228 membalance=minmem/maxmem
232 if(mapalgo.eq.memonly)
then
236 if (ierr.ne.0)
goto 99999
237 elseif(mapalgo.eq.floponly)
then
241 if (ierr.ne.0)
goto 99999
242 elseif(mapalgo.eq.hybrid)
then
243 if (workbalance <= membalance)
then
247 if (ierr.ne.0)
goto 99999
252 if (ierr.ne.0)
goto 99999
256 &
write(
cv_lp,*)
'Unknown mapalgo in ',subname
261 err_rep=
'HIGHER_LAYER'
263 & nmb_thislayer,cont,ierr)
264 if (ierr.ne.0)
goto 99999
270 err_rep=
'POSTPROCESS'
276 if (ierr.ne.0)
goto 99999
277 err_rep=
'ENCODE_PROC'
279 if (ierr.ne.0)
goto 99999
282 & info,procnode,ssarbr,nbsa)
283 err_rep=
'mem_dealloc'
284 deallocate(thislayer,stat=ierr)
287 &
write(
cv_lp,*)
'Memory deallocation error in ',subname
293 if (ierr.ne.0)
goto 99999
298 write(
cv_lp,*)
'Error in ',subname,
', layernmb=',layernmb
299 write(
cv_lp,*)
'procedure reporting the error: ',err_rep
309 & map_strat,workload,memused,accepted,
312 integer,
intent(in)::map_strat
313 DOUBLE PRECISION,
dimension(:),
intent(in)::workload, memused
314 logical,
intent(out)::accepted
315 integer,
intent(out)::istat
316 DOUBLE PRECISION maxi,mini,mean,stddev, dpkeep102
318 character (len=48):: subname
319 logical alternative_criterion
345 dpkeep102 = dble(150)
347 dpkeep102 = dble(200)
349 dpkeep102 = dble(250)
351 dpkeep102 = dble(275)
353 dpkeep102 = dble(300)
355 dpkeep102 = dble(400)
360 alternative_criterion=.false.
362 maxi=maxval(workload)
363 mini=minval(workload)
364 if (maxi.lt.minflops)
then
366 elseif(maxi.le.(dpkeep102/dble(100))*mini)
then
369 if ((.NOT.accepted).AND.(alternative_criterion))
then
374 & (abs(workload(i)-mean)*abs(workload(i)-mean))
377 nmb=count(mask=abs(workload-mean)<stddev)
379 & .AND.(stddev.lt.dv_rate*mean)) accepted=.true.
384 if (maxi.lt.minmem)
then
387 if (maxi.le.dble(2)*mini) accepted=.true.
389 if (maxi.le.dble(4)*mini) accepted=.true.
391 if (maxi.le.dble(6)*mini) accepted=.true.
393 if (maxi.le.dble(8)*mini) accepted=.true.
395 if (maxi.le.dble(10)*mini) accepted=.true.
402 & procnode,istat,respect_prop)
404 integer,
intent(in)::map_strat, layerL0end
405 DOUBLE PRECISION,
dimension(:),
intent(out)::workload, memused
406 integer,
intent(out)::procnode(:),istat
407 logical,
intent(in),
OPTIONAL:: respect_prop
408 integer i,j,ierr, nodenumber,proc
409 DOUBLE PRECISION work,mem
410 character (len=48):: err_rep,subname
416 &
write(
cv_lp,*)
'Error:tcost must be allocated in ',subname
432 err_rep=
'FIND_BEST_PROC'
433 if(
present(respect_prop))
then
435 & workload,memused,proc,ierr,respect_prop)
438 & workload,memused,proc,ierr)
441 procnode(nodenumber)=proc
444 &
write(
cv_lp,*)
'Error reported by ',err_rep,
' in ',subname
461 integer,
intent(in)::layernmb,thislayer(:),
463 integer,
intent(out)::istat
464 integer i,in,npiv,nfront,inode,inoderoot,par_nodes_in_layer,
466 character (len=48):: subname
468 subname=
'ASSIGN_TYPES'
469 if((layernmb.lt.0).or.(layernmb.gt.
cv_maxlayer))
return
471 if(layernmb.eq.0)
then
476 else if(layernmb.eq.0)
then
492 if ( inode .ne. inoderoot )
then
526 if(layernmb.gt.0)
then
531 & par_nodes_in_layer=par_nodes_in_layer+1
533 if(par_nodes_in_layer.gt.0)
then
540 if (allocok.gt.0)
then
545 &
write(
cv_lp,*)
'memory allocation error in ',subname
575 integer,
intent(in)::procnumber
586 integer,
intent(in)::inode,procnumber
590 if((procnumber.lt.1).or.(procnumber.gt.
cv_slavef))
return
591 if(.not.
associated(
cv_prop_map(inode)%ind_proc))
return
600 integer,
intent(inout)::procs4node(cv_size_ind_proc)
601 integer,
intent(in)::procnumber
602 integer,
intent(out)::istat
605 if((procnumber.lt.1).or.(procnumber.gt.
cv_slavef))
return
609 procs4node(pos1)=ibset(procs4node(pos1),pos2)
615 integer,
intent(out)::istat
617 DOUBLE PRECISION :: maxcostw_root
623 &
'Error: tcost must be allocated in MUMPS_CALCCOSTS'
646 integer,
intent(in)::npiv,nfront
647 DOUBLE PRECISION,
intent(out)::costw,costm
648 character (len=48):: subname
649 subname=
'CALCNODECOSTS'
650 if((npiv.le.1).and.(nfront.le.1))
then
656 WRITE(*,*)
" *** Temp internal error in MUMPS_CALCNODECOSTS:"
663 costw= 2.0d0*dble(nfront)*dble(npiv)*dble(nfront-npiv-
664 & + dble(npiv)*dble(npiv+1)*dble(2*npiv+1)/dble(3)
665 & + dble(2*nfront-npiv-1) * dble(npiv) / dble(2)
666 costm= dble(npiv)*(dble(2*nfront)-dble(npiv))
669 & (dble(nfront)*dble(nfront)+dble(2*nfront) -
670 & dble(nfront+1) * dble(npiv+1) +
671 & dble(npiv+1) * dble(2*npiv+1) / dble(6))
672 costm= dble(npiv) * dble(nfront)
676 if((costw.lt.0.0d0).or.(costm.lt.0.0d0))
then
681 & K471, K472, K475, K488, SYM)
682 INTEGER,
INTENT(IN) :: NPIV, NFRONT, SYM, K471, K472, K475, K488
683 DOUBLE PRECISION,
INTENT(OUT) :: COSTW, COSTM
685 DOUBLE PRECISION :: B,R,M,N
693 ELSEIF (k471.EQ.1)
THEN
696WRITE'Internal error in MUMPS_CALCNODECOSTS_BLR', k471
701 costw = m/b * b*(b+1.0d0)*(2.0d0*b+1.0d0)/3.0d0
703 costw = costw + 2.0d0*m/(b*b)*(n-(m+b)/2.0d0) * b*b*b
704 ELSEIF (k475.EQ.1)
THEN
705 costw = costw + m/(b*b)*(n-(m+b)/2.0d0) * b*b*(r+b)
706 ELSEIF (k475.EQ.2)
THEN
707 costw = costw + m/(b*b)*(2.0d0*n-3.0d0*m-2.0d0*b) * b*b*r
708 & + (m/b-1.0d0)*m/b*(m/b-1.0d0)/6.0d0 * b*b*b
709 ELSEIF (k475.EQ.3)
THEN
710 costw = costw + 2.0d0*m/(b*b)*(n-(m+b)/2.0d0) * b*b*r
712 costw = costw + 2.0d0*m/(b*b)*(n-(m+b)/2.0d0) * 2.0d0*b*b*r
713 costw = costw + (4.0d0*b*r*r + 2.0d0*b*b*r) * (
714 & (n-m)*(n-m)*m/(b*b*b)
715 & + (n-m)/b*(m/b-1.0d0)*m/b
716 & + (m/b-1.0d0)*m/b*(2.0d0*m/b-1.0d0)/6.0d0
718 costm = m*(2.0d0*n-m)/(b*b) * 2.0d0*b*r
720 costw = m/b * b*(b+1.0d0)*(2.0d0*b+1.0d0)/6.0d0
721 IF (k475.EQ.0.OR.k475.EQ.1)
THEN
722 costw = costw + m/(b*b)*(n-(m+b)/2.0d0) * b*b*b
723 ELSEIF (k475.EQ.2)
THEN
724 costw = costw + (n-m)*m/(b*b) * b*b*r
725 & + (m/b-1.0d0)*m/b*(m/b-1.0d0)/6.0d0 * b*b*b
726 ELSEIF (k475.EQ.3)
THEN
727 costw = costw + m/(b*b)*(n-(m+b)/2.0d0) * b*b*r
729 costw = costw + m/(b*b)*(n-(m+b)/2.0d0) * 2.0d0*b*b*r
730 costw = costw + (4.0d0*b*r*r + 2.0d0*b*b*r) * (
731 & (n-m)*(n-m)*m/(b*b*b)/2.0d0
732 & + (n-m)/b*(m/b-1.0d0)*m/b/2.0d0
733 & + (m/b-1.0d0)*m/b*(m/b+1.0d0)/6.0d0
735 costm = m*n/(b*b) * 2.0d0*b*r
739 & COSTW, COSTM, K471, K472, K475, K488, SYM)
740 INTEGER,
INTENT(IN) :: NPIV, NFRONT, SYM, K471, K472, K475, K488
741 DOUBLE PRECISION,
INTENT(OUT) :: COSTW, COSTM
743 DOUBLE PRECISION :: B,R,M,N
751 ELSEIF (k471.EQ.1)
THEN
754 WRITE(*,*)
'Internal error in ',
755 &
'MUMPS_COSTS_BLR_T2_MASTER', k471
760 costw = m/b * b*(b+1.0d0)*(2.0d0*b+1.0d0)/3.0d0
762 costw = costw + m/(b*b)*(n-(m+b)/2.0d0) * b*b*b
763 & + (m/b-1.0d0)*m/b*(m/b-1.0d0)/6.0d0 * b*b*b
764 ELSEIF (k475.EQ.1)
THEN
765 costw = costw + m/(b*b)*(n-(m+b)/2.0d0)
766 & + (m/b-1.0d0)*m/b*(m/b-1.0d0)/6.0d0 * b*b*r
767 ELSEIF (k475.EQ.2)
THEN
768 costw = costw + m/(b*b)*(n-m) * b*b*r
769 & + (m/b-1.0d0)*m/b*(m/b-1.0d0)/6.0d0 * b*b*(b+r)
770 ELSEIF (k475.EQ.3)
THEN
771 costw = costw + m/(b*b)*(n-(m+b)/2.0d0) * b*b*r
772 & + (m/b-1.0d0)*m/b*(m/b-1.0d0)/6.0d0 * b*b*r
774 costw = costw + m/(b*b)*(n-(m+b)/2.0d0) * 2.0d0*b*b*r
775 & + (m/b-1.0d0)*m/b*(m/b-1.0d0)/6.0d0 * 2.0d0*b*b*r
776 costw = costw + (4.0d0*b*r*r + 2.0d0*b*b*r) * (
777 & (n-m)/b*(m/b-1.0d0)*m/b/2.0d0
778 & + (m/b-1.0d0)*m/b*(2.0d0*m/b-1.0d0)/6.0d0
780 costm = m*n/(b*b) * 2.0d0*b*r
782 costw = m/b * b*(b+1.0d0)*(2.0d0*b+1.0d0)/6.0d0
784 costw = costw + (m/b-1.0d0)*m/b*(m/b-1.0d0)/6.0d0 * b*b*b
785 ELSEIF (k475.EQ.3)
THEN
786 costw = costw + (m/b-1.0d0)*m/b*(m/b-1.0d0)/6.0d0 * b*b*r
788 costw = costw + (m/b-1.0d0)*m/b*(m/b-1.0d0)/6.0d0
791 & (m/b-1.0d0)*m/b*(m/b+1.0d0)/6.0d0
793 costm = m*m/(b*b) * 2.0d0*b*r
797 & NROW, COSTW, COSTM, K471, K472, K475, K488, SYM)
798 INTEGER,
INTENT(IN) :: NPIV, NFRONT, SYM, K471, K472,
800 DOUBLE PRECISION,
INTENT(IN) :: NROW
801 DOUBLE PRECISION,
INTENT(OUT) :: COSTW, COSTM
803 DOUBLE PRECISION :: B,R,M,N,P
812 ELSEIF (k471.EQ.1)
THEN
815 WRITE(*,*)
'Internal error in ',
816 &
'MUMPS_COSTS_BLR_T2_SLAVE', k471
822 costw = costw + (m*p)/(b*b) * b*b*b
824 costw = costw + (m*p)/(b*b) * b*b*r
826 costw = costw + (m*p)/(b*b) * 2.0d0*b*b*r
828 costw = costw + (4.0d0*b*r*r + 2.0d0*b*b*r) * (
829 & m/b*(p/b-1.0d0)*p/b/2.0d0
830 & + (n-m)*m*p/(b*b*b)
833 costw = costw + (4.0d0*b*r*r + 2.0d0*b*b*r) * (
834 & m/b*(p/b-1.0d0)*p/b/2.0d0
835 & + (n-m)*m*p/(b*b*b)/2.0d0
838 costm = m*p/(b*b) * 2.0d0*b*r
842 integer,
intent(in)::layernmb,nmb_thislayer
843 integer,
intent(out)::istat
844 integer in,inode,j,kmax,npiv,nfront,ncb,ncol,
845 & min_needed,max_needed,more_than_needed,total_nmb_cand,
846 & nmb_type2_thislayer,fraction,
847 & total_cand_layer,cand_strat, keep48_loc
848 DOUBLE PRECISION ,work_type2_thislayer,
849 & relative_weight,workmaster,nrow
851 character (len=48):: subname
852 integer MUMPS_REG_GETKMAX, MUMPS_BLOC2_GET_NSLAVESMIN,
854 external MUMPS_REG_GETKMAX, MUMPS_BLOC2_GET_NSLAVESMIN,
857 subname=
'COSTS_LAYER_T2'
860 &
write(
cv_lp,*)
'Error in ',subname,
'. Wrong keep24'
863 force_cand=(mod(
cv_keep(24),2).eq.0)
866 if (nmb_type2_thislayer.gt.0)
then
867 work_type2_thislayer=0.0d0
868 do j=1,nmb_type2_thislayer
870 work_type2_thislayer=work_type2_thislayer+
cv_ncostw(inode)
874 &
write(
cv_lp,*)
'Error in ',subname,
'. Wrong cv_relax'
878 do j=1,nmb_type2_thislayer
899 if (
cv_keep(48).EQ.5) keep48_loc = 5
908 if(cand_strat.eq.1)
then
910 elseif (cand_strat.eq.2)
then
911 if(work_type2_thislayer.gt.0.0d0)
then
912 relative_weight=
cv_ncostw(inode)/work_type2_thislayer
914 relative_weight = 0.0d0
916 fraction=nint(relative_weight *
917 & dble(total_cand_layer))
919 &
max(0,fraction-min_needed) )
920 elseif (cand_strat.eq.3)
then
924 &
write(
cv_lp,*)
'Unknown cand. strategy in ',subname
927 total_nmb_cand=
min(min_needed+more_than_needed,
929 total_nmb_cand=
min(total_nmb_cand,max_needed)
936 flop1=dble(2*npiv)*dble(nfront)-
937 & dble(npiv+nfront)*dble(npiv+1)
938 flop1= dble(npiv)*flop1 +
939 & dble(2 * npiv-npiv-1)*dble(npiv)/dble(2)+
940 & dble(npiv)*dble(npiv+1)*dble(2*npiv+1)/dble(3)
943 & ( dble(npiv)*dble(npiv)+dble(npiv)-
944 & dble(npiv*npiv+npiv+1) )+
945 & (dble(npiv)*dble(npiv+1)*dble(2*npiv+1))/dble(6)
948 if(total_nmb_cand.gt.0)
then
949 nrow = dble(
max(
min(dble(ncb)/dble(total_nmb_cand),
953 nrow = dble(
max(dble(kmax),
959 flop1 = dble(npiv)*dble(nrow)+
960 & dble(nrow)*dble(npiv)*dble(2*nfront-npiv-1)
963 flop1 = dble(npiv)*dble(nrow)*
964 & (dble(2*ncol)-dble(nrow)-dble(npiv)+dble(1))
965 workmaster = dble(npiv)*dble(npiv)*dble(npiv)/dble(3)
966 if (workmaster.gt.flop1) flop1=workmaster
976 & =dble(npiv)*dble(nrow)
979 & =dble(npiv)*dble(nrow)
989 integer,
intent(in)::layernmb
990integer,
intent(out)::istat
991 integer in,inode,,jj,kmax,npiv,nfront,ncb,ncol,
992 & total_nmb_cand,nmb_type2_thislayer,
993 & total_cand_layer,npropmap,min_needed,
995 DOUBLE PRECISION flop1,work_type2_thislayer,
996 & relative_weight,workmaster,nrow
997 DOUBLE PRECISION save_ncostw, save_ncostm
998 LOGICAL SPLITNODE, BLRNODE
1000 integer MUMPS_REG_GETKMAX, MUMPS_BLOC2_GET_NSLAVESMIN
1001 external MUMPS_REG_GETKMAX, MUMPS_BLOC2_GET_NSLAVESMIN
1007 subname=
'COSTS_LAYER_T2PM'
1012 &
write(
cv_lp,*)
'Error in ',subname,
'. Wrong keep24'
1016 if (nmb_type2_thislayer.gt.0)
then
1018 work_type2_thislayer=0.0d0
1019 do j=1,nmb_type2_thislayer
1021 work_type2_thislayer=work_type2_thislayer+
cv_ncostw(inode)
1025 & npropmap=npropmap+1
1027 total_cand_layer=total_cand_layer+npropmap
1029 do j=1,nmb_type2_thislayer
1057 if (
cv_keep(48).EQ.5) keep48_loc = 5
1062 if(min_needed.lt.1)
then
1064 &
write(
cv_lp,*)
'Error in ',subname,
'.NEG min_needed'
1072 & npropmap=npropmap+1
1074 total_nmb_cand=
max(npropmap-1,min_needed)
1075 elseif(
cv_keep(24).eq.10)
then
1076 if(work_type2_thislayer.gt.0.0d0)
then
1077 relative_weight=
cv_ncostw(inode)/work_type2_thislayer
1079 relative_weight = 0.0d0
1081 total_nmb_cand=nint(relative_weight *
1082 & dble(total_cand_layer))
1083 total_nmb_cand=
max(total_nmb_cand-1,min_needed)
1088 &
': use 8 on layer',layernmb
1093 & npropmap=npropmap+1
1095 total_nmb_cand=
max(npropmap-1,min_needed)
1099 &
': use 10 on layer',layernmb
1101 if(work_type2_thislayer.gt.0.0d0)
then
1102 relative_weight=
cv_ncostw(inode)/work_type2_thislayer
1104 relative_weight = 0.0d0
1106 total_nmb_cand=nint(relative_weight *
1107 & dble(total_cand_layer))
1108 total_nmb_cand=
max(total_nmb_cand-1,min_needed)
1112 &
write(
cv_lp,*)
'Unknown cand. strategy in ',subname
1115 total_nmb_cand=
max(total_nmb_cand,1)
1117 total_nmb_cand=
min(total_nmb_cand,ncb)
1129 flop1=dble(2*npiv)*dble(nfront)-
1130 & dble(npiv+nfront)*dble(npiv+1)
1131 flop1= dble(npiv)*flop1 +
1132 & dble(2 * npiv-npiv-1)*dble(npiv)/dble(2)+
1133 & dble(npiv)*dble(npiv+1)*dble(2*npiv+1)/dble(3)
1136 & ( dble(npiv)*dble(npiv)+dble(npiv)-
1137 & dble(npiv*npiv+npiv+1) )+
1138 & (dble(npiv)*dble(npiv+1)*dble(2*npiv+1))/dble(6)
1146 if(total_nmb_cand.gt.0)
then
1147 nrow = dble(
max(
min(dble(ncb)/dble(total_nmb_cand),
1151 nrow = dble(
max(dble(kmax),
1165 flop1 = dble(npiv)*dble(nrow)+
1166 & dble(nrow)*dble(npiv)*dble(2*nfront-npiv-1)
1169 flop1 = dble(npiv)*dble(nrow)*
1170 & (dble(2*ncol)-dble(nrow)-dble(npiv)+dble(1))
1171 workmaster = dble(npiv)*dble(npiv)*dble(npiv)/dble(3)
1172 if (workmaster.gt.flop1) flop1=workmaster
1177 IF (.NOT.blrnode)
THEN
1179 cv_ncostm(inode)=dble(npiv)*dble(nfront)
1187 ELSEIF (.NOT.blrnode)
THEN
1190 & =dble(npiv)*dble(nrow)
1193 & =dble(npiv)*dble(nrow)
1202 & layernmb,thislayer,nmb_thislayer,
1205 integer,
intent(in)::layernmb,nmb_thislayer
1206 integer,
intent(in)::thislayer(:)
1207 integer,
intent(out)::istat
1208 integer i,,k1,k2,k3,ierr,inode,nfront,npiv,
1209 & npropmap, inode_tmp, allocok
1211 integer,
allocatable,
dimension(:) :: npivsplit
1212 integer :: lnpivsplit
1214 integer :: k1_temp, npiv_beg, npiv_end
1215 character (len=48):: err_rep,subname
1217 subname=
'SPLIT_DURING_MAPPING'
1218 if((layernmb.lt.0).or.(layernmb.gt.
cv_maxlayer))
return
1227 allocate(npivsplit(lnpivsplit),stat=allocok)
1228 if (allocok .NE. 0)
then
1233 &
write(
cv_lp,*)
'memory allocation error in ',subname
1236 do i=1,nmb_thislayer
1242 do while (inode_tmp.gt.0)
1250 if (inode_tmp .eq. 0) cycle
1257 IF ((keep(376) .EQ.1)
1259 err_rep=
'GET_SPLIT_4_PERF'
1262 & k1, lnpivsplit, npivsplit, n,
cv_frere(1),
1271 err_rep=
'GET_SPLIT_INKPART'
1273 & doit,npiv,nfront,npropmap,k1,k3,
1276 err_rep=
'GET_MEMSPLIT_INKPART'
1278 & doit,npiv,nfront,npropmap,k2,ierr)
1283 if (lnpivsplit < k1)
then
1284 write(*,*)
'error in', subname, lnpivsplit, k1,
cv_keep(108)
1287 bsize =
max(npiv/k1,1)
1293 do while (inode_tmp.gt.0)
1295 if (npiv_end-npiv_beg.ge.bsize)
then
1297 npivsplit(k1_temp) = npiv_end-npiv_beg
1299 if ( ( (npiv-npiv_beg).gt.0) .and.
1300 & (npiv-npiv_beg.LT.2*bsize)
1303 npivsplit(k1_temp) = npiv - npiv_beg
1309 if (k1_temp.eq.0)
then
1313 if (npiv_end.gt.npiv_beg)
then
1315 npivsplit(k1_temp) = npiv_end-npiv_beg
1323 npivsplit(k1) = npiv-bsize*(k1-1)
1329 &
write(
cv_lp,*)
'Error reported by ',
1330 & err_rep,
' in ',subname
1334 if ( ( k1.le.1).or.(k3.le.1).or.(.NOT.doit) ) cycle
1335 err_rep=
'SPLITNODE_INKPART'
1341 & , sizeofblocks, lsizeofblocks
1346 &
write(
cv_lp,*)
'Error reported by ',err_rep,
1351 err_rep=
'SPLITNODE_UPDATE'
1353 & lnpivsplit, npivsplit,
1357 &
write(
cv_lp,*)
'Error reported by ',err_rep,
1365 deallocate(npivsplit)
1369 & doit,npiv,nfront,npropmap,k1,k3,istat)
1371 integer,
intent(in)::inode
1372 logical,
intent(out)::doit
1373 integer,
intent(in) :: npiv, nfront, npropmap
1374 integer,
intent(out) :: istat
1375 integer,
intent(out) ::k1,k3
1376 integer npiv2,nfront2,npiv_son2
1377 integer ncb,kmax,keep48_loc,nslaves_max,
1378 & nslaves_estim,strat,kk
1379 DOUBLE PRECISION wk_master,wk_master2,wk_slave2
1380 integer MUMPS_REG_GETKMAX,
1383 external MUMPS_REG_GETKMAX
1384 external MUMPS_BLOC2_GET_NSLAVESMAX
1385 external MUMPS_BLOC2_GET_NSLAVESMIN
1401 npiv_son2 =
max(npiv/2,1)
1414 if (
cv_keep(48).EQ.5) keep48_loc = 5
1415 if(npropmap .gt.
cv_keep(83))
then
1421 nslaves_estim =
min(npropmap-1,nslaves_max)
1422 nslaves_estim =
max(nslaves_estim,1)
1432 nslaves_estim =
max(nslaves_estim,1)
1433 nslaves_estim =
min(nslaves_estim,nslaves_max)
1436 wk_master = (dble(2)/dble(3))*
1437 & dble(npiv)*dble(npiv)*dble(npiv)+
1438 & dble(npiv)*dble(npiv)*dble(nfront-npiv)
1440 wk_master = dble(npiv)*dble(npiv)*dble(npiv)/dble(3)
1448 nfront2 = nfront-npiv+npiv2
1450 & (nfront2.le.
cv_keep(9)) )
then
1454 wk_master2 = wk_master / dble(kk)
1456 wk_slave2 = ( dble(npiv2)*dble(nfront2-npiv2) *
1457 & dble(2*nfront2-npiv2) ) / dble(nslaves_estim)
1460 & ( dble(npiv2)*dble(nfront2-npiv2)*dble(nfront2) )
1461 & / dble(nslaves_estim)
1464 & (1.0d0 +dble(kk*strat)/dble(100))*wk_slave2)
then
1476 wk_master2 = wk_master / dble(kk)
1478 wk_slave2 = ( dble(npiv2)*dble(nfront2-npiv2) *
1479 & dble(2*nfront2-npiv2) ) / dble(nslaves_estim)
1482 & ( dble(npiv2)*dble(nfront2-npiv2)*dble(nfront2) )
1483 & / dble(nslaves_estim)
1485 if(wk_master2.le.wk_slave2)
then
1493 k1=
min(k1, npropmap-1)
1494 k3=
min(k3, npropmap-1)
1503 & doit,npiv,nfront,npropmap,k2,istat)
1505 integer,
intent(in) :: inode
1506 logical,
intent(out) :: doit
1507 integer,
intent(in) :: npiv,nfront,npropmap
1508 integer,
intent(out) :: istat
1509 integer,
intent(out) :: k2
1510 integer npiv2,npiv_son2
1512 DOUBLE PRECISION mem_master, mem_slave
1528 if ((nfront-npiv).lt.npropmap.OR.
1529 & (npropmap.le.0) )
then
1534 npiv_son2 =
max(npiv/2,1)
1544 if(npiv2 .eq. 0)
then
1548 mem_slave = dble(nfront-npiv)*dble(nfront)/
1549 & dble(npropmap-kk+1)
1550 mem_master = dble(npiv2)*dble(nfront)
1552 & (1.0d0 +dble(
cv_keep(62))/dble(100))*mem_slave)
then
1563 & lnpivsplit, npivsplit,
1566 integer,
intent(in)::nfront,npiv
1567 integer,
intent(in):: k
1568 integer,
intent(in)::lnpivsplit
1569 integer,
intent(in)::npivsplit(lnpivsplit)
1570 integer,
intent(in):: inode
1571 integer,
intent(out)::istat
1572 integer lev,npiv_father,
1573 & npiv_son,nfrontk,npivk,next_father
1574 DOUBLE PRECISION:: ncostm,ncostw,ncostm_ison,ncostw_ison,
1575 & ncostm_ifather,ncostw_ifather
1576 integer::ison,ifather
1577 character (len=48):: subname
1579 subname=
'SPLITNODE_UPDATE'
1580 npiv_son = npivsplit(1)
1582 next_father = -frere(ison)
1588 & ncostw_ison,ncostm_ison)
1596 ifather = next_father
1597 next_father = -frere(ifather)
1598 npiv_son= abs(npivsplit(lev))
1599 npiv_father=abs(npivsplit(lev+1))
1601 & ncostw_ifather,ncostm_ifather)
1613 &
write(
cv_lp,*)
'PROPMAP4SPLIT error in ',subname
1618 nfrontk = nfrontk-npiv_son
1619 npivk = npivk - npiv_son
1622 if (npivk .ne. npiv_father)
then
1623 write(*,*)
"Error 1 in MUMPS_SPLITNODE_UPDATE"
1629 &
write(
cv_lp,*)
'PROPMAP4SPLIT error in ',subname
1640 integer,
intent(in) ::
1658 integer,
intent(out)::istat
1660 character (len=48):: subname
1661 integer,
external :: MUMPS_ENCODE_TPN_IPROC
1663 subname=
'ENCODE_PROCNODE'
1681 &
write(
cv_lp,*)
'Error in ',subname
1703 integer,
intent(in)::ifather
1704 integer,
intent(out)::istat
1705 integer in,son,oldl0end
1706 logical father_has_sons
1707 character (len=48):: subname
1709 subname=
'FATHSON_REPLACE'
1710 father_has_sons=.true.
1718 father_has_sons=.false.
1722 elseif(father_has_sons)
then
1731 if (father_has_sons)
then
1756 &
write(
cv_lp,*)
'Error reported by MUMPS_SORT_MSORT in',
1769 &
'Error reported by MUMPS_SORT_MMERGE in',
1779 & workload,memused,proc,istat,respect_prop)
1782 integer,
intent(in)::inode,map_strat
1783 DOUBLE PRECISION,
intent(in)::work,mem
1784 DOUBLE PRECISION,
dimension(:),
intent(inout)::workload, memused
1785 integer,
intent(out):: proc,istat
1786 logical,
intent(in),
OPTIONAL::respect_prop
1788 logical respect_proportional
1789 DOUBLE PRECISION dummy
1790 character (len=48):: subname
1792 respect_proportional=.false.
1793 if(
present(respect_prop)) respect_proportional=respect_prop
1794 subname=
'FIND_BEST_PROC'
1801 & ((.NOT.respect_proportional)
1805 & (((workload(i).lt.dummy).AND.
1808 & ((memused(i).lt.dummy).AND.
1824 if (proc.ne.-1)
then
1825 workload(proc)=workload(proc)+work
1826 memused(proc)=memused(proc)+mem
1832 & thislayer,nmb_thislayer,istat)
1834 integer,
intent(in)::nmb
1835 integer,
intent(out) :: thislayer(:)
1836 integer,
intent(out) :: nmb_thislayer,istat
1838 character (len=48):: subname
1840 subname=
'FIND_THISLAYER'
1846 nmb_thislayer=nmb_thislayer+1
1849 &
write(
cv_lp,*)
'Problem with nmb_thislayer in ',subname
1852 thislayer(nmb_thislayer)=i
1859 & nmb_thislayer,cont,istat)
1861 integer,
intent(in)::startlayer,nmb_thislayer
1862 integer,
intent(in)::thislayer(:)
1863 logical,
intent(inout)::cont
1864 integer,
intent(out)::istat
1866 integer il,i,current,in,ifather
1867 logical father_valid,upper_layer_exists
1868 character (len=48):: subname
1870 subname=
'HIGHER_LAYER'
1871 if(.NOT.cont)
return
1872 if(startlayer.lt.1)
return
1873 current=startlayer-1
1874 visited = -current-1
1875 upper_layer_exists=.false.
1876 if (current.eq.0)
then
1880 upper_layer_exists=.true.
1886 do il=1,nmb_thislayer
1901 write(6,*)
' Internal error 1 in MUMPS_HIGHER_LAYER'
1907 do il=1,nmb_thislayer
1923 write(6,*)
' Internal error 1 in MUMPS_HIGHER_LAYER',
1934 father_valid=.false.
1944 if (.not.father_valid .or.
cv_frere(in).gt.0)
then
1957 father_valid=.false.
1963 father_valid=.false.
1971 if(father_valid)
then
1973 upper_layer_exists=.true.
1976 if (upper_layer_exists)
then
1984 do il=1,nmb_thislayer
1992 & frere,fils,nfsiz,ne,keep,KEEP8,icntl,info,
1993 & procnode,ssarbr,peak,istat
1994 & , SIZEOFBLOCKS, LSIZEOFBLOCKS
1997 integer,
intent(in)::n,slavef
1998 integer,
intent(in),
TARGET:: frere(n),fils(n),nfsiz(n),ne(n),
1999 & keep(500),icntl(60),info(80),
2000 & procnode(n),ssarbr(n)
2001 INTEGER(8),
intent(in),
TARGET:: KEEP8(150)
2002 integer,
intent(out)::istat
2003 integer,
intent(in) :: LSIZEOFBLOCKS
2004 integer,
intent(in),
TARGET :: SIZEOFBLOCKS(LSIZEOFBLOCKS)
2006 DOUBLE PRECISION peak
2007 character (len=48):: subname
2023 &
'Warning in mumps_static_mapping : splitting is set off'
2028 &
'warning in mumps_static_mapping : keep(83) reset to 0'
2031 if(slavef.gt.1)
then
2041 &
write(
cv_lp,*)
'Problem with bit size in ',subname
2054 if (allocok.gt.0)
then
2059 &
write(
cv_lp,*)
'memory allocation error in ',subname
2064 &
write(
cv_lp,*)
' No splitting during static mapping '
2077 if((keep(28).gt.n).OR.(keep(28).lt.0))
then
2079 &
write(
cv_lp,*)
'problem with nsteps in ',subname
2092 cv_relax=dble(1) + dble(
max(0,keep(68)))/dble(100)
2114 integer,
intent(out)::istat
2115 integer i,allocok,inode,in,inoderoot,ierr,maxcut
2116 character (len=48):: subname
2125 &
write(
cv_lp,*)
'Memory deallocation error in ',subname
2131 &
write(
cv_lp,*)
'problem with maxnsteps in ',subname
2148 if (inode.ne.inoderoot)
then
2167 &
write(
cv_lp,*)
'problem with maxnodenmb in ',subname
2173 if (allocok.gt.0)
then
2178 &
write(
cv_lp,*)
'memory allocation error in ',subname
2193 integer,
intent(in)::nfront,npiv
2195 if( (nfront - npiv >
cv_keep(9))
2196 & .and. ((npiv >
cv_keep(4)).or.(.true.))
2202 integer,
intent(out)::istat
2203 integer i,ierr,inode
2206 character (len=48):: err_rep,subname
2207 logical use_geist_ng_replace, skiparrangeL0
2209 INTEGER CURRENT_SIZE_L0
2227 skiparrangel0 = .false
2228 do while(.not.accepted)
2234 IF ( ( (current_size_l0.LT.minsize_l0)
2235 & .OR. skiparrangel0
2247 &
write(
cv_lp,*)
'Error reported by ',err_rep,
' in ',subname
2257 &
write(
cv_lp,*)
'Error reported by '' in ',subname
2264 & skiparrangel0 = .NOT.skiparrangel0
2270 err_rep=
'MAX_TCOST_L0'
2272 use_geist_ng_replace = .true.
2273 if(use_geist_ng_replace)
then
2274 err_rep=
'FATHSON_REPLACE'
2278 elseif(ierr.ne.0)
then
2281 &
'Error rep. by ',err_rep,
' in ',subname
2295 err_rep=
'LIST2LAYER'
2299 &
write(
cv_lp,*)
'Error reported by ',err_rep,
' in ',subname
2303 err_rep=
'MAKE_PROPMAP'
2307 &
write(
cv_lp,*)
'Error reported by ',err_rep,
' in ',subname
2317 &
write(
cv_lp,*)
'Error reported by ',err_rep,
' in ',subname
2336 integer,
intent(out)::istat
2337 character (len=48):: subname
2340 subname=
'LIST2LAYER'
2358 integer,
intent(out)::istat
2359 integer ,pctr,pctr2,ierr
2360 character (len=48):: subname
2361 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: procindex
2363 subname =
"MUMPS_MAKE_PROPMAP"
2366 IF (allocok > 0)
THEN
2371 &
write(
cv_lp,*)
'Memory allocation error in ',subname
2380 &
'MUMPS_BIT_SET signalled error to'
2391 &
write(
cv_lp,*)
'PROPMAP_INIT signalled error to'
2401 &
'PROPMAP signalled error to',subname
2409 &
'MOD_PROPMAP signalled error to',subname
2418 DEALLOCATE(procindex)
2422 & nmb_thislayer,map_strat,istat)
2424 integer,
intent(in)::layernmb,thislayer(:),
2425 & nmb_thislayer,map_strat
2426 integer,
intent(out)::istat
2427 integer i,inode,j,k,ierr,nmb,aux_int,nmb_cand_needed
2428 DOUBLE PRECISION aux_flop,aux_mem
2429 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: candid, sorted_nmb
2430 DOUBLE PRECISION,
ALLOCATABLE,
DIMENSION(:) ::
2431 & sorted_costw, sorted_costm, old_workload, old_memused
2432 character (len=48):: err_rep,subname
2443 if((layernmb.lt.0).or.(layernmb.gt.
cv_maxlayer))
return
2446 ALLOCATE(candid(
cv_slavef), sorted_nmb(2*nmb_thislayer),
2447 & sorted_costw(2*nmb_thislayer), sorted_costm(2*nmb_thislayer),
2449 if (allocok.gt.0)
then
2454 &
write(
cv_lp,*)
'memory allocation error in ',subname
2457 do i=1,nmb_thislayer
2469 do i=1,nmb_thislayer
2473 sorted_nmb(nmb)=inode
2492 &
write(
cv_lp,*)
'Error in ',subname
2495 if(sorted_costw(nmb).lt.
cv_ncostw(inode))
then
2498 sorted_nmb(nmb)=inode
2500 sorted_nmb(nmb)=-inode
2506 &
write(
cv_lp,*)
'Unknown node type. Error in ',subname
2512 & sorted_costw(1:nmb),sorted_costm(1:nmb))
2515 & sorted_costm(1:nmb),sorted_costw(1:nmb))
2520 &
'Error reported by MUMPS_SORT_MSORT in ',subname
2525 aux_int=sorted_nmb(i)
2526 aux_flop=sorted_costw(i)
2527 aux_mem=sorted_costm(i)
2529 if (aux_int.lt.0)
then
2532 if(use_propmap)
then
2535 & inode=inode,istat=ierr)
2544 &
'Error reported by ',err_rep,
' in ',subname
2561 &
write(
cv_lp,*)
'Error in ',subname
2564 do while((k.le.
cv_slavef).and.(nmb_cand_needed.gt.0))
2584 nmb_cand_needed=nmb_cand_needed-1
2590 &
write(
cv_lp,*)
'Error in ',subname
2595 if(nmb_cand_needed.gt.0)
then
2597 &
write(
cv_lp,*)
'Error in ',subname
2628 &
write(
cv_lp,*)
'Error in ',subname
2636 if(use_propmap)
then
2639 & inode=inode,istat=ierr)
2648 &
'Error reported by ',err_rep,
' in ',subname
2674 &
write(
cv_lp,*)
'Inconsist data in ',subname
2714 &
write(
cv_lp,*)
'Error in ',subname
2734 &
write(
cv_lp,*)
'Error in ',subname
2741 do while((k.le.
cv_slavef).and.(nmb_cand_needed.gt.0))
2763 nmb_cand_needed=nmb_cand_needed-1
2769 &
write(
cv_lp,*)
'Error in ',subname
2774 if(nmb_cand_needed.gt.0)
then
2776 &
write(
cv_lp,*)
'Error in ',subname
2789 if(candid(j).gt.0)
then
2794 if (k.ne.nmb_cand_needed)
then
2796 &
write(
cv_lp,*)
'Error in ',subname
2806 DEALLOCATE(candid, sorted_nmb, sorted_costw, sorted_costm,
2807 & old_workload, old_memused)
2812 integer,
intent(in)::inode,procnmb
2813 integer,
intent(inout)::procnode(:)
2815 procnode(inode)=procnmb
2816 if (
cv_fils(inode).eq.0)
return
2819 procnode(in)=procnmb
2831 integer,
intent(inout)::procnode(:)
2836 procnmb=procnode(inode)
2844 integer candid,inode,index,i,j,layernmb,master,nmbcand,swapper,
2845 & totalnmb,node_of_master,node_of_candid,node_of_swapper
2846 DOUBLE PRECISION::mastermem,slavemem,maxmem
2847 logical swapthem,cand_better_master_arch,cand_better_swapper_arch
2854 if(
ke69 .gt. 1)
then
2858 if (node_of_master .lt. 0 )
then
2859 if(
cv_mp.gt.0)
write(
cv_mp,*)
'node_of_master_not found'
2861 node_of_swapper = node_of_master
2870 if(
ke69 .gt. 1)
then
2872 if (node_of_candid .lt. 0 )
then
2874 &
'node_of_candid_not found'
2877 if(
ke69 .le. 1)
then
2878 if((slavemem.lt.mastermem) .and.
2884 cand_better_master_arch = (
2886 & (slavemem.lt.mastermem) .or.
2891 cand_better_swapper_arch = (
2898 if(cand_better_master_arch .and.
2899 & cand_better_swapper_arch )
then
2901 node_of_swapper = node_of_candid
2906 if(swapper.ne.master)
then
2910 if(mastermem.le.mastermem-
cv_ncostm(inode)
2923 if(
ke69 .gt. 1)
then
2928 if(.NOT.swapthem) cycle
2944 totalnmb = totalnmb+1
2951 DOUBLE PRECISION,
intent(in),
OPTIONAL::maxwork(cv_slavef),
2953 integer,
intent(out)::istat
2955 DOUBLE PRECISION dummy
2956 character (len=48):: subname
2959 if(
present(maxwork))
then
2964 if(
present(maxmem))
then
2975 if (allocok.gt.0)
then
2980 &
write(
cv_lp,*)'memory allocation error in
',subname
2983 allocate(work_per_proc(cv_slavef),id_son(cv_slavef),STAT=allocok)
2984.gt.
if (allocok0) then
2985 cv_info(1) = cv_error_memalloc
2986 cv_info(2) = 2*cv_slavef
2987 istat = cv_error_memalloc
2989 & write(cv_lp,*)'memory allocation error in
',subname
2993 cv_proc_workload(i)=dble(0)
2994 if(cv_constr_work) then
2995 cv_proc_maxwork(i)=maxwork(i)
2997 cv_proc_maxwork(i)=(huge(dummy))
2999 cv_proc_memused(i)=dble(0)
3000 if(cv_constr_mem) then
3001 cv_proc_maxmem(i)=maxmem(i)
3003 cv_proc_maxmem(i)=(huge(dummy))
3011 end subroutine MUMPS_PROCINIT
3012 recursive subroutine MUMPS_MOD_PROPMAP
3013 & (inode_entry,ctr_entry,istat)
3015 integer, intent(in)::inode_entry,ctr_entry
3016 integer, intent(inout)::istat
3017 integer::j,k,in,in1,ierr,son,nmb_procs_inode,nmb_sons_inode,
3019 INTEGER, ALLOCATABLE, DIMENSION(:) :: procs4son
3021 character (len=48):: subname
3022 DOUBLE PRECISION :: relative_weight,costs_sons
3023 DOUBLE PRECISION :: loc_relax
3025 INTEGER :: inode,ctr
3028 integer nmb_propmap_strict,share2,procsrest,current2
3030 INTEGER, ALLOCATABLE, DIMENSION(:) :: procs_inode
3040.eq.
if(cv_frere(inode)cv_n+1) return
3041 subname='mod_propmap
'
3042.NOT.
if(associated(cv_prop_map(inode)%ind_proc)) return
3044 costs_sons = dble(0)
3045.eq.
force_cand=(mod(cv_keep(24),2)0)
3047.gt.
do while (cv_fils(in)0)
3050.eq.
if (cv_fils(in)0) then
3057 nmb_sons_inode = nmb_sons_inode + 1
3058.le.
if(cv_tcostw(in)0.0D0) then
3060 & write(cv_lp,*)'subtree costs
for ',in,
3061 & ' should be positive in
',subname
3064.ne.
if (cv_keep(67) 1) then
3065 costs_sons = costs_sons + cv_tcostw(in)
3067 costs_sons = costs_sons + cv_tcostm(in)
3071.le.
if(costs_sons0D0) then
3073 & write(cv_lp,*)'error in
',subname
3077.eq..AND.
if ((cv_nodelayer(inode)0)
3078.ne.
& (cv_frere(inode)cv_n+1)) then
3082.eq.
IF (nmb_sons_inode1) THEN
3083.NOT.
if(associated(cv_prop_map(son)%ind_proc)) then
3084 WRITE(6,*) son, " cv_prop_map(son)%ind_proc not associated "
3086 cv_prop_map(son)%ind_proc = cv_prop_map(inode)%ind_proc
3090 ALLOCATE(procs_inode(cv_slavef),
3091 & procs4son(cv_size_ind_proc),stat=allocok)
3092.gt.
if (allocok0) then
3093 cv_info(1) = cv_error_memalloc
3094 cv_info(2) = cv_size_ind_proc + cv_slavef
3095 istat = cv_error_memalloc
3097 & write(cv_lp,*)'memory allocation error in
',subname
3103 if( MUMPS_BIT_GET4PROC(inode,j))then
3104 nmb_procs_inode = nmb_procs_inode + 1
3110 call MUMPS_GET_IDP1_PROC(j-1,
3115 if(MUMPS_BIT_GET4PROC(inode,k69onid))then
3117 procs_inode(i)=k69onid
3120.ne.
if(inmb_procs_inode)then
3122 & write(cv_lp,*)'error in
',subname
3126.eq.
if(nmb_procs_inode0) then
3128 & write(cv_lp,*)'error in
',subname
3132 depth= max(cv_mixed_strat_bound - ctr,0)
3133.eq..OR..eq.
if ((cv_keep(24)16)(cv_keep(24)18)) then
3134.ge.
if(depthcv_mixed_strat_bound) then
3137 loc_relax = dble(1) +
3138 & max(dble(cv_keep(77))/dble(100), dble(0))
3147.ge..AND.
if( ( (nmb_sons_inodenmb_procs_inode)
3148.LT.
& (nmb_procs_inode4) )
3149.OR..EQ.
& ( nmb_sons_inode1 )
3151 procs4son = cv_prop_map(inode)%ind_proc
3152.EQ.
IF (nmb_sons_inode1) UPDATE_CTR=.FALSE.
3154 do k=1,cv_size_ind_proc
3155 do j=0,cv_bitsize_of_int-1
3156 procs4son(k)=ibclr(procs4son(k),j)
3159 nmb_propmap_strict=0
3161 if( MUMPS_BIT_GET4PROC(in,k)) then
3162 nmb_propmap_strict=nmb_propmap_strict+1
3163 call MUMPS_BIT_SET(procs4son,k,ierr)
3166.gt.
if(costs_sons0.0D0) then
3167.ne.
if (cv_keep(67) 1) then
3168 relative_weight=cv_tcostw(in)/costs_sons
3170 relative_weight=cv_tcostm(in)/costs_sons
3173 relative_weight=0.0D0
3175 current = nmb_propmap_strict
3177 & max(0,nint(relative_weight*(loc_relax-dble(1))*
3178 & dble(nmb_procs_inode)))
3179 procsrest=nmb_procs_inode - nmb_propmap_strict
3180 share2=min(share2,procsrest)
3181 CALL random_number(Y)
3182 current2=int(dble(Y)*dble(procsrest))
3185.gt..and..le.
do while((share20)(i2))
3186 do j=1,nmb_procs_inode
3187.le.
if(share20) exit
3188 k69onid = procs_inode(j)
3189.AND.
if(( MUMPS_BIT_GET4PROC(inode,k69onid))
3190.NOT.
& (MUMPS_BIT_GET(procs4son,k69onid))) then
3191.ge.
if(kcurrent2)then
3192 call MUMPS_BIT_SET(procs4son,k69onid,ierr)
3194.gt.
if(cv_lp0)write(cv_lp,*)
3195 & 'bit_set signalled error to
',subname
3206.ne.
if(share20) then
3207.gt.
if(cv_lp0) write(cv_lp,*)
3208 & 'error reported in
',subname
3214 cv_prop_map(in1)%ind_proc=procs4son
3215 IF (UPDATE_CTR) THEN
3216 call MUMPS_MOD_PROPMAP(in1,ctr-1,ierr)
3218 call MUMPS_MOD_PROPMAP(in1,ctr,ierr)
3221.gt.
if(cv_lp0) write(cv_lp,*)
3222 & 'error reported in
',subname
3230 if (allocated(procs_inode)) DEALLOCATE(procs_inode)
3231 if (allocated(procs4son)) DEALLOCATE(procs4son)
3233 end subroutine MUMPS_MOD_PROPMAP
3234 recursive subroutine MUMPS_PROPMAP(inode_entry, ctr_entry, istat)
3236 integer, intent(in)::inode_entry,ctr_entry
3237 integer, intent(inout)::istat
3238 integer::j,k,in,in1,ierr,son,nmb_procs_inode,nmb_sons_inode,
3239 & share,current,offset,
3240 & in_tmp,nfront,npiv,ncb,
3241 & keep48_loc,min_cand_needed
3242 integer, dimension(:), allocatable :: procs4son
3243 character (len=48):: subname
3244 DOUBLE PRECISION :: relative_weight,costs_sons, shtemp
3245 DOUBLE PRECISION :: costs_sons_real
3246 DOUBLE PRECISION :: PartofaProc
3247 LOGICAL :: SkipSmallNodes
3248 PARAMETER (PartofaProc=0.01D0)
3249 DOUBLE PRECISION :: loc_relax
3252 integer MUMPS_REG_GETKMAX, MUMPS_BLOC2_GET_NSLAVESMIN
3253 external MUMPS_REG_GETKMAX, MUMPS_BLOC2_GET_NSLAVESMIN
3255 integer nmb_propmap_strict,share2,procsrest,current2
3256 integer k69onid,nb_free_procs,local_son_indice,nb_procs_for_sons,
3257 & ptr_upper_ro_procs
3258 integer :: inode, ctr
3260 logical upper_round_off,are_sons_treated
3261 DOUBLE PRECISION tmp_cost
3270.eq.
if(cv_frere(inode)cv_n+1) return
3274 if( MUMPS_BIT_GET4PROC(inode,j))
3275 & nmb_procs_inode = nmb_procs_inode + 1
3277.eq.
if(nmb_procs_inode0) then
3279 & write(cv_lp,*)'error in
',subname
3283.eq..AND.
if ((cv_nodelayer(inode)0)
3284.ne.
& (cv_frere(inode)cv_n+1)) then
3288 ptr_upper_ro_procs=1
3289 work_per_proc(1:cv_slavef)=0.0D0
3290 id_son(1:cv_slavef)=0
3292 costs_sons = dble(0)
3293.eq.
force_cand=(mod(cv_keep(24),2)0)
3296.gt.
do while (cv_fils(in)0)
3299.eq.
if (cv_fils(in)0) then
3306 nmb_sons_inode = nmb_sons_inode + 1
3307.le.
if(cv_tcostw(in)0.0D0) then
3309 & write(cv_lp,*)'subtree costs
for ',in,
3310 & ' should be positive in
',subname
3313.ne.
if (cv_keep(67) 1) then
3314 costs_sons = costs_sons + cv_tcostw(in)
3316 costs_sons = costs_sons + cv_tcostm(in)
3320.eq.
IF (nmb_sons_inode1) THEN
3321.NOT.
if(associated(cv_prop_map(son)%ind_proc)) then
3322 call MUMPS_PROPMAP_INIT(son,ierr)
3325 & write(cv_lp,*)'propmap_init signalled error to
'
3332 cv_prop_map(son)%ind_proc = cv_prop_map(inode)%ind_proc
3336 costs_sons_real = costs_sons
3337 SkipSmallNodes = .true.
3338.gt.
IF (costs_sons_real0.0D0) then
3341.ne.
if (cv_keep(67) 1) then
3342 relative_weight=cv_tcostw(in)/costs_sons_real
3344 relative_weight=cv_tcostm(in)/costs_sons_real
3346 shtemp = relative_weight*dble(nmb_procs_inode)
3347.lt.
IF (shtempPartofaProc) THEN
3348.ne.
if (cv_keep(67) 1) then
3349 costs_sons = costs_sons - cv_tcostw(in)
3351 costs_sons = costs_sons - cv_tcostm(in)
3356.LT.
IF (costs_sons PartofaProc*costs_sons_real) THEN
3357 costs_sons = costs_sons_real
3358 SkipSmallNodes = .false.
3361.le.
if(costs_sons0.0D0) then
3363 & write(cv_lp,*)'error in
',subname
3367.le.
if(cv_relax0.0D0) then
3369 & write(cv_lp,*)'error in
',subname,'. wrong
cv_relax'
3372 ALLOCATE(procs4son(cv_size_ind_proc),stat=allocok)
3373.GT.
IF (allocok 0) THEN
3374 cv_info(1) = cv_error_memalloc
3375 cv_info(2) = cv_size_ind_proc
3376 istat = cv_error_memalloc
3379 & 'memory allocation error in
',subname
3382 depth= max(cv_n - ctr,0)
3383.eq.
if(cv_keep(24)8) then
3384 loc_relax = cv_relax
3385.eq..OR..eq.
elseif ((cv_keep(24)16)(cv_keep(24)18)) then
3386 loc_relax = cv_relax
3387.eq.
elseif (cv_keep(24)10) then
3388 loc_relax = cv_relax
3389.eq..OR..eq.
elseif ((cv_keep(24)12)(cv_keep(24)14)) then
3390.ge.
if(depthcv_mixed_strat_bound) then
3391 loc_relax = cv_relax
3393 loc_relax = cv_relax +
3394 & max(dble(cv_keep(77))/dble(100), dble(0))
3401 upper_round_off=.FALSE.
3402 are_sons_treated=.TRUE.
3404.ge..AND.
if( (nmb_sons_inodenmb_procs_inode)
3405.LT.
& (nmb_procs_inode4) ) then
3406 procs4son = cv_prop_map(inode)%ind_proc
3407 are_sons_treated=.FALSE.
3408 nb_procs_for_sons=nmb_procs_inode
3409 nmb_propmap_strict=nmb_procs_inode
3410.LE.
elseif(nmb_procs_inode cv_keep(83)) then
3411 procs4son = cv_prop_map(inode)%ind_proc
3412 are_sons_treated=.FALSE.
3413 nb_procs_for_sons=nmb_procs_inode
3414 nmb_propmap_strict=nmb_procs_inode
3416 do k=1,cv_size_ind_proc
3417 do j=0,cv_bitsize_of_int-1
3418 procs4son(k)=ibclr(procs4son(k),j)
3421.gt.
if(costs_sons0.0D0) then
3422.ne.
if (cv_keep(67) 1) then
3423 relative_weight=cv_tcostw(in)/costs_sons
3425 relative_weight=cv_tcostm(in)/costs_sons
3428 relative_weight=dble(0)
3430 shtemp = relative_weight*dble(nmb_procs_inode)
3431.LT.
IF ( (shtempPartofaProc)
3432.AND.
& ( SkipSmallNodes ) ) THEN
3434 do j=current,cv_slavef
3436 call MUMPS_GET_IDP1_PROC(j-1,k69onid,ierr)
3440 if( MUMPS_BIT_GET4PROC(inode,k69onid)) then
3441 call MUMPS_BIT_SET(procs4son,k69onid,ierr)
3443.gt.
if(cv_lp0)write(cv_lp,*)
3444 & 'bit_set signalled error to
',subname
3452.gt.
if (share0) then
3455 call MUMPS_GET_IDP1_PROC(j-1,k69onid,ierr)
3459 if( MUMPS_BIT_GET4PROC(inode,k69onid)) then
3460 call MUMPS_BIT_SET(procs4son,k69onid,ierr)
3462.gt.
if(cv_lp0)write(cv_lp,*)
3463 & 'bit_set signalled error to
',subname
3473.gt.
if(cv_lp0) write(cv_lp,*)
3474 & 'error reported in
',subname
3477.NOT.
if(associated(cv_prop_map(in)%ind_proc)) then
3478 call MUMPS_PROPMAP_INIT(in,ierr)
3481 & write(cv_lp,*)'propmap_init signalled error to
'
3488 cv_prop_map(in)%ind_proc = procs4son
3492 share = max(1,nint(shtemp))
3493.ge.
if (dble(share)shtemp) then
3494 upper_round_off=.TRUE.
3496 upper_round_off = .FALSE.
3498 share=min(share,nmb_procs_inode)
3499 nmb_propmap_strict=share
3500 nb_procs_for_sons=nb_procs_for_sons+nmb_propmap_strict
3502 do j=current,cv_slavef
3504 call MUMPS_GET_IDP1_PROC(j-1,k69onid,ierr)
3508 if( MUMPS_BIT_GET4PROC(inode,k69onid)) then
3509 call MUMPS_BIT_SET(procs4son,k69onid,ierr)
3511.gt.
if(cv_lp0)write(cv_lp,*)
3512 & 'bit_set signalled error to
',subname
3518 current = j + offset
3519.gt.
if(currentcv_slavef) current = 1
3527 call MUMPS_GET_IDP1_PROC(j-1,k69onid,ierr)
3531 if( MUMPS_BIT_GET4PROC(inode,k69onid)) then
3532 call MUMPS_BIT_SET(procs4son,k69onid,ierr)
3534.gt.
if(cv_lp0)write(cv_lp,*)
3535 & 'bit_set signalled error to
',subname
3541 current = j + offset
3542.gt.
if(currentcv_slavef) current = 1
3549.gt.
if(cv_lp0) write(cv_lp,*)
3550 & 'error reported in
',subname
3553.not.
if(upper_round_off)then
3554.lt.
if(local_son_indicecv_slavef)then
3555 id_son(local_son_indice)=in
3556.ne.
if ( cv_keep(67) 1 ) then
3557 work_per_proc(local_son_indice)=cv_tcostw(in)/
3558 & dble(nmb_propmap_strict)
3560 work_per_proc(local_son_indice)=cv_tcostm(in)/
3561 & dble(nmb_propmap_strict)
3563 local_son_indice=local_son_indice+1
3564.eq.
if(local_son_indicecv_slavef)then
3565 CALL MUMPS_SORT_MSORT(ierr,cv_slavef,id_son,
3577.ne.
if (cv_keep(67) 1) then
3578 tmp_cost=cv_tcostw(in)/dble(nmb_propmap_strict)
3580 tmp_cost=cv_tcostm(in)/dble(nmb_propmap_strict)
3582.ge.
do while(current21)
3583.lt.
if(tmp_costwork_per_proc(current2))exit
3586.ne.
if(current2cv_slavef)then
3587.eq.
if(current20)then
3590 do j=cv_slavef-1,current2,-1
3591 id_son(j+1)=id_son(j)
3592 work_per_proc(j+1)=work_per_proc(j)
3595 work_per_proc(current2)=tmp_cost
3599 upper_round_off=.FALSE.
3601.NOT.
if(associated(cv_prop_map(in)%ind_proc)) then
3602 call MUMPS_PROPMAP_INIT(in,ierr)
3605 & write(cv_lp,*)'propmap_init signalled error to
'
3611 cv_prop_map(in)%ind_proc = procs4son
3614 if(are_sons_treated)then
3615.ne.
if(nb_procs_for_sonsnmb_procs_inode)then
3616 do j=1,nmb_procs_inode-nb_procs_for_sons
3617 procs4son=cv_prop_map(id_son(j))%ind_proc
3618.le.
do while(currentcv_slavef)
3620 call MUMPS_GET_IDP1_PROC(current-1,k69onid,ierr)
3624.NOT.
if(MUMPS_BIT_GET4PROC(inode,k69onid)) then
3630 call MUMPS_BIT_SET(procs4son,k69onid,ierr)
3631 cv_prop_map(id_son(j))%ind_proc=procs4son
3633 ptr_upper_ro_procs=min(j,nmb_procs_inode-nb_procs_for_sons)
3639.ge..AND.
if( (nmb_sons_inodenmb_procs_inode)
3640.LT.
& (nmb_procs_inode4) ) then
3641 procs4son = cv_prop_map(inode)%ind_proc
3642.LE.
elseif(nmb_procs_inode cv_keep(83)) then
3643 procs4son = cv_prop_map(inode)%ind_proc
3645 procs4son = cv_prop_map(in)%ind_proc
3647 nfront=cv_nfsiz(in_tmp)
3650.gt.
do while(in_tmp0)
3652 npiv = npiv + cv_SIZEOFBLOCKS(in_tmp)
3656 in_tmp=cv_fils(in_tmp)
3659 if (force_cand) then
3660 if (cv_keep(50) == 0) then
3665.EQ.
if (cv_keep(48)5) keep48_loc = 5
3667 & MUMPS_BLOC2_GET_NSLAVESMIN
3668 & (cv_slavef, keep48_loc,cv_keep8(21),
3671 & cv_keep(375), cv_keep(119))
3672 min_cand_needed=min(cv_slavef,min_cand_needed+1)
3676 min_cand_needed = max(min_cand_needed, cv_keep(91))
3677.gt.
if(costs_sons0.0D0) then
3678.ne.
if (cv_keep(67) 1) then
3679 relative_weight=cv_tcostw(in)/costs_sons
3681 relative_weight=cv_tcostm(in)/costs_sons
3684 relative_weight=dble(0)
3686 nmb_propmap_strict=0
3688 if( MUMPS_BIT_GET(procs4son,k)) then
3689 nmb_propmap_strict=nmb_propmap_strict+1
3694 & max(0,nint(relative_weight*(loc_relax-dble(1))*
3695 & dble(nmb_procs_inode)))
3696 share2 = max(share2, min_cand_needed -nmb_propmap_strict,
3697 & (cv_keep(83)/2) - nmb_propmap_strict)
3698 procsrest=nmb_procs_inode - nmb_propmap_strict
3699 share2=min(share2,procsrest)
3701 CALL random_number(Y)
3702 current2 =int(dble(Y)*dble(procsrest))
3705.le.
if(share20) exit
3707 call MUMPS_GET_IDP1_PROC(j-1,k69onid,ierr)
3711.AND.
if(( MUMPS_BIT_GET4PROC(inode,k69onid))
3712.NOT.
& (MUMPS_BIT_GET(procs4son,k69onid))) then
3713.ge.
if(nb_free_procscurrent2)then
3714 call MUMPS_BIT_SET(procs4son,k69onid,ierr)
3716.gt.
if(cv_lp0)write(cv_lp,*)
3717 & 'bit_set signalled error to
',subname
3723 nb_free_procs=nb_free_procs+1
3726.gt.
if(share20) then
3728.le.
if(share20) exit
3730 call MUMPS_GET_IDP1_PROC(j-1,k69onid,ierr)
3734.AND.
if(( MUMPS_BIT_GET4PROC(inode,k69onid))
3735.NOT.
& (MUMPS_BIT_GET(procs4son,k69onid))) then
3736 call MUMPS_BIT_SET(procs4son,k69onid,ierr)
3738.gt.
if(cv_lp0)write(cv_lp,*)
3739 & 'bit_set signalled error to
',subname
3747.ne.
if(share20) then
3748.gt.
if(cv_lp0) write(cv_lp,*)
3749 & 'error reported in
',subname
3755 cv_prop_map(in1)%ind_proc = procs4son
3756.EQ.
IF (nmb_sons_inode1) DEALLOCATE(procs4son)
3757 call MUMPS_PROPMAP(in1,ctr-1,ierr)
3759.gt.
if(cv_lp0) write(cv_lp,*)
3760 & 'error reported in
',subname
3768 if (allocated(procs4son)) DEALLOCATE(procs4son)
3770 end subroutine MUMPS_PROPMAP
3771 subroutine MUMPS_PROPMAP_INIT(inode,istat)
3773 integer, intent(in)::inode
3774 integer, intent(out)::istat
3776 character (len=48):: subname
3778.eq.
if(cv_frere(inode)cv_n+1) return
3779 subname='propmap_init
'
3781 & cv_prop_map(inode)%ind_proc)) then
3782 allocate(cv_prop_map(inode)%ind_proc
3783 & (cv_size_ind_proc),STAT=allocok)
3784.gt.
if (allocok0) then
3785 cv_info(1) = cv_error_memalloc
3786 cv_info(2) = cv_size_ind_proc
3787 istat = cv_error_memalloc
3790 & 'memory allocation error in
',subname
3794 do k=1,cv_size_ind_proc
3795 do j=0,cv_bitsize_of_int-1
3796 cv_prop_map(inode)%ind_proc(k)=
3797 & ibclr(cv_prop_map(inode)%ind_proc(k),j)
3802 end subroutine MUMPS_PROPMAP_INIT
3803 subroutine MUMPS_PROPMAP_TERM(inode,istat)
3804 integer,intent(in)::inode
3805 integer,intent(out)::istat
3807 character (len=48):: subname
3808 subname='propmap_term
'
3810 if(associated(cv_prop_map(inode)%ind_proc)) then
3811 deallocate(cv_prop_map(inode)%ind_proc, STAT=ierr)
3814 & write(cv_lp,*)'memory deallocation error in
', subname
3815 istat = cv_error_memdeloc
3818 nullify(cv_prop_map(inode)%ind_proc)
3822 end subroutine MUMPS_PROPMAP_TERM
3823 subroutine MUMPS_PROPMAP4SPLIT(inode,ifather,istat)
3825 integer,intent(in)::inode,ifather
3826 integer,intent(out)::istat
3827 character (len=48):: subname
3829 subname='propmap4split
'
3830.eq..OR..eq.
if((cv_frere(inode)cv_n+1)(cv_frere(ifather)cv_n+1)
3831.OR..NOT.
& (associated(cv_prop_map(inode)%ind_proc))) then
3833 & write(cv_lp,*)'tototo signalled error to
'
3837.NOT.
if(associated(cv_prop_map(ifather)%ind_proc)) then
3838 call MUMPS_PROPMAP_INIT(ifather,ierr)
3841 & write(cv_lp,*)'propmap_init signalled error to
'
3847 cv_prop_map(ifather)%ind_proc =
3848 & cv_prop_map(inode)%ind_proc
3851 end subroutine MUMPS_PROPMAP4SPLIT
3852 subroutine MUMPS_ROOTLIST(istat)
3854 integer,intent(out)::istat
3856 character (len=48):: subname
3859 allocate(cv_layerl0_array(cv_maxnsteps),
3860 & cv_layerl0_sorted_costw(cv_maxnsteps),STAT=allocok)
3861.gt.
if (allocok0) then
3862 cv_info(1) = cv_error_memalloc
3863 cv_info(2) = 12*cv_maxnsteps
3864 istat = cv_error_memalloc
3867 & 'memory allocation error in
',subname
3871 cv_layerl0_sorted_costw(i)=dble(0)
3872 cv_layerl0_array(i)=0
3874 cv_layerl0_start = 0
3876 layerL0_endforarrangeL0 = 0
3877.NOT..OR..NOT.
if ((associated(cv_tcostw))(associated(cv_tcostm)))
3880 & write(cv_lp,*)'error:tcost must be
allocated in
',subname
3885.eq.
if (cv_frere(i)0) then
3887 cv_layerl0_end=cv_layerl0_end+1
3888.GT.
IF (cv_tcostw(i)mincostw)
3889 & layerL0_endforarrangeL0 = layerL0_endforarrangeL0+1
3890 cv_layerl0_array(cv_layerl0_end)=i
3891 cv_layerl0_sorted_costw(cv_layerl0_end)=cv_tcostw(i)
3892 cv_costw_layer0=cv_costw_layer0 + cv_tcostw(i)
3893 cv_costm_layer0=cv_costm_layer0 + cv_tcostm(i)
3897.eq.
if(cv_nbsa0) then
3899 & write(cv_lp,*)'error:no root nodes in
',subname
3902 call MUMPS_SORT_MSORT(ierr,cv_layerl0_end-cv_layerl0_start+1,
3903 & cv_layerl0_array(cv_layerl0_start:cv_layerl0_end),
3904 & cv_layerl0_sorted_costw(cv_layerl0_start:cv_layerl0_end))
3905.ne.
IF (ierr 0) then
3912 cv_costw_total=cv_costw_layer0
3913 cv_costm_total=cv_costm_layer0
3916 end subroutine MUMPS_ROOTLIST
3917 subroutine MUMPS_SELECT_TYPE3(istat)
3919 integer,intent(out)::istat
3920 character (len=48):: subname
3921 subname='select_type3
'
3922 CALL MUMPS_SELECT_K38K20(cv_n, slavef, cv_mp, cv_icntl(13),
3923 & cv_keep(1), cv_frere(1), cv_nfsiz(1), istat)
3924.NE.
IF (istat 0) THEN
3927 & 'error: can
''t
select type 3 node in
',subname
3928.ne.
ELSE IF (cv_keep(38) 0) then
3929.eq..and.
IF(cv_nodelayer(cv_keep(38))0
3930.EQ.
& (cv_keep(60)0)) then
3933 cv_nodetype(cv_keep(38))=3
3937 end subroutine MUMPS_SELECT_TYPE3
3938 subroutine MUMPS_SETUP_CAND(istat)
3939 integer,intent(out):: istat
3940 integer :: i,dummy,layernmb,allocok
3941 integer :: montype, nbcand, inode
3942 character (len=48) :: subname
3944 subname='setup_cand
'
3947 if(MUMPS_IS_NODE_OF_TYPE2(i)) cv_nb_niv2=cv_nb_niv2+1
3949 cv_keep(56)=cv_nb_niv2
3950 nullify(cv_par2_nodes,cv_cand)
3951.GT.
if(cv_nb_niv20) then
3952 allocate(cv_par2_nodes(cv_nb_niv2),
3953 & cv_cand(cv_nb_niv2,cv_slavef+1),STAT=allocok)
3954.gt.
if (allocok0) then
3955 cv_info(1) = cv_error_memalloc
3956 cv_info(2) = cv_nb_niv2*(cv_slavef+2)
3957 istat = cv_error_memalloc
3960 & 'memory allocation error in
',subname
3966 do layernmb=1,cv_maxlayer
3967 do i=1,cv_layer_p2node(layernmb)%nmb_t2s
3968 inode = cv_layer_p2node(layernmb)%t2_nodenumbers(i)
3969 cv_par2_nodes(dummy)= inode
3970 nbcand = cv_layer_p2node(layernmb)%t2_cand(i,cv_slavef+1)
3971 cv_cand(dummy,:)=cv_layer_p2node(layernmb)%t2_cand(i,:)
3972 montype= cv_nodetype(inode)
3973.eq.
if (montypetsplit_beg) then
3974 CALL MUMPS_SETUP_CAND_CHAIN(cv_n, cv_nb_niv2,
3975 & cv_frere(1), cv_nodetype(1),
3976 & cv_par2_nodes(1), cv_procnode(1), cv_cand(1,1),
3978 & slavef, dummy, nbcand, istat)
3983.ne.
if(dummycv_nb_niv2+1) then
3985 & write(cv_lp,*)'error in
',subname,
3986 & ' : dummy =
',dummy,'nbniv2 =
',cv_nb_niv2
3992 end subroutine MUMPS_SETUP_CAND
3993 subroutine MUMPS_SORTPROCS(map_strat,workload,memused,
3996 integer,intent(in)::map_strat
3997 DOUBLE PRECISION,dimension(:),intent(in)::workload, memused
3998 integer, optional::inode,istat
3999 integer i,j,aux_int,nmb_procs,pos
4000 character (len=48):: subname
4001 logical enforce_prefsort
4003 logical,SAVE::init1 = .FALSE.
4004 logical,SAVE::init2 = .FALSE.
4006 enforce_prefsort=.TRUE.
4007 use_propmap=present(inode)
4008 if(present(istat))istat=-1
4009.ne..and.
if((map_stratcv_equilib_flops)
4010.ne.
& (map_stratcv_equilib_mem)) then
4012 & write(cv_lp,*)'error in
',subname
4019.not.
if (present(inode)) then
4025.lt.
if(((workload(cv_proc_sorted(j))
4026.AND.
& workload(cv_proc_sorted(i)))
4027.eq.
& (map_stratcv_equilib_flops))
4029.lt.
& ((memused(cv_proc_sorted(j))
4030.AND.
& memused(cv_proc_sorted(i)))
4031.eq.
& (map_stratcv_equilib_mem)))then
4032 aux_int=cv_proc_sorted(j)
4033 cv_proc_sorted(j)=cv_proc_sorted(i)
4034 cv_proc_sorted(i)=aux_int
4038 else if(present(inode)) then
4039 if (use_propmap) then
4045 if( MUMPS_BIT_GET4PROC(inode,pos)) then
4046.le.
if (posnmb_procs) then
4049 nmb_procs=nmb_procs+1
4050 aux_int=cv_proc_sorted(pos)
4051 cv_proc_sorted(pos)=
4052 & cv_proc_sorted(nmb_procs)
4053 cv_proc_sorted(nmb_procs)=aux_int
4061.lt.
if(((workload(cv_proc_sorted(j))
4062.AND.
& workload(cv_proc_sorted(i)))
4063.eq.
& (map_stratcv_equilib_flops))
4065.lt.
& ((memused(cv_proc_sorted(j))
4066.AND.
& memused(cv_proc_sorted(i)))
4067.eq.
& (map_stratcv_equilib_mem)))then
4068 aux_int=cv_proc_sorted(j)
4069 cv_proc_sorted(j)=cv_proc_sorted(i)
4070 cv_proc_sorted(i)=aux_int
4074 do i=nmb_procs+1,cv_slavef-1
4076.lt.
if(((workload(cv_proc_sorted(j))
4077.AND.
& workload(cv_proc_sorted(i)))
4078.eq.
& (map_stratcv_equilib_flops))
4080.lt.
& ((memused(cv_proc_sorted(j))
4081.AND.
& memused(cv_proc_sorted(i)))
4082.eq.
& (map_stratcv_equilib_mem)))then
4083 aux_int=cv_proc_sorted(j)
4084 cv_proc_sorted(j)=cv_proc_sorted(i)
4085 cv_proc_sorted(i)=aux_int
4089.NOT.
if(enforce_prefsort) then
4090.lt.
if(((2.0D0*workload(cv_proc_sorted(nmb_procs+1))
4091.AND.
& workload(cv_proc_sorted(1)))
4092.eq.
& (map_stratcv_equilib_flops))
4094.lt.
& ((2.0D0*memused(cv_proc_sorted(nmb_procs+1))
4095.AND.
& memused(cv_proc_sorted(1)))
4096.eq.
& (map_stratcv_equilib_mem)))then
4099.lt.
if(((workload(cv_proc_sorted(j))
4100.AND.
& workload(cv_proc_sorted(i)))
4101.eq.
& (map_stratcv_equilib_flops))
4103.lt.
& ((memused(cv_proc_sorted(j))
4104.AND.
& memused(cv_proc_sorted(i)))
4105.eq.
& (map_stratcv_equilib_mem)))then
4106 aux_int=cv_proc_sorted(j)
4107 cv_proc_sorted(j)=cv_proc_sorted(i)
4108 cv_proc_sorted(i)=aux_int
4115 if(present(istat))istat=0
4117 end subroutine MUMPS_SORTPROCS
4118 subroutine MUMPS_STORE_GLOBALS(ne,nfsiz,frere,fils,keep,KEEP8,
4119 & info,procnode,ssarbr,nbsa)
4121 integer,dimension(cv_n),intent(inout)::ne,nfsiz,frere,fils,
4123 integer, intent(inout):: keep(500),info(80),nbsa
4124 INTEGER(8) KEEP8(150)
4130 keep(20)=cv_keep(20)
4132 keep(38)=cv_keep(38)
4133 keep(56)=cv_keep(56)
4134 keep(61)=cv_keep(61)
4137 procnode=cv_procnode
4140 end subroutine MUMPS_STORE_GLOBALS
4141 subroutine MUMPS_TERMGLOB(istat)
4143 integer,intent(out)::istat
4144 integer i,ierr,layernmb
4145 character (len=48):: subname
4148 nullify(cv_frere,cv_fils,cv_nfsiz,cv_ne,cv_keep,cv_keep8,
4149 & cv_icntl,cv_info,cv_procnode,cv_ssarbr)
4150 deallocate(cv_proc_workload,cv_proc_maxwork,cv_proc_memused,
4151 & cv_proc_maxmem,cv_nodetype,
4152 & cv_nodelayer,cv_proc_sorted,
4153 & cv_ncostw,cv_ncostm,
4154 & cv_layerworkload,cv_layermemused,
4158 & write(cv_lp,*)'memory deallocation error in
',subname
4159 istat = cv_error_memdeloc
4162 deallocate(work_per_proc,id_son,STAT=ierr)
4165 & write(cv_lp,*)'memory deallocation error in
',subname
4166 istat = cv_error_memdeloc
4169 do layernmb=1,cv_maxlayer
4170.gt.
if(cv_layer_p2node(layernmb)%nmb_t2s0) then
4171 deallocate(cv_layer_p2node(layernmb)%t2_nodenumbers,
4172 & cv_layer_p2node(layernmb)%t2_cand,
4173 & cv_layer_p2node(layernmb)%t2_candcostw,
4174 & cv_layer_p2node(layernmb)%t2_candcostm,
4178 & write(cv_lp,*)'memory deallocation error in
',
4180 istat = cv_error_memdeloc
4185 if(associated(cv_layer_p2node)) then
4186 deallocate(cv_layer_p2node,STAT=ierr)
4189 & write(cv_lp,*)'memory deallocation error in
',subname
4190 istat = cv_error_memdeloc
4195 call MUMPS_PROPMAP_TERM(i,ierr)
4198 & write(cv_lp,*)'propmap_term signalled error in
',
4204 if(associated(cv_prop_map))deallocate(cv_prop_map,STAT=ierr)
4207 & write(cv_lp,*)'memory deallocation error in
',subname
4208 istat = cv_error_memdeloc
4213 end subroutine MUMPS_TERMGLOB
4214 recursive subroutine MUMPS_TREECOSTS(pos)
4216 integer,intent(in)::pos
4217 integer i,nfront,npiv,nextpos
4218.NOT..OR..NOT.
if ((associated(cv_tcostw))(associated(cv_tcostm)))
4222 nfront=cv_nfsiz(pos)
4224 nextpos=cv_fils(pos)
4225.gt.
do while (nextpos0)
4227 npiv = npiv + cv_SIZEOFBLOCKS(nextpos)
4231 nextpos=cv_fils(nextpos)
4233 call MUMPS_CALCNODECOSTS(npiv,nfront,
4234 & cv_ncostw(pos), cv_ncostm(pos))
4235 cv_tcostw(pos)=cv_ncostw(pos)
4236 cv_tcostm(pos)=cv_ncostm(pos)
4237.ne.
if (cv_ne(pos)0) then
4238 nextpos=cv_fils(pos)
4239.gt.
do while(nextpos0)
4240 nextpos=cv_fils(nextpos)
4244 cv_depth(nextpos)=cv_depth(pos)+1
4245 call MUMPS_TREECOSTS(nextpos)
4246 cv_tcostw(pos)=cv_tcostw(pos)+cv_tcostw(nextpos)
4247 cv_tcostm(pos)=cv_tcostm(pos)+cv_tcostm(nextpos)
4248 nextpos=cv_frere(nextpos)
4252 end subroutine MUMPS_TREECOSTS
4253 recursive subroutine MUMPS_TYPEINSSARBR(inode)
4255 integer, intent(in)::inode
4257 cv_nodetype(inode)=-1
4264 call MUMPS_TYPEINSSARBR(in)
4267 end subroutine MUMPS_TYPEINSSARBR
4268 subroutine MUMPS_WORKMEM_IMBALANCE(workload,memused,
4269 & maxwork,minwork,maxmem,minmem)
4271 DOUBLE PRECISION,dimension(:),intent(in)::workload,
4273 DOUBLE PRECISION,intent(out)::maxwork,minwork,maxmem,minmem
4274 maxwork=maxval(workload)
4275 minwork=minval(workload, mask= workload > dble(0))
4276 maxmem=maxval(memused)
4277 minmem=minval(memused, mask= memused > dble(0))
4278 end subroutine MUMPS_WORKMEM_IMBALANCE
4279 subroutine MUMPS_FIX_ACCEPTED_MASTER(layernumber,nodenumber)
4281 integer layernumber,nodenumber
4284 integer current_max,current_proc
4287 allowed_nodes = .FALSE.
4288 inode=cv_layer_p2node(layernumber)%t2_nodenumbers(nodenumber)
4289 do i=1,cv_layer_p2node(layernumber)%t2_cand(nodenumber,
4291 current_proc=cv_layer_p2node(layernumber)%t2_cand(nodenumber,i)
4292.ge.
if ( current_proc 0) then
4293 score(mem_distribmpi(current_proc)) =
4294 & score(mem_distribmpi(current_proc)) + 1
4297 current_proc = cv_procnode(inode) - 1
4298 score(mem_distribmpi(current_proc)) =
4299 & score(mem_distribmpi(current_proc)) + 1
4300 do i=0,nb_arch_nodes - 1
4301.gt.
if ( score(i) current_max ) then
4302 current_max = score(i)
4303 allowed_nodes = .FALSE.
4304 allowed_nodes(i) = .TRUE.
4306.eq.
if(score(i) current_max) then
4307 allowed_nodes(i) = .TRUE.
4312 end subroutine MUMPS_FIX_ACCEPTED_MASTER
4313 end subroutine MUMPS_DISTRIBUTE
4314 subroutine MUMPS_RETURN_CANDIDATES(par2_nodes,cand,
4316 integer, intent(out) :: par2_nodes(cv_nb_niv2), istat
4317 integer, intent(out) :: cand(:,:)
4318 character (len=48):: subname
4322 par2_nodes=cv_par2_nodes
4323 do iloop=1, cv_slavef+1
4324 cand(iloop,:)=cv_cand(:,iloop)
4326 deallocate(cv_par2_nodes,cv_cand,STAT=istat)
4329 & write(cv_lp,*)'memory deallocation error in
',subname
4330 istat = cv_error_memdeloc
4335 end subroutine MUMPS_RETURN_CANDIDATES
4336 subroutine MUMPS_INIT_ARCH_PARAMETERS(
4337 & total_comm,working_comm,keep69,par,
4338 & nbslaves,mem_distrib,informerr)
4342 integer, dimension(0:) :: mem_distrib
4343 integer total_comm,working_comm,keep69,par
4344 integer, dimension(:) ::informerr
4347 integer,dimension(:),allocatable :: buffer_memdistrib
4352 cv_slavef = nbslaves
4353.eq.
if (ke69 1) then
4356 if ( allocated(mem_distribtmp) ) deallocate(mem_distribtmp )
4357 allocate( mem_distribtmp( 0:cv_slavef-1 ),
4358 & buffer_memdistrib( 0:cv_slavef-1 ), stat=ierr )
4359.gt.
if ( ierr 0 ) then
4360.gt.
if(cv_mp0) write(cv_mp,*) 'pb allocation mem_dist
'
4362 informerr(2) = cv_slavef
4366 call MPI_COMM_RANK( total_comm, host, ierr )
4367.eq..or..ne.
if ((par 1) (host 0)) then
4368 call MPI_COMM_RANK( working_comm, myrank, ierr )
4369 call MUMPS_COMPUTE_DISTRIB(ierr,myrank,
4370 & working_comm,mem_distrib)
4371.ne.
if ( ierr 0 ) then
4375 informerr(2) = cv_slavef
4378 mem_distribtmp = mem_distrib
4379 call MUMPS_FIX_NODE_MASTER(ierr)
4380.ne.
if ( ierr 0 ) then
4381.gt.
if(cv_mp0) write(cv_mp,*)
4384 informerr(2) = cv_slavef
4389 deallocate(mem_distribtmp)
4390 deallocate(buffer_memdistrib)
4393 call MPI_ALLREDUCE(mem_distribtmp(0),buffer_memdistrib(0),
4394 & cv_slavef,MPI_INTEGER,
4395 & MPI_MAX,total_comm,ierr)
4396 mem_distribtmp = buffer_memdistrib
4397 deallocate (buffer_memdistrib)
4398 call MUMPS_COMPUTE_NB_ARCH_NODES()
4399.le.
if((cv_slavef/nb_arch_nodes) 4) then
4400 do i = 0, cv_slavef-1
4401.NE.
if ( mem_distrib(i) 1 ) then
4402 mem_distrib(i)=max(ke69/2,2)
4406.eq..or.
if((nb_arch_nodes 1)
4407.eq.
& (nb_arch_nodes cv_slavef)) then
4410 deallocate(mem_distribtmp)
4413.eq.
if (host 0) then
4414 if ( allocated(mem_distribmpi) ) deallocate(mem_distribmpi )
4415 allocate( mem_distribmpi( 0:cv_slavef-1 ), stat=ierr )
4416.gt.
if ( ierr 0 ) then
4417.gt.
if(cv_mp0) write(cv_mp,*) 'pb allocation mem_dist
'
4419 informerr(2) = cv_slavef
4422 call MUMPS_ALLOC_ALLOW_MASTER(ierr)
4423.ne.
if(ierr 0 ) then
4426 mem_distribmpi = mem_distribtmp
4427 call MUMPS_FIX_TABLE_OF_PROCESS(ierr)
4428.ne.
if ( ierr 0 ) then
4432 informerr(2) = cv_slavef
4436 deallocate(mem_distribtmp)
4439 end subroutine MUMPS_INIT_ARCH_PARAMETERS
4440 subroutine MUMPS_COMPUTE_NB_ARCH_NODES()
4445.eq.
if(mem_distribtmp(i) i) then
4446 nb_arch_nodes = nb_arch_nodes + 1
4450 end subroutine MUMPS_COMPUTE_NB_ARCH_NODES
4451 subroutine MUMPS_FIX_TABLE_OF_PROCESS(ierr)
4453 external MUMPS_SORT_INT
4454 integer i,precnode,nodecount
4459 if ( allocated(table_of_process) )
4460 & deallocate(table_of_process )
4461 allocate( table_of_process(0:cv_slavef-1), stat=ierr )
4462.gt.
if ( ierr 0 ) then
4463.gt.
if(cv_mp0) write(cv_mp,*)
4467 do i=0,cv_slavef - 1
4468 table_of_process(i) = i
4470 call MUMPS_SORT_INT(cv_slavef,mem_distribtmp(0),
4471 & table_of_process(0))
4475.eq.
if(mem_distribtmp(i) precnode) then
4476 sizesmp = sizesmp + 1
4477 mem_distribtmp(i) = nodecount
4478 mem_distribmpi(table_of_process(i)) = nodecount
4480 score(nodecount) = sizesmp
4482 nodecount = nodecount + 1
4483 precnode = mem_distribtmp(i)
4484 mem_distribtmp(i) = nodecount
4485 mem_distribmpi(table_of_process(i)) = nodecount
4488 score(nodecount) = sizesmp
4490 mem_distribtmp(i) = score(mem_distribtmp(i))
4492 CALL MUMPS_SORT_INT_DEC(cv_slavef,mem_distribtmp(0),
4493 & table_of_process(0))
4496 end subroutine MUMPS_FIX_TABLE_OF_PROCESS
4497 subroutine MUMPS_FIX_NODE_MASTER(ierr)
4504.eq.
if (mem_distribtmp(i) 1) then
4507.eq.
if (mem_distribtmp(j) 1) then
4508 mem_distribtmp(j) = idmaster
4510 mem_distribtmp(j) = 0
4515 mem_distribtmp(i) = 0
4522 end subroutine MUMPS_FIX_NODE_MASTER
4523 subroutine MUMPS_COMPUTE_DISTRIB(ierr,myrank,working_comm,
4527 integer ierr,resultlen,myrank,i,working_comm
4528 integer , dimension(0:) :: mem_distrib
4530 character(len=MPI_MAX_PROCESSOR_NAME) name
4531 integer, dimension(:),allocatable :: namercv
4532 integer, dimension(:),allocatable :: myname
4534 external MUMPS_COMPARE_TAB
4535 logical MUMPS_COMPARE_TAB
4537 call MPI_GET_PROCESSOR_NAME(name,resultlen,ierr)
4538 allocate(myname(resultlen),stat=allocok)
4539.gt.
if ( allocok 0 ) then
4540.gt.
if(cv_mp0) write(cv_mp,*)
4541 & 'pb allocation in compute_dist
for myname
'
4546 myname(i) = ichar(name(i:i))
4549.eq.
if(myrank i) then
4554 call MPI_BCAST(lenrcv,1,MPI_INTEGER,i,
4555 & working_comm,ierr)
4556 allocate(namercv(lenrcv),stat=allocok)
4557.gt.
if ( allocok 0 ) then
4558.gt.
if(cv_mp0) write(cv_mp,*)
4559 & 'pb allocation in compute_dist
for namercv
'
4563.eq.
if(myrank i) then
4566 call MPI_BCAST(namercv,lenrcv,MPI_INTEGER,i,
4567 & working_comm,ierr)
4568 if( MUMPS_COMPARE_TAB(myname,namercv,
4569 & resultlen,lenrcv)) then
4579 end subroutine MUMPS_COMPUTE_DISTRIB
4580 subroutine MUMPS_GET_IDP1_PROC(current_proc,idarch,ierr)
4582 integer current_proc
4585.ge.
if (current_proc cv_slavef) then
4589.lt.
if (current_proc 0) then
4593 idarch = table_of_process(current_proc) + 1
4596 end subroutine MUMPS_GET_IDP1_PROC
4597 subroutine MUMPS_END_ARCH_CV()
4598 if (allocated(table_of_process)) deallocate(table_of_process)
4599 if (allocated(allowed_nodes)) deallocate(allowed_nodes)
4600 if (allocated(score)) deallocate(score)
4601 if (allocated(mem_distribtmp)) deallocate(mem_distribtmp)
4602 if (allocated(mem_distribmpi)) deallocate(mem_distribmpi)
4604 end subroutine MUMPS_END_ARCH_CV
4605 subroutine MUMPS_ALLOC_ALLOW_MASTER(ierr)
4608 if (allocated(allowed_nodes)) deallocate(allowed_nodes)
4609 allocate( allowed_nodes(0:nb_arch_nodes-1),stat=ierr)
4610.gt.
if ( ierr 0 ) then
4611.gt.
if(cv_mp0) write(cv_mp,*)
4616 allowed_nodes = .FALSE.
4617 if (allocated(score)) deallocate(score)
4618 allocate( score(0:nb_arch_nodes-1),stat=ierr)
4619.gt.
if ( ierr 0 ) then
4620.gt.
if(cv_mp0) write(cv_mp,*)
4628 end subroutine MUMPS_ALLOC_ALLOW_MASTER
4629 SUBROUTINE MUMPS_SORT_MMERGE(start1st,end1st,dim1,
4630 & start2nd,end2nd,dim2,
4634 integer, intent(in):: start1st,end1st,dim1,start2nd,end2nd,dim2
4635 integer, intent(inout):: indx(:)
4636 DOUBLE PRECISION, intent(inout):: val(:)
4637 INTEGER, intent(out) :: istat
4638 INTEGER, ALLOCATABLE, DIMENSION(:) :: index
4639 DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: dummy1
4642 character (len=48):: subname
4643 subname = "MUMPS_SORT_MMERGE"
4645 ALLOCATE(index(dim1+dim2),dummy1(dim1+dim2),stat=allocok)
4646.gt.
if ( allocok 0 ) then
4647 cv_info(1) = cv_error_memalloc
4648 cv_info(2) = dim1+dim2+dim1+dim2
4649 istat = cv_error_memalloc
4652 & 'memory allocation error in
',subname
4658.LT..AND..LT.
do while((aend1st+1)(bend2nd+1))
4659.GT.
if(val(a)val(b))then
4671.LT.
if(aend1st+1) then
4672.LT.
do while(aend1st+1)
4678.LT.
elseif(bend2nd+1) then
4679.LT.
do while(bend2nd+1)
4686 indx(start1st:end1st)=index(1:dim1)
4687 val(start1st:end1st)=dummy1(1:dim1)
4688 indx(start2nd:end2nd)=index(dim1+1:dim1+dim2)
4689 val(start2nd:end2nd)=dummy1(dim1+1:dim1+dim2)
4690 DEALLOCATE(index,dummy1)
4693 end SUBROUTINE MUMPS_SORT_MMERGE
4694 SUBROUTINE MUMPS_SORT_MSORT(istat,dim,indx,val1,val2)
4696 integer, intent(in):: dim
4697 integer, intent(inout):: indx(:)
4698 integer, intent(out)::istat
4699 DOUBLE PRECISION, intent(inout):: val1(:)
4700 DOUBLE PRECISION, intent(inout),optional:: val2(:)
4701 INTEGER, ALLOCATABLE, DIMENSION(:) :: index, dummy1
4702 DOUBLE PRECISION, ALLOCATABLE, DIMENSION(:) :: dummy2
4703 integer, parameter :: ss = 35
4704 integer :: a,b,c,i,k,l,r,s,stackl(ss),stackr(ss)
4706 character (len=48):: subname
4708 subname = "MUMPS_SORT_MSORT"
4709 ALLOCATE(index(dim),dummy1(dim),dummy2(dim),stat=allocok)
4710.gt.
if (allocok0) then
4711 cv_info(1) = cv_error_memalloc
4713 istat = cv_error_memalloc
4715 & write(cv_lp,*)'memory allocation error in
',subname
4729.GE.
if(sss) stop 'maxsize of stack reached
'
4740.GE.
if(sss) stop 'maxsize of stack reached
'
4753.LT..AND..LT.
do while((ak+1)(br+1))
4754.GT.
if(val1(index(a))val1(index(b)))then
4765 dummy1(c:r-l+1)=index(a:k)
4766.LT.
elseif(br+1) then
4767 dummy1(c:r-l+1)=index(b:r)
4769 index(l:r)=dummy1(1:r-l+1)
4772.EQ.
if(lstackl(s)) goto 5512
4773.EQ.
if(rstackr(s)) goto 5513
4776 dummy1(i)=indx(index(i))
4780 dummy2(i)=val1(index(i))
4783 if(present(val2)) then
4785 dummy2(i)=val2(index(i))
4790 DEALLOCATE(index,dummy1,dummy2)
4792 end subroutine MUMPS_SORT_MSORT
4793 END MODULE MUMPS_STATIC_MAPPING
4794 SUBROUTINE MUMPS_SELECT_K38K20(N, SLAVEF, MP,
4795 & ICNTL13, KEEP, FRERE, ND, ISTAT)
4797 INTEGER, intent(in) :: N, SLAVEF, ICNTL13, MP
4799 INTEGER FRERE(N), ND(N)
4800 INTEGER, intent(out) :: ISTAT
4801 INTEGER IROOTTREE, SIZEROOT, NFRONT, I
4803.EQ..or..EQ.
IF (KEEP(60)2 KEEP(60)3 ) THEN
4805.EQ..OR..GT..OR.
IF((SLAVEF1)(ICNTL130)
4806.NE.
& (KEEP(60)0)) THEN
4812.EQ.
IF (FRERE(I)0) THEN
4814.GT.
IF (NFRONT SIZEROOT) THEN
4820.EQ..OR..EQ.
IF ((IROOTTREE-1)(SIZEROOT-1)) THEN
4824.LE.
IF (SIZEROOTSLAVEF) THEN
4826.GT.
ELSE IF((SIZEROOTKEEP(37))
4827.AND..EQ.
& (KEEP(53)0)
4829.GT.
IF (MP0) WRITE(MP,*) 'a root of estimated
size ',
4830 & SIZEROOT,' has been selected
for scalapack.
'
4831 KEEP(38) = IROOTTREE
4834.GT.
IF (MP0) WRITE(MP,'(a,i9,a)
')
4835 & ' warning: largest root node of
size ', SIZEROOT,
4836 & ' not selected
for parallel execution
'
4838.EQ..AND..NE.
IF ((KEEP(38)0)(KEEP(53)0)) THEN
4839 KEEP(20) = IROOTTREE
4840.EQ.
ELSE IF (KEEP(60)0) THEN
4846 END SUBROUTINE MUMPS_SELECT_K38K20
4847 SUBROUTINE MUMPS_SPLITNODE_INTREE(inode,nfront,npiv,k,
4848 & lnpivsplit, npivsplit, keep, n, fils, frere,
4849 & nfsiz, ne, info5_nfrmax, k28_nsteps, nodetype,
4851 & , SIZEOFBLOCKS, LSIZEOFBLOCKS
4855 integer, intent(in)::nfront,npiv
4856 integer, intent(in):: k
4857 integer, intent(in)::lnpivsplit
4858 integer, intent(in)::npivsplit(lnpivsplit)
4859 integer, intent(in):: inode
4860 integer, intent(out)::istat
4861 integer, intent(inout):: keep(500)
4862 integer, intent(inout):: k28_nsteps
4863 integer, intent(in) :: info5_nfrmax
4864 integer, intent(in) :: n
4865 integer, intent(inout)::frere(n), fils(n), nfsiz(n), ne(n)
4866 integer, intent(inout):: nodetype(n)
4867 integer, intent(in) :: LSIZEOFBLOCKS
4868 integer, intent(in) :: SIZEOFBLOCKS(LSIZEOFBLOCKS)
4869 logical,intent(in) :: BLKON
4870 integer i,lev,in,in_son,in_father,in_grandpa,npiv_father,
4871 & npiv_son,nfrontk,npivk,d1,f1,e1,dk,fk,next_father
4872 integer::ison,ifather
4873 character (len=48):: subname
4874 integer, parameter:: tsplit_beg=4
4875 integer, parameter:: tsplit_mid=5
4876 integer, parameter:: tsplit_last=6
4878 subname='splitnode_intree
'
4883 npiv_son = npivsplit(1)
4884 keep(2)=max(keep(2),nfront-npiv_son)
4890.lt.
do while (inpiv_son)
4892 i = i + SIZEOFBLOCKS(f1)
4901 next_father = fils(in_son)
4903 ifather = next_father
4905 npiv_son= abs(npivsplit(lev))
4906 npiv_father=abs(npivsplit(lev+1))
4908 i= SIZEOFBLOCKS(in_father)
4909.lt.
do while (inpiv_father)
4910 in_father=fils(in_father)
4911 i = i + SIZEOFBLOCKS(in_father)
4914 do i=1,npiv_father-1
4915 in_father=fils(in_father)
4918 frere(ison)=-ifather
4919 next_father = fils(in_father)
4920 fils(in_father)=-ison
4922 nfsiz(ifather)=nfrontk-npiv_son
4925.EQ.
IF (keep(79)0) THEN
4926 if( nfront-npiv_son > keep(9)) then
4927 nodetype(ifather) = 2
4929 nodetype(ifather) = 1
4933 nodetype(ison) = tsplit_beg
4935.eq.
if (levk-1) then
4936 nodetype(ifather) = tsplit_last
4938 nodetype(ifather) = tsplit_mid
4940 if (npivsplit(lev+1) < 0) then
4941.eq.
if (levk-1) then
4942 nodetype(ifather)=-tsplit_last
4944 nodetype(ifather)=-tsplit_mid
4948 nfrontk = nfrontk-npiv_son
4949 npivk = npivk - npiv_son
4955# if (check_mumps_static_mapping >= 3)
4956 write(6,*) ' last(
close to root) node in chain :
', ifather
4958 fils(f1) = next_father
4965.gt.
do while(fils(in)0)
4969.eq.
if(fils(in_grandpa)-d1) then
4970 fils(in_grandpa)=-dk
4972 in=-fils(in_grandpa)
4973.ne.
do while(frere(in) d1)
4978 k28_nsteps = k28_nsteps + k-1
4981 END SUBROUTINE MUMPS_SPLITNODE_INTREE
4982 subroutine MUMPS_SETUP_CAND_CHAIN(n, nb_niv2,
4983 & frere, nodetype, par2_nodes,
4984 & procnode, cand, inode_chain, slavef, dummy, nbcand, istat)
4986 integer, intent(in) :: n, nb_niv2, slavef
4987 integer,intent(in)::frere(n)
4988 integer, intent(inout) :: par2_nodes(nb_niv2), procnode(n)
4989 integer,intent(inout)::nodetype(n)
4990 integer,intent(inout)::cand(nb_niv2, slavef+1)
4991 integer,intent(in)::inode_chain
4992 integer,intent(inout)::dummy, nbcand
4993 integer,intent(out):: istat
4994 integer, parameter:: tsplit_beg=4
4995 integer, parameter:: tsplit_mid=5
4996 integer, parameter:: tsplit_last=6
4997 integer, parameter:: invalid=-9999
4998 integer :: inode, ifather, k
4999 logical :: last_iteration_reached
5004.not..lt.
if ( (frere(inode) 0) ) then
5005 write(*,*) " Internal error 0 in SETUP_CAND",
5006 & frere(inode), inode
5009 ifather = -frere(inode)
5010.eq.
last_iteration_reached = (abs(nodetype(ifather))tsplit_last)
5011 par2_nodes(dummy+1) = ifather
5012 procnode(ifather) = cand(dummy,1) + 1
5013.eq..or.
if ( (nodetype(ifather)tsplit_mid)
5014.eq.
& (nodetype(ifather)tsplit_last) ) then
5015.lt.
if (nbcand2) then
5016 par2_nodes(dummy+1) = ifather
5017 procnode(ifather) = procnode(inode)
5018 cand(dummy+1,:) = cand(dummy,:)
5020 write(6,*) ' mapping property
',
5021 & ' of procs in chain lost
'
5024 cand(dummy+1,1:nbcand-1+k-1) = cand(dummy,2:nbcand+k-1)
5025 cand(dummy+1,nbcand-1+k) = procnode(inode)-1
5026 cand(dummy+1,nbcand-1+k+1:slavef) = invalid
5029.eq..or.
else if ( (nodetype(ifather)-tsplit_mid)
5030.eq.
& (nodetype(ifather)-tsplit_last) ) then
5031.eq.
if (nodetype(inode)tsplit_beg) then
5034 nodetype(inode)=tsplit_last
5036.eq.
if (nodetype(ifather) -tsplit_last) then
5037 nodetype(ifather) = 2
5039 nodetype(ifather) = tsplit_beg
5041 cand(dummy+1,1:nbcand-1+k-1) = cand(dummy,2:nbcand+k-1)
5042 cand(dummy+1,nbcand-1+k) = procnode(inode)-1
5046 write(6,*) ' internal error 2 in setup_cand
',
5047 & ' in, ifather =
', inode, ifather,
5048 & ' nodetype(ifather)
', nodetype(ifather)
5051 cand(dummy+1,slavef+1)= nbcand
5053 if (last_iteration_reached) exit
5057 end subroutine MUMPS_SETUP_CAND_CHAIN
5058 subroutine MUMPS_GET_SPLIT_4_PERF(inode, nfront, npiv, nproc,
5059 & k, lnpivsplit, npivsplit,
5061 & fils, BLKON, sizeofblocks,
5064 integer,intent(in)::inode, nfront, npiv, lnpivsplit, n
5065 integer,intent(in)::frere(n)
5066 integer,intent(in) :: fils(n)
5067 logical, intent(in) :: BLKON
5068 integer, intent(in) :: sizeofblocks(*)
5069 integer,intent(in)::keep(500)
5070 double precision, intent(in):: nproc
5071 integer,intent(out)::k, npivsplit(lnpivsplit), istat
5073 integer :: inode_tmp
5074 integer :: kk, optimization_strategy, nass, npiv2
5075 double precision :: nproc2
5076 integer :: npivOld, npivNew
5077 double precision :: timeFacOld, timeFacNew, timeAss
5078 double precision ,parameter :: alpha=8.0D9
5079 double precision ,parameter :: gamma=1.2D9
5080.le.
nosplit = npiv npiv4equilibreRows(nfront, nproc)
5081 optimization_strategy = 0
5082.or..eq.
nosplit = nosplit (frere(inode) 0)
5089.le.
if (nproc 1.0d0) then
5099.lt.
do while (nass npiv)
5100.eq..or.
if ((nproc2 2.0d0)
5101.le.
& (nfront - nass 6*keep(9))) then
5103.gt.
else if (nproc2 2) then
5104.eq.
if (optimization_strategy 0) then
5105 npiv2 = min(npiv - nass,
5106 & npiv4equilibreRows(nfront - nass, nproc2 ))
5107.eq.
else if (optimization_strategy 1) then
5108.eq.
if (nproc2 nproc) then
5109 npiv2 = min(npiv - nass,
5110 & npiv4equilibreFlops(nfront - nass, nproc2 ))
5112 npiv2 = min(npiv - nass,
5113 & npiv4equilibreRows(nfront - nass, nproc2 ))
5116 write(*,*) "Internal error in MUMPS_GET_SPLIT_4_PERF,"
5117 write(*,*) "optimization_strategy not implemented"
5124.LT..and..gt.
DO WHILE (npivsplit(kk) npiv2 inode_tmp 0)
5125 npivsplit(kk) = npivsplit(kk) + sizeofblocks(inode_tmp)
5126 inode_tmp= fils(inode_tmp)
5128 npiv2 = npivsplit(kk)
5130 npivsplit(kk) = npiv2
5133.and..ne.
& kk 1) then
5134.eq.
if (optimization_strategy 0) then
5135 npivOld = min(npiv - nass,
5136 & npiv4equilibreRows(nfront - nass, nproc ))
5137 npivNew = min(npiv - nass,
5138 & npiv4equilibreRows(nfront - nass, nproc2 - 1.0d0))
5139.eq.
else if (optimization_strategy 1) then
5140 npivOld = min(npiv - nass,
5141 & npiv4equilibreFlops(nfront - nass, nproc ))
5142 npivNew = min(npiv - nass,
5143 & npiv4equilibreRows(nfront - nass, nproc2 - 1.0d0))
5145 write(*,*) "Internal error in MUMPS_GET_SPLIT_4_PERF,"
5146 write(*,*) "optimization_strategy not implemented"
5149 timeAss = timeAssembly(int(nfront-nass,8), nproc2)
5150 timeFacOld = timeFacto(int(nfront-nass,8), int(npivOld,8),
5152 timeFacNew = timeFacto(int(nfront-nass,8),int(npivNew,8),
5154 if ( (flopsFactoPanel(int(npivOld,8),int(nfront-nass,8))+
5155 & flopsUpdate(int(nfront-nass-npivOld,8),
5156 & int(nfront-nass-npivOld,8), int(npivOld,8)))/
5157 & (timeFacOld+timeAss)
5158.gt.
& (flopsFactoPanel(int(npivNew,8),int(nfront-nass,8))+
5159 & flopsUpdate(int(nfront-nass-npivNew,8),
5160 & int(nfront-nass-npivNew,8), int(npivNew,8)))/
5162 npivsplit(kk) = -npiv2
5165 nproc2 = nproc2 - 1.0d0
5167 npivsplit(kk)=npivNew
5176 function npiv4equilibreRows(nfront, nproc)
5178 integer npiv4equilibreRows
5179 integer, intent(in) :: nfront
5180 double precision, intent(in) :: nproc
5181 npiv4equilibreRows = max(1, int(dble(nfront)/nproc))
5183 end function npiv4equilibreRows
5184 function npiv4equilibreFlops(nfront, nproc)
5186 integer npiv4equilibreFlops
5187 integer, intent(in) :: nfront
5188 double precision, intent(in) :: nproc
5189 double precision::n,s,a,b,c,sdelta,npiv
5193 b = -3.*n - s*n - s/2.
5194 c = 2.*n**2 + s*n + s/6.
5195 sdelta = (b*b) - 4*a*c
5196 if (sdelta < 0.0E0) then
5197 WRITE(*,*) "Delta < 0 in npiv4equilibreFlops"
5200 sdelta = sqrt(sdelta)
5201 npiv = (-b - sdelta)/(2*a)
5202 npiv4equilibreFlops = max(1, int(npiv))
5204 end function npiv4equilibreFlops
5205 function flopsFactoPanel(nbrows, nbcols)
5206 integer(8) :: nbrows, nbcols
5207 double precision :: flopsFactoPanel
5208 flopsFactoPanel = (nbrows*((-1.d0/3.d0)*nbrows**2 +
5209 & (nbcols + 1.d0/2.d0)*nbrows +
5210 & (nbcols + 1.d0/6.d0)))
5211 end function flopsFactoPanel
5212 function flopsUpdate(m, n, k)
5213 integer(8) :: m, n, k
5214 double precision :: flopsUpdate
5215 flopsUpdate = dble(2*m*n*k + m*k**2)
5216 end function flopsUpdate
5217 function timeFacto(nfront, npiv, nproc)
5218 integer(8), intent(in) :: nfront, npiv
5219 double precision, intent(in) :: nproc
5220 double precision :: timeFacto
5221 timeFacto = (max(flopsFactoPanel(npiv,nfront),
5222 & flopsUpdate(nfront-npiv, nfront-npiv, npiv)/
5224 end function timeFacto
5225 function timeNIV1(nfront, npiv)
5226 integer(8) :: nfront, npiv
5227 double precision :: timeNIV1
5228 timeNIV1 = ((flopsFactoPanel(npiv, nfront) +
5229 & flopsUpdate(nfront - npiv, nfront - npiv, npiv))/alpha)
5230 end function timeNIV1
5231 function timeAssembly(n, p)
5233 double precision, intent(in) :: p
5234 double precision :: timeAssembly
5235 timeAssembly = ((n*n/p)/(gamma/(log(p)/log(2.0d0))))
5236 end function timeAssembly
5237 end subroutine MUMPS_GET_SPLIT_4_PERF
subroutine mumps_get_flops_cost(nfront, npiv, nass, keep50, level, cost)
for(i8=*sizetab-1;i8 >=0;i8--)
subroutine mumps_get_split_4_perf(inode, nfront, npiv, nproc, k, lnpivsplit, npivsplit, n, frere, keep, fils, blkon, sizeofblocks, istat)
subroutine mumps_find_thislayer(nmb, thislayer, nmb_thislayer, istat)
subroutine mumps_map_layer(layernmb, thislayer, nmb_thislayer, map_strat, istat)
subroutine mumps_splitnode_intree(inode, nfront, npiv, k, lnpivsplit, npivsplit, keep, n, fils, frere, nfsiz, ne, info5_nfrmax, k28_nsteps, nodetype, istat, sizeofblocks, lsizeofblocks, blkon)
subroutine mumps_initpart2(istat)
subroutine mumps_procinit(maxwork, maxmem, istat)
subroutine mumps_setup_cand(istat)
subroutine mumps_split_during_mapping(layernmb, thislayer, nmb_thislayer, istat)
subroutine mumps_costs_layer_t2pm(layernmb, nmb_thislayer, istat)
subroutine mumps_costs_blr_t2_slave(npiv, nfront, nrow, costw, costm, k471, k472, k475, k488, sym)
subroutine mumps_find_best_proc(inode, map_strat, work, mem, workload, memused, proc, istat, respect_prop)
subroutine mumps_postprocess_mem()
subroutine mumps_assign_types(layernmb, thislayer, nmb_thislayer, istat)
subroutine mumps_store_globals(ne, nfsiz, frere, fils, keep, keep8, info, procnode, ssarbr, nbsa)
subroutine mumps_select_type3(istat)
subroutine mumps_splitnode_update(inode, nfront, npiv, k, lnpivsplit, npivsplit, istat)
subroutine mumps_encode_procnode(istat)
logical function mumps_bit_get(procs4node, procnumber)
subroutine mumps_initpart1(n, slavef, frere, fils, nfsiz, ne, keep, keep8, icntl, info, procnode, ssarbr, peak, istat, sizeofblocks, lsizeofblocks)
recursive subroutine mumps_treecosts(pos)
subroutine mumps_accept_l0(map_strat, workload, memused, accepted, istat)
subroutine mumps_fathson_replace(ifather, istat)
subroutine mumps_higher_layer(startlayer, thislayer, nmb_thislayer, cont, istat)
subroutine mumps_bit_set(procs4node, procnumber, istat)
logical function mumps_bit_get4proc(inode, procnumber)
subroutine mumps_rootlist(istat)
subroutine mumps_calcnodecosts(npiv, nfront, costw, costm)
subroutine mumps_propmap_init(inode, istat)
recursive subroutine mumps_typeinssarbr(inode)
subroutine mumps_mapsubtree(procnode)
subroutine mumps_get_memsplit_inkpart(inode, doit, npiv, nfront, npropmap, k2, istat)
recursive subroutine mumps_propmap(inode_entry, ctr_entry, istat)
subroutine mumps_sortprocs(map_strat, workload, memused, inode, istat)
subroutine mumps_get_split_inkpart(inode, doit, npiv, nfront, npropmap, k1, k3, istat)
subroutine mumps_calccosts(istat)
subroutine mumps_costs_layer_t2(layernmb, nmb_thislayer, istat)
subroutine mumps_costs_blr_t2_master(npiv, nfront, costw, costm, k471, k472, k475, k488, sym)
subroutine mumps_list2layer(istat)
subroutine mumps_propmap4split(inode, ifather, istat)
subroutine mumps_arrangel0(map_strat, layerl0end, workload, memused, procnode, istat, respect_prop)
logical function mumps_istype2bysize(nfront, npiv)
recursive subroutine mumps_mapbelow(inode, procnmb, procnode)
subroutine mumps_make_propmap(istat)
logical function mumps_is_node_of_type2(inode)
subroutine mumps_termglob(istat)
recursive subroutine mumps_mod_propmap(inode_entry, ctr_entry, istat)
subroutine mumps_workmem_imbalance(workload, memused, maxwork, minwork, maxmem, minmem)
subroutine mumps_layerl0(istat)
subroutine mumps_fix_accepted_master(layernumber, nodenumber)
subroutine mumps_calcnodecosts_blr(npiv, nfront, costw, costm, k471, k472, k475, k488, sym)
integer function mumps_bloc2_get_nslavesmin(slavef, k48, k821, k50, nfront, ncb, k375, k119)
integer function mumps_bloc2_get_nslavesmax(slavef, k48, k821, k50, nfront, ncb, k375, k119)
integer function mumps_reg_getkmax(keep821, ncb)
subroutine compute_blr_vcs(k472, ibcksz, maxsize, nass)
integer, dimension(:), pointer cv_frere
double precision cv_costw_layer0
integer, dimension(:), allocatable, save mem_distribtmp
double precision, dimension(:), pointer cv_layerworkload
subroutine mumps_sort_msort(istat, dim, indx, val1, val2)
integer cv_bitsize_of_int
double precision, dimension(:), pointer cv_proc_workload
double precision, dimension(:), pointer cv_tcostw
integer, dimension(:), allocatable, save score
integer, dimension(:), pointer cv_nodetype
double precision cv_splitthresh
integer, dimension(:), pointer cv_info
subroutine mumps_alloc_allow_master(ierr)
integer, dimension(:), allocatable, save table_of_process
integer, parameter tsplit_beg
type(alloc_arraytype), dimension(:), pointer cv_layer_p2node
integer cv_mixed_strat_bound
logical, dimension(:), allocatable, save allowed_nodes
double precision, dimension(:), pointer cv_ncostw
double precision, dimension(:), pointer cv_layerl0_sorted_costw
double precision cv_costm_upper
double precision cv_costm_total
double precision, dimension(:), pointer cv_ncostm
integer, dimension(:), pointer cv_icntl
double precision cv_costm_layer0
double precision, parameter cv_d_invalid
integer, parameter cv_invalid
subroutine mumps_fix_node_master(ierr)
double precision cv_l0wthresh
integer, parameter tsplit_last
integer, dimension(:), pointer cv_nfsiz
integer, parameter cv_equilib_flops
integer, dimension(:), pointer cv_sizeofblocks
subroutine, public mumps_end_arch_cv()
type(procs4node_t), dimension(:), pointer cv_prop_map
double precision, dimension(:), pointer cv_proc_memused
integer, dimension(:), pointer cv_nodelayer
subroutine, public mumps_init_arch_parameters(total_comm, working_comm, keep69, par, nbslaves, mem_distrib, informerr)
double precision, dimension(:), pointer cv_proc_maxmem
double precision, dimension(:), pointer cv_tcostm
integer, dimension(:), pointer cv_fils
double precision mincostw
integer, parameter cv_error_memdeloc
integer cv_dist_l0_mixed_strat_bound
integer, parameter cv_error_memalloc
subroutine mumps_sort_mmerge(start1st, end1st, dim1, start2nd, end2nd, dim2, indx, val, istat)
double precision, dimension(:), pointer cv_proc_maxwork
integer, dimension(:,:), pointer, save cv_cand
integer, dimension(:), pointer cv_keep
integer, dimension(:), pointer cv_layerl0_array
double precision, dimension(:), pointer cv_layermemused
integer, parameter cv_equilib_mem
subroutine, public mumps_distribute(n, slavef, icntl, info, ne, nfsiz, frere, fils, keep, keep8, procnode, ssarbr, nbsa, peak, istat, sizeofblocks, lsizeofblocks)
double precision cv_costw_total
integer, save nb_arch_nodes
integer, dimension(:), pointer cv_proc_sorted
integer, dimension(:), allocatable, save mem_distribmpi
integer(8), dimension(:), pointer cv_keep8
double precision cv_costw_upper
subroutine mumps_fix_table_of_process(ierr)
integer, parameter tsplit_mid
double precision cv_relax
integer layerl0_endforarrangel0
integer, dimension(:), pointer cv_procnode
integer, dimension(:), pointer cv_depth
type(splitting_data) cv_last_splitting
integer, dimension(:), pointer cv_ne
subroutine, public mumps_return_candidates(par2_nodes, cand, istat)
integer, dimension(:), pointer cv_ssarbr