94 implicit none
95 integer,intent(in)::n,slavef
96 integer, intent(inout),TARGET:: ne(n),nfsiz(n),
97 & procnode(n),ssarbr(n),frere(n),fils(n),keep(500),
98 & icntl(60),info(80)
99 integer, intent(in) :: LSIZEOFBLOCKS
100 integer, intent(in) :: SIZEOFBLOCKS(LSIZEOFBLOCKS)
101 INTEGER(8) KEEP8(150)
102 integer,intent(out)::nbsa,istat
103 integer ierr,nmb_thislayer,layernmb,mapalgo,allocok,i
104 integer,pointer,dimension(:)::thislayer
105 integer,parameter::memonly=1,floponly=2,hybrid=3
106 DOUBLE PRECISION::
107 & maxwork,minwork,maxmem,minmem,workbalance,membalance
108 DOUBLE PRECISION:: cost_root_node
109 DOUBLE PRECISION,dimension(:),allocatable:: work_per_proc
110 integer,dimension(:),allocatable::id_son
111 logical :: cont
112 character (len=48):: err_rep,subname
113 DOUBLE PRECISION peak
114 logical :: BLKON
115 blkon = (sizeofblocks(1).GT.0)
116 cv_blkon = blkon
117 istat=-1
118 subname='DISTRIBUTE'
119 cv_lp=icntl(1)
120 cv_mp=icntl(3)
121 IF (icntl(4).LT.2) cv_mp=0
122 nullify(thislayer)
123 err_rep='INITPART1'
125 & frere,fils,nfsiz,ne,keep,keep8,icntl,info,
126 & procnode,ssarbr,peak,ierr
127 & , sizeofblocks, lsizeofblocks
128 & )
129 if (ierr.ne.0) goto 99999
130 err_rep='PROCINIT'
132 if (ierr.ne.0) goto 99999
133 err_rep='CALCCOST'
135 if (ierr.ne.0) goto 99999
136 err_rep='ROOTLIST'
138 if (ierr.ne.0) goto 99999
139 err_rep='LAYERL0'
141 if (ierr.ne.0) goto 99999
142 if (ierr.ne.0) goto 99999
143 err_rep='INITPART2'
145 if (ierr.ne.0) goto 99999
146 err_rep='WORKMEM_'
148 & cv_proc_workload,cv_proc_memused,
149 & maxwork,minwork,maxmem,minmem)
150 if(maxwork.gt.0.0d0) then
151 workbalance=minwork/maxwork
152 else
153 workbalance=0.0d0
154 endif
155 if(maxmem.gt.0.0d0) then
156 membalance=minmem/maxmem
157 else
158 membalance=0.0d0
159 endif
160 err_rep='mem_alloc'
161 allocate(thislayer(cv_maxnodenmb),stat=allocok)
162 if (allocok.gt.0) then
163 cv_info(1) = cv_error_memalloc
164 cv_info(2) = 2*cv_maxnsteps+cv_maxnodenmb
165 if(cv_lp.gt.0)
166 & write(cv_lp,*)'memory allocation error in ',subname
167 ierr = cv_error_memalloc
168 goto 99999
169 end if
170 cont=.true.
171 layernmb=0
172 mapalgo=floponly
173 err_rep='SELECT_TYPE3'
175 if (ierr.ne.0) goto 99999
176 IF (cv_keep(38) .ne. 0 .and. cv_keep(60) .eq. 0 ) THEN
178 & cv_nfsiz(keep(38)), cv_nfsiz(keep(38)),
179 & cv_keep(50), 3, cost_root_node)
180 cost_root_node = cost_root_node / dble(cv_slavef)
181 do i=1, cv_slavef
182 cv_proc_memused(i)=cv_proc_memused(i)+
183 & dble(cv_nfsiz(keep(38)))*dble(cv_nfsiz(keep(38)))/
184 & dble(cv_slavef)
185 cv_proc_workload(i)=cv_proc_workload(i)+dble(cost_root_node)
186 enddo
187 ENDIF
188 do while((cont).OR.(layernmb.le.cv_maxlayer))
189 err_rep='FIND_THIS'
191 & ierr)
192 if (ierr.ne.0) goto 99999
193 err_rep='DO_SPLITTING'
194 if(cv_keep(82) .gt. 0) then
196 & (layernmb,thislayer,nmb_thislayer,ierr)
197 endif
198 if (ierr.ne.0) goto 99999
199 err_rep='ASSIGN_TYPES'
201 & ierr)
202 if (ierr.ne.0) goto 99999
203 if(layernmb.gt.0) then
204 if ((cv_keep(24).eq.1).OR.(cv_keep(24).eq.2).OR.
205 & (cv_keep(24).eq.4).OR.(cv_keep(24).eq.6)) then
206 err_rep='COSTS_LAYER_T2'
208 elseif((cv_keep(24).eq.8).OR.(cv_keep(24).eq.10)
209 & .OR.(cv_keep(24).eq.12).OR.(cv_keep(24).eq.14)
210 & .OR.(cv_keep(24).eq.16).OR.(cv_keep(24).eq.18)) then
211 err_rep='COSTS_LAYER_T2PM'
213 else
214 err_rep='wrong strategy for COSTS_LAYER_T2'
215 ierr = -9999
216 endif
217 if (ierr.ne.0) goto 99999
218 err_rep='WORKMEM_'
220 & cv_proc_workload,cv_proc_memused,
221 & maxwork,minwork,maxmem,minmem)
222 if(maxwork.gt.0.0d0) then
223 workbalance=minwork/maxwork
224 else
225 workbalance=0.0d0
226 endif
227 if(maxmem.gt.0.0d0) then
228 membalance=minmem/maxmem
229 else
230 membalance=0.0d0
231 endif
232 if(mapalgo.eq.memonly) then
233 err_rep='MAP_LAYER'
235 & nmb_thislayer,cv_equilib_mem,ierr)
236 if (ierr.ne.0) goto 99999
237 elseif(mapalgo.eq.floponly) then
238 err_rep='MAP_LAYER'
240 & nmb_thislayer,cv_equilib_flops,ierr)
241 if (ierr.ne.0) goto 99999
242 elseif(mapalgo.eq.hybrid) then
243 if (workbalance <= membalance) then
244 err_rep='MAP_LAYER'
246 & nmb_thislayer,cv_equilib_flops,ierr)
247 if (ierr.ne.0) goto 99999
248 else
249 err_rep='MAP_LAYER'
251 & nmb_thislayer,cv_equilib_mem,ierr)
252 if (ierr.ne.0) goto 99999
253 endif
254 else
255 if(cv_lp.gt.0)
256 & write(cv_lp,*)'Unknown mapalgo in ',subname
257 return
258 endif
259 endif
260 layernmb=layernmb+1
261 err_rep='HIGHER_LAYER'
263 & nmb_thislayer,cont,ierr)
264 if (ierr.ne.0) goto 99999
265 end do
266 IF ( (cv_keep(79).EQ.0).OR.(cv_keep(79).EQ.3).OR.
267 & (cv_keep(79).EQ.5).OR.(cv_keep(79).EQ.7)
268 & ) THEN
269 if(cv_slavef.gt.4) then
270 err_rep='POSTPROCESS'
272 endif
273 ENDIF
274 err_rep='SETUP_CAND'
276 if (ierr.ne.0) goto 99999
277 err_rep='ENCODE_PROC'
279 if (ierr.ne.0) goto 99999
280 err_rep='STORE_GLOB'
282 & info,procnode,ssarbr,nbsa)
283 err_rep='mem_dealloc'
284 deallocate(thislayer,stat=ierr)
285 if (ierr.ne.0) then
286 if(cv_lp.gt.0)
287 & write(cv_lp,*)'Memory deallocation error in ',subname
288 ierr = cv_error_memdeloc
289 goto 99999
290 endif
291 err_rep='TERMGLOB'
293 if (ierr.ne.0) goto 99999
294 istat=0
295 return
29699999 continue
297 if(cv_lp.gt.0) then
298 write(cv_lp,*)'Error in ',subname,', layernmb=',layernmb
299 write(cv_lp,*)'procedure reporting the error: ',err_rep
300 endif
301 if(ierr.eq.cv_error_memalloc) then
302 info(1) = cv_info(1)
303 info(2) = cv_info(2)
304 endif
305 istat=ierr
306 return
307 CONTAINS
309 & map_strat,workload,memused,accepted,
310 & istat)
311 implicit none
312 integer,intent(in)::map_strat
313 DOUBLE PRECISION,dimension(:),intent(in)::workload, memused
314 logical,intent(out)::accepted
315 integer,intent(out)::istat
316 DOUBLE PRECISION maxi,mini,mean,stddev, dpkeep102
317 integer i,nmb
318 character (len=48):: subname
319 logical alternative_criterion
320 DOUBLE PRECISION::
321 & minflops , minmem,
322 & cl_rate, dv_rate
323 istat=-1
324 if ( cv_keep(72) .EQ. 1) then
325 minflops = 2.0d0
326 minmem=50.0d0
327 cl_rate =0.8d0
328 dv_rate=0.2d0
329 else
330 IF (cv_keep(198).NE.0) THEN
331 minflops = 5.0d8
332 minmem=5.0d7
333 cl_rate =0.8d0
334 dv_rate=0.2d0
335 ELSE
336 minflops = 5.0d7
337 minmem=5.0d6
338 cl_rate =0.8d0
339 dv_rate=0.2d0
340 ENDIF
341 endif
342 dpkeep102 = dble(cv_keep(102))
343 IF (cv_keep(198).NE.0) THEN
344 IF (cv_slavef.LT.3)THEN
345 dpkeep102 = dble(150)
346 ELSEIF (cv_slavef.LT.5)THEN
347 dpkeep102 = dble(200)
348 ELSEIF (cv_slavef.LT.8)THEN
349 dpkeep102 = dble(250)
350 ELSEIF (cv_slavef.LT.32)THEN
351 dpkeep102 = dble(275)
352 ELSEIF (cv_slavef.LT.512)THEN
353 dpkeep102 = dble(300)
354 ELSEIF (cv_slavef.GE.512)THEN
355 dpkeep102 = dble(400)
356 ENDIF
357 ENDIF
358 subname='ACCEPT_L0'
359 accepted=.false.
360 alternative_criterion=.false.
361 if(map_strat.eq.cv_equilib_flops) then
362 maxi=maxval(workload)
363 mini=minval(workload)
364 if (maxi.lt.minflops) then
365 accepted=.true.
366 elseif(maxi.le.(dpkeep102/dble(100))*mini)then
367 accepted=.true.
368 endif
369 if ((.NOT.accepted).AND.(alternative_criterion)) then
370 mean=sum(workload)/
max(dble(cv_slavef),dble(1))
371 stddev=dble(0)
372 do i=1,cv_slavef
373 stddev=stddev+
374 & (abs(workload(i)-mean)*abs(workload(i)-mean))
375 enddo
376 stddev=sqrt(stddev/
max(dble(cv_slavef),dble(1)))
377 nmb=count(mask=abs(workload-mean)<stddev)
378 if((dble(nmb)/
max(dble(cv_slavef),dble(1)).gt.cl_rate)
379 & .AND.(stddev.lt.dv_rate*mean)) accepted=.true.
380 endif
381 elseif(map_strat.eq.cv_equilib_mem) then
382 maxi=maxval(memused)
383 mini=minval(memused)
384 if (maxi.lt.minmem) then
385 accepted=.true.
386 else if(cv_slavef.lt.48) then
387 if (maxi.le.dble(2)*mini) accepted=.true.
388 else if(cv_slavef.lt.128) then
389 if (maxi.le.dble(4)*mini) accepted=.true.
390 else if(cv_slavef.lt.256) then
391 if (maxi.le.dble(6)*mini) accepted=.true.
392 else if(cv_slavef.lt.512) then
393 if (maxi.le.dble(8)*mini) accepted=.true.
394 else if(cv_slavef.gt.512) then
395 if (maxi.le.dble(10)*mini) accepted=.true.
396 end if
397 endif
398 istat=0
399 return
402 & procnode,istat,respect_prop)
403 implicit none
404 integer, intent(in)::map_strat, layerL0end
405 DOUBLE PRECISION,dimension(:),intent(out)::workload, memused
406 integer, intent(out)::procnode(:),istat
407 logical, intent(in), OPTIONAL:: respect_prop
408 integer i,j,ierr, nodenumber,proc
409 DOUBLE PRECISION work,mem
410 character (len=48):: err_rep,subname
411 istat=-1
412 subname='ARRANGEL0'
413 if ((.NOT.associated(cv_tcostw)).OR.(.NOT.associated(cv_tcostm)))
414 & then
415 if(cv_lp.gt.0)
416 & write(cv_lp,*)'Error:tcost must be allocated in ',subname
417 return
418 end if
419 if((map_strat.ne.cv_equilib_flops).and.
420 & (map_strat.ne.cv_equilib_mem)) return
421 do i=1,cv_n
422 procnode(i)=cv_invalid
423 end do
424 do i=1,cv_slavef
425 workload(i)=cv_proc_workload(i)
426 memused(i)=cv_proc_memused(i)
427 end do
428 do i=cv_layerl0_start,layerl0end
429 nodenumber=cv_layerl0_array(i)
430 work=cv_tcostw(nodenumber)
431 mem=cv_tcostm(nodenumber)
432 err_rep='FIND_BEST_PROC'
433 if(present(respect_prop)) then
435 & workload,memused,proc,ierr,respect_prop)
436 else
438 & workload,memused,proc,ierr)
439 endif
440 if(ierr.eq.0) then
441 procnode(nodenumber)=proc
442 else
443 if(cv_lp.gt.0)
444 & write(cv_lp,*)'Error reported by ',err_rep,' in ',subname
445 do j=1,cv_slavef
446 workload(j)=cv_proc_workload(j)
447 memused(j)=cv_proc_memused(j)
448 end do
449 do j=1,cv_n
450 procnode(j)=cv_invalid
451 end do
452 return
453 end if
454 end do
455 istat=0
456 return
459 & istat )
460 implicit none
461 integer,intent(in)::layernmb,thislayer(:),
462 & nmb_thislayer
463 integer,intent(out)::istat
464 integer i,in,npiv,nfront,inode,inoderoot,par_nodes_in_layer,
465 & dummy,allocok
466 character (len=48):: subname
467 istat=-1
468 subname='ASSIGN_TYPES'
469 if((layernmb.lt.0).or.(layernmb.gt.cv_maxlayer)) return
470 if(cv_slavef.eq.1) then
471 if(layernmb.eq.0) then
472 do inode=1,cv_n
473 cv_nodetype(inode)=0
474 end do
475 end if
476 else if(layernmb.eq.0) then
477 do i=1,nmb_thislayer
478 inode=thislayer(i)
479 inoderoot=inode
480 if(cv_nodetype(inode).ne.cv_invalid) cycle
481 cv_nodetype(inode)=0
482 30 continue
483 in = inode
484 do while (in .ne. 0)
485 inode = in
486 do while (in .gt. 0)
487 in = cv_fils(in)
488 end do
489 if (in.lt.0) in=-in
490 end do
491 10 continue
492 if ( inode .ne. inoderoot ) then
493 cv_nodetype(inode)=-1
494 in = cv_frere(inode)
495 inode = abs(in)
496 if (in .lt. 0) then
497 go to 10
498 else
499 go to 30
500 end if
501 end if
502 end do
503 else
504 do i=1,nmb_thislayer
505 inode=thislayer(i)
506 in = inode
507 npiv = 0
508 do while (in.gt.0)
509 if (cv_blkon) then
510 npiv = npiv + cv_sizeofblocks(in)
511 else
512 npiv = npiv + 1
513 endif
514 in = cv_fils(in)
515 end do
516 nfront = cv_nfsiz(inode)
517 if(cv_nodetype(inode).ne.cv_invalid) cycle
519 & (in.ne.0)) then
520 cv_nodetype(inode)=2
521 else
522 cv_nodetype(inode)=1
523 end if
524 end do
525 end if
526 if(layernmb.gt.0) then
527 par_nodes_in_layer=0
528 do i=1,nmb_thislayer
529 inode=thislayer(i)
531 & par_nodes_in_layer=par_nodes_in_layer+1
532 enddo
533 if(par_nodes_in_layer.gt.0) then
534 allocate(
535 &cv_layer_p2node(layernmb)%t2_nodenumbers(par_nodes_in_layer),
536 &cv_layer_p2node(layernmb)%t2_cand(par_nodes_in_layer,cv_slavef+1),
537 &cv_layer_p2node(layernmb)%t2_candcostw(par_nodes_in_layer),
538 &cv_layer_p2node(layernmb)%t2_candcostm(par_nodes_in_layer),
539 & stat=allocok)
540 if (allocok.gt.0) then
541 cv_info(1) = cv_error_memalloc
542 cv_info(2) = (3+cv_slavef+1)*par_nodes_in_layer
543 istat = cv_error_memalloc
544 if(cv_lp.gt.0)
545 & write(cv_lp,*)'memory allocation error in ',subname
546 return
547 end if
548 cv_layer_p2node(layernmb)%nmb_t2s=par_nodes_in_layer
549 dummy=1
550 do i=1,nmb_thislayer
551 inode=thislayer(i)
553 cv_layer_p2node(layernmb)%t2_nodenumbers(dummy)=inode
554 cv_layer_p2node(layernmb)%t2_cand(dummy,:)=0
555 cv_layer_p2node(layernmb)%t2_candcostw(dummy)
556 & =cv_d_invalid
557 cv_layer_p2node(layernmb)%t2_candcostm(dummy)
558 & =cv_d_invalid
559 dummy=dummy+1
560 endif
561 enddo
562 else
563 nullify(cv_layer_p2node(layernmb)%t2_nodenumbers,
564 & cv_layer_p2node(layernmb)%t2_cand,
565 & cv_layer_p2node(layernmb)%t2_candcostw,
566 & cv_layer_p2node(layernmb)%t2_candcostm)
567 end if
568 endif
569 istat=0
570 return
573 implicit none
574 integer,intent(in)::procs4node(cv_size_ind_proc)
575 integer,intent(in)::procnumber
576 logical :: MUMPS_BIT_GET
577 integer pos1,pos2
578 pos1 = (procnumber-1)/cv_bitsize_of_int +1
579 pos2 = mod(procnumber-1,cv_bitsize_of_int)
580 mumps_bit_get=btest(procs4node(pos1),pos2)
581 return
584
585 implicit none
586 integer, intent(in)::inode,procnumber
587 logical :: MUMPS_BIT_GET4PROC
588 integer pos1,pos2
589 mumps_bit_get4proc=.false.
590 if((procnumber.lt.1).or.(procnumber.gt.cv_slavef)) return
591 if(.not.associated(cv_prop_map(inode)%ind_proc)) return
592 pos1 = (procnumber-1)/cv_bitsize_of_int +1
593 pos2 = mod(procnumber-1,cv_bitsize_of_int)
594 mumps_bit_get4proc=btest
595 & (cv_prop_map(inode)%ind_proc(pos1),pos2)
596 return
599 implicit none
600 integer, intent(inout)::procs4node(cv_size_ind_proc)
601 integer,intent(in)::procnumber
602 integer, intent(out)::istat
603 integer pos1,pos2
604 istat = -1
605 if((procnumber.lt.1).or.(procnumber.gt.cv_slavef)) return
606 if(cv_bitsize_of_int.le.0) return
607 pos1 = (procnumber-1)/cv_bitsize_of_int +1
608 pos2 = mod(procnumber-1,cv_bitsize_of_int)
609 procs4node(pos1)=ibset(procs4node(pos1),pos2)
610 istat = 0
611 return
614 implicit none
615 integer,intent(out)::istat
616 integer i
617 DOUBLE PRECISION :: maxcostw_root
618 istat = -1
619 if ((.NOT.associated(cv_tcostw)).OR.(.NOT.associated(cv_tcostm)))
620 & then
621 if(cv_lp.gt.0)
622 & write(cv_lp,*)
623 & 'Error: tcost must be allocated in MUMPS_CALCCOSTS'
624 return
625 end if
626 maxcostw_root = 0d0
627 do i=1,cv_n
628 if (cv_frere(i).eq.cv_n+1) then
629 cv_tcostw(i)=0.0d0
630 cv_ncostw(i)=0.0d0
631 cv_tcostm(i)=0.0d0
632 cv_ncostm(i)=0.0d0
633 elseif (cv_frere(i).eq.0) then
634 cv_depth(i)=1
636 maxcostw_root =
max(maxcostw_root,cv_tcostw(i))
637 end if
638 end do
639 istat = 0
640 mincostw = 1.0d0+maxcostw_root/(dble(cv_maxnsteps)*
641 & dble(10*cv_slavef) )
642 return
645 implicit none
646 integer,intent(in)::npiv,nfront
647 DOUBLE PRECISION,intent(out)::costw,costm
648 character (len=48):: subname
649 subname='CALCNODECOSTS'
650 if((npiv.le.1).and.(nfront.le.1)) then
651 costw = dble(0)
652 costm = dble(1)
653 else
654 if((cv_keep(494).ne.0).and.(cv_keep(471).ge.0).and.
655 & (npiv.ge.cv_keep(490)).and.(nfront.ge.cv_keep(491))) then
656 WRITE(*,*) " *** Temp internal error in MUMPS_CALCNODECOSTS:"
659 & cv_keep(471), cv_keep(472), cv_keep(475),
660 & cv_keep(488), cv_keep(50))
661 else
662 if(cv_keep(50).eq.0) then
663 costw= 2.0d0*dble(nfront)*dble(npiv)*dble(nfront-npiv-1)
664 & + dble(npiv)*dble(npiv+1)*dble(2*npiv+1)/dble(3)
665 & + dble(2*nfront-npiv-1) * dble(npiv) / dble(2)
666 costm= dble(npiv)*(dble(2*nfront)-dble(npiv))
667 else
668 costw= dble(npiv) *
669 & (dble(nfront)*dble(nfront)+dble(2*nfront) -
670 & dble(nfront+1) * dble(npiv+1) +
671 & dble(npiv+1) * dble(2*npiv+1) / dble(6))
672 costm= dble(npiv) * dble(nfront)
673 end if
674 end if
675 end if
676 if((costw.lt.0.0d0).or.(costm.lt.0.0d0)) then
677 endif
678 return
681 & K471, K472, K475, K488, SYM)
682 INTEGER, INTENT(IN) :: NPIV, NFRONT, SYM, K471, K472, K475, K488
683 DOUBLE PRECISION, INTENT(OUT) :: COSTW, COSTM
684 INTEGER :: IBCKSZ
685 DOUBLE PRECISION :: B,R,M,N
686 M = dble(npiv)
687 n = dble(nfront)
688 CALL compute_blr_vcs(k472, ibcksz, k488, npiv)
689 b = dble(ibcksz)
691 IF (k471.EQ.0) THEN
692 r = 1.0d0
693 ELSEIF (k471.EQ.1) THEN
694 r = sqrt(dble(n))
695 ELSE
696 WRITE(*,*) 'Internal error in MUMPS_CALCNODECOSTS_BLR', k471
698 ENDIF
700 IF (sym.EQ.0) THEN
701 costw = m/b * b*(b+1.0d0)*(2.0d0*b+1.0d0)/3.0d0
702 IF (k475.EQ.0) THEN
703 costw = costw + 2.0d0*m/(b*b)*(n-(m+b)/2.0d0) * b*b*b
704 ELSEIF (k475.EQ.1) THEN
705 costw = costw + m/(b*b)*(n-(m+b)/2.0d0) * b*b*(r+b)
706 ELSEIF (k475.EQ.2) THEN
707 costw = costw + m/(b*b)*(2.0d0*n-3.0d0*m-2.0d0*b) * b*b*r
708 & + (m/b-1.0d0)*m/b*(m/b-1.0d0)/6.0d0 * b*b*b
709 ELSEIF (k475.EQ.3) THEN
710 costw = costw + 2.0d0*m/(b*b)*(n-(m+b)/2.0d0) * b*b*r
711 ENDIF
712 costw = costw + 2.0d0*m/(b*b)*(n-(m+b)/2.0d0) * 2.0d0*b*b*r
713 costw = costw + (4.0d0*b*r*r + 2.0d0*b*b*r) * (
714 & (n-m)*(n-m)*m/(b*b*b)
715 & + (n-m)/b*(m/b-1.0d0)*m/b
716 & + (m/b-1.0d0)*m/b*(2.0d0*m/b-1.0d0)/6.0d0
717 & )
718 costm = m*(2.0d0*n-m)/(b*b) * 2.0d0*b*r
719 ELSE
720 costw = m/b * b*(b+1.0d0)*(2.0d0*b+1.0d0)/6.0d0
721 IF (k475.EQ.0.OR.k475.EQ.1) THEN
722 costw = costw + m/(b*b)*(n-(m+b)/2.0d0) * b*b*b
723 ELSEIF (k475.EQ.2) THEN
724 costw = costw + (n-m)*m/(b*b) * b*b*r
725 & + (m/b-1.0d0)*m/b*(m/b-1.0d0)/6.0d0 * b*b*b
726 ELSEIF (k475.EQ.3) THEN
727 costw = costw + m/(b*b)*(n-(m+b)/2.0d0) * b*b*r
728 ENDIF
729 costw = costw + m/(b*b)*(n-(m+b)/2.0d0) * 2.0d0*b*b*r
730 costw = costw + (4.0d0*b*r*r + 2.0d0*b*b*r) * (
731 & (n-m)*(n-m)*m/(b*b*b)/2.0d0
732 & + (n-m)/b*(m/b-1.0d0)*m/b/2.0d0
733 & + (m/b-1.0d0)*m/b*(m/b+1.0d0)/6.0d0
734 & )
735 costm = m*n/(b*b) * 2.0d0*b*r
736 ENDIF
739 & COSTW, COSTM, K471, K472, K475, K488, SYM)
740 INTEGER, INTENT(IN) :: NPIV, NFRONT, SYM, K471, K472, K475, K488
741 DOUBLE PRECISION, INTENT(OUT) :: COSTW, COSTM
742 INTEGER :: IBCKSZ
743 DOUBLE PRECISION :: B,R,M,N
744 m = dble(npiv)
745 n = dble(nfront)
746 CALL compute_blr_vcs(k472, ibcksz, k488, npiv)
747 b = dble(ibcksz)
749 IF (k471.EQ.0) THEN
750 r = 1.0d0
751 ELSEIF (k471.EQ.1) THEN
752 r = sqrt(dble(n))
753 ELSE
754 WRITE(*,*) 'Internal error in ',
755 & 'MUMPS_COSTS_BLR_T2_MASTER', k471
757 ENDIF
759 IF (sym.EQ.0) THEN
760 costw = m/b * b*(b+1.0d0)*(2.0d0*b+1.0d0)/3.0d0
761 IF (k475.EQ.0) THEN
762 costw = costw + m/(b*b)*(n-(m+b)/2.0d0) * b*b*b
763 & + (m/b-1.0d0)*m/b*(m/b-1.0d0)/6.0d0 * b*b*b
764 ELSEIF (k475.EQ.1) THEN
765 costw = costw + m/(b*b)*(n-(m+b)/2.0d0) * b*b*b
766 & + (m/b-1.0d0)*m/b*(m/b-1.0d0)/6.0d0 * b*b*r
767 ELSEIF (k475.EQ.2) THEN
768 costw = costw + m/(b*b)*(n-m) * b*b*r
769 & + (m/b-1.0d0)*m/b*(m/b-1.0d0)/6.0d0 * b*b*(b+r)
770 ELSEIF (k475.EQ.3) THEN
771 costw = costw + m/(b*b)*(n-(m+b)/2.0d0) * b*b*r
772 & + (m/b-1.0d0)*m/b*(m/b-1.0d0)/6.0d0 * b*b*r
773 ENDIF
774 costw = costw + m/(b*b)*(n-(m+b)/2.0d0) * 2.0d0*b*b*r
775 & + (m/b-1.0d0)*m/b*(m/b-1.0d0)/6.0d0 * 2.0d0*b*b*r
776 costw = costw + (4.0d0*b*r*r + 2.0d0*b*b*r) * (
777 & (n-m)/b*(m/b-1.0d0)*m/b/2.0d0
778 & + (m/b-1.0d0)*m/b*(2.0d0*m/b-1.0d0)/6.0d0
779 & )
780 costm = m*n/(b*b) * 2.0d0*b*r
781 ELSE
782 costw = m/b * b*(b+1.0d0)*(2.0d0*b+1.0d0)/6.0d0
783 IF (k475.LE.2) THEN
784 costw = costw + (m/b-1.0d0)*m/b*(m/b-1.0d0)/6.0d0 * b*b*b
785 ELSEIF (k475.EQ.3) THEN
786 costw = costw + (m/b-1.0d0)*m/b*(m/b-1.0d0)/6.0d0 * b*b*r
787 ENDIF
788 costw = costw + (m/b-1.0d0)*m/b*(m/b-1.0d0)/6.0d0
789 & * 2.0d0*b*b*r
790 costw = costw + (4.0d0*b*r*r + 2.0d0*b*b*r) * (
791 & (m/b-1.0d0)*m/b*(m/b+1.0d0)/6.0d0
792 & )
793 costm = m*m/(b*b) * 2.0d0*b*r
794 ENDIF
797 & NROW, COSTW, COSTM, K471, K472, K475, K488, SYM)
798 INTEGER, INTENT(IN) :: NPIV, NFRONT, SYM, K471, K472,
799 & K475, K488
800 DOUBLE PRECISION, INTENT(IN) :: NROW
801 DOUBLE PRECISION, INTENT(OUT) :: COSTW, COSTM
802 INTEGER :: IBCKSZ
803 DOUBLE PRECISION :: B,R,M,N,P
804 M = nrow
805 n = dble(nfront)
806 p = dble(npiv)
807 CALL compute_blr_vcs(k472, ibcksz, k488, npiv)
808 b = dble(ibcksz)
810 IF (k471.EQ.0) THEN
811 r = 1.0d0
812 ELSEIF (k471.EQ.1) THEN
813 r = sqrt(dble(n))
814 ELSE
815 WRITE(*,*) 'Internal error in ',
816 & 'MUMPS_COSTS_BLR_T2_SLAVE', k471
818 ENDIF
820 costw = 0.0d0
821 IF (k475.EQ.0) THEN
822 costw = costw + (m*p)/(b*b) * b*b*b
823 ELSE
824 costw = costw + (m*p)/(b*b) * b*b*r
825 ENDIF
826 costw = costw + (m*p)/(b*b) * 2.0d0*b*b*r
827 IF (sym.EQ.0) THEN
828 costw = costw + (4.0d0*b*r*r + 2.0d0*b*b*r) * (
829 & m/b*(p/b-1.0d0)*p/b/2.0d0
830 & + (n-m)*m*p/(b*b*b)
831 & )
832 ELSE
833 costw = costw + (4.0d0*b*r*r + 2.0d0*b*b*r) * (
834 & m/b*(p/b-1.0d0)*p/b/2.0d0
835 & + (n-m)*m*p/(b*b*b)/2.0d0
836 & )
837 ENDIF
838 costm = m*p/(b*b) * 2.0d0*b*r
841 implicit none
842 integer,intent(in)::layernmb,nmb_thislayer
843 integer,intent(out)::istat
844 integer in,inode,j,kmax,npiv,nfront,ncb,ncol,
845 & min_needed,max_needed,more_than_needed,total_nmb_cand,
846 & nmb_type2_thislayer,fraction,
847 & total_cand_layer,cand_strat, keep48_loc
848 DOUBLE PRECISION flop1,work_type2_thislayer,
849 & relative_weight,workmaster,nrow
850 logical force_cand
851 character (len=48):: subname
852 integer MUMPS_REG_GETKMAX, MUMPS_BLOC2_GET_NSLAVESMIN,
853 & MUMPS_BLOC2_GET_NSLAVESMAX
854 external MUMPS_REG_GETKMAX, MUMPS_BLOC2_GET_NSLAVESMIN,
855 & mumps_bloc2_get_nslavesmax
856 istat=-1
857 subname='COSTS_LAYER_T2'
858 if (cv_keep(24).lt.1) then
859 if(cv_lp.gt.0)
860 & write(cv_lp,*)'Error in ',subname,'. Wrong keep24'
861 return
862 endif
863 force_cand=(mod(cv_keep(24),2).eq.0)
864 cand_strat=cv_keep(24)/2
865 nmb_type2_thislayer=cv_layer_p2node(layernmb)%nmb_t2s
866 if (nmb_type2_thislayer.gt.0) then
867 work_type2_thislayer=0.0d0
868 do j=1,nmb_type2_thislayer
869 inode=cv_layer_p2node(layernmb)%t2_nodenumbers(j)
870 work_type2_thislayer=work_type2_thislayer+cv_ncostw(inode)
871 end do
872 if(cv_relax.le.0.0d0) then
873 if(cv_lp.gt.0)
874 & write(cv_lp,*)'Error in ',subname,'. Wrong cv_relax'
875 return
876 endif
877 total_cand_layer=int(cv_relax*dble(cv_slavef))
878 do j=1,nmb_type2_thislayer
879 inode=cv_layer_p2node(layernmb)%t2_nodenumbers(j)
880 nfront=cv_nfsiz(inode)
881 npiv=0
882 in=inode
883 do while(in.gt.0)
884 if (cv_blkon) then
885 npiv = npiv + cv_sizeofblocks(in)
886 else
887 npiv=npiv+1
888 endif
889 in=cv_fils(in)
890 end do
891 ncb=nfront-npiv
893 if (force_cand) then
894 if (cv_keep(50) == 0) then
895 keep48_loc=0
896 else
897 keep48_loc=3
898 endif
899 if (cv_keep(48).EQ.5) keep48_loc = 5
901 & cv_slavef, keep48_loc,cv_keep8(21),
902 & cv_keep(50),nfront,ncb,
903 & cv_keep(375), cv_keep(119))
904 max_needed = mumps_bloc2_get_nslavesmax(
905 & cv_slavef, keep48_loc,cv_keep8(21),
906 & cv_keep(50),nfront,ncb,
907 & cv_keep(375), cv_keep(119))
908 if(cand_strat.eq.1) then
909 more_than_needed = 0
910 elseif (cand_strat.eq.2) then
911 if(work_type2_thislayer.gt.0.0d0) then
912 relative_weight=cv_ncostw(inode)/work_type2_thislayer
913 else
914 relative_weight = 0.0d0
915 endif
916 fraction=nint(relative_weight *
917 & dble(total_cand_layer))
918 more_than_needed=
min(
max(0,cv_slavef-1-min_needed),
919 &
max(0,fraction-min_needed) )
920 elseif (cand_strat.eq.3) then
921 more_than_needed=cv_slavef-1-min_needed
922 else
923 if(cv_lp.gt.0)
924 & write(cv_lp,*)'Unknown cand. strategy in ',subname
925 return
926 endif
927 total_nmb_cand=
min(min_needed+more_than_needed,
928 & cv_slavef-1)
929 total_nmb_cand=
min(total_nmb_cand,max_needed)
930 else
931 total_nmb_cand=0
932 endif
933 cv_layer_p2node(layernmb)%t2_cand(j,cv_slavef+1)
934 & = total_nmb_cand
935 if(cv_keep(50).eq.0) then
936 flop1=dble(2*npiv)*dble(nfront)-
937 & dble(npiv+nfront)*dble(npiv+1)
938 flop1= dble(npiv)*flop1 +
939 & dble(2 * npiv-npiv-1)*dble(npiv)/dble(2)+
940 & dble(npiv)*dble(npiv+1)*dble(2*npiv+1)/dble(3)
941 else
942 flop1=dble(npiv)*
943 & ( dble(npiv)*dble(npiv)+dble(npiv)-
944 & dble(npiv*npiv+npiv+1) )+
945 & (dble(npiv)*dble(npiv+1)*dble(2*npiv+1))/dble(6)
946 endif
947 cv_ncostw(inode)=flop1
948 if(total_nmb_cand.gt.0) then
949 nrow = dble(
max(
min(dble(ncb)/dble(total_nmb_cand),
950 & dble(kmax)),
951 & dble(ncb)/dble(cv_slavef-1)))
952 elseif(cv_slavef.gt.1) then
953 nrow = dble(
max(dble(kmax),
954 & dble(ncb)/dble(cv_slavef-1)))
955 else
956 nrow = dble(ncb)
957 endif
958 if(cv_keep(50).eq.0) then
959 flop1 = dble(npiv)*dble(nrow)+
960 & dble(nrow)*dble(npiv)*dble(2*nfront-npiv-1)
961 else
962 ncol= nfront
963 flop1 = dble(npiv)*dble(nrow)*
964 & (dble(2*ncol)-dble(nrow)-dble(npiv)+dble(1))
965 workmaster = dble(npiv)*dble(npiv)*dble(npiv)/dble(3)
966 if (workmaster.gt.flop1) flop1=workmaster
967 endif
968 cv_layer_p2node(layernmb)%t2_candcostw(j)=flop1
969 if(cv_keep(50).eq.0) then
970 cv_ncostm(inode)=dble(npiv)*dble(nfront)
971 else
972 cv_ncostm(inode)=dble(npiv)*dble(npiv)
973 endif
974 if(cv_keep(50).eq.0) then
975 cv_layer_p2node(layernmb)%t2_candcostm(j)
976 & =dble(npiv)*dble(nrow)
977 else
978 cv_layer_p2node(layernmb)%t2_candcostm(j)
979 & =dble(npiv)*dble(nrow)
980 endif
981 end do
982 endif
983 istat=0
984 return
987
988 implicit none
989 integer,intent(in)::layernmb,nmb_thislayer
990 integer,intent(out)::istat
991 integer in,inode,j,jj,kmax,npiv,nfront,ncb,ncol,
992 & total_nmb_cand,nmb_type2_thislayer,
993 & total_cand_layer,npropmap,min_needed,
994 & keep48_loc
995 DOUBLE PRECISION flop1,work_type2_thislayer,
996 & relative_weight,workmaster,nrow
997 DOUBLE PRECISION save_ncostw, save_ncostm
998 LOGICAL SPLITNODE, BLRNODE
999 character (len=48):: subname
1000 integer MUMPS_REG_GETKMAX, MUMPS_BLOC2_GET_NSLAVESMIN
1001 external MUMPS_REG_GETKMAX, MUMPS_BLOC2_GET_NSLAVESMIN
1002 istat=-1
1003 splitnode=.false.
1004 blrnode=.false.
1005 save_ncostw = 1.0d0
1006 save_ncostm = 1.0d0
1007 subname='COSTS_LAYER_T2PM'
1008 if((cv_keep(24).ne.8).AND.(cv_keep(24).ne.10)
1009 & .AND.(cv_keep(24).ne.12).AND.(cv_keep(24).ne.14)
1010 & .AND.(cv_keep(24).ne.16).AND.(cv_keep(24).ne.18)) then
1011 if(cv_lp.gt.0)
1012 & write(cv_lp,*)'Error in ',subname,'. Wrong keep24'
1013 return
1014 endif
1015 nmb_type2_thislayer=cv_layer_p2node(layernmb)%nmb_t2s
1016 if (nmb_type2_thislayer.gt.0) then
1017 total_cand_layer=0
1018 work_type2_thislayer=0.0d0
1019 do j=1,nmb_type2_thislayer
1020 inode=cv_layer_p2node(layernmb)%t2_nodenumbers(j)
1021 work_type2_thislayer=work_type2_thislayer+cv_ncostw(inode)
1022 npropmap=0
1023 do jj=1,cv_slavef
1025 & npropmap=npropmap+1
1026 end do
1027 total_cand_layer=total_cand_layer+npropmap
1028 end do
1029 do j=1,nmb_type2_thislayer
1030 inode=cv_layer_p2node(layernmb)%t2_nodenumbers(j)
1031 nfront=cv_nfsiz(inode)
1032 splitnode = (abs(cv_nodetype(inode)).GT.3)
1033 IF (splitnode) THEN
1034 save_ncostw = cv_ncostw(inode)
1035 save_ncostm = cv_ncostm(inode)
1036 ENDIF
1037 npiv=0
1038 in=inode
1039 do while(in.gt.0)
1040 if (cv_blkon) then
1041 npiv = npiv + cv_sizeofblocks(in)
1042 else
1043 npiv=npiv+1
1044 endif
1045 in=cv_fils(in)
1046 end do
1047 ncb=nfront-npiv
1049 if(kmax.lt.1) then
1051 endif
1052 if (cv_keep(50) == 0) then
1053 keep48_loc=0
1054 else
1055 keep48_loc=3
1056 endif
1057 if (cv_keep(48).EQ.5) keep48_loc = 5
1059 & (cv_slavef, keep48_loc,cv_keep8(21),
1060 & cv_keep(50),nfront,ncb,
1061 & cv_keep(375), cv_keep(119))
1062 if(min_needed.lt.1) then
1063 if(cv_lp.gt.0)
1064 & write(cv_lp,*)'Error in ',subname,'.NEG min_needed'
1065 return
1066 endif
1067 if ((cv_keep(24).eq.8).OR.(cv_keep(24).eq.14).OR.
1068 & (cv_keep(24).eq.18)) then
1069 npropmap=0
1070 do jj=1,cv_slavef
1072 & npropmap=npropmap+1
1073 end do
1074 total_nmb_cand=
max(npropmap-1,min_needed)
1075 elseif(cv_keep(24).eq.10) then
1076 if(work_type2_thislayer.gt.0.0d0) then
1077 relative_weight=cv_ncostw(inode)/work_type2_thislayer
1078 else
1079 relative_weight = 0.0d0
1080 endif
1081 total_nmb_cand=nint(relative_weight *
1082 & dble(total_cand_layer))
1083 total_nmb_cand=
max(total_nmb_cand-1,min_needed)
1084 elseif((cv_keep(24).eq.12).OR.(cv_keep(24).eq.16)) then
1085 if(layernmb.lt.cv_dist_l0_mixed_strat_bound) then
1086 if(cv_mp.gt.0)then
1087 write(cv_mp,*)'Strat', cv_keep(24),
1088 & ': use 8 on layer',layernmb
1089 endif
1090 npropmap=0
1091 do jj=1,cv_slavef
1093 & npropmap=npropmap+1
1094 end do
1095 total_nmb_cand=
max(npropmap-1,min_needed)
1096 else
1097 if(cv_mp.gt.0)then
1098 write(cv_mp,*)'Strat', cv_keep(24),
1099 & ': use 10 on layer',layernmb
1100 endif
1101 if(work_type2_thislayer.gt.0.0d0) then
1102 relative_weight=cv_ncostw(inode)/work_type2_thislayer
1103 else
1104 relative_weight = 0.0d0
1105 endif
1106 total_nmb_cand=nint(relative_weight *
1107 & dble(total_cand_layer))
1108 total_nmb_cand=
max(total_nmb_cand-1,min_needed)
1109 endif
1110 else
1111 if(cv_lp.gt.0)
1112 & write(cv_lp,*)'Unknown cand. strategy in ',subname
1113 return
1114 endif
1115 total_nmb_cand=
max(total_nmb_cand,1)
1116 total_nmb_cand=
min(total_nmb_cand,cv_slavef-1)
1117 total_nmb_cand=
min(total_nmb_cand,ncb)
1118 cv_layer_p2node(layernmb)%t2_cand(j,cv_slavef+1)
1119 & = total_nmb_cand
1120 blrnode = ((cv_keep(494).ne.0).and.(cv_keep(471).ge.0) .and.
1121 & (npiv.ge.cv_keep(490)).and.(nfront.ge.cv_keep(491)))
1122 IF (blrnode) THEN
1124 & cv_ncostw(inode), cv_ncostm(inode),
1125 & cv_keep(471), cv_keep(472), cv_keep(475),
1126 & cv_keep(488), cv_keep(50))
1127 ELSE
1128 if(cv_keep(50).eq.0) then
1129 flop1=dble(2*npiv)*dble(nfront)-
1130 & dble(npiv+nfront)*dble(npiv+1)
1131 flop1= dble(npiv)*flop1 +
1132 & dble(2 * npiv-npiv-1)*dble(npiv)/dble(2)+
1133 & dble(npiv)*dble(npiv+1)*dble(2*npiv+1)/dble(3)
1134 else
1135 flop1=dble(npiv)*
1136 & ( dble(npiv)*dble(npiv)+dble(npiv)-
1137 & dble(npiv*npiv+npiv+1) )+
1138 & (dble(npiv)*dble(npiv+1)*dble(2*npiv+1))/dble(6)
1139 endif
1140 cv_ncostw(inode)=flop1
1141 ENDIF
1142 IF (splitnode) THEN
1143 cv_layer_p2node(layernmb)%t2_candcostw(j)=
1144 &
max(save_ncostw - cv_ncostw(inode), 1.0d0)
1145 ELSE
1146 if(total_nmb_cand.gt.0) then
1147 nrow = dble(
max(
min(dble(ncb)/dble(total_nmb_cand),
1148 & dble(kmax)),
1149 & dble(ncb)/dble(cv_slavef-1)))
1150 elseif(cv_slavef.gt.1) then
1151 nrow = dble(
max(dble(kmax),
1152 & dble(ncb)/dble(cv_slavef-1)))
1153 else
1154 nrow = dble(ncb)
1155 endif
1156 IF (blrnode) THEN
1158 & nrow,
1159 & cv_layer_p2node(layernmb)%t2_candcostw(j),
1160 & cv_layer_p2node(layernmb)%t2_candcostm(j),
1161 & cv_keep(471), cv_keep(472), cv_keep(475),
1162 & cv_keep(488), cv_keep(50))
1163 ELSE
1164 if(cv_keep(50).eq.0) then
1165 flop1 = dble(npiv)*dble(nrow)+
1166 & dble(nrow)*dble(npiv)*dble(2*nfront-npiv-1)
1167 else
1168 ncol= nfront
1169 flop1 = dble(npiv)*dble(nrow)*
1170 & (dble(2*ncol)-dble(nrow)-dble(npiv)+dble(1))
1171 workmaster = dble(npiv)*dble(npiv)*dble(npiv)/dble(3)
1172 if (workmaster.gt.flop1) flop1=workmaster
1173 endif
1174 cv_layer_p2node(layernmb)%t2_candcostw(j)=flop1
1175 ENDIF
1176 ENDIF
1177 IF (.NOT.blrnode) THEN
1178 if(cv_keep(50).eq.0) then
1179 cv_ncostm(inode)=dble(npiv)*dble(nfront)
1180 else
1181 cv_ncostm(inode)=dble(npiv)*dble(npiv)
1182 endif
1183 ENDIF
1184 IF (splitnode) THEN
1185 cv_layer_p2node(layernmb)%t2_candcostm(j) =
1186 &
max(save_ncostm - cv_ncostm(inode), 1.0d0)
1187 ELSEIF (.NOT.blrnode) THEN
1188 if(cv_keep(50).eq.0) then
1189 cv_layer_p2node(layernmb)%t2_candcostm(j)
1190 & =dble(npiv)*dble(nrow)
1191 else
1192 cv_layer_p2node(layernmb)%t2_candcostm(j)
1193 & =dble(npiv)*dble(nrow)
1194 endif
1195 ENDIF
1196 end do
1197 endif
1198 istat=0
1199 return
1202 & layernmb,thislayer,nmb_thislayer,
1203 & istat )
1204 implicit none
1205 integer,intent(in)::layernmb,nmb_thislayer
1206 integer,intent(in)::thislayer(:)
1207 integer,intent(out)::istat
1208 integer i,j,k1,k2,k3,ierr,inode,nfront,npiv,
1209 & npropmap, inode_tmp, allocok
1210 logical doit
1211 integer, allocatable, dimension(:) :: npivsplit
1212 integer :: lnpivsplit
1213 integer :: bsize
1214 integer :: k1_temp, npiv_beg, npiv_end
1215 character (len=48):: err_rep,subname
1216 istat=-1
1217 subname='SPLIT_DURING_MAPPING'
1218 if((layernmb.lt.0).or.(layernmb.gt.cv_maxlayer)) return
1219 if (cv_slavef.eq.1) then
1220 return
1221 endif
1222 if (cv_icntl(59) .ne. 0) then
1223 istat = 0
1224 return
1225 endif
1226 lnpivsplit = cv_keep(108)
1227 allocate(npivsplit(lnpivsplit),stat=allocok)
1228 if (allocok .NE. 0) then
1229 cv_info(1) = cv_error_memalloc
1230 cv_info(2) = lnpivsplit
1231 istat = cv_error_memalloc
1232 if(cv_lp.gt.0)
1233 & write(cv_lp,*)'memory allocation error in ',subname
1234 return
1235 endif
1236 do i=1,nmb_thislayer
1237 ierr=0
1238 inode=thislayer(i)
1239 nfront = cv_nfsiz(inode)
1240 inode_tmp=inode
1241 npiv=0
1242 do while (inode_tmp.gt.0)
1243 if (cv_blkon) then
1244 npiv = npiv + cv_sizeofblocks(inode_tmp)
1245 else
1246 npiv=npiv+1
1247 endif
1248 inode_tmp=cv_fils(inode_tmp)
1249 end do
1250 if (inode_tmp .eq. 0) cycle
1251 npropmap=0
1252 do j=1,cv_slavef
1254 npropmap=npropmap+1
1255 endif
1256 end do
1257 IF ((keep(376) .EQ.1)
1258 & ) THEN
1259 err_rep='GET_SPLIT_4_PERF'
1261 & dble(npropmap),
1262 & k1, lnpivsplit, npivsplit, n, cv_frere(1),
1263 & cv_keep(1),
1264 & cv_fils(1), cv_blkon, cv_sizeofblocks(1),
1265 & istat)
1266 k3=k1
1267 doit = .true.
1268 GOTO 200
1269 ENDIF
1270 IF ((cv_keep(79) .EQ.0).OR.(cv_keep(79).GE.5)) THEN
1271 err_rep='GET_SPLIT_INKPART'
1273 & doit,npiv,nfront,npropmap,k1,k3,
1274 & ierr)
1275 ELSE
1276 err_rep='GET_MEMSPLIT_INKPART'
1278 & doit,npiv,nfront,npropmap,k2,ierr)
1279 k1=k2
1280 k3=k2
1281 ENDIF
1282 if (ierr.eq.0) then
1283 if (lnpivsplit < k1) then
1284 write(*,*) 'error in', subname, lnpivsplit, k1, cv_keep(108)
1286 endif
1287 bsize =
max(npiv/k1,1)
1288 if (cv_blkon) then
1289 inode_tmp = inode
1290 npiv_beg = 0
1291 npiv_end = 0
1292 k1_temp = 0
1293 do while (inode_tmp.gt.0)
1294 npiv_end = npiv_end + cv_sizeofblocks(inode_tmp)
1295 if (npiv_end-npiv_beg.ge.bsize) then
1296 k1_temp = k1_temp+1
1297 npivsplit(k1_temp) = npiv_end-npiv_beg
1298 npiv_beg = npiv_end
1299 if ( ( (npiv-npiv_beg).gt.0) .and.
1300 & (npiv-npiv_beg.LT.2*bsize)
1301 & ) then
1302 k1_temp = k1_temp+1
1303 npivsplit(k1_temp) = npiv - npiv_beg
1304 exit
1305 endif
1306 endif
1307 inode_tmp=cv_fils(inode_tmp)
1308 enddo
1309 if (k1_temp.eq.0) then
1310 k1_temp = 1
1311 npivsplit(1) = npiv
1312 else
1313 if (npiv_end.gt.npiv_beg) then
1314 k1_temp = k1_temp+1
1315 npivsplit(k1_temp) = npiv_end-npiv_beg
1316 endif
1317 endif
1318 k1 = k1_temp
1319 else
1320 do j = 1, k1-1
1321 npivsplit(j)= bsize
1322 enddo
1323 npivsplit(k1) = npiv-bsize*(k1-1)
1324 endif
1325 endif
1326 200 CONTINUE
1327 if(ierr.ne.0) then
1328 if(cv_lp.gt.0)
1329 & write(cv_lp,*)'Error reported by ',
1330 & err_rep,' in ',subname
1331 istat =ierr
1332 goto 100
1333 endif
1334 if ( ( k1.le.1).or.(k3.le.1).or.(.NOT.doit) ) cycle
1335 err_rep='SPLITNODE_INKPART'
1337 & lnpivsplit, npivsplit, cv_keep(1), n, cv_fils(1),
1338 & cv_frere(1),
1339 & cv_nfsiz(1), cv_ne(1), cv_info(5),
1340 & cv_nsteps, cv_nodetype(1), ierr
1341 & , sizeofblocks, lsizeofblocks
1342 & , blkon
1343 & )
1344 if(ierr.ne.0) then
1345 if(cv_lp.gt.0)
1346 & write(cv_lp,*)'Error reported by ',err_rep,
1347 & ' in ',subname
1348 istat = ierr
1349 goto 100
1350 endif
1351 err_rep='SPLITNODE_UPDATE'
1353 & lnpivsplit, npivsplit,
1354 & ierr)
1355 if(ierr.ne.0) then
1356 if(cv_lp.gt.0)
1357 & write(cv_lp,*)'Error reported by ',err_rep,
1358 & ' in ',subname
1359 istat = ierr
1360 goto 100
1361 endif
1362 end do
1363 istat=0
1364 100 continue
1365 deallocate(npivsplit)
1366 return
1369 & doit,npiv,nfront,npropmap,k1,k3,istat)
1370 implicit none
1371 integer,intent(in)::inode
1372 logical,intent(out)::doit
1373 integer,intent(in) :: npiv, nfront, npropmap
1374 integer,intent(out) :: istat
1375 integer,intent(out) ::k1,k3
1376 integer npiv2,nfront2,npiv_son2
1377 integer ncb,kmax,keep48_loc,nslaves_max,
1378 & nslaves_estim,strat,kk
1379 DOUBLE PRECISION wk_master,wk_master2,wk_slave2
1380 integer MUMPS_REG_GETKMAX,
1383 external MUMPS_REG_GETKMAX
1384 external MUMPS_BLOC2_GET_NSLAVESMAX
1385 external MUMPS_BLOC2_GET_NSLAVESMIN
1386 doit=.false.
1387 k1=1
1388 k3 =1
1389 istat=-1
1390 doit=.true.
1391 if (cv_nodetype(inode) .gt. 0) then
1392 doit=.false.
1393 istat = 0
1394 return
1395 endif
1396 if ( (cv_frere(inode).eq.0) ) then
1397 doit=.false.
1398 istat = 0
1399 return
1400 endif
1401 npiv_son2 =
max(npiv/2,1)
1403 doit=.false.
1404 istat = 0
1405 return
1406 endif
1407 ncb = nfront - npiv
1409 if (cv_keep(50) == 0) then
1410 keep48_loc=0
1411 else
1412 keep48_loc=3
1413 endif
1414 if (cv_keep(48).EQ.5) keep48_loc = 5
1415 if(npropmap .gt. cv_keep(83)) then
1417 & cv_slavef, keep48_loc, cv_keep8(21),
1418 & cv_keep(50), nfront,
1419 &
max(
max(ncb,nfront-cv_keep(420)),1),
1420 & cv_keep(375), cv_keep(119))
1421 nslaves_estim =
min(npropmap-1,nslaves_max)
1422 nslaves_estim =
max(nslaves_estim,1)
1423 else
1425 & cv_slavef, keep48_loc, cv_keep8(21),
1426 & cv_keep(50), nfront, ncb, cv_keep(375),
1427 & cv_keep(119) )
1429 & cv_slavef, keep48_loc,cv_keep8(21),
1430 & cv_keep(50), nfront, ncb,
1431 & cv_keep(375), cv_keep(119) )
1432 nslaves_estim =
max(nslaves_estim,1)
1433 nslaves_estim =
min(nslaves_estim,nslaves_max)
1434 endif
1435 if (cv_keep(50).eq.0) then
1436 wk_master = (dble(2)/dble(3))*
1437 & dble(npiv)*dble(npiv)*dble(npiv)+
1438 & dble(npiv)*dble(npiv)*dble(nfront-npiv)
1439 else
1440 wk_master = dble(npiv)*dble(npiv)*dble(npiv)/dble(3)
1441 end if
1442 strat = cv_keep(62)
1443 doit = .true.
1444 k1 = cv_keep(82)
1445 k3 = cv_keep(82)
1446 do kk=1,cv_keep(82)-1
1447 npiv2 = npiv/kk
1448 nfront2 = nfront-npiv+npiv2
1449 if (npiv2 .le.
max(6*cv_keep(6),0).or.
1450 & (nfront2.le.cv_keep(9)) ) then
1452 exit
1453 endif
1454 wk_master2 = wk_master / dble(kk)
1455 if (cv_keep(50).eq.0) then
1456 wk_slave2 = ( dble(npiv2)*dble(nfront2-npiv2) *
1457 & dble(2*nfront2-npiv2) ) / dble(nslaves_estim)
1458 else
1459 wk_slave2 =
1460 & ( dble(npiv2)*dble(nfront2-npiv2)*dble(nfront2) )
1461 & / dble(nslaves_estim)
1462 endif
1463 if(wk_master2.le.
1464 & (1.0d0 +dble(kk*strat)/dble(100))*wk_slave2) then
1465 k1 = kk
1466 exit
1467 endif
1468 enddo
1469 do kk=1,cv_keep(82)-1
1470 npiv2 = npiv/kk
1471 nfront2 = nfront
1472 if (npiv2 .le.
max(6*cv_keep(6),0))
then
1474 exit
1475 endif
1476 wk_master2 = wk_master / dble(kk)
1477 if (cv_keep(50).eq.0) then
1478 wk_slave2 = ( dble(npiv2)*dble(nfront2-npiv2) *
1479 & dble(2*nfront2-npiv2) ) / dble(nslaves_estim)
1480 else
1481 wk_slave2 =
1482 & ( dble(npiv2)*dble(nfront2-npiv2)*dble(nfront2) )
1483 & / dble(nslaves_estim)
1484 endif
1485 if(wk_master2.le.wk_slave2) then
1486 k3 = kk
1487 exit
1488 endif
1489 enddo
1492 IF (cv_keep(79).GE.1) THEN
1493 k1=
min(k1, npropmap-1)
1494 k3=
min(k3, npropmap-1)
1495 ENDIF
1496 if(k3 .lt. k1) then
1497 k3 = k1
1498 endif
1499 istat=0
1500 return
1503 & doit,npiv,nfront,npropmap,k2,istat)
1504 implicit none
1505 integer,intent(in) :: inode
1506 logical,intent(out) :: doit
1507 integer,intent(in) :: npiv,nfront,npropmap
1508 integer,intent(out) :: istat
1509 integer,intent(out) :: k2
1510 integer npiv2,npiv_son2
1511 integer kk
1512 DOUBLE PRECISION mem_master, mem_slave
1513 doit=.false.
1514 k2=1
1515 istat=-1
1516 doit=.true.
1517 if (cv_nodetype(inode) .gt. 0) then
1518 doit=.false.
1519 istat = 0
1520 return
1521 endif
1522 if (cv_frere(inode).eq.0
1523 & ) then
1524 doit=.false.
1525 istat = 0
1526 return
1527 endif
1528 if ((nfront-npiv).lt.npropmap.OR.
1529 & (npropmap.le.0) ) then
1530 doit=.false.
1531 istat = 0
1532 return
1533 endif
1534 npiv_son2 =
max(npiv/2,1)
1536 doit=.false.
1537 istat = 0
1538 return
1539 endif
1540 doit = .true.
1541 k2 =
min(cv_keep(82),npropmap-1)
1542 do kk=1,
min(cv_keep(82)-1, npropmap-1)
1543 npiv2 = npiv/kk
1544 if(npiv2 .eq. 0) then
1546 exit
1547 endif
1548 mem_slave = dble(nfront-npiv)*dble(nfront)/
1549 & dble(npropmap-kk+1)
1550 mem_master = dble(npiv2)*dble(nfront)
1551 if(mem_master.le.
1552 & (1.0d0 +dble(cv_keep(62))/dble(100))*mem_slave) then
1553 k2 = kk
1554 exit
1555 endif
1556 enddo
1559 istat=0
1560 return
1563 & lnpivsplit, npivsplit,
1564 & istat)
1565 implicit none
1566 integer, intent(in)::nfront,npiv
1567 integer, intent(in):: k
1568 integer, intent(in)::lnpivsplit
1569 integer, intent(in)::npivsplit(lnpivsplit)
1570 integer, intent(in):: inode
1571 integer, intent(out)::istat
1572 integer lev,npiv_father,
1573 & npiv_son,nfrontk,npivk,next_father
1574 DOUBLE PRECISION:: ncostm,ncostw,ncostm_ison,ncostw_ison,
1575 & ncostm_ifather,ncostw_ifather
1576 integer::ison,ifather
1577 character (len=48):: subname
1578 istat=-1
1579 subname='SPLITNODE_UPDATE'
1580 npiv_son = npivsplit(1)
1581 ison = inode
1582 next_father = -frere(ison)
1583 ncostw=cv_ncostw(inode)
1584 ncostm=cv_ncostm(inode)
1585 nfrontk = nfront
1586 npivk = npiv
1588 & ncostw_ison,ncostm_ison)
1589 cv_ncostw(ison)=ncostw_ison
1590 cv_ncostm(ison)=ncostm_ison
1591 if(associated(cv_tcostw)) cv_tcostw(ison) = cv_tcostw(inode)
1592 & -ncostw +cv_ncostw(ison)
1593 if(associated(cv_tcostm)) cv_tcostm(ison) = cv_tcostm(inode)
1594 & -ncostm +cv_ncostm(ison)
1595 do lev = 1, k-1
1596 ifather = next_father
1597 next_father = -frere(ifather)
1598 npiv_son= abs(npivsplit(lev))
1599 npiv_father=abs(npivsplit(lev+1))
1601 & ncostw_ifather,ncostm_ifather)
1602 cv_ncostw(ifather)=ncostw_ifather
1603 cv_ncostm(ifather)=ncostm_ifather
1604 if(associated(cv_tcostw))
1605 & cv_tcostw(ifather) = cv_tcostw(ison)+cv_ncostw(ifather)
1606 if(associated(cv_tcostm))
1607 & cv_tcostm(ifather) = cv_tcostm(ison)+cv_ncostm(ifather)
1608 cv_total_split=cv_total_split+1
1609 if(lev .gt. 1) then
1611 if(ierr.ne.0) then
1612 if(cv_lp.gt.0)
1613 & write(cv_lp,*)'PROPMAP4SPLIT error in ',subname
1614 istat = ierr
1615 return
1616 endif
1617 endif
1618 nfrontk = nfrontk-npiv_son
1619 npivk = npivk - npiv_son
1620 ison = ifather
1621 enddo
1622 if (npivk .ne. npiv_father) then
1623 write(*,*) "Error 1 in MUMPS_SPLITNODE_UPDATE"
1625 endif
1627 if(ierr.ne.0) then
1628 if(cv_lp.gt.0)
1629 & write(cv_lp,*)'PROPMAP4SPLIT error in ',subname
1630 istat = ierr
1631 return
1632 endif
1633 cv_ncostw(inode) = ncostw
1634 cv_ncostm(inode) = ncostm
1635 istat = 0
1636 return
1639 implicit none
1640 integer, intent(in) :: inode
1641 logical :: MUMPS_IS_NODE_OF_TYPE2
1642 if (
1643 & (cv_nodetype(inode) .EQ. 2 ) .OR.
1644 & (cv_nodetype(inode) .EQ. tsplit_beg ) .OR.
1645 & (cv_nodetype(inode) .EQ. tsplit_mid ) .OR.
1646 & (cv_nodetype(inode) .EQ. -tsplit_mid ) .OR.
1647 & (cv_nodetype(inode) .EQ. tsplit_last) .OR.
1648 & (cv_nodetype(inode) .EQ. -tsplit_last)
1649 & ) then
1650 mumps_is_node_of_type2 = .true.
1651 else
1652 mumps_is_node_of_type2 = .false.
1653 endif
1654 return
1657 implicit none
1658 integer, intent(out)::istat
1659 integer i,in,inode
1660 character (len=48):: subname
1661 integer, external :: MUMPS_ENCODE_TPN_IPROC
1662 istat=-1
1663 subname='ENCODE_PROCNODE'
1664 do i=1,cv_nbsa
1665 inode=cv_ssarbr(i)
1666 cv_nodetype(inode)=0
1667 in=cv_fils(inode)
1668 do while (in>0)
1669 in=cv_fils(in)
1670 end do
1671 in=-in
1672 do while(in.gt.0)
1674 in=cv_frere(in)
1675 enddo
1676 enddo
1677 do i=1,cv_n
1678 if (cv_frere(i).lt.cv_n+1) then
1679 if(cv_nodetype(i).eq.cv_invalid) then
1680 if(cv_lp.gt.0)
1681 & write(cv_lp,*)'Error in ',subname
1682 return
1683 endif
1684 if (i.eq.cv_keep(38)) then
1685 if (cv_nodetype(i).ne.3) then
1686 cv_nodetype(i)=3
1687 endif
1688 endif
1690 & cv_procnode(i)-1, cv_keep(199))
1691 in=cv_fils(i)
1692 do while (in>0)
1693 cv_procnode(in)=cv_procnode(i)
1694 in=cv_fils(in)
1695 end do
1696 end if
1697 end do
1698 istat = 0
1699 return
1702 implicit none
1703 integer,intent(in)::ifather
1704 integer,intent(out)::istat
1705 integer in,son,oldl0end
1706 logical father_has_sons
1707 character (len=48):: subname
1708 istat=-1
1709 subname='FATHSON_REPLACE'
1710 father_has_sons=.true.
1711 in=ifather
1712 do while (in.gt.0)
1713 in=cv_fils(in)
1714 end do
1715 if(in.eq.0) then
1716 cv_nodelayer(ifather)=1
1717 cv_keep(262)=cv_keep(262)+1
1718 father_has_sons=.false.
1719 end if
1720 if(cv_layerl0_end-cv_layerl0_start.gt.0) then
1721 cv_layerl0_start= cv_layerl0_start+1
1722 elseif(father_has_sons) then
1723 cv_layerl0_start= cv_layerl0_start+1
1724 else
1725 istat=1
1726 cv_nodelayer(ifather)=0
1727 return
1728 endif
1729 cv_nbsa=cv_nbsa-1
1730 oldl0end = cv_layerl0_end
1731 if (father_has_sons) then
1732 son=-in
1733 son=-in
1734 10 continue
1735 cv_layerl0_end=cv_layerl0_end+1
1736 if (cv_tcostw(son).GT.mincostw)
1737 & layerl0_endforarrangel0 = layerl0_endforarrangel0+1
1738 cv_layerl0_array(cv_layerl0_end)=son
1739 cv_layerl0_sorted_costw(cv_layerl0_end)=cv_tcostw(son)
1740 cv_nbsa=cv_nbsa+1
1741 if((cv_frere(son).gt.0).and.(cv_frere(son).lt.cv_n+1)) then
1742 son=cv_frere(son)
1743 goto 10
1744 end if
1745 endif
1746 cv_costw_layer0=cv_costw_layer0 - cv_ncostw(ifather)
1747 cv_costm_layer0=cv_costm_layer0 - cv_ncostm(ifather)
1748 cv_costw_upper=cv_costw_upper + cv_ncostw(ifather)
1749 cv_costm_upper=cv_costm_upper + cv_ncostm(ifather)
1750 if(cv_layerl0_end.gt.oldl0end) then
1751 call mumps_sort_msort(ierr,cv_layerl0_end-oldl0end,
1752 & cv_layerl0_array(oldl0end+1:cv_layerl0_end),
1753 & cv_layerl0_sorted_costw(oldl0end+1:cv_layerl0_end))
1754 if(ierr.ne.0) then
1755 if(cv_lp.gt.0)
1756 & write(cv_lp,*) 'Error reported by MUMPS_SORT_MSORT in',
1757 & subname
1758 istat = ierr
1759 return
1760 endif
1761 call mumps_sort_mmerge(
1762 & cv_layerl0_start,oldl0end,oldl0end-cv_layerl0_start+1,
1763 & oldl0end+1,cv_layerl0_end,cv_layerl0_end-oldl0end,
1764 & cv_layerl0_array(1:cv_layerl0_end),
1765 & cv_layerl0_sorted_costw(1:cv_layerl0_end),ierr)
1766 if(ierr.ne.0) then
1767 if(cv_lp.gt.0)
1768 & write(cv_lp,*)
1769 & 'Error reported by MUMPS_SORT_MMERGE in',
1770 & subname
1771 istat = ierr
1772 return
1773 endif
1774 endif
1775 istat=0
1776 return
1779 & workload,memused,proc,istat,respect_prop)
1780
1781 implicit none
1782 integer, intent(in)::inode,map_strat
1783 DOUBLE PRECISION,intent(in)::work,mem
1784 DOUBLE PRECISION,dimension(:),intent(inout)::workload, memused
1785 integer,intent(out):: proc,istat
1786 logical,intent(in),OPTIONAL::respect_prop
1787 integer i
1788 logical respect_proportional
1789 DOUBLE PRECISION dummy
1790 character (len=48):: subname
1791 istat=-1
1792 respect_proportional=.false.
1793 if(present(respect_prop)) respect_proportional=respect_prop
1794 subname='FIND_BEST_PROC'
1795 proc=-1
1796 if((map_strat.ne.cv_equilib_flops).and.
1797 & (map_strat.ne.cv_equilib_mem)) return
1798 dummy=huge(dummy)
1799 do i=cv_slavef,1,-1
1800 if (
1801 & ((.NOT.respect_proportional)
1802 & .OR.
1804 & .AND.
1805 & (((workload(i).lt.dummy).AND.
1806 & (map_strat.eq.cv_equilib_flops))
1807 & .OR.
1808 & ((memused(i).lt.dummy).AND.
1809 & (map_strat.eq.cv_equilib_mem))))then
1810 if((.not.cv_constr_work).or.
1811 & (workload(i)+work.lt.cv_proc_maxwork(i))) then
1812 if((.not.cv_constr_mem).or.
1813 & (memused(i)+mem.lt.cv_proc_maxmem(i))) then
1814 proc=i
1815 if(map_strat.eq.cv_equilib_flops) then
1816 dummy=workload(i)
1817 elseif(map_strat.eq.cv_equilib_mem) then
1818 dummy=memused(i)
1819 endif
1820 end if
1821 end if
1822 end if
1823 end do
1824 if (proc.ne.-1) then
1825 workload(proc)=workload(proc)+work
1826 memused(proc)=memused(proc)+mem
1827 istat=0
1828 end if
1829 return
1832 & thislayer,nmb_thislayer,istat)
1833 implicit none
1834 integer, intent(in)::nmb
1835 integer,intent(out) :: thislayer(:)
1836 integer,intent(out) :: nmb_thislayer,istat
1837 integer i
1838 character (len=48):: subname
1839 istat=-1
1840 subname='FIND_THISLAYER'
1841 thislayer=0
1842 nmb_thislayer=0
1843 if((nmb.lt.0).or.(nmb.gt.cv_maxlayer)) return
1844 do i=1,cv_n
1845 if(cv_nodelayer(i).eq.nmb) then
1846 nmb_thislayer=nmb_thislayer+1
1847 if(nmb_thislayer.gt.cv_maxnodenmb) then
1848 if(cv_lp.gt.0)
1849 & write(cv_lp,*)'Problem with nmb_thislayer in ',subname
1850 return
1851 endif
1852 thislayer(nmb_thislayer)=i
1853 end if
1854 end do
1855 istat=0
1856 return
1859 & nmb_thislayer,cont,istat)
1860 implicit none
1861 integer,intent(in)::startlayer,nmb_thislayer
1862 integer,intent(in)::thislayer(:)
1863 logical,intent(inout)::cont
1864 integer,intent(out)::istat
1865 integer :: visited
1866 integer il,i,current,in,ifather
1867 logical father_valid,upper_layer_exists
1868 character (len=48):: subname
1869 istat=-1
1870 subname='HIGHER_LAYER'
1871 if(.NOT.cont) return
1872 if(startlayer.lt.1) return
1873 current=startlayer-1
1874 visited = -current-1
1875 upper_layer_exists=.false.
1876 if (current.eq.0) then
1877 do i=1,cv_n
1878 if (cv_nodelayer(i).ne.current) then
1879 if(cv_nodelayer(i).eq.1) then
1880 upper_layer_exists=.true.
1881 exit
1882 endif
1883 endif
1884 enddo
1885 endif
1886 do il=1,nmb_thislayer
1887 i = thislayer(il)
1888 in=i
1889 if (cv_nodetype(in).eq.tsplit_beg) then
1890 do while (cv_frere(in).lt.0)
1891 ifather = -cv_frere(in)
1892 if (abs(cv_nodetype(ifather)).eq.tsplit_mid) then
1893 in = ifather
1894 cv_nodelayer(in) = -visited-1
1895 cycle
1896 else if (abs(cv_nodetype(ifather)).eq.tsplit_last) then
1897 in = ifather
1898 cv_nodelayer(in) = current
1899 exit
1900 else
1901 write(6,*) ' Internal error 1 in MUMPS_HIGHER_LAYER'
1903 endif
1904 end do
1905 endif
1906 enddo
1907 do il=1,nmb_thislayer
1908 i = thislayer(il)
1909 if (cv_nodelayer(i).lt.current) cycle
1910 in=i
1911 if (cv_nodetype(in).eq.tsplit_beg) then
1912 cv_nodelayer(in) = visited
1913 do while (cv_frere(in).lt.0)
1914 ifather = -cv_frere(in)
1915 if (abs(cv_nodetype(ifather)).eq.tsplit_mid) then
1916 in = ifather
1917 cv_nodelayer(in) = -visited-1
1918 cycle
1919 else if (abs(cv_nodetype(ifather)).eq.tsplit_last) then
1920 in = ifather
1921 exit
1922 else
1923 write(6,*) ' Internal error 1 in MUMPS_HIGHER_LAYER',
1924 & cv_nodetype(ifather)
1926 endif
1927 end do
1928 endif
1929 if(cv_frere(in).eq.0) cycle
1930 cv_nodelayer(in) = visited
1931 father_valid=.true.
1932 do while(cv_frere(in).gt.0)
1933 if (cv_nodelayer(cv_frere(in)).gt.current) then
1934 father_valid=.false.
1935 in = cv_frere(in)
1936 cycle
1937 endif
1938 if (cv_nodelayer(cv_frere(in)).eq.visited) exit
1939 in=cv_frere(in)
1940 if (cv_nodelayer(in).eq.current) then
1941 cv_nodelayer(in) = visited
1942 endif
1943 end do
1944 if (.not.father_valid .or. cv_frere(in).gt.0) then
1945 cycle
1946 endif
1947 ifather=-cv_frere(in)
1948 if(cv_nodelayer(ifather).eq.current+1) then
1949 cycle
1950 endif
1951 in=ifather
1952 do while (cv_fils(in).gt.0)
1953 in=cv_fils(in)
1954 end do
1955 in=-cv_fils(in)
1956 if(cv_nodelayer(in).gt.current) then
1957 father_valid=.false.
1958 else
1959 father_valid=.true.
1960 do while(cv_frere(in).gt.0)
1961 in=cv_frere(in)
1962 if(cv_nodelayer(in).gt.current) then
1963 father_valid=.false.
1964 exit
1965 endif
1966 if(cv_nodelayer(in).eq.visited) then
1967 exit
1968 endif
1969 end do
1970 endif
1971 if(father_valid) then
1972 cv_nodelayer(ifather)=current+1
1973 upper_layer_exists=.true.
1974 end if
1975 end do
1976 if (upper_layer_exists) then
1977 current=current+1
1978 cv_maxlayer=current
1979 cont=.true.
1980 else
1981 cv_maxlayer=current
1982 cont=.false.
1983 endif
1984 do il=1,nmb_thislayer
1985 i = thislayer(il)
1986 if (cv_nodelayer(i).eq.visited) cv_nodelayer(i) = -visited-1
1987 enddo
1988 istat=0
1989 return
1992 & frere,fils,nfsiz,ne,keep,KEEP8,icntl,info,
1993 & procnode,ssarbr,peak,istat
1994 & , SIZEOFBLOCKS, LSIZEOFBLOCKS
1995 & )
1996 implicit none
1997 integer, intent(in)::n,slavef
1998 integer, intent(in), TARGET:: frere(n),fils(n),nfsiz(n),ne(n),
1999 & keep(500),icntl(60),info(80),
2000 & procnode(n),ssarbr(n)
2001 INTEGER(8), intent(in), TARGET:: KEEP8(150)
2002 integer,intent(out)::istat
2003 integer, intent(in) :: LSIZEOFBLOCKS
2004 integer, intent(in), TARGET :: SIZEOFBLOCKS(LSIZEOFBLOCKS)
2005 integer i,allocok,rest
2006 DOUBLE PRECISION peak
2007 character (len=48):: subname
2008 istat=-1
2009 nullify(cv_frere,cv_fils,cv_nfsiz,cv_ne,cv_keep,cv_keep8,
2010 & cv_icntl,cv_info,cv_procnode,cv_ssarbr)
2011 nullify(cv_ncostw,cv_tcostw,cv_ncostm,cv_tcostm,
2012 & cv_nodelayer,cv_nodetype,cv_depth,
2013 & cv_layerworkload,cv_layermemused,cv_prop_map)
2014 nullify(cv_sizeofblocks)
2015 cv_sizeofblocks => sizeofblocks
2016 subname='INITPART1'
2017 cv_n=n
2018 cv_slavef=slavef
2019 cv_keep=>keep
2020 cv_keep8=>keep8
2021 if(cv_keep(82) .lt. 0) then
2022 write(cv_lp,*)
2023 & 'Warning in mumps_static_mapping : splitting is set off'
2024 cv_keep(82) = 0
2025 endif
2026 if(cv_keep(83) .lt. 0) then
2027 write(cv_lp,*)
2028 & 'warning in mumps_static_mapping : keep(83) reset to 0'
2029 cv_keep(83) = 0
2030 endif
2031 if(slavef.gt.1) then
2032 cv_mixed_strat_bound =
max(cv_keep(78),1)
2033 cv_maxdepth = slavef
2034 else
2035 cv_maxdepth = 0
2036 cv_mixed_strat_bound=0
2037 endif
2038 cv_bitsize_of_int = bit_size(n)
2039 if(cv_bitsize_of_int.le.0) then
2040 if(cv_lp.gt.0)
2041 & write(cv_lp,*)'Problem with bit size in ',subname
2042 return
2043 endif
2044 rest = mod(cv_slavef,cv_bitsize_of_int)
2045 if (rest.eq.0) then
2046 cv_size_ind_proc = cv_slavef / cv_bitsize_of_int
2047 else
2048 cv_size_ind_proc = cv_slavef / cv_bitsize_of_int + 1
2049 endif
2050 allocate(cv_ncostw(n),cv_tcostw(n),cv_ncostm(n),cv_tcostm(n),
2051 & cv_nodelayer(n),cv_nodetype(n),cv_depth(n),
2052 & cv_layerworkload(slavef),cv_layermemused(slavef),
2053 & cv_prop_map(n),stat=allocok)
2054 if (allocok.gt.0) then
2055 cv_info(1) = cv_error_memalloc
2056 cv_info(2) = 8*n+2*cv_slavef
2057 istat = cv_error_memalloc
2058 if(cv_lp.gt.0)
2059 & write(cv_lp,*)'memory allocation error in ',subname
2060 return
2061 end if
2062 if(cv_keep(82) .eq. 0) then
2063 if(cv_lp.gt.0)
2064 & write(cv_lp,*)' No splitting during static mapping '
2065 endif
2066 cv_frere=>frere
2067 cv_fils=>fils
2068 cv_nfsiz=>nfsiz
2069 cv_ne=>ne
2070 cv_icntl=>icntl
2071 cv_info=>info
2072 cv_procnode=>procnode
2073 cv_ssarbr=>ssarbr
2074 cv_ssarbr=0
2075 cv_nodetype=cv_invalid
2076 cv_nsteps=keep(28)
2077 if((keep(28).gt.n).OR.(keep(28).lt.0)) then
2078 if(cv_lp.gt.0)
2079 & write(cv_lp,*)'problem with nsteps in ',subname
2080 return
2081 end if
2082 cv_costw_upper=0.0d0
2083 cv_costm_upper=0.0d0
2084 cv_costw_layer0=0.0d0
2085 cv_costm_layer0=0.0d0
2086 cv_costw_total=0.0d0
2087 cv_costm_total=0.0d0
2088 cv_nodelayer=n+2
2089 cv_depth=cv_invalid
2090 cv_l0wthresh=0.0d0
2091 cv_splitthresh=0.45d0
2092 cv_relax=dble(1) + dble(
max(0,keep(68)))/dble(100)
2093 cv_maxlayer=0
2094 cv_maxnsteps= cv_nsteps+1
2095 cv_layerworkload=dble(0)
2096 cv_layermemused=dble(0)
2097 cv_total_amalg=0
2098 cv_total_split=0
2099 cv_last_splitting%new_ison=cv_invalid
2100 cv_last_splitting%new_ifather=cv_invalid
2101 cv_last_splitting%old_keep2=cv_invalid
2102 cv_last_splitting%ncostw_oldinode=cv_d_invalid
2103 cv_last_splitting%ncostm_oldinode=cv_d_invalid
2104 cv_last_splitting%tcostw_oldinode=cv_d_invalid
2105 cv_last_splitting%tcostm_oldinode=cv_d_invalid
2106 do i=1,cv_n
2107 nullify(cv_prop_map(i)%ind_proc)
2108 end do
2109 istat=0
2110 return
2113 implicit none
2114 integer,intent(out)::istat
2115 integer i,allocok,inode,in,inoderoot,ierr,maxcut
2116 character (len=48):: subname
2117 istat=-1
2118 subname='INITPART2'
2119 if(associated(cv_layerl0_array))deallocate(cv_layerl0_array)
2120 if(associated(cv_layerl0_sorted_costw))
2121 & deallocate(cv_layerl0_sorted_costw)
2122 deallocate(cv_depth,cv_tcostw,cv_tcostm,stat=ierr)
2123 if(ierr.ne.0) then
2124 if(cv_lp.gt.0)
2125 & write(cv_lp,*)'Memory deallocation error in ',subname
2126 istat = cv_error_memdeloc
2127 return
2128 end if
2129 if(cv_maxnsteps.lt.1) then
2130 if(cv_lp.gt.0)
2131 & write(cv_lp,*)'problem with maxnsteps in ',subname
2132 return
2133 end if
2134 cv_maxnodenmb=cv_maxnsteps
2135 do i=1,cv_nbsa
2136 inode=cv_ssarbr(i)
2137 inoderoot=inode
2138 300 continue
2139 in = inode
2140 do while (in.ne.0)
2141 inode = in
2142 do while (in.gt.0)
2143 in = cv_fils(in)
2144 end do
2145 if (in.lt.0) in=-in
2146 end do
2147 100 continue
2148 if (inode.ne.inoderoot) then
2149 cv_maxnodenmb=cv_maxnodenmb-1
2150 in = cv_frere(inode)
2151 inode = abs(in)
2152 if (in.lt.0) then
2153 go to 100
2154 else
2155 go to 300
2156 end if
2157 end if
2158 end do
2159 if(cv_keep(82) .gt. 0) then
2160 maxcut =
min((cv_keep(82)-1)*cv_maxnodenmb,cv_n)
2161 cv_maxnsteps =
min(cv_maxnsteps+maxcut,cv_n)
2162 cv_maxnodenmb =
min(cv_maxnodenmb+maxcut,cv_n)
2163 endif
2164 nullify(cv_layer_p2node)
2165 if(cv_maxnodenmb.lt.0) then
2166 if(cv_lp.gt.0)
2167 & write(cv_lp,*)'problem with maxnodenmb in ',subname
2168 return
2169 elseif(cv_maxnodenmb.lt.1) then
2170 cv_maxnodenmb = 1
2171 end if
2172 allocate(cv_layer_p2node(cv_maxnodenmb),stat=allocok)
2173 if (allocok.gt.0) then
2174 cv_info(1) = cv_error_memalloc
2175 cv_info(2) = cv_maxnodenmb
2176 istat = cv_error_memalloc
2177 if(cv_lp.gt.0)
2178 & write(cv_lp,*)'memory allocation error in ',subname
2179 return
2180 end if
2181 do i=1,cv_maxnodenmb
2182 nullify(cv_layer_p2node(i)%t2_nodenumbers,
2183 & cv_layer_p2node(i)%t2_cand,
2184 & cv_layer_p2node(i)%t2_candcostw,
2185 & cv_layer_p2node(i)%t2_candcostm)
2186 cv_layer_p2node(i)%nmb_t2s=0
2187 enddo
2188 istat = 0
2191 implicit none
2192 logical::MUMPS_ISTYPE2BYSIZE
2193 integer,intent(in)::nfront,npiv
2194 mumps_istype2bysize=.false.
2195 if( (nfront - npiv > cv_keep(9))
2196 & .and. ((npiv > cv_keep(4)).or.(.true.))
2197 & .and. (cv_icntl(59).eq.0) ) mumps_istype2bysize=.true.
2198 return
2201 implicit none
2202 integer,intent(out)::istat
2203 integer i,ierr,inode
2204 logical accepted
2205 integer,parameter::map_strat=cv_equilib_flops
2206 character (len=48):: err_rep,subname
2207 logical use_geist_ng_replace, skiparrangeL0
2208 INTEGER MINSIZE_L0
2209 INTEGER CURRENT_SIZE_L0
2210 istat=-1
2211 subname='LAYERL0'
2212 accepted=.false.
2213 IF (cv_keep(72).EQ.2) THEN
2214 minsize_l0 = 6*cv_slavef
2215 ELSE
2216 IF (cv_keep(198).NE.0) THEN
2217 IF (cv_keep(198).EQ.1) THEN
2218 minsize_l0 = 3*cv_slavef
2219 ELSE
2220 minsize_l0 = 2*cv_slavef
2221 ENDIF
2222 ELSE
2223 minsize_l0 = 3*cv_slavef
2224 ENDIF
2225 ENDIF
2226 55 continue
2227 skiparrangel0 = .false.
2228 do while(.not.accepted)
2229 IF (cv_keep(198).EQ.2) THEN
2230 current_size_l0 = layerl0_endforarrangel0
2231 ELSE
2232 current_size_l0 = layerl0_endforarrangel0
2233 ENDIF
2234 IF ( ( (current_size_l0.LT.minsize_l0)
2235 & .OR. skiparrangel0
2236 & )
2237 & .AND.
2238 & (cv_layerl0_end.LT.cv_maxnsteps/2) ) THEN
2239 accepted = .false.
2240 ELSE
2241 err_rep='ARRANGEL0'
2243 & cv_layerworkload,cv_layermemused,
2244 & cv_procnode,ierr)
2245 if(ierr.ne.0) then
2246 if(cv_lp.gt.0)
2247 & write(cv_lp,*)'Error reported by ',err_rep,' in ',subname
2248 istat = ierr
2249 return
2250 end if
2251 err_rep='ACCEPT_L0'
2253 & cv_layerworkload,cv_layermemused,
2254 & accepted,ierr)
2255 if(ierr.ne.0) then
2256 if(cv_lp.gt.0)
2257 & write(cv_lp,*)'Error reported by ',err_rep,' in ',subname
2258 istat = ierr
2259 return
2260 end if
2261 ENDIF
2262 IF (cv_keep(198).EQ.0) THEN
2263 IF (cv_slavef.GT.16)
2264 & skiparrangel0 = .NOT.skiparrangel0
2265 ENDIF
2266 if (accepted.OR.(cv_costw_total.le.0.0d0)) then
2267 exit
2268 elseif(((cv_costw_layer0/cv_costw_total).gt.cv_l0wthresh) .AND.
2269 & (.true.))then
2270 err_rep='MAX_TCOST_L0'
2271 inode = cv_layerl0_array(cv_layerl0_start)
2272 use_geist_ng_replace = .true.
2273 if(use_geist_ng_replace) then
2274 err_rep='FATHSON_REPLACE'
2276 if(ierr.eq.1) then
2277 accepted=.true.
2278 elseif(ierr.ne.0) then
2279 if(cv_lp.gt.0)
2280 & write(cv_lp,*)
2281 & 'Error rep. by ',err_rep,' in ',subname
2282 istat = ierr
2283 return
2284 endif
2285 endif
2286 else
2287 accepted=.true.
2288 end if
2289 end do
2290 accepted=.true.
2291 if (accepted) then
2292 else
2293 goto 55
2294 endif
2295 err_rep='LIST2LAYER'
2297 if(ierr.ne.0) then
2298 if(cv_lp.gt.0)
2299 & write(cv_lp,*)'Error reported by ',err_rep,' in ',subname
2300 istat = ierr
2301 return
2302 end if
2303 err_rep='MAKE_PROPMAP'
2305 if(ierr.ne.0) then
2306 if(cv_lp.gt.0)
2307 & write(cv_lp,*)'Error reported by ',err_rep,' in ',subname
2308 istat = ierr
2309 return
2310 end if
2311 if ( cv_keep(75).EQ.1 ) then
2313 & cv_layerworkload,cv_layermemused,
2314 & cv_procnode,ierr, respect_prop=.true.)
2315 if(ierr.ne.0) then
2316 if(cv_lp.gt.0)
2317 & write(cv_lp,*)'Error reported by ',err_rep,' in ',subname
2318 istat = ierr
2319 return
2320 end if
2321 else if (layerl0_endforarrangel0.LT.cv_layerl0_end) THEN
2323 & cv_layerworkload,cv_layermemused,
2324 & cv_procnode,ierr)
2325 endif
2327 do i=1,cv_slavef
2328 cv_proc_workload(i)=cv_layerworkload(i)
2329 cv_proc_memused(i)=cv_layermemused(i)
2330 end do
2331 istat=0
2332 return
2335 implicit none
2336 integer, intent(out)::istat
2337 character (len=48):: subname
2338 integer i,inode
2339 istat=-1
2340 subname='LIST2LAYER'
2341 cv_dist_l0_mixed_strat_bound=0
2342 cv_nbsa=0
2343 do i=cv_layerl0_start,cv_layerl0_end
2344 inode=cv_layerl0_array(i)
2345 if(inode.gt.0) then
2346 cv_dist_l0_mixed_strat_bound=
max(cv_dist_l0_mixed_strat_bound
2347 & ,
max(cv_depth(inode)-cv_mixed_strat_bound,0))
2348 cv_nodelayer(inode)=0
2349 cv_nbsa=cv_nbsa+1
2350 cv_ssarbr(cv_nbsa)=inode
2351 endif
2352 enddo
2353 istat=0
2354 return
2357 implicit none
2358 integer,intent(out)::istat
2359 integer i,pctr,pctr2,ierr
2360 character (len=48):: subname
2361 INTEGER, ALLOCATABLE, DIMENSION(:) :: procindex
2362 INTEGER :: allocok
2363 subname = "MUMPS_MAKE_PROPMAP"
2364 istat = -1
2365 ALLOCATE(procindex(cv_size_ind_proc),stat=allocok)
2366 IF (allocok > 0) THEN
2367 cv_info(1) = cv_error_memalloc
2368 cv_info(2) = cv_maxnodenmb
2369 istat = cv_error_memalloc
2370 if(cv_lp.gt.0)
2371 & write(cv_lp,*) 'Memory allocation error in ',subname
2372 return
2373 ENDIF
2374 pctr=cv_n
2375 pctr2=cv_mixed_strat_bound
2376 do i=1,cv_slavef
2378 if(ierr.ne.0) then
2379 if(cv_lp.gt.0)write(cv_lp,*)
2380 & 'MUMPS_BIT_SET signalled error to',subname
2381 istat = ierr
2382 GOTO 999
2383 end if
2384 end do
2385 do i=1,cv_n
2386 if(cv_frere(i).eq.0) then
2387 if(.NOT.associated(cv_prop_map(i)%ind_proc)) then
2389 if(ierr.ne.0) then
2390 if(cv_lp.gt.0)
2391 & write(cv_lp,*)'PROPMAP_INIT signalled error to'
2392 & ,subname
2393 istat = ierr
2394 GOTO 999
2395 end if
2396 endif
2397 cv_prop_map(i)%ind_proc = procindex
2399 if(ierr.ne.0) then
2400 if(cv_lp.gt.0)write(cv_lp,*)
2401 & 'PROPMAP signalled error to',subname
2402 istat = ierr
2403 GOTO 999
2404 endif
2405 if((cv_keep(24).eq.16).OR.(cv_keep(24).eq.18)) then
2407 if(ierr.ne.0) then
2408 if(cv_lp.gt.0)write(cv_lp,*)
2409 & 'MOD_PROPMAP signalled error to',subname
2410 istat = ierr
2411 GOTO 999
2412 endif
2413 endif
2414 endif
2415 end do
2416 istat = 0
2417 999 CONTINUE
2418 DEALLOCATE(procindex)
2419 return
2422 & nmb_thislayer,map_strat,istat)
2423 implicit none
2424 integer, intent(in)::layernmb,thislayer(:),
2425 & nmb_thislayer,map_strat
2426 integer,intent(out)::istat
2427 integer i,inode,j,k,ierr,nmb,aux_int,nmb_cand_needed
2428 DOUBLE PRECISION aux_flop,aux_mem
2429 INTEGER, ALLOCATABLE, DIMENSION(:) :: candid, sorted_nmb
2430 DOUBLE PRECISION, ALLOCATABLE, DIMENSION(:) ::
2431 & sorted_costw, sorted_costm, old_workload, old_memused
2432 character (len=48):: err_rep,subname
2433 logical use_propmap
2434 istat=-1
2435 subname='MAP_LAYER'
2436 if((cv_keep(24).eq.8).OR.(cv_keep(24).eq.10)
2437 & .OR.(cv_keep(24).eq.12).OR.(cv_keep(24).eq.14)
2438 & .OR.(cv_keep(24).eq.16).OR.(cv_keep(24).eq.18)) then
2439 use_propmap=.true.
2440 else
2441 use_propmap=.false.
2442 endif
2443 if((layernmb.lt.0).or.(layernmb.gt.cv_maxlayer)) return
2444 if((map_strat.ne.cv_equilib_flops).and.
2445 & (map_strat.ne.cv_equilib_mem)) return
2446 ALLOCATE(candid(cv_slavef), sorted_nmb(2*nmb_thislayer),
2447 & sorted_costw(2*nmb_thislayer), sorted_costm(2*nmb_thislayer),
2448 & old_workload(cv_slavef), old_memused(cv_slavef), stat=allocok)
2449 if (allocok.gt.0) then
2450 cv_info(1) = cv_error_memalloc
2451 cv_info(2) = 7*nmb_thislayer+2*cv_slavef
2452 istat = cv_error_memalloc
2453 if(cv_lp.gt.0)
2454 & write(cv_lp,*)'memory allocation error in ',subname
2455 goto 999
2456 end if
2457 do i=1,nmb_thislayer
2458 inode=thislayer(i)
2459 if (cv_nodetype(inode).eq.3) then
2460 cv_procnode(inode)=1
2461 exit
2462 end if
2463 end do
2464 do i=1,cv_slavef
2465 old_workload(i)=cv_layerworkload(i)
2466 old_memused(i)=cv_layermemused(i)
2467 enddo
2468 nmb=0
2469 do i=1,nmb_thislayer
2470 inode=thislayer(i)
2471 if(cv_nodetype(inode).eq.1) then
2472 nmb=nmb+1
2473 sorted_nmb(nmb)=inode
2474 sorted_costw(nmb)=cv_ncostw(inode)
2475 sorted_costm(nmb)=cv_ncostm(inode)
2477 nmb=nmb+1
2478 do j=1,cv_layer_p2node(layernmb)%nmb_t2s
2479 if(cv_layer_p2node(layernmb)%t2_nodenumbers(j).ne.inode)
2480 & then
2481 cycle
2482 else
2483 sorted_costw(nmb)=
2484 & cv_layer_p2node(layernmb)%t2_candcostw(j)
2485 sorted_costm(nmb)=
2486 & cv_layer_p2node(layernmb)%t2_candcostm(j)
2487 endif
2488 enddo
2489 if((sorted_costw(nmb).eq.cv_d_invalid).OR.
2490 & (sorted_costm(nmb).eq.cv_d_invalid)) then
2491 if(cv_lp.gt.0)
2492 & write(cv_lp,*)'Error in ',subname
2493 goto 999
2494 end if
2495 if(sorted_costw(nmb).lt.cv_ncostw(inode))then
2496 sorted_costw(nmb)=cv_ncostw(inode)
2497 sorted_costm(nmb)=cv_ncostm(inode)
2498 sorted_nmb(nmb)=inode
2499 else
2500 sorted_nmb(nmb)=-inode
2501 endif
2502 else if(cv_nodetype(inode).eq.3) then
2503 cycle
2504 else
2505 if(cv_lp.gt.0)
2506 & write(cv_lp,*)'Unknown node type. Error in ',subname
2507 goto 999
2508 end if
2509 end do
2510 if (map_strat.eq.cv_equilib_flops) then
2511 call mumps_sort_msort(ierr,nmb,sorted_nmb(1:nmb),
2512 & sorted_costw(1:nmb),sorted_costm(1:nmb))
2513 elseif(map_strat.eq.cv_equilib_mem) then
2514 call mumps_sort_msort(ierr,nmb,sorted_nmb(1:nmb),
2515 & sorted_costm(1:nmb),sorted_costw(1:nmb))
2516 endif
2517 if(ierr.ne.0) then
2518 if(cv_lp.gt.0)
2519 & write(cv_lp,*)
2520 & 'Error reported by MUMPS_SORT_MSORT in ',subname
2521 istat = ierr
2522 goto 999
2523 endif
2524 do i=1,nmb
2525 aux_int=sorted_nmb(i)
2526 aux_flop=sorted_costw(i)
2527 aux_mem=sorted_costm(i)
2528 k=1
2529 if (aux_int.lt.0) then
2530 inode=-aux_int
2531 err_rep='SORTPROCS'
2532 if(use_propmap) then
2534 & cv_proc_workload,cv_proc_memused,
2535 & inode=inode,istat=ierr)
2536 else
2538 & cv_proc_workload,cv_proc_memused,
2539 & istat=ierr)
2540 end if
2541 if(ierr.ne.0) then
2542 if(cv_lp.gt.0)
2543 & write(cv_lp,*)
2544 & 'Error reported by ',err_rep,' in ',subname
2545 istat = ierr
2546 goto 999
2547 endif
2548 nmb_cand_needed=cv_invalid
2549 do j=1,cv_layer_p2node(layernmb)%nmb_t2s
2550 if(cv_layer_p2node(layernmb)%t2_nodenumbers(j).ne.inode)
2551 & then
2552 cycle
2553 else
2554 nmb_cand_needed=
2555 & cv_layer_p2node(layernmb)%t2_cand(j,cv_slavef+1)
2556 exit
2557 endif
2558 enddo
2559 if(nmb_cand_needed.eq.cv_invalid) then
2560 if(cv_lp.gt.0)
2561 & write(cv_lp,*)'Error in ',subname
2562 goto 999
2563 endif
2564 do while((k.le.cv_slavef).and.(nmb_cand_needed.gt.0))
2565 if(((.not.cv_constr_work).or.
2566 & (cv_proc_workload(cv_proc_sorted(k))+aux_flop.lt.
2567 & cv_proc_maxwork(cv_proc_sorted(k))))
2568 & .AND.((.not.cv_constr_mem).or.
2569 & (cv_proc_memused(cv_proc_sorted(k))+aux_mem.lt.
2570 & cv_proc_maxmem(cv_proc_sorted(k))))
2571 & .AND.
2572 & (cv_layer_p2node(layernmb)%t2_cand(j,cv_proc_sorted(k)).eq.0))
2573 & then
2574 cv_proc_workload(cv_proc_sorted(k))=
2575 & cv_proc_workload(cv_proc_sorted(k))+aux_flop
2576 cv_proc_memused(cv_proc_sorted(k))=
2577 & cv_proc_memused(cv_proc_sorted(k))+aux_mem
2578 cv_layer_p2node(layernmb)%t2_cand(j,cv_proc_sorted(k))
2579 & =inode
2580 cv_layerworkload(cv_proc_sorted(k))=
2581 & cv_layerworkload(cv_proc_sorted(k))+aux_flop
2582 cv_layermemused(cv_proc_sorted(k))=
2583 & cv_layermemused(cv_proc_sorted(k))+aux_mem
2584 nmb_cand_needed=nmb_cand_needed-1
2585 k=k+1
2586 else
2587 k=k+1
2588 if(k.gt.cv_slavef) then
2589 if(cv_lp.gt.0)
2590 & write(cv_lp,*)'Error in ',subname
2591 goto 999
2592 endif
2593 end if
2594 end do
2595 if(nmb_cand_needed.gt.0) then
2596 if(cv_lp.gt.0)
2597 & write(cv_lp,*)'Error in ',subname
2598 goto 999
2599 endif
2600 aux_flop=cv_ncostw(inode)
2601 aux_mem=cv_ncostm(inode)
2602 do while(k.le.cv_slavef)
2603 if(((.not.cv_constr_work).or.
2604 & (cv_proc_workload(cv_proc_sorted(k))+aux_flop.lt.
2605 & cv_proc_maxwork(cv_proc_sorted(k))))
2606 & .AND.((.not.cv_constr_mem).or.
2607 & (cv_proc_memused(cv_proc_sorted(k))+aux_mem.lt.
2608 & cv_proc_maxmem(cv_proc_sorted(k))))
2609 & .AND.
2610 & (cv_layer_p2node(layernmb)%t2_cand(j,cv_proc_sorted(k)).eq.0))
2611 & then
2612 cv_procnode(inode)=cv_proc_sorted(k)
2613 cv_proc_workload(cv_proc_sorted(k))=
2614 & cv_proc_workload(cv_proc_sorted(k))+aux_flop
2615 cv_proc_memused(cv_proc_sorted(k))=
2616 & cv_proc_memused(cv_proc_sorted(k))+aux_mem
2617 cv_layer_p2node(layernmb)%t2_cand(j,cv_proc_sorted(k))
2618 & =-inode
2619 cv_layerworkload(cv_proc_sorted(k))=
2620 & cv_layerworkload(cv_proc_sorted(k))+aux_flop
2621 cv_layermemused(cv_proc_sorted(k))=
2622 & cv_layermemused(cv_proc_sorted(k))+aux_mem
2623 exit
2624 else
2625 k=k+1
2626 if(k.gt.cv_slavef) then
2627 if(cv_lp.gt.0)
2628 & write(cv_lp,*)'Error in ',subname
2629 goto 999
2630 endif
2631 end if
2632 end do
2633 else
2634 inode=aux_int
2635 err_rep='SORTPROCS'
2636 if(use_propmap) then
2638 & cv_proc_workload,cv_proc_memused,
2639 & inode=inode,istat=ierr)
2640 else
2642 & cv_proc_workload,cv_proc_memused,
2643 & inode,istat=ierr)
2644 endif
2645 if(ierr.ne.0) then
2646 if(cv_lp.gt.0)
2647 & write(cv_lp,*)
2648 & 'Error reported by ',err_rep,' in ',subname
2649 istat = ierr
2650 goto 999
2651 endif
2652 if (cv_nodetype(inode).eq.1) then
2653 do while(k.le.cv_slavef)
2654 if((.not.cv_constr_work).or.
2655 & (cv_proc_workload(cv_proc_sorted(k))+aux_flop.lt.
2656 & cv_proc_maxwork(cv_proc_sorted(k)))
2657 & .AND.((.not.cv_constr_mem).or.
2658 & (cv_proc_memused(cv_proc_sorted(k))+aux_mem.lt.
2659 & cv_proc_maxmem(cv_proc_sorted(k))))) then
2660 cv_procnode(inode)=cv_proc_sorted(k)
2661 cv_proc_workload(cv_proc_sorted(k))=
2662 & cv_proc_workload(cv_proc_sorted(k))+aux_flop
2663 cv_proc_memused(cv_proc_sorted(k))=
2664 & cv_proc_memused(cv_proc_sorted(k))+aux_mem
2665 cv_layerworkload(cv_proc_sorted(k))=
2666 & cv_layerworkload(cv_proc_sorted(k))+aux_flop
2667 cv_layermemused(cv_proc_sorted(k))=
2668 & cv_layermemused(cv_proc_sorted(k))+aux_mem
2669 exit
2670 else
2671 k=k+1
2672 if(k.gt.cv_slavef) then
2673 if(cv_lp.gt.0)
2674 & write(cv_lp,*)'Inconsist data in ',subname
2675 goto 999
2676 endif
2677 end if
2678 end do
2680 do j=1,cv_layer_p2node(layernmb)%nmb_t2s
2681 if(cv_layer_p2node(layernmb)%t2_nodenumbers(j).ne.
2682 & inode) then
2683 cycle
2684 else
2685 exit
2686 endif
2687 enddo
2688 do while(k.le.cv_slavef)
2689 if(((.not.cv_constr_work).or.
2690 & (cv_proc_workload(cv_proc_sorted(k))+aux_flop.lt.
2691 & cv_proc_maxwork(cv_proc_sorted(k))))
2692 & .AND.((.not.cv_constr_mem).or.
2693 & (cv_proc_memused(cv_proc_sorted(k))+aux_mem.lt.
2694 & cv_proc_maxmem(cv_proc_sorted(k))))
2695 & .AND.
2696 & (cv_layer_p2node(layernmb)%t2_cand(j,cv_proc_sorted(k)).eq.0))
2697 & then
2698 cv_procnode(inode)=cv_proc_sorted(k)
2699 cv_proc_workload(cv_proc_sorted(k))=
2700 & cv_proc_workload(cv_proc_sorted(k))+aux_flop
2701 cv_proc_memused(cv_proc_sorted(k))=
2702 & cv_proc_memused(cv_proc_sorted(k))+aux_mem
2703 cv_layer_p2node(layernmb)%t2_cand(j,cv_proc_sorted(k))
2704 & =-inode
2705 cv_layerworkload(cv_proc_sorted(k))=
2706 & cv_layerworkload(cv_proc_sorted(k))+aux_flop
2707 cv_layermemused(cv_proc_sorted(k))=
2708 & cv_layermemused(cv_proc_sorted(k))+aux_mem
2709 exit
2710 else
2711 k=k+1
2712 if(k.gt.cv_slavef) then
2713 if(cv_lp.gt.0)
2714 & write(cv_lp,*)'Error in ',subname
2715 goto 999
2716 endif
2717 end if
2718 end do
2719 nmb_cand_needed=cv_invalid
2720 do j=1,cv_layer_p2node(layernmb)%nmb_t2s
2721 if(cv_layer_p2node(layernmb)%t2_nodenumbers(j)
2722 & .ne.inode)
2723 & then
2724 cycle
2725 else
2726 nmb_cand_needed=
2727 & cv_layer_p2node(layernmb)%
2728 & t2_cand(j,cv_slavef+1)
2729 exit
2730 endif
2731 enddo
2732 if(nmb_cand_needed.eq.cv_invalid) then
2733 if(cv_lp.gt.0)
2734 & write(cv_lp,*)'Error in ',subname
2735 goto 999
2736 endif
2737 aux_flop=
2738 & cv_layer_p2node(layernmb)%t2_candcostw(j)
2739 aux_mem=
2740 & cv_layer_p2node(layernmb)%t2_candcostm(j)
2741 do while((k.le.cv_slavef).and.(nmb_cand_needed.gt.0))
2742 if(((.not.cv_constr_work).or.
2743 & (cv_proc_workload(cv_proc_sorted(k))+aux_flop.lt.
2744 & cv_proc_maxwork(cv_proc_sorted(k))))
2745 & .AND.((.not.cv_constr_mem).or.
2746 & (cv_proc_memused(cv_proc_sorted(k))+aux_mem.lt.
2747 & cv_proc_maxmem(cv_proc_sorted(k))))
2748 & .AND.
2749 & (cv_layer_p2node(layernmb)%
2750 & t2_cand(j,cv_proc_sorted(k)).eq.0))
2751 & then
2752 cv_proc_workload(cv_proc_sorted(k))=
2753 & cv_proc_workload(cv_proc_sorted(k))+aux_flop
2754 cv_proc_memused(cv_proc_sorted(k))=
2755 & cv_proc_memused(cv_proc_sorted(k))+aux_mem
2756 cv_layer_p2node(layernmb)%
2757 & t2_cand(j,cv_proc_sorted(k))
2758 & =inode
2759 cv_layerworkload(cv_proc_sorted(k))=
2760 & cv_layerworkload(cv_proc_sorted(k))+aux_flop
2761 cv_layermemused(cv_proc_sorted(k))=
2762 & cv_layermemused(cv_proc_sorted(k))+aux_mem
2763 nmb_cand_needed=nmb_cand_needed-1
2764 k=k+1
2765 else
2766 k=k+1
2767 if(k.gt.cv_slavef) then
2768 if(cv_lp.gt.0)
2769 & write(cv_lp,*)'Error in ',subname
2770 goto 999
2771 endif
2772 end if
2773 end do
2774 if(nmb_cand_needed.gt.0) then
2775 if(cv_lp.gt.0)
2776 & write(cv_lp,*)'Error in ',subname
2777 goto 999
2778 endif
2779 end if
2780 end if
2781 end do
2782 do i=1,cv_layer_p2node(layernmb)%nmb_t2s
2783 nmb_cand_needed=
2784 & cv_layer_p2node(layernmb)%t2_cand(i,cv_slavef+1)
2785 candid= cv_layer_p2node(layernmb)%t2_cand(i,1:cv_slavef)
2786 cv_layer_p2node(layernmb)%t2_cand(i,1:cv_slavef)=-1
2787 k=0
2788 do j=1,cv_slavef
2789 if(candid(j).gt.0) then
2790 k=k+1
2791 cv_layer_p2node(layernmb)%t2_cand(i,k)=j-1
2792 end if
2793 end do
2794 if (k.ne.nmb_cand_needed) then
2795 if(cv_lp.gt.0)
2796 & write(cv_lp,*)'Error in ',subname
2797 goto 999
2798 endif
2799 enddo
2800 do i=1,cv_slavef
2801 cv_layerworkload(i)=cv_layerworkload(i)-old_workload(i)
2802 cv_layermemused(i)=cv_layermemused(i)-old_memused(i)
2803 enddo
2804 istat=0
2805 999 continue
2806 DEALLOCATE(candid, sorted_nmb, sorted_costw, sorted_costm,
2807 & old_workload, old_memused)
2808 return
2811 & procnode)
2812 integer,intent(in)::inode,procnmb
2813 integer,intent(inout)::procnode(:)
2814 integer in
2815 procnode(inode)=procnmb
2816 if (cv_fils(inode).eq.0) return
2817 in=cv_fils(inode)
2818 do while(in>0)
2819 procnode(in)=procnmb
2820 in=cv_fils(in)
2821 end do
2822 in=-in
2823 do while(in>0)
2825 in=cv_frere(in)
2826 end do
2827 return
2830 implicit none
2831 integer,intent(inout)::procnode(:)
2832 integer i,inode,procnmb
2833 do i=cv_layerl0_start,cv_layerl0_end
2834 inode=cv_layerl0_array(i)
2835 if(inode.gt.0) then
2836 procnmb=procnode(inode)
2838 endif
2839 enddo
2840 return
2843 implicit none
2844 integer candid,inode,index,i,j,layernmb,master,nmbcand,swapper,
2845 & totalnmb,node_of_master,node_of_candid,node_of_swapper
2846 DOUBLE PRECISION::mastermem,slavemem,maxmem
2847 logical swapthem,cand_better_master_arch,cand_better_swapper_arch
2848 maxmem=maxval(cv_proc_memused(:))
2849 totalnmb=0
2850 do layernmb=cv_maxlayer,1,-1
2851 do i=1,cv_layer_p2node(layernmb)%nmb_t2s
2852 inode=cv_layer_p2node(layernmb)%t2_nodenumbers(i)
2853 master=cv_procnode(inode)
2854 if(ke69 .gt. 1) then
2855 allowed_nodes = .false.
2857 node_of_master = mem_distribmpi(master-1)
2858 if (node_of_master .lt. 0 ) then
2859 if(cv_mp.gt.0) write(cv_mp,*)'node_of_master_not found'
2860 endif
2861 node_of_swapper = node_of_master
2862 endif
2863 mastermem=cv_proc_memused(master)
2864 nmbcand=cv_layer_p2node(layernmb)%t2_cand(i,cv_slavef+1)
2865 swapper=master
2866 index=0
2867 do j=1,nmbcand
2868 candid=cv_layer_p2node(layernmb)%t2_cand(i,j)+1
2869 slavemem=cv_proc_memused(candid)
2870 if(ke69 .gt. 1) then
2871 node_of_candid = mem_distribmpi(candid-1)
2872 if (node_of_candid .lt. 0 ) then
2873 if(cv_mp.gt.0) write(cv_mp,*)
2874 & 'node_of_candid_not found'
2875 endif
2876 endif
2877 if(ke69 .le. 1) then
2878 if((slavemem.lt.mastermem) .and.
2879 & (slavemem.lt.cv_proc_memused(swapper))) then
2880 swapper=candid
2881 index=j
2882 endif
2883 else
2884 cand_better_master_arch = (
2885 & (
2886 & (slavemem.lt.mastermem) .or.
2887 & (.not. allowed_nodes(node_of_master))
2888 & )
2889 & .and. allowed_nodes(node_of_candid)
2890 & )
2891 cand_better_swapper_arch = (
2892 & (
2893 & (slavemem.lt.cv_proc_memused(swapper)) .or.
2894 & (.not. allowed_nodes(node_of_swapper))
2895 & )
2896 & .and. allowed_nodes(node_of_candid)
2897 & )
2898 if(cand_better_master_arch .and.
2899 & cand_better_swapper_arch ) then
2900 swapper=candid
2901 node_of_swapper = node_of_candid
2902 index=j
2903 endif
2904 endif
2905 enddo
2906 if(swapper.ne.master) then
2907 swapthem = .false.
2908 if(0.75d0*mastermem.ge.cv_proc_memused(swapper))
2909 & swapthem=.true.
2910 if(mastermem.le.mastermem-cv_ncostm(inode)
2911 & +cv_layer_p2node(layernmb)%t2_candcostm(i))
2912 & swapthem=.false.
2913 if(mastermem.le.cv_proc_memused(swapper)
2914 & +cv_ncostm(inode)
2915 & -cv_layer_p2node(layernmb)%t2_candcostm(i))
2916 & swapthem=.false.
2917 if(maxmem.le.mastermem-cv_ncostm(inode)
2918 & +cv_layer_p2node(layernmb)%t2_candcostm(i))
2919 & swapthem=.false.
2920 if(maxmem.le.cv_proc_memused(swapper)+cv_ncostm(inode)
2921 & -cv_layer_p2node(layernmb)%t2_candcostm(i))
2922 & swapthem=.false.
2923 if(ke69 .gt. 1) then
2924 if (.not. allowed_nodes(node_of_master)) then
2925 swapthem=.true.
2926 endif
2927 endif
2928 if(.NOT.swapthem) cycle
2929 cv_proc_workload(master)=cv_proc_workload(master)
2930 & -cv_ncostw(inode)
2931 & +cv_layer_p2node(layernmb)%t2_candcostw(i)
2932 cv_proc_memused(master)=cv_proc_memused(master)
2933 & -cv_ncostm(inode)
2934 & +cv_layer_p2node(layernmb)%t2_candcostm(i)
2935 cv_proc_workload(swapper)=cv_proc_workload(swapper)
2936 & +cv_ncostw(inode)
2937 & -cv_layer_p2node(layernmb)%t2_candcostw(i)
2938 cv_proc_memused(swapper)=cv_proc_memused(swapper)
2939 & +cv_ncostm(inode)
2940 & -cv_layer_p2node(layernmb)%t2_candcostm(i)
2941 cv_layer_p2node(layernmb)%t2_cand(i,index)=master-1
2942 cv_procnode(inode)=swapper
2943 maxmem=maxval(cv_proc_memused(:))
2944 totalnmb = totalnmb+1
2945 endif
2946 enddo
2947 enddo
2950 implicit none
2951 DOUBLE PRECISION,intent(in),OPTIONAL::maxwork(cv_slavef),
2952 & maxmem(cv_slavef)
2953 integer,intent(out)::istat
2954 integer i,allocok
2955 DOUBLE PRECISION dummy
2956 character (len=48):: subname
2957 istat=-1
2958 subname='PROCINIT'
2959 if(present(maxwork)) then
2960 cv_constr_work=.true.
2961 else
2962 cv_constr_work=.false.
2963 end if
2964 if(present(maxmem)) then
2965 cv_constr_mem=.true.
2966 else
2967 cv_constr_mem=.false.
2968 end if
2969 allocate(cv_proc_workload(cv_slavef),
2970 & cv_proc_maxwork(cv_slavef),
2971 & cv_proc_memused(cv_slavef),
2972 & cv_proc_maxmem(cv_slavef),
2973 & cv_proc_sorted(cv_slavef),
2974 & stat=allocok)
2975 if (allocok.gt.0) then
2976 cv_info(1) = cv_error_memalloc
2977 cv_info(2) = 2*cv_slavef
2978 istat = cv_error_memalloc
2979 if(cv_lp.gt.0)
2980 & write(cv_lp,*)'memory allocation error in ',subname
2981 return
2982 end if
2983 allocate(work_per_proc(cv_slavef),id_son(cv_slavef),stat=allocok)
2984 if (allocok.gt.0) then
2985 cv_info(1) = cv_error_memalloc
2986 cv_info(2) = 2*cv_slavef
2987 istat = cv_error_memalloc
2988 if(cv_lp.gt.0)
2989 & write(cv_lp,*)'memory allocation error in ',subname
2990 return
2991 end if
2992 do i=1,cv_slavef
2993 cv_proc_workload(i)=dble(0)
2994 if(cv_constr_work) then
2995 cv_proc_maxwork(i)=maxwork(i)
2996 else
2997 cv_proc_maxwork(i)=(huge(dummy))
2998 endif
2999 cv_proc_memused(i)=dble(0)
3000 if(cv_constr_mem) then
3001 cv_proc_maxmem(i)=maxmem(i)
3002 else
3003 cv_proc_maxmem(i)=(huge(dummy))
3004 endif
3005 end do
3006 do i=1, cv_slavef
3007 cv_proc_sorted(i)=i
3008 enddo
3009 istat=0
3010 return
3013 & (inode_entry,ctr_entry,istat)
3014 implicit none
3015 integer, intent(in)::inode_entry,ctr_entry
3016 integer, intent(inout)::istat
3017 integer::j,k,in,in1,ierr,son,nmb_procs_inode,nmb_sons_inode,
3018 & current,i
3019 INTEGER, ALLOCATABLE, DIMENSION(:) :: procs4son
3020 INTEGER :: allocok
3021 character (len=48):: subname
3022 DOUBLE PRECISION :: relative_weight,costs_sons
3023 DOUBLE PRECISION :: loc_relax
3024 INTEGER :: depth
3025 INTEGER :: inode,ctr
3026 logical force_cand
3027 DOUBLE PRECISION Y
3028 integer nmb_propmap_strict,share2,procsrest,current2
3029 integer k69onid
3030 INTEGER, ALLOCATABLE, DIMENSION(:) :: procs_inode
3031 LOGICAL UPDATE_CTR
3032 inode = inode_entry
3033 ctr = ctr_entry
3034 1234 CONTINUE
3035 if (ctr.le.0) then
3036 istat = 0
3037 return
3038 endif
3039 istat= -1
3040 if(cv_frere(inode).eq.cv_n+1) return
3041 subname='MOD_PROPMAP'
3042 if(.NOT.associated(cv_prop_map(inode)%ind_proc)) return
3043 nmb_sons_inode = 0
3044 costs_sons = dble(0)
3045 force_cand=(mod(cv_keep(24),2).eq.0)
3046 in = inode
3047 do while (cv_fils(in).gt.0)
3048 in=cv_fils(in)
3049 end do
3050 if (cv_fils(in).eq.0) then
3051 istat = 0
3052 goto 999
3053 endif
3054 in = -cv_fils(in)
3055 son=in
3056 do while(in.gt.0)
3057 nmb_sons_inode = nmb_sons_inode + 1
3058 if(cv_tcostw(in).le.0.0d0) then
3059 if(cv_lp.gt.0)
3060 & write(cv_lp,*)'Subtree costs for ',in,
3061 & ' should be positive in ',subname
3062 goto 999
3063 endif
3064 if (cv_keep(67) .ne. 1) then
3065 costs_sons = costs_sons + cv_tcostw(in)
3066 else
3067 costs_sons = costs_sons + cv_tcostm(in)
3068 end if
3069 in=cv_frere(in)
3070 enddo
3071 if(costs_sons.le.0d0) then
3072 if(cv_lp.gt.0)
3073 & write(cv_lp,*)'Error in ',subname
3074 & ,subname
3075 goto 999
3076 endif
3077 if ((cv_nodelayer(inode).eq.0).AND.
3078 & (cv_frere(inode).ne.cv_n+1)) then
3079 istat = 0
3080 goto 999
3081 endif
3082 IF (nmb_sons_inode.eq.1) THEN
3083 if(.NOT.associated(cv_prop_map(son)%ind_proc)) then
3084 WRITE(6,*) son, " cv_prop_map(son)%ind_proc not associated "
3085 endif
3086 cv_prop_map(son)%ind_proc = cv_prop_map(inode)%ind_proc
3087 inode = son
3088 GOTO 1234
3089 ENDIF
3090 ALLOCATE(procs_inode(cv_slavef),
3091 & procs4son(cv_size_ind_proc),stat=allocok)
3092 if (allocok.gt.0) then
3093 cv_info(1) = cv_error_memalloc
3094 cv_info(2) = cv_size_ind_proc + cv_slavef
3095 istat = cv_error_memalloc
3096 if(cv_lp.gt.0)
3097 & write(cv_lp,*)'memory allocation error in ',subname
3098 return
3099 end if
3100 procs_inode=-1
3101 nmb_procs_inode = 0
3102 do j=1,cv_slavef
3104 nmb_procs_inode = nmb_procs_inode + 1
3105 endif
3106 end do
3107 i=0
3108 do j=1,cv_slavef
3109 if(ke69 .gt.1) then
3110 call mumps_get_idp1_proc(j-1,
3111 & k69onid,ierr)
3112 else
3113 k69onid = j
3114 endif
3116 i = i + 1
3117 procs_inode(i)=k69onid
3118 endif
3119 end do
3120 if(i.ne.nmb_procs_inode)then
3121 if(cv_lp.gt.0)
3122 & write(cv_lp,*)'Error in ',subname
3123 & ,subname
3124 goto 999
3125 endif
3126 if(nmb_procs_inode.eq.0) then
3127 if(cv_lp.gt.0)
3128 & write(cv_lp,*)'Error in ',subname
3129 & ,subname
3130 goto 999
3131 end if
3132 depth=
max(cv_mixed_strat_bound - ctr,0)
3133 if ((cv_keep(24).eq.16).OR.(cv_keep(24).eq.18)) then
3134 if(depth.ge.cv_mixed_strat_bound) then
3135 loc_relax = dble(1)
3136 else
3137 loc_relax = dble(1) +
3138 &
max(dble(cv_keep(77))/dble(100), dble(0))
3139 endif
3140 else
3141 loc_relax = dble(1)
3142 endif
3143 in=son
3144 current = 1
3145 do while(in.gt.0)
3146 update_ctr = .true.
3147 if( ( (nmb_sons_inode.ge.nmb_procs_inode).AND.
3148 & (nmb_procs_inode.LT.4) )
3149 & .OR. ( nmb_sons_inode.EQ.1 )
3150 & ) then
3151 procs4son = cv_prop_map(inode)%ind_proc
3152 IF (nmb_sons_inode.EQ.1) update_ctr=.false.
3153 else
3154 do k=1,cv_size_ind_proc
3155 do j=0,cv_bitsize_of_int-1
3156 procs4son(k)=ibclr(procs4son(k),j)
3157 end do
3158 end do
3159 nmb_propmap_strict=0
3160 do k=1,cv_slavef
3162 nmb_propmap_strict=nmb_propmap_strict+1
3164 end if
3165 end do
3166 if(costs_sons.gt.0.0d0) then
3167 if (cv_keep(67) .ne. 1) then
3168 relative_weight=cv_tcostw(in)/costs_sons
3169 else
3170 relative_weight=cv_tcostm(in)/costs_sons
3171 endif
3172 else
3173 relative_weight=0.0d0
3174 endif
3175 current = nmb_propmap_strict
3176 share2=
3177 &
max(0,nint(relative_weight*(loc_relax-dble(1))*
3178 & dble(nmb_procs_inode)))
3179 procsrest=nmb_procs_inode - nmb_propmap_strict
3180 share2=
min(share2,procsrest)
3181 CALL random_number(y)
3182 current2=int(dble(y)*dble(procsrest))
3183 k=1
3184 i=1
3185 do while((share2.gt.0).and.(i.le.2))
3186 do j=1,nmb_procs_inode
3187 if(share2.le.0) exit
3188 k69onid = procs_inode(j)
3191 if(k.ge.current2)then
3193 if(ierr.ne.0) then
3194 if(cv_lp.gt.0)write(cv_lp,*)
3195 & 'BIT_SET signalled error to',subname
3196 istat = ierr
3197 goto 999
3198 end if
3199 share2 = share2 - 1
3200 endif
3201 k=k+1
3202 end if
3203 enddo
3204 i=i+1
3205 enddo
3206 if(share2.ne.0) then
3207 if(cv_lp.gt.0) write(cv_lp,*)
3208 & 'Error reported in ',subname
3209 goto 999
3210 end if
3211 end if
3212 ierr=0
3213 in1=in
3214 cv_prop_map(in1)%ind_proc=procs4son
3215 IF (update_ctr) THEN
3217 ELSE
3219 ENDIF
3220 if(ierr.ne.0) then
3221 if(cv_lp.gt.0) write(cv_lp,*)
3222 & 'Error reported in ',subname
3223 istat=ierr
3224 goto 999
3225 endif
3226 in=cv_frere(in)
3227 end do
3228 istat = 0
3229 999 continue
3230 if (allocated(procs_inode)) DEALLOCATE(procs_inode)
3231 if (allocated(procs4son)) DEALLOCATE(procs4son)
3232 return
3234 recursive subroutine mumps_propmap(inode_entry, ctr_entry, istat)
3235 implicit none
3236 integer, intent(in)::inode_entry,ctr_entry
3237 integer, intent(inout)::istat
3238 integer::j,k,in,in1,ierr,son,nmb_procs_inode,nmb_sons_inode,
3239 & share,current,offset,
3240 & in_tmp,nfront,npiv,ncb,
3241 & keep48_loc,min_cand_needed
3242 integer, dimension(:), allocatable :: procs4son
3243 character (len=48):: subname
3244 DOUBLE PRECISION :: relative_weight,costs_sons, shtemp
3245 DOUBLE PRECISION :: costs_sons_real
3246 DOUBLE PRECISION :: PartofaProc
3247 LOGICAL :: SkipSmallNodes
3248 parameter(partofaproc=0.01d0)
3249 DOUBLE PRECISION :: loc_relax
3250 INTEGER :: depth
3251 logical force_cand
3252 integer MUMPS_REG_GETKMAX, MUMPS_BLOC2_GET_NSLAVESMIN
3253 external MUMPS_REG_GETKMAX, MUMPS_BLOC2_GET_NSLAVESMIN
3254 DOUBLE PRECISION Y
3255 integer nmb_propmap_strict,share2,procsrest,current2
3256 integer k69onid,nb_free_procs,local_son_indice,nb_procs_for_sons,
3257 & ptr_upper_ro_procs
3258 integer :: inode, ctr
3259 INTEGER :: allocok
3260 logical upper_round_off,are_sons_treated
3261 DOUBLE PRECISION tmp_cost
3262 inode = inode_entry
3263 ctr = ctr_entry
3264 1234 CONTINUE
3265 if (ctr.le.0) then
3266 istat = 0
3267 return
3268 endif
3269 istat= -1
3270 if(cv_frere(inode).eq.cv_n+1) return
3271 subname='PROPMAP'
3272 nmb_procs_inode = 0
3273 do j=1,cv_slavef
3275 & nmb_procs_inode = nmb_procs_inode + 1
3276 end do
3277 if(nmb_procs_inode.eq.0) then
3278 if(cv_lp.gt.0)
3279 & write(cv_lp,*)'Error in ',subname
3280 & ,subname
3281 return
3282 end if
3283 if ((cv_nodelayer(inode).eq.0).AND.
3284 & (cv_frere(inode).ne.cv_n+1)) then
3285 istat = 0
3286 return
3287 endif
3288 ptr_upper_ro_procs=1
3289 work_per_proc(1:cv_slavef)=0.0d0
3290 id_son(1:cv_slavef)=0
3291 nmb_sons_inode = 0
3292 costs_sons = dble(0)
3293 force_cand=(mod(cv_keep(24),2).eq.0)
3294 min_cand_needed=0
3295 in = inode
3296 do while (cv_fils(in).gt.0)
3297 in=cv_fils(in)
3298 end do
3299 if (cv_fils(in).eq.0) then
3300 istat = 0
3301 return
3302 endif
3303 in = -cv_fils(in)
3304 son=in
3305 do while(in.gt.0)
3306 nmb_sons_inode = nmb_sons_inode + 1
3307 if(cv_tcostw(in).le.0.0d0) then
3308 if(cv_lp.gt.0)
3309 & write(cv_lp,*)'Subtree costs for ',in,
3310 & ' should be positive in ',subname
3311 return
3312 endif
3313 if (cv_keep(67) .ne. 1) then
3314 costs_sons = costs_sons + cv_tcostw(in)
3315 else
3316 costs_sons = costs_sons + cv_tcostm(in)
3317 endif
3318 in=cv_frere(in)
3319 enddo
3320 IF (nmb_sons_inode.eq.1) THEN
3321 if(.NOT.associated(cv_prop_map(son)%ind_proc)) then
3323 if(ierr.ne.0) then
3324 if(cv_lp.gt.0)
3325 & write(cv_lp,*)'PROPMAP_INIT signalled error to'
3326 & ,subname
3327 istat = ierr
3328 goto 999
3329 end if
3330 endif
3331 ctr = ctr -1
3332 cv_prop_map(son)%ind_proc = cv_prop_map(inode)%ind_proc
3333 inode = son
3334 GOTO 1234
3335 ENDIF
3336 costs_sons_real = costs_sons
3337 skipsmallnodes = .true.
3338 IF (costs_sons_real.gt.0.0d0) then
3339 in = son
3340 do while (in.gt.0)
3341 if (cv_keep(67) .ne. 1) then
3342 relative_weight=cv_tcostw(in)/costs_sons_real
3343 else
3344 relative_weight=cv_tcostm(in)/costs_sons_real
3345 endif
3346 shtemp = relative_weight*dble(nmb_procs_inode)
3347 IF (shtemp.lt.partofaproc) THEN
3348 if (cv_keep(67) .ne. 1) then
3349 costs_sons = costs_sons - cv_tcostw(in)
3350 else
3351 costs_sons = costs_sons - cv_tcostm(in)
3352 endif
3353 ENDIF
3354 in=cv_frere(in)
3355 enddo
3356 IF (costs_sons.LT. partofaproc*costs_sons_real) THEN
3357 costs_sons = costs_sons_real
3358 skipsmallnodes = .false.
3359 ENDIF
3360 ENDIF
3361 if(costs_sons.le.0.0d0) then
3362 if(cv_lp.gt.0)
3363 & write(cv_lp,*)'Error in ',subname
3364 & ,subname
3365 return
3366 endif
3367 if(cv_relax.le.0.0d0) then
3368 if(cv_lp.gt.0)
3369 & write(cv_lp,*)'Error in ',subname,'. Wrong cv_relax'
3370 return
3371 endif
3372 ALLOCATE(procs4son(cv_size_ind_proc),stat=allocok)
3373 IF (allocok .GT. 0) THEN
3374 cv_info(1) = cv_error_memalloc
3375 cv_info(2) = cv_size_ind_proc
3376 istat = cv_error_memalloc
3377 if(cv_lp.gt.0)
3378 & write(cv_lp,*)
3379 & 'Memory allocation error in ',subname
3380 return
3381 ENDIF
3382 depth=
max(cv_n - ctr,0)
3383 if(cv_keep(24).eq.8) then
3384 loc_relax = cv_relax
3385 elseif ((cv_keep(24).eq.16).OR.(cv_keep(24).eq.18)) then
3386 loc_relax = cv_relax
3387 elseif (cv_keep(24).eq.10) then
3388 loc_relax = cv_relax
3389 elseif ((cv_keep(24).eq.12).OR.(cv_keep(24).eq.14)) then
3390 if(depth.ge.cv_mixed_strat_bound) then
3391 loc_relax = cv_relax
3392 else
3393 loc_relax = cv_relax +
3394 &
max(dble(cv_keep(77))/dble(100), dble(0))
3395 endif
3396 endif
3397 in=son
3398 current = 1
3399 local_son_indice=1
3400 nb_procs_for_sons=0
3401 upper_round_off=.false.
3402 are_sons_treated=.true.
3403 do while(in.gt.0)
3404 if( (nmb_sons_inode.ge.nmb_procs_inode).AND.
3405 & (nmb_procs_inode.LT.4) ) then
3406 procs4son = cv_prop_map(inode)%ind_proc
3407 are_sons_treated=.false.
3408 nb_procs_for_sons=nmb_procs_inode
3409 nmb_propmap_strict=nmb_procs_inode
3410 elseif(nmb_procs_inode .LE. cv_keep(83)) then
3411 procs4son = cv_prop_map(inode)%ind_proc
3412 are_sons_treated=.false.
3413 nb_procs_for_sons=nmb_procs_inode
3414 nmb_propmap_strict=nmb_procs_inode
3415 else
3416 do k=1,cv_size_ind_proc
3417 do j=0,cv_bitsize_of_int-1
3418 procs4son(k)=ibclr(procs4son(k),j)
3419 end do
3420 end do
3421 if(costs_sons.gt.0.0d0) then
3422 if (cv_keep(67) .ne. 1) then
3423 relative_weight=cv_tcostw(in)/costs_sons
3424 else
3425 relative_weight=cv_tcostm(in)/costs_sons
3426 endif
3427 else
3428 relative_weight=dble(0)
3429 endif
3430 shtemp = relative_weight*dble(nmb_procs_inode)
3431 IF ( (shtemp.LT.partofaproc)
3432 & .AND. ( skipsmallnodes ) ) THEN
3433 share = 1
3434 do j=current,cv_slavef
3435 if(ke69 .gt.1) then
3436 call mumps_get_idp1_proc(j-1,k69onid,ierr)
3437 else
3438 k69onid = j
3439 endif
3442 if(ierr.ne.0) then
3443 if(cv_lp.gt.0)write(cv_lp,*)
3444 & 'BIT_SET signalled error to',subname
3445 istat = ierr
3446 goto 999
3447 end if
3448 share = share -1
3449 exit
3450 endif
3451 enddo
3452 if (share.gt.0) then
3453 do j=1,current-1
3454 if(ke69 .gt.1) then
3455 call mumps_get_idp1_proc(j-1,k69onid,ierr)
3456 else
3457 k69onid = j
3458 endif
3461 if(ierr.ne.0) then
3462 if(cv_lp.gt.0)write(cv_lp,*)
3463 & 'BIT_SET signalled error to',subname
3464 istat = ierr
3465 goto 999
3466 end if
3467 share = share -1
3468 exit
3469 endif
3470 enddo
3471 endif
3472 if(share.ne.0) then
3473 if(cv_lp.gt.0) write(cv_lp,*)
3474 & 'Error reported in ',subname
3475 goto 999
3476 end if
3477 if(.NOT.associated(cv_prop_map(in)%ind_proc)) then
3479 if(ierr.ne.0) then
3480 if(cv_lp.gt.0)
3481 & write(cv_lp,*)'PROPMAP_INIT signalled error to'
3482 & ,subname
3483 istat = ierr
3484 goto 999
3485 end if
3486 endif
3487 current = j
3488 cv_prop_map(in)%ind_proc = procs4son
3489 in = cv_frere(in)
3490 cycle
3491 ENDIF
3492 share =
max(1,nint(shtemp))
3493 if (dble(share).ge.shtemp) then
3494 upper_round_off=.true.
3495 else
3496 upper_round_off = .false.
3497 endif
3498 share=
min(share,nmb_procs_inode)
3499 nmb_propmap_strict=share
3500 nb_procs_for_sons=nb_procs_for_sons+nmb_propmap_strict
3501 offset=1
3502 do j=current,cv_slavef
3503 if(ke69 .gt.1) then
3504 call mumps_get_idp1_proc(j-1,k69onid,ierr)
3505 else
3506 k69onid = j
3507 endif
3510 if(ierr.ne.0) then
3511 if(cv_lp.gt.0)write(cv_lp,*)
3512 & 'BIT_SET signalled error to',subname
3513 istat = ierr
3514 goto 999
3515 end if
3516 share = share-1
3517 if(share.le.0) then
3518 current = j + offset
3519 if(current.gt.cv_slavef) current = 1
3520 exit
3521 end if
3522 end if
3523 end do
3524 if(share.gt.0) then
3525 do j=1,current-1
3526 if(ke69 .gt.1) then
3527 call mumps_get_idp1_proc(j-1,k69onid,ierr)
3528 else
3529 k69onid = j
3530 endif
3533 if(ierr.ne.0) then
3534 if(cv_lp.gt.0)write(cv_lp,*)
3535 & 'BIT_SET signalled error to',subname
3536 istat = ierr
3537 goto 999
3538 end if
3539 share = share-1
3540 if(share.le.0) then
3541 current = j + offset
3542 if(current.gt.cv_slavef) current = 1
3543 exit
3544 end if
3545 end if
3546 end do
3547 endif
3548 if(share.ne.0) then
3549 if(cv_lp.gt.0) write(cv_lp,*)
3550 & 'Error reported in ',subname
3551 goto 999
3552 end if
3553 if(.not.upper_round_off)then
3554 if(local_son_indice.lt.cv_slavef)then
3555 id_son(local_son_indice)=in
3556 if ( cv_keep(67) .ne. 1 ) then
3557 work_per_proc(local_son_indice)=cv_tcostw(in)/
3558 & dble(nmb_propmap_strict)
3559 else
3560 work_per_proc(local_son_indice)=cv_tcostm(in)/
3561 & dble(nmb_propmap_strict)
3562 endif
3563 local_son_indice=local_son_indice+1
3564 if(local_son_indice.eq.cv_slavef)then
3565 CALL mumps_sort_msort(ierr,cv_slavef,id_son,
3566 & work_per_proc)
3567 if(ierr.ne.0) then
3568 if(cv_lp.gt.0)
3569 & write(cv_lp,*)
3570 & 'Error reported by MUMPS_SORT_MSORT in ',subname
3571 istat = ierr
3572 goto 999
3573 endif
3574 endif
3575 else
3576 current2=cv_slavef
3577 if (cv_keep(67) .ne.1) then
3578 tmp_cost=cv_tcostw(in)/dble(nmb_propmap_strict)
3579 else
3580 tmp_cost=cv_tcostm(in)/dble(nmb_propmap_strict)
3581 endif
3582 do while(current2.ge.1)
3583 if(tmp_cost.lt.work_per_proc(current2))exit
3584 current2=current2-1
3585 enddo
3586 if(current2.ne.cv_slavef)then
3587 if(current2.eq.0)then
3588 current2=1
3589 endif
3590 do j=cv_slavef-1,current2,-1
3591 id_son(j+1)=id_son(j)
3592 work_per_proc(j+1)=work_per_proc(j)
3593 enddo
3594 id_son(current2)=in
3595 work_per_proc(current2)=tmp_cost
3596 endif
3597 endif
3598 endif
3599 upper_round_off=.false.
3600 endif
3601 if(.NOT.associated(cv_prop_map(in)%ind_proc)) then
3603 if(ierr.ne.0) then
3604 if(cv_lp.gt.0)
3605 & write(cv_lp,*)'PROPMAP_INIT signalled error to'
3606 & ,subname
3607 istat = ierr
3608 goto 999
3609 end if
3610 endif
3611 cv_prop_map(in)%ind_proc = procs4son
3612 in=cv_frere(in)
3613 end do
3614 if(are_sons_treated)then
3615 if(nb_procs_for_sons.ne.nmb_procs_inode)then
3616 do j=1,nmb_procs_inode-nb_procs_for_sons
3617 procs4son=cv_prop_map(id_son(j))%ind_proc
3618 do while(current.le.cv_slavef)
3619 if(ke69 .gt.1) then
3620 call mumps_get_idp1_proc(current-1,k69onid,ierr)
3621 else
3622 k69onid = current
3623 endif
3625 current=current+1
3626 else
3627 exit
3628 endif
3629 enddo
3631 cv_prop_map(id_son(j))%ind_proc=procs4son
3632 enddo
3633 ptr_upper_ro_procs=
min(j,nmb_procs_inode-nb_procs_for_sons)
3634 endif
3635 endif
3636 in=son
3637 current = 1
3638 do while(in.gt.0)
3639 if( (nmb_sons_inode.ge.nmb_procs_inode).AND.
3640 & (nmb_procs_inode.LT.4) ) then
3641 procs4son = cv_prop_map(inode)%ind_proc
3642 elseif(nmb_procs_inode .LE. cv_keep(83)) then
3643 procs4son = cv_prop_map(inode)%ind_proc
3644 else
3645 procs4son = cv_prop_map(in)%ind_proc
3646 in_tmp=in
3647 nfront=cv_nfsiz(in_tmp)
3648 npiv=0
3649 in_tmp=in_tmp
3650 do while(in_tmp.gt.0)
3651 if (cv_blkon) then
3652 npiv = npiv + cv_sizeofblocks(in_tmp)
3653 else
3654 npiv=npiv+1
3655 endif
3656 in_tmp=cv_fils(in_tmp)
3657 end do
3658 ncb=nfront-npiv
3659 if (force_cand) then
3660 if (cv_keep(50) == 0) then
3661 keep48_loc=0
3662 else
3663 keep48_loc=3
3664 endif
3665 if (cv_keep(48).EQ.5) keep48_loc = 5
3666 min_cand_needed=
3668 & (cv_slavef, keep48_loc,cv_keep8(21),
3669 & cv_keep(50),
3670 & nfront,ncb,
3671 & cv_keep(375), cv_keep(119))
3672 min_cand_needed=
min(cv_slavef,min_cand_needed+1)
3673 else
3674 min_cand_needed = 0
3675 endif
3676 min_cand_needed =
max(min_cand_needed, cv_keep(91))
3677 if(costs_sons.gt.0.0d0) then
3678 if (cv_keep(67) .ne.1) then
3679 relative_weight=cv_tcostw(in)/costs_sons
3680 else
3681 relative_weight=cv_tcostm(in)/costs_sons
3682 endif
3683 else
3684 relative_weight=dble(0)
3685 endif
3686 nmb_propmap_strict=0
3687 do k=1,cv_slavef
3689 nmb_propmap_strict=nmb_propmap_strict+1
3690 end if
3691 end do
3692 offset=1
3693 share2=
3694 &
max(0,nint(relative_weight*(loc_relax-dble(1))*
3695 & dble(nmb_procs_inode)))
3696 share2 =
max(share2, min_cand_needed -nmb_propmap_strict,
3697 & (cv_keep(83)/2) - nmb_propmap_strict)
3698 procsrest=nmb_procs_inode - nmb_propmap_strict
3699 share2=
min(share2,procsrest)
3700 share2 = 0
3701 CALL random_number(y)
3702 current2 =int(dble(y)*dble(procsrest))
3703 nb_free_procs=1
3704 do j=1,cv_slavef
3705 if(share2.le.0) exit
3706 if(ke69 .gt.1) then
3707 call mumps_get_idp1_proc(j-1,k69onid,ierr)
3708 else
3709 k69onid = j
3710 endif
3713 if(nb_free_procs.ge.current2)then
3715 if(ierr.ne.0) then
3716 if(cv_lp.gt.0)write(cv_lp,*)
3717 & 'BIT_SET signalled error to',subname
3718 istat = ierr
3719 goto 999
3720 end if
3721 share2 = share2 - 1
3722 endif
3723 nb_free_procs=nb_free_procs+1
3724 end if
3725 end do
3726 if(share2.gt.0) then
3727 do j=1,cv_slavef
3728 if(share2.le.0) exit
3729 if(ke69 .gt.1) then
3730 call mumps_get_idp1_proc(j-1,k69onid,ierr)
3731 else
3732 k69onid = j
3733 endif
3737 if(ierr.ne.0) then
3738 if(cv_lp.gt.0)write(cv_lp,*)
3739 & 'BIT_SET signalled error to',subname
3740 istat = ierr
3741 goto 999
3742 end if
3743 share2 = share2 - 1
3744 end if
3745 end do
3746 endif
3747 if(share2.ne.0) then
3748 if(cv_lp.gt.0) write(cv_lp,*)
3749 & 'Error reported in ',subname
3750 goto 999
3751 end if
3752 endif
3753 ierr=0
3754 in1=in
3755 cv_prop_map(in1)%ind_proc = procs4son
3756 IF (nmb_sons_inode.EQ.1) DEALLOCATE(procs4son)
3758 if(ierr.ne.0) then
3759 if(cv_lp.gt.0) write(cv_lp,*)
3760 & 'Error reported in ',subname
3761 istat=ierr
3762 goto 999
3763 endif
3764 in=cv_frere(in)
3765 end do
3766 istat = 0
3767 999 CONTINUE
3768 if (allocated(procs4son)) DEALLOCATE(procs4son)
3769 return
3772 implicit none
3773 integer, intent(in)::inode
3774 integer, intent(out)::istat
3775 integer j,k,allocok
3776 character (len=48):: subname
3777 istat = -1
3778 if(cv_frere(inode).eq.cv_n+1) return
3779 subname='PROPMAP_INIT'
3780 if(.not.associated(
3781 & cv_prop_map(inode)%ind_proc)) then
3782 allocate(cv_prop_map(inode)%ind_proc
3783 & (cv_size_ind_proc),stat=allocok)
3784 if (allocok.gt.0) then
3785 cv_info(1) = cv_error_memalloc
3786 cv_info(2) = cv_size_ind_proc
3787 istat = cv_error_memalloc
3788 if(cv_lp.gt.0)
3789 & write(cv_lp,*)
3790 & 'memory allocation error in ',subname
3791 return
3792 end if
3793 end if
3794 do k=1,cv_size_ind_proc
3795 do j=0,cv_bitsize_of_int-1
3796 cv_prop_map(inode)%ind_proc(k)=
3797 & ibclr(cv_prop_map(inode)%ind_proc(k),j)
3798 end do
3799 end do
3800 istat = 0
3801 return
3804 integer,intent(in)::inode
3805 integer,intent(out)::istat
3806 integer ierr
3807 character (len=48):: subname
3808 subname='PROPMAP_TERM'
3809 istat =-1
3810 if(associated(cv_prop_map(inode)%ind_proc)) then
3811 deallocate(cv_prop_map(inode)%ind_proc, stat=ierr)
3812 if(ierr.ne.0) then
3813 if(cv_lp.gt.0)
3814 & write(cv_lp,*)'Memory deallocation error in ', subname
3815 istat = cv_error_memdeloc
3816 return
3817 endif
3818 nullify(cv_prop_map(inode)%ind_proc)
3819 end if
3820 istat =0
3821 return
3824 implicit none
3825 integer,intent(in)::inode,ifather
3826 integer,intent(out)::istat
3827 character (len=48):: subname
3828 istat= -1
3829 subname='PROPMAP4SPLIT'
3830 if((cv_frere(inode).eq.cv_n+1).OR.(cv_frere(ifather).eq.cv_n+1)
3831 & .OR.(.NOT.associated(cv_prop_map(inode)%ind_proc))) then
3832 if(cv_lp.gt.0)
3833 & write(cv_lp,*)'tototo signalled error to'
3834 & ,subname
3835 return
3836 endif
3837 if(.NOT.associated(cv_prop_map(ifather)%ind_proc)) then
3839 if(ierr.ne.0) then
3840 if(cv_lp.gt.0)
3841 & write(cv_lp,*)'PROPMAP_INIT signalled error to '
3842 & ,subname
3843 istat = ierr
3844 return
3845 end if
3846 endif
3847 cv_prop_map(ifather)%ind_proc =
3848 & cv_prop_map(inode)%ind_proc
3849 istat=0
3850 return
3853 implicit none
3854 integer,intent(out)::istat
3855 integer i,allocok
3856 character (len=48):: subname
3857 istat=-1
3858 subname='ROOTLIST'
3859 allocate(cv_layerl0_array(cv_maxnsteps),
3860 & cv_layerl0_sorted_costw(cv_maxnsteps),stat=allocok)
3861 if (allocok.gt.0) then
3862 cv_info(1) = cv_error_memalloc
3863 cv_info(2) = 12*cv_maxnsteps
3864 istat = cv_error_memalloc
3865 if(cv_lp.gt.0)
3866 & write(cv_lp,*)
3867 & 'memory allocation error in ',subname
3868 return
3869 end if
3870 do i=1,cv_maxnsteps
3871 cv_layerl0_sorted_costw(i)=dble(0)
3872 cv_layerl0_array(i)=0
3873 end do
3874 cv_layerl0_start = 0
3875 cv_layerl0_end = 0
3876 layerl0_endforarrangel0 = 0
3877 if ((.NOT.associated(cv_tcostw)).OR.(.NOT.associated(cv_tcostm)))
3878 & then
3879 if(cv_lp.gt.0)
3880 & write(cv_lp,*)'Error:tcost must be allocated in ',subname
3881 return
3882 end if
3883 cv_nbsa=0
3884 do i=1,cv_n
3885 if (cv_frere(i).eq.0) then
3886 cv_layerl0_start=1
3887 cv_layerl0_end=cv_layerl0_end+1
3888 IF (cv_tcostw(i).GT.mincostw)
3889 & layerl0_endforarrangel0 = layerl0_endforarrangel0+1
3890 cv_layerl0_array(cv_layerl0_end)=i
3891 cv_layerl0_sorted_costw(cv_layerl0_end)=cv_tcostw(i)
3892 cv_costw_layer0=cv_costw_layer0 + cv_tcostw(i)
3893 cv_costm_layer0=cv_costm_layer0 + cv_tcostm(i)
3894 cv_nbsa=cv_nbsa+1
3895 end if
3896 end do
3897 if(cv_nbsa.eq.0) then
3898 if(cv_lp.gt.0)
3899 & write(cv_lp,*)'Error:no root nodes in ',subname
3900 return
3901 end if
3902 call mumps_sort_msort(ierr,cv_layerl0_end-cv_layerl0_start+1,
3903 & cv_layerl0_array(cv_layerl0_start:cv_layerl0_end),
3904 & cv_layerl0_sorted_costw(cv_layerl0_start:cv_layerl0_end))
3905 IF (ierr .ne.0) then
3906 if(cv_lp.gt.0)
3907 & write(cv_lp,*)
3908 & 'Error reported by MUMPS_SORT_MSORT in ',subname
3909 istat = ierr
3910 return
3911 ENDIF
3912 cv_costw_total=cv_costw_layer0
3913 cv_costm_total=cv_costm_layer0
3914 istat=0
3915 return
3918 implicit none
3919 integer,intent(out)::istat
3920 character (len=48):: subname
3921 subname='SELECT_TYPE3'
3923 & cv_keep(1), cv_frere(1), cv_nfsiz(1), istat)
3924 IF (istat .NE. 0) THEN
3925 if(cv_lp.gt.0)
3926 & write(cv_lp,*)
3927 & 'Error: Can''t select type 3 node in ',subname
3928 ELSE IF (cv_keep(38) .ne. 0) then
3929 IF(cv_nodelayer(cv_keep(38)).eq.0.and.
3930 & (cv_keep(60).EQ.0)) then
3931 cv_keep(38)=0
3932 ELSE
3933 cv_nodetype(cv_keep(38))=3
3934 ENDIF
3935 ENDIF
3936 RETURN
3939 integer,intent(out):: istat
3940 integer :: i,dummy,layernmb,allocok
3941 integer :: montype, nbcand, inode
3942 character (len=48) :: subname
3943 istat=-1
3944 subname='SETUP_CAND'
3945 cv_nb_niv2=0
3946 do i=1,cv_n
3948 end do
3949 cv_keep(56)=cv_nb_niv2
3950 nullify(cv_par2_nodes,cv_cand)
3951 if(cv_nb_niv2.GT.0) then
3952 allocate(cv_par2_nodes(cv_nb_niv2),
3953 & cv_cand(cv_nb_niv2,cv_slavef+1),stat=allocok)
3954 if (allocok.gt.0) then
3955 cv_info(1) = cv_error_memalloc
3956 cv_info(2) = cv_nb_niv2*(cv_slavef+2)
3957 istat = cv_error_memalloc
3958 if(cv_lp.gt.0)
3959 & write(cv_lp,*)
3960 & 'memory allocation error in ',subname
3961 return
3962 end if
3963 cv_par2_nodes=0
3964 cv_cand(:,:)=0
3965 dummy=1
3966 do layernmb=1,cv_maxlayer
3967 do i=1,cv_layer_p2node(layernmb)%nmb_t2s
3968 inode = cv_layer_p2node(layernmb)%t2_nodenumbers(i)
3969 cv_par2_nodes(dummy)= inode
3970 nbcand = cv_layer_p2node(layernmb)%t2_cand(i,cv_slavef+1)
3971 cv_cand(dummy,:)=cv_layer_p2node(layernmb)%t2_cand(i,:)
3972 montype= cv_nodetype(inode)
3973 if (montype.eq.tsplit_beg) then
3975 & cv_frere(1), cv_nodetype(1),
3976 & cv_par2_nodes(1), cv_procnode(1), cv_cand(1,1),
3977 & inode,
3978 & slavef, dummy, nbcand, istat)
3979 endif
3980 dummy=dummy+1
3981 enddo
3982 enddo
3983 if(dummy.ne.cv_nb_niv2+1) then
3984 if(cv_lp.gt.0)
3985 & write(cv_lp,*)'Error in ',subname,
3986 & ' : dummy =',dummy,'nbniv2 =',cv_nb_niv2
3987 return
3988 endif
3989 endif
3990 istat=0
3991 return
3994 & inode,istat)
3995 implicit none
3996 integer,intent(in)::map_strat
3997 DOUBLE PRECISION,dimension(:),intent(in)::workload, memused
3998 integer, optional::inode,istat
3999 integer i,j,aux_int,nmb_procs,pos
4000 character (len=48):: subname
4001 logical enforce_prefsort
4002 logical use_propmap
4003 logical,SAVE::init1 = .false.
4004 logical,SAVE::init2 = .false.
4005 subname='SORTPROCS'
4006 enforce_prefsort=.true.
4007 use_propmap=present(inode)
4008 if(present(istat))istat=-1
4009 if((map_strat.ne.cv_equilib_flops).and.
4010 & (map_strat.ne.cv_equilib_mem)) then
4011 if(cv_lp.gt.0)
4012 & write(cv_lp,*)'error in ',subname
4013 return
4014 endif
4015 i=0
4016 do i = 1, cv_slavef
4017 cv_proc_sorted(i)=i
4018 enddo
4019 if (.not.present(inode)) then
4020 if(.NOT.init1) then
4021 init1=.true.
4022 end if
4023 do i=1,cv_slavef-1
4024 do j=i+1,cv_slavef
4025 if(((workload(cv_proc_sorted(j)).lt.
4026 & workload(cv_proc_sorted(i))).AND.
4027 & (map_strat.eq.cv_equilib_flops))
4028 & .OR.
4029 & ((memused(cv_proc_sorted(j)).lt.
4030 & memused(cv_proc_sorted(i))).AND.
4031 & (map_strat.eq.cv_equilib_mem)))then
4032 aux_int=cv_proc_sorted(j)
4033 cv_proc_sorted(j)=cv_proc_sorted(i)
4034 cv_proc_sorted(i)=aux_int
4035 end if
4036 end do
4037 end do
4038 else if(present(inode)) then
4039 if (use_propmap) then
4040 if(.NOT.init2) then
4041 init2=.true.
4042 end if
4043 nmb_procs=0
4044 do pos=1,cv_slavef
4046 if (pos.le.nmb_procs) then
4047 exit
4048 else
4049 nmb_procs=nmb_procs+1
4050 aux_int=cv_proc_sorted(pos)
4051 cv_proc_sorted(pos)=
4052 & cv_proc_sorted(nmb_procs)
4053 cv_proc_sorted(nmb_procs)=aux_int
4054 cycle
4055 end if
4056 end if
4057 end do
4058 end if
4059 do i=1,nmb_procs-1
4060 do j=i+1,nmb_procs
4061 if(((workload(cv_proc_sorted(j)).lt.
4062 & workload(cv_proc_sorted(i))).AND.
4063 & (map_strat.eq.cv_equilib_flops))
4064 & .OR.
4065 & ((memused(cv_proc_sorted(j)).lt.
4066 & memused(cv_proc_sorted(i))).AND.
4067 & (map_strat.eq.cv_equilib_mem)))then
4068 aux_int=cv_proc_sorted(j)
4069 cv_proc_sorted(j)=cv_proc_sorted(i)
4070 cv_proc_sorted(i)=aux_int
4071 end if
4072 end do
4073 end do
4074 do i=nmb_procs+1,cv_slavef-1
4075 do j=i+1,cv_slavef
4076 if(((workload(cv_proc_sorted(j)).lt.
4077 & workload(cv_proc_sorted(i))).AND.
4078 & (map_strat.eq.cv_equilib_flops))
4079 & .OR.
4080 & ((memused(cv_proc_sorted(j)).lt.
4081 & memused(cv_proc_sorted(i))).AND.
4082 & (map_strat.eq.cv_equilib_mem)))then
4083 aux_int=cv_proc_sorted(j)
4084 cv_proc_sorted(j)=cv_proc_sorted(i)
4085 cv_proc_sorted(i)=aux_int
4086 end if
4087 end do
4088 end do
4089 if(.NOT.enforce_prefsort) then
4090 if(((2.0d0*workload(cv_proc_sorted(nmb_procs+1)).lt.
4091 & workload(cv_proc_sorted(1))).AND.
4092 & (map_strat.eq.cv_equilib_flops))
4093 & .OR.
4094 & ((2.0d0*memused(cv_proc_sorted(nmb_procs+1)).lt.
4095 & memused(cv_proc_sorted(1))).AND.
4096 & (map_strat.eq.cv_equilib_mem)))then
4097 do i=1,cv_slavef-1
4098 do j=i+1,cv_slavef
4099 if(((workload(cv_proc_sorted(j)).lt.
4100 & workload(cv_proc_sorted(i))).AND.
4101 & (map_strat.eq.cv_equilib_flops))
4102 & .OR.
4103 & ((memused(cv_proc_sorted(j)).lt.
4104 & memused(cv_proc_sorted(i))).AND.
4105 & (map_strat.eq.cv_equilib_mem)))then
4106 aux_int=cv_proc_sorted(j)
4107 cv_proc_sorted(j)=cv_proc_sorted(i)
4108 cv_proc_sorted(i)=aux_int
4109 end if
4110 end do
4111 end do
4112 endif
4113 end if
4114 endif
4115 if(present(istat))istat=0
4116 return
4119 & info,procnode,ssarbr,nbsa)
4120 implicit none
4121 integer,dimension(cv_n),intent(inout)::ne,nfsiz,frere,fils,
4122 & procnode,ssarbr
4123 integer, intent(inout):: keep(500),info(80),nbsa
4124 INTEGER(8) KEEP8(150)
4125 ne=cv_ne
4126 nfsiz=cv_nfsiz
4127 frere=cv_frere
4128 fils=cv_fils
4129 keep(2) =cv_keep(2)
4130 keep(20)=cv_keep(20)
4131 keep(28)=cv_nsteps
4132 keep(38)=cv_keep(38)
4133 keep(56)=cv_keep(56)
4134 keep(61)=cv_keep(61)
4135 info(5)=cv_info(5)
4136 info(6)=cv_nsteps
4137 procnode=cv_procnode
4138 ssarbr=cv_ssarbr
4139 nbsa=cv_nbsa
4142 implicit none
4143 integer,intent(out)::istat
4144 integer i,ierr,layernmb
4145 character (len=48):: subname
4146 istat=-1
4147 subname='TERMGLOB'
4148 nullify(cv_frere,cv_fils,cv_nfsiz,cv_ne,cv_keep,cv_keep8,
4149 & cv_icntl,cv_info,cv_procnode,cv_ssarbr)
4150 deallocate(cv_proc_workload,cv_proc_maxwork,cv_proc_memused,
4151 & cv_proc_maxmem,cv_nodetype,
4152 & cv_nodelayer,cv_proc_sorted,
4153 & cv_ncostw,cv_ncostm,
4154 & cv_layerworkload,cv_layermemused,
4155 & stat=ierr)
4156 if(ierr.ne.0) then
4157 if(cv_lp.gt.0)
4158 & write(cv_lp,*)'Memory deallocation error in ',subname
4159 istat = cv_error_memdeloc
4160 return
4161 end if
4162 deallocate(work_per_proc,id_son,stat=ierr)
4163 if(ierr.ne.0) then
4164 if(cv_lp.gt.0)
4165 & write(cv_lp,*)'Memory deallocation error in ',subname
4166 istat = cv_error_memdeloc
4167 return
4168 end if
4169 do layernmb=1,cv_maxlayer
4170 if(cv_layer_p2node(layernmb)%nmb_t2s.gt.0) then
4171 deallocate(cv_layer_p2node(layernmb)%t2_nodenumbers,
4172 & cv_layer_p2node(layernmb)%t2_cand,
4173 & cv_layer_p2node(layernmb)%t2_candcostw,
4174 & cv_layer_p2node(layernmb)%t2_candcostm,
4175 & stat=ierr)
4176 if(ierr.ne.0) then
4177 if(cv_lp.gt.0)
4178 & write(cv_lp,*)'Memory deallocation error in ',
4179 & subname
4180 istat = cv_error_memdeloc
4181 return
4182 end if
4183 endif
4184 enddo
4185 if(associated(cv_layer_p2node)) then
4186 deallocate(cv_layer_p2node,stat=ierr)
4187 if(ierr.ne.0) then
4188 if(cv_lp.gt.0)
4189 & write(cv_lp,*)'Memory deallocation error in ',subname
4190 istat = cv_error_memdeloc
4191 return
4192 end if
4193 end if
4194 do i=1,cv_n
4196 if(ierr.ne.0) then
4197 if(cv_lp.gt.0)
4198 & write(cv_lp,*)'PROPMAP_TERM signalled error in ',
4199 & subname
4200 istat = ierr
4201 return
4202 end if
4203 end do
4204 if(associated(cv_prop_map))deallocate(cv_prop_map,stat=ierr)
4205 if(ierr.ne.0) then
4206 if(cv_lp.gt.0)
4207 & write(cv_lp,*)'Memory deallocation error in ',subname
4208 istat = cv_error_memdeloc
4209 return
4210 end if
4211 istat=0
4212 return
4215 implicit none
4216 integer,intent(in)::pos
4217 integer i,nfront,npiv,nextpos
4218 if ((.NOT.associated(cv_tcostw)).OR.(.NOT.associated(cv_tcostm)))
4219 & then
4221 end if
4222 nfront=cv_nfsiz(pos)
4223 npiv=1
4224 nextpos=cv_fils(pos)
4225 do while (nextpos.gt.0)
4226 if (cv_blkon) then
4227 npiv = npiv + cv_sizeofblocks(nextpos)
4228 else
4229 npiv=npiv+1
4230 endif
4231 nextpos=cv_fils(nextpos)
4232 end do
4234 & cv_ncostw(pos), cv_ncostm(pos))
4235 cv_tcostw(pos)=cv_ncostw(pos)
4236 cv_tcostm(pos)=cv_ncostm(pos)
4237 if (cv_ne(pos).ne.0) then
4238 nextpos=cv_fils(pos)
4239 do while(nextpos.gt.0)
4240 nextpos=cv_fils(nextpos)
4241 end do
4242 nextpos=-nextpos
4243 do i=1,cv_ne(pos)
4244 cv_depth(nextpos)=cv_depth(pos)+1
4246 cv_tcostw(pos)=cv_tcostw(pos)+cv_tcostw(nextpos)
4247 cv_tcostm(pos)=cv_tcostm(pos)+cv_tcostm(nextpos)
4248 nextpos=cv_frere(nextpos)
4249 end do
4250 endif
4251 return
4254 implicit none
4255 integer, intent(in)::inode
4256 integer in
4257 cv_nodetype(inode)=-1
4258 in=cv_fils(inode)
4259 do while (in>0)
4260 in=cv_fils(in)
4261 end do
4262 in=-in
4263 do while(in.gt.0)
4265 in=cv_frere(in)
4266 enddo
4269 & maxwork,minwork,maxmem,minmem)
4270 implicit none
4271 DOUBLE PRECISION,dimension(:),intent(in)::workload,
4272 & memused
4273 DOUBLE PRECISION,intent(out)::maxwork,minwork,maxmem,minmem
4274 maxwork=maxval(workload)
4275 minwork=minval(workload, mask= workload > dble(0))
4276 maxmem=maxval(memused)
4277 minmem=minval(memused, mask= memused > dble(0))
4280 implicit none
4281 integer layernumber,nodenumber
4282 integer i
4283 integer inode
4284 integer current_max,current_proc
4285 current_max = 0
4286 score = 0
4287 allowed_nodes = .false.
4288 inode=cv_layer_p2node(layernumber)%t2_nodenumbers(nodenumber)
4289 do i=1,cv_layer_p2node(layernumber)%t2_cand(nodenumber,
4290 & cv_slavef+1)
4291 current_proc=cv_layer_p2node(layernumber)%t2_cand(nodenumber,i)
4292 if ( current_proc .ge. 0) then
4293 score(mem_distribmpi(current_proc)) =
4294 & score(mem_distribmpi(current_proc)) + 1
4295 endif
4296 enddo
4297 current_proc = cv_procnode(inode) - 1
4298 score(mem_distribmpi(current_proc)) =
4299 & score(mem_distribmpi(current_proc)) + 1
4300 do i=0,nb_arch_nodes - 1
4301 if ( score(i) .gt. current_max ) then
4302 current_max = score(i)
4303 allowed_nodes = .false.
4304 allowed_nodes(i) = .true.
4305 else
4306 if(score(i) .eq. current_max) then
4307 allowed_nodes(i) = .true.
4308 endif
4309 endif
4310 enddo
4311 return
subroutine mumps_get_flops_cost(nfront, npiv, nass, keep50, level, cost)
subroutine mumps_get_split_4_perf(inode, nfront, npiv, nproc, k, lnpivsplit, npivsplit, n, frere, keep, fils, blkon, sizeofblocks, istat)
subroutine mumps_find_thislayer(nmb, thislayer, nmb_thislayer, istat)
subroutine mumps_setup_cand_chain(n, nb_niv2, frere, nodetype, par2_nodes, procnode, cand, inode_chain, slavef, dummy, nbcand, istat)
subroutine mumps_map_layer(layernmb, thislayer, nmb_thislayer, map_strat, istat)
subroutine mumps_splitnode_intree(inode, nfront, npiv, k, lnpivsplit, npivsplit, keep, n, fils, frere, nfsiz, ne, info5_nfrmax, k28_nsteps, nodetype, istat, sizeofblocks, lsizeofblocks, blkon)
subroutine mumps_initpart2(istat)
subroutine mumps_procinit(maxwork, maxmem, istat)
subroutine mumps_setup_cand(istat)
subroutine mumps_split_during_mapping(layernmb, thislayer, nmb_thislayer, istat)
subroutine mumps_costs_layer_t2pm(layernmb, nmb_thislayer, istat)
subroutine mumps_costs_blr_t2_slave(npiv, nfront, nrow, costw, costm, k471, k472, k475, k488, sym)
subroutine mumps_find_best_proc(inode, map_strat, work, mem, workload, memused, proc, istat, respect_prop)
subroutine mumps_postprocess_mem()
subroutine mumps_assign_types(layernmb, thislayer, nmb_thislayer, istat)
subroutine mumps_store_globals(ne, nfsiz, frere, fils, keep, keep8, info, procnode, ssarbr, nbsa)
subroutine mumps_select_k38k20(n, slavef, mp, icntl13, keep, frere, nd, istat)
subroutine mumps_select_type3(istat)
subroutine mumps_splitnode_update(inode, nfront, npiv, k, lnpivsplit, npivsplit, istat)
subroutine mumps_encode_procnode(istat)
logical function mumps_bit_get(procs4node, procnumber)
subroutine mumps_initpart1(n, slavef, frere, fils, nfsiz, ne, keep, keep8, icntl, info, procnode, ssarbr, peak, istat, sizeofblocks, lsizeofblocks)
recursive subroutine mumps_treecosts(pos)
subroutine mumps_accept_l0(map_strat, workload, memused, accepted, istat)
subroutine mumps_fathson_replace(ifather, istat)
subroutine mumps_higher_layer(startlayer, thislayer, nmb_thislayer, cont, istat)
subroutine mumps_bit_set(procs4node, procnumber, istat)
logical function mumps_bit_get4proc(inode, procnumber)
subroutine mumps_rootlist(istat)
subroutine mumps_calcnodecosts(npiv, nfront, costw, costm)
subroutine mumps_propmap_init(inode, istat)
recursive subroutine mumps_typeinssarbr(inode)
subroutine mumps_mapsubtree(procnode)
subroutine mumps_get_memsplit_inkpart(inode, doit, npiv, nfront, npropmap, k2, istat)
recursive subroutine mumps_propmap(inode_entry, ctr_entry, istat)
subroutine mumps_sortprocs(map_strat, workload, memused, inode, istat)
subroutine mumps_get_split_inkpart(inode, doit, npiv, nfront, npropmap, k1, k3, istat)
subroutine mumps_calccosts(istat)
subroutine mumps_costs_layer_t2(layernmb, nmb_thislayer, istat)
subroutine mumps_costs_blr_t2_master(npiv, nfront, costw, costm, k471, k472, k475, k488, sym)
subroutine mumps_list2layer(istat)
subroutine mumps_propmap4split(inode, ifather, istat)
subroutine mumps_arrangel0(map_strat, layerl0end, workload, memused, procnode, istat, respect_prop)
logical function mumps_istype2bysize(nfront, npiv)
recursive subroutine mumps_mapbelow(inode, procnmb, procnode)
subroutine mumps_make_propmap(istat)
subroutine mumps_propmap_term(inode, istat)
logical function mumps_is_node_of_type2(inode)
subroutine mumps_termglob(istat)
recursive subroutine mumps_mod_propmap(inode_entry, ctr_entry, istat)
subroutine mumps_workmem_imbalance(workload, memused, maxwork, minwork, maxmem, minmem)
subroutine mumps_layerl0(istat)
subroutine mumps_fix_accepted_master(layernumber, nodenumber)
subroutine mumps_calcnodecosts_blr(npiv, nfront, costw, costm, k471, k472, k475, k488, sym)
integer function mumps_bloc2_get_nslavesmin(slavef, k48, k821, k50, nfront, ncb, k375, k119)
integer function mumps_bloc2_get_nslavesmax(slavef, k48, k821, k50, nfront, ncb, k375, k119)
integer function mumps_reg_getkmax(keep821, ncb)