OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
zlr_stats.F
Go to the documentation of this file.
1C
2C This file is part of MUMPS 5.5.1, released
3C on Tue Jul 12 13:17:24 UTC 2022
4C
5C
6C Copyright 1991-2022 CERFACS, CNRS, ENS Lyon, INP Toulouse, Inria,
7C Mumps Technologies, University of Bordeaux.
8C
9C This version of MUMPS is provided to you free of charge. It is
10C released under the CeCILL-C license
11C (see doc/CeCILL-C_V1-en.txt, doc/CeCILL-C_V1-fr.txt, and
12C https://cecill.info/licences/Licence_CeCILL-C_V1-en.html)
13C
16 IMPLICIT NONE
17 DOUBLE PRECISION :: mry_cb_fr,
19 & mry_lu_fr,
23 INTEGER :: cnt_nodes
24 DOUBLE PRECISION :: flop_lrgain,
27 & flop_panel,
28 & flop_trsm,
38 DOUBLE PRECISION :: flop_compress,
46 DOUBLE PRECISION :: factor_processed_fraction
47 INTEGER(KIND=8) :: factor_size
48 DOUBLE PRECISION :: total_flop
49 DOUBLE PRECISION :: time_update
50 DOUBLE PRECISION :: time_update_lrlr1
51 DOUBLE PRECISION :: time_update_lrlr2
52 DOUBLE PRECISION :: time_update_lrlr3
53 DOUBLE PRECISION :: time_update_frlr
54 DOUBLE PRECISION :: time_update_frfr
55 DOUBLE PRECISION :: time_compress
56 DOUBLE PRECISION :: time_midblk_compress
57 DOUBLE PRECISION :: time_frswap_compress
58 DOUBLE PRECISION :: time_cb_compress
59 DOUBLE PRECISION :: time_lr_module
60 DOUBLE PRECISION :: time_upd_nelim
61 DOUBLE PRECISION :: time_lrtrsm
62 DOUBLE PRECISION :: time_frtrsm
63 DOUBLE PRECISION :: time_panel
64 DOUBLE PRECISION :: time_fac_i
65 DOUBLE PRECISION :: time_fac_mq
66 DOUBLE PRECISION :: time_fac_sq
67 DOUBLE PRECISION :: TIME_FRFRONTS
68 DOUBLE PRECISION :: time_diagcopy
69 DOUBLE PRECISION :: time_decomp
70 DOUBLE PRECISION :: time_decomp_ucfs
71 DOUBLE PRECISION :: time_decomp_asm1
72 DOUBLE PRECISION :: time_decomp_locasm2
73 DOUBLE PRECISION :: time_decomp_maplig1
74 DOUBLE PRECISION :: time_decomp_asms2s
75 DOUBLE PRECISION :: time_decomp_asms2m
76 DOUBLE PRECISION :: time_lrana_lrgrouping
77 DOUBLE PRECISION :: time_lrana_sepgrouping
78 DOUBLE PRECISION :: time_lrana_gethalo
79 DOUBLE PRECISION :: time_lrana_kway
80 DOUBLE PRECISION :: time_lrana_gnew
81 DOUBLE PRECISION :: avg_flop_facto_lr
82 DOUBLE PRECISION :: MIN_FLOP_FACTO_LR
83 DOUBLE PRECISION :: max_flop_facto_lr
85 INTEGER :: min_blocksize_ass, MAX_BLOCKSIZE_ASS
86 INTEGER :: MIN_BLOCKSIZE_CB, max_blocksize_cb
88 CONTAINS
89 SUBROUTINE collect_blocksizes(CUT,NPARTSASS,NPARTSCB)
90 INTEGER, INTENT(IN) :: NPARTSASS, NPARTSCB
91 INTEGER, POINTER, DIMENSION(:) :: CUT
92 INTEGER :: LOC_MIN_ASS, LOC_MIN_CB, LOC_MAX_ASS, LOC_MAX_CB,
93 & LOC_TOT_ASS, LOC_TOT_CB
94 DOUBLE PRECISION :: LOC_AVG_ASS, LOC_AVG_CB
95 INTEGER :: I
96 loc_tot_ass = 0
97 loc_tot_cb = 0
98 loc_avg_ass = 0.d0
99 loc_avg_cb = 0.d0
100 loc_min_ass = 100000
101 loc_min_cb = 100000
102 loc_max_ass = 0
103 loc_max_cb = 0
104 DO i = 1,npartsass
105 loc_avg_ass = ( loc_tot_ass * loc_avg_ass
106 & + cut(i+1) - cut(i) )
107 & / (loc_tot_ass + 1)
108 loc_tot_ass = loc_tot_ass + 1
109 IF (cut(i+1) - cut(i) .LE. loc_min_ass) THEN
110 loc_min_ass = cut(i+1) - cut(i)
111 END IF
112 IF (cut(i+1) - cut(i) .GE. loc_max_ass) THEN
113 loc_max_ass = cut(i+1) - cut(i)
114 END IF
115 END DO
116 DO i = npartsass+1,npartsass+npartscb
117 loc_avg_cb = ( loc_tot_cb * loc_avg_cb
118 & + cut(i+1) - cut(i) )
119 & / (loc_tot_cb + 1)
120 loc_tot_cb = loc_tot_cb + 1
121 IF (cut(i+1) - cut(i) .LE. loc_min_cb) THEN
122 loc_min_cb = cut(i+1) - cut(i)
123 END IF
124 IF (cut(i+1) - cut(i) .GE. loc_max_cb) THEN
125 loc_max_cb = cut(i+1) - cut(i)
126 END IF
127 END DO
129 & + loc_tot_ass*loc_avg_ass) / (total_nblocks_ass+loc_tot_ass)
131 & + loc_tot_cb*loc_avg_cb) / (total_nblocks_cb+loc_tot_cb)
138 END SUBROUTINE collect_blocksizes
139 SUBROUTINE upd_flop_decompress(F, CB)
140 DOUBLE PRECISION, INTENT(IN) :: F
141 LOGICAL, INTENT(IN) :: CB
142!$OMP ATOMIC UPDATE
144!$OMP END ATOMIC
145 IF (cb) THEN
146!$OMP ATOMIC UPDATE
148!$OMP END ATOMIC
149 ENDIF
150 RETURN
151 END SUBROUTINE upd_flop_decompress
152 SUBROUTINE upd_flop_compress(LR_B, REC_ACC,
153 & CB_COMPRESS, FRSWAP)
154 TYPE(lrb_type),INTENT(IN) :: LR_B
155 INTEGER(8) :: M,N,K
156 DOUBLE PRECISION :: HR_COST,BUILDQ_COST,
157 & hr_and_buildq_cost
158 LOGICAL, OPTIONAL :: REC_ACC, CB_COMPRESS, FRSWAP
159 m = int(lr_b%M,8)
160 n = int(lr_b%N,8)
161 k = int(lr_b%K,8)
162 hr_cost = dble(k*k*k/3_8 + 4_8*k*m*n - (2_8*m+n)*k*k)
163 IF (lr_b%ISLR) THEN
164 buildq_cost = dble(2_8*k*k*m - k*k*k)
165 ELSE
166 buildq_cost = 0.0d0
167 END IF
168 hr_and_buildq_cost = hr_cost + buildq_cost
169!$OMP ATOMIC UPDATE
170 flop_compress = flop_compress + hr_and_buildq_cost
171!$OMP END ATOMIC
172 IF (present(rec_acc)) THEN
173 IF (rec_acc) THEN
174!$OMP ATOMIC UPDATE
176 & hr_and_buildq_cost
177!$OMP END ATOMIC
178 ENDIF
179 ENDIF
180 IF (present(cb_compress)) THEN
181 IF (cb_compress) THEN
182!$OMP ATOMIC UPDATE
184 & hr_and_buildq_cost
185!$OMP END ATOMIC
186 ENDIF
187 ENDIF
188 IF (present(frswap)) THEN
189 IF (frswap) THEN
190!$OMP ATOMIC UPDATE
192 & hr_and_buildq_cost
193!$OMP END ATOMIC
194 ENDIF
195 ENDIF
196 RETURN
197 END SUBROUTINE upd_flop_compress
198 SUBROUTINE upd_flop_trsm(LRB, LorU)
199 TYPE(lrb_type),INTENT(IN) :: LRB
200 INTEGER,INTENT(IN) :: LorU
201 DOUBLE PRECISION :: LR_COST, FR_COST, LR_GAIN
202 IF (loru.EQ.0) THEN
203 fr_cost = dble(lrb%M*lrb%N*lrb%N)
204 IF (lrb%ISLR) THEN
205 lr_cost = dble(lrb%K*lrb%N*lrb%N)
206 ELSE
207 lr_cost = fr_cost
208 ENDIF
209 ELSE
210 fr_cost = dble(lrb%M-1)*dble(lrb%N*lrb%N)
211 IF (lrb%ISLR) THEN
212 lr_cost = dble(lrb%N-1)*dble(lrb%N*lrb%K)
213 ELSE
214 lr_cost = fr_cost
215 ENDIF
216 ENDIF
217 lr_gain = fr_cost - lr_cost
218!$OMP ATOMIC UPDATE
219 flop_lrgain = flop_lrgain + lr_gain
220!$OMP END ATOMIC
221 RETURN
222 END SUBROUTINE upd_flop_trsm
223 SUBROUTINE upd_flop_update(LRB1, LRB2,
224 & MIDBLK_COMPRESS, RANK_IN, BUILDQ,
225 & IS_SYMDIAG, LUA_ACTIVATED, REC_ACC)
226 TYPE(lrb_type),INTENT(IN) :: LRB1,LRB2
227 LOGICAL, INTENT(IN) :: BUILDQ, IS_SYMDIAG, LUA_ACTIVATED
228 INTEGER, INTENT(IN) :: RANK_IN, MIDBLK_COMPRESS
229 LOGICAL, INTENT(IN), OPTIONAL :: REC_ACC
230 DOUBLE PRECISION :: COST_FR, COST_LR, COST_LRLR1, COST_LRLR2,
231 & cost_lrlr3, cost_frlr, cost_frfr,
232 & cost_compress, cost_lr_and_compress, lr_gain
233 DOUBLE PRECISION :: M1,N1,K1,M2,N2,K2,RANK
234 LOGICAL :: REC_ACC_LOC
235 M1 = dble(lrb1%M)
236 n1 = dble(lrb1%N)
237 k1 = dble(lrb1%K)
238 m2 = dble(lrb2%M)
239 n2 = dble(lrb2%N)
240 k2 = dble(lrb2%K)
241 rank = dble(rank_in)
242 cost_lrlr1 = 0.0d0
243 cost_lrlr2 = 0.0d0
244 cost_lrlr3 = 0.0d0
245 cost_frlr = 0.0d0
246 cost_frfr = 0.0d0
247 cost_compress = 0.0d0
248 IF (present(rec_acc)) THEN
249 rec_acc_loc = rec_acc
250 ELSE
251 rec_acc_loc = .false.
252 ENDIF
253 IF ((.NOT.lrb1%ISLR).AND.(.NOT.lrb2%ISLR)) THEN
254 cost_frfr = 2.0d0*m1*m2*n1
255 cost_lr = 2.0d0*m1*m2*n1
256 cost_fr = 2.0d0*m1*m2*n1
257 ELSEIF (lrb1%ISLR.AND.(.NOT.lrb2%ISLR)) THEN
258 cost_frlr = 2.0d0*k1*m2*n1
259 cost_lrlr3 = 2.0d0*m1*m2*k1
260 cost_lr = cost_frlr + cost_lrlr3
261 cost_fr = 2.0d0*m1*m2*n1
262 ELSEIF ((.NOT.lrb1%ISLR).AND.lrb2%ISLR) THEN
263 cost_frlr = 2.0d0*m1*k2*n1
264 cost_lrlr3 = 2.0d0*m1*m2*k2
265 cost_lr = cost_frlr + cost_lrlr3
266 cost_fr = 2.0d0*m1*m2*n1
267 ELSE
268 IF (midblk_compress.GE.1) THEN
269 cost_compress = rank*rank*rank/3.0d0 +
270 & 4.0d0*rank*k1*k2 -
271 & (2.0d0*k1+k2)*rank*rank
272 IF (buildq) THEN
273 cost_compress = cost_compress + 4.0d0*rank*rank*k1
274 & - rank*rank*rank
275 ENDIF
276 ENDIF
277 cost_lrlr1 = 2.0d0*k1*k2*n1
278 IF ((midblk_compress.GE.1).AND.buildq) THEN
279 cost_lrlr2 = 2.0d0*k1*m1*rank + 2.0d0*k2*m2*rank
280 cost_lrlr3 = 2.0d0*m1*m2*rank
281 ELSE
282 IF (k1 .GE. k2) THEN
283 cost_lrlr2 = 2.0d0*k1*m1*k2
284 cost_lrlr3 = 2.0d0*m1*m2*k2
285 ELSE
286 cost_lrlr2 = 2.0d0*k1*m2*k2
287 cost_lrlr3 = 2.0d0*m1*m2*k1
288 ENDIF
289 ENDIF
290 cost_lr = cost_lrlr1 + cost_lrlr2 + cost_lrlr3
291 cost_fr = 2.0d0*m1*m2*n1
292 ENDIF
293 IF (is_symdiag) THEN
294 cost_fr = cost_fr/2.0d0
295 cost_lrlr3 = cost_lrlr3/2.0d0
296 cost_frfr = cost_frfr/2.0d0
297 cost_lr = cost_lr - cost_lrlr3 - cost_frfr
298 ENDIF
299 IF (lua_activated) THEN
300 cost_lr = cost_lr - cost_lrlr3
301 cost_lrlr3 = 0.0d0
302 IF (rec_acc_loc) THEN
303 cost_lr_and_compress = cost_lr + cost_compress
304!$OMP ATOMIC UPDATE
305 flop_compress = flop_compress + cost_lr_and_compress
306!$OMP END ATOMIC
307 ENDIF
308 ENDIF
309 IF (.NOT.rec_acc_loc) THEN
310!$OMP ATOMIC UPDATE
311 flop_compress = flop_compress + cost_compress
312!$OMP END ATOMIC
313 lr_gain = cost_fr - cost_lr
314!$OMP ATOMIC UPDATE
315 flop_lrgain = flop_lrgain + lr_gain
316!$OMP END ATOMIC
317 ENDIF
318 END SUBROUTINE upd_flop_update
319 SUBROUTINE upd_flop_update_lrlr3(LRB, NIV)
320 TYPE(lrb_type),INTENT(IN) :: LRB
321 INTEGER,INTENT(IN) :: NIV
322 DOUBLE PRECISION :: FLOP_COST
323 flop_cost = 2.0d0*dble(lrb%M)*dble(lrb%N)*dble(lrb%K)
324!$OMP ATOMIC UPDATE
325 flop_lrgain = flop_lrgain - flop_cost
326!$OMP END ATOMIC
327 RETURN
328 END SUBROUTINE upd_flop_update_lrlr3
329 SUBROUTINE upd_flop_root(KEEP50, NFRONT, NPIV,
330 & NPROW, NPCOL, MYID)
331 INTEGER, intent(in) :: KEEP50, NFRONT, NPIV,
332 & nprow, npcol, myid
333 DOUBLE PRECISION :: COST, COST_PER_PROC
334 INTEGER, PARAMETER :: LEVEL3 = 3
335 CALL mumps_get_flops_cost(nfront, npiv, nfront, keep50, level3,
336 & cost)
337 cost_per_proc = dble(int( cost,8) / int(nprow * npcol,8))
338!$OMP ATOMIC UPDATE
339 flop_frfronts = flop_frfronts + cost_per_proc
340!$OMP END ATOMIC
341 RETURN
342 END SUBROUTINE upd_flop_root
343 SUBROUTINE init_stats_global(id)
345 TYPE (ZMUMPS_STRUC), TARGET :: id
346 mry_lu_fr = 0.d0
347 mry_lu_lrgain = 0.d0
348 mry_cb_fr = 0.d0
349 mry_cb_lrgain = 0.d0
350 flop_facto_fr = 0.d0
351 flop_facto_lr = 0.d0
352 flop_lrgain = 0.d0
353 flop_cb_compress = 0.d0
354 flop_cb_decompress = 0.d0
355 flop_decompress = 0.d0
356 flop_update_fr = 0.d0
357 flop_update_lr = 0.d0
358 flop_update_lrlr1 = 0.d0
359 flop_update_lrlr2 = 0.d0
360 flop_update_lrlr3 = 0.d0
361 flop_update_frlr = 0.d0
362 flop_update_frfr = 0.d0
364 flop_trsm_fr = 0.d0
365 flop_trsm_lr = 0.d0
366 flop_compress = 0.d0
369 flop_panel = 0.d0
370 flop_trsm = 0.d0
371 flop_frfronts = 0.d0
374 avg_blocksize_ass = 0.d0
375 avg_blocksize_cb = 0.d0
376 min_blocksize_ass = huge(1)
378 min_blocksize_cb = huge(1)
380 cnt_nodes = 0
381 time_update = 0.d0
383 time_update_lrlr1 = 0.d0
384 time_update_lrlr2 = 0.d0
385 time_update_lrlr3 = 0.d0
386 time_update_frlr = 0.d0
387 time_update_frfr = 0.d0
388 time_compress = 0.d0
389 time_cb_compress = 0.d0
390 time_lr_module = 0.d0
391 time_upd_nelim = 0.d0
392 time_lrtrsm = 0.d0
393 time_frtrsm = 0.d0
394 time_panel = 0.d0
395 time_fac_i = 0.d0
396 time_fac_mq = 0.d0
397 time_fac_sq = 0.d0
398 time_frfronts = 0.d0
399 time_diagcopy = 0.d0
401 time_decomp = 0.d0
402 time_decomp_ucfs = 0.d0
403 time_decomp_asm1 = 0.d0
404 time_decomp_locasm2 = 0.d0
405 time_decomp_maplig1 = 0.d0
406 time_decomp_asms2s = 0.d0
407 time_decomp_asms2m = 0.d0
408 END SUBROUTINE init_stats_global
409 SUBROUTINE upd_mry_lu_fr(NASS, NCB, SYM, NELIM)
410 INTEGER,INTENT(IN) :: NASS, NCB, SYM, NELIM
411 DOUBLE PRECISION :: MRY
412 INTEGER :: NPIV
413 npiv = nass - nelim
414 IF (sym .GT. 0) THEN
415 mry = dble(npiv)*(dble(npiv)+1.d0)/2.d0
416 & + dble(npiv)*dble(ncb+nelim)
417 ELSE
418 mry = dble(npiv)*dble(npiv)
419 & + 2.0d0*dble(npiv)*dble(ncb+nelim)
420 END IF
421!$OMP ATOMIC UPDATE
422 mry_lu_fr = mry_lu_fr + mry
423!$OMP END ATOMIC
424 RETURN
425 END SUBROUTINE upd_mry_lu_fr
426 SUBROUTINE upd_mry_cb_fr(NROWS, NCOLS, SYM)
427 INTEGER,INTENT(IN) :: NROWS, NCOLS, SYM
428 DOUBLE PRECISION :: MRY
429 IF (sym.EQ.0) THEN
430 mry = dble(ncols)*dble(nrows)
431 ELSE
432 mry = dble(ncols-nrows)*dble(nrows) +
433 & dble(nrows)*dble(nrows+1)/2.d0
434 ENDIF
435!$OMP ATOMIC UPDATE
436 mry_cb_fr = mry_cb_fr + mry
437!$OMP END ATOMIC
438 RETURN
439 END SUBROUTINE upd_mry_cb_fr
440 SUBROUTINE upd_mry_cb_lrgain(LRB
441 & )
442 TYPE(lrb_type), INTENT(IN) :: LRB
443 DOUBLE PRECISION :: LRGAIND
444 lrgaind = dble(lrb%M*lrb%N-(lrb%M+lrb%N)*lrb%K)
445!$OMP ATOMIC UPDATE
446 mry_cb_lrgain = mry_cb_lrgain + lrgaind
447!$OMP END ATOMIC
448 RETURN
449 END SUBROUTINE upd_mry_cb_lrgain
450 SUBROUTINE upd_mry_lu_lrgain( BLR_PANEL, NBBLOCKS
451 & )
452 INTEGER,INTENT(IN) :: NBBLOCKS
453 TYPE(LRB_TYPE), INTENT(IN) :: BLR_PANEL(:)
454 DOUBLE PRECISION :: MRY
455 INTEGER :: I
456 mry = 0.0d0
457 DO i = 1, nbblocks
458 IF (blr_panel(i)%ISLR) THEN
459 mry = mry + dble(blr_panel(i)%M*blr_panel(i)%N
460 & - blr_panel(i)%K*(blr_panel(i)%M + blr_panel(i)%N))
461 ENDIF
462 ENDDO
463!$OMP ATOMIC UPDATE
465!$OMP END ATOMIC
466 RETURN
467 END SUBROUTINE upd_mry_lu_lrgain
468 SUBROUTINE upd_flop_facto_fr( NFRONT, NASS, NPIV, SYM, NIV)
469 INTEGER,INTENT(IN) :: NFRONT, SYM, NASS, NPIV, NIV
470 DOUBLE PRECISION :: FLOP
471 CALL MUMPS_GET_FLOPS_COST(NFRONT, NPIV, NASS,
472 & sym, niv, flop)
473!$OMP ATOMIC UPDATE
475!$OMP END ATOMIC
476 END SUBROUTINE upd_flop_facto_fr
477 SUBROUTINE stats_compute_flop_slave_type2( NROW1, NCOL1,
478 & NASS1, KEEP50, INODE)
479 INTEGER,INTENT(IN) :: NROW1, NCOL1, KEEP50, NASS1, INODE
480 DOUBLE PRECISION :: NROW2, NCOL2, NASS2
481 DOUBLE PRECISION :: FLOP
482 NROW2 = dble(nrow1)
483 ncol2 = dble(ncol1)
484 nass2 = dble(nass1)
485 IF (keep50.EQ.0) THEN
486 flop = nrow2*nass2*nass2
487 & + 2.0d0*nrow2*nass2*(ncol2-nass2)
488 ELSE
489 flop =
490 & nrow2*nass2*nass2
491 & + nrow2*nass2*nrow2
492 & + 2.0d0*nrow2*nass2*(ncol2-nass2-nrow2)
493 ENDIF
494!$OMP ATOMIC UPDATE
496!$OMP END ATOMIC
497 RETURN
498 END SUBROUTINE stats_compute_flop_slave_type2
499 SUBROUTINE upd_flop_frfronts(NFRONT, NPIV, NASS, SYM,
500 & NIV)
501 INTEGER, INTENT(IN) :: NFRONT, NPIV, NASS, SYM, NIV
502 DOUBLE PRECISION :: FLOP_FAC
503 CALL mumps_get_flops_cost(nfront, npiv, nass,
504 & sym, niv, flop_fac)
505!$OMP ATOMIC UPDATE
506 flop_frfronts = flop_frfronts + flop_fac
507!$OMP END ATOMIC
508 RETURN
509 END SUBROUTINE upd_flop_frfronts
510 SUBROUTINE upd_flop_frfront_slave(NROW1, NCOL1, NASS1,
511 & KEEP50, INODE)
512 INTEGER,INTENT(IN) :: NROW1, NCOL1, KEEP50, NASS1, INODE
513 DOUBLE PRECISION :: NROW2, NCOL2, NASS2
514 DOUBLE PRECISION :: FLOP
515 nrow2 = dble(nrow1)
516 ncol2 = dble(ncol1)
517 nass2 = dble(nass1)
518 IF (keep50.EQ.0) THEN
519 flop = nrow2*nass2*nass2
520 & + 2.0d0*nrow2*nass2*(ncol2-nass2)
521 ELSE
522 flop =
523 & nrow2*nass2*nass2
524 & + nrow2*nass2*nrow2
525 & + 2.0d0*nrow2*nass2*(ncol2-nass2-nrow2)
526 ENDIF
527!$OMP ATOMIC UPDATE
529!$OMP END ATOMIC
530 RETURN
531 END SUBROUTINE upd_flop_frfront_slave
532 SUBROUTINE compute_global_gains(NB_ENTRIES_FACTOR,
533 & FLOP_NUMBER, NB_ENTRIES_FACTOR_withLR,
534 & PROKG, MPG)
535 INTEGER(8), INTENT(IN) :: NB_ENTRIES_FACTOR
536 INTEGER, INTENT(IN) :: MPG
537 LOGICAL, INTENT(IN) :: PROKG
538 DOUBLE PRECISION, INTENT(IN) :: FLOP_NUMBER
539 INTEGER(8), INTENT(OUT) ::
540 & NB_ENTRIES_FACTOR_withLR
541 IF (nb_entries_factor < 0) THEN
542 IF (prokg.AND.mpg.GT.0) THEN
543 WRITE(mpg,*) "NEGATIVE NUMBER OF ENTRIES IN FACTOR"
544 WRITE(mpg,*) "===> OVERFLOW ?"
545 END IF
546 END IF
547 IF (mry_lu_fr .EQ. 0) THEN
548 global_mry_lpro_compr = 100.0d0
549 ELSE
550 global_mry_lpro_compr = 100.0d0 *
552 ENDIF
553 IF (mry_cb_fr .EQ. 0) THEN
554 mry_cb_fr = 100.0d0
555 END IF
556 nb_entries_factor_withlr = nb_entries_factor -
557 & int(mry_lu_lrgain,8)
558 IF (nb_entries_factor.EQ.0) THEN
560 global_mry_ltot_compr = 100.0d0
561 ELSE
562 factor_processed_fraction = 100.0d0 *
563 & mry_lu_fr/dble(nb_entries_factor)
565 & 100.0d0*mry_lu_lrgain/dble(nb_entries_factor)
566 ENDIF
567 total_flop = flop_number
570 RETURN
571 END SUBROUTINE compute_global_gains
572 SUBROUTINE saveandwrite_gains(LOCAL, K489, DKEEP, N,
573 & ICNTL36,
574 & DEPTH, BCKSZ, NASSMIN, NFRONTMIN, SYM, K486,
575 & K472, K475, K478, K480, K481, K483, K484,
576 & K8110, K849,
577 & NBTREENODES, NPROCS, MPG, PROKG)
578 INTEGER, INTENT(IN) :: LOCAL,K489,DEPTH, N,
579 & ICNTL36, BCKSZ,NASSMIN,
580 & nfrontmin, k486, nbtreenodes, mpg,
581 & k472, k475, k478, k480, k481, k483, k484,
582 & sym, nprocs
583 INTEGER(8), INTENT(IN) :: K8110, K849
584 LOGICAL, INTENT(IN) :: PROKG
585 DOUBLE PRECISION :: DKEEP(230)
586 LOGICAL PROK
587 prok = (prokg.AND.(mpg.GE.0))
588 IF (prok) THEN
589 WRITE(mpg,'(/A,A)')
590 & '-------------- Beginning of BLR statistics -------------------',
591 & '--------------'
592 WRITE(mpg,'(A,I2)')
593 & ' ICNTL(36) BLR variant = ', icntl36
594 WRITE(mpg,'(A,ES8.1)')
595 & ' CNTL(7) Dropping parameter controlling accuracy = ',
596 & dkeep(8)
597 WRITE(mpg,'(A)')
598 & ' Statistics after BLR factorization :'
599 WRITE(mpg,'(A,I8)')
600 & ' Number of BLR fronts = ',
601 & cnt_nodes
602 ENDIF
603 IF (prok) WRITE(mpg,'(A,F8.1,A)')
604 & ' Fraction of factors in BLR fronts =',
606 IF (prok) THEN
607 WRITE(mpg,'(A)')
608 & ' Statistics on the number of entries in factors :'
609 WRITE(mpg,'(A,ES10.3,A,F5.1,A)')
610 & ' INFOG(29) Theoretical nb of entries in factors ='
611 & ,dble(k8110),' (100.0%)'
612 WRITE(mpg,'(A,ES10.3,A,F5.1,A)')
613 & ' INFOG(35) Effective nb of entries (% of INFOG(29)) ='
614 & ,dble(k849),' ('
615 & ,dble(100)*(dble(k849)/dble(max(k8110,1_8)))
616 & ,'%)'
617 ENDIF
618 IF (prok) WRITE(mpg,'(A)')
619 & ' Statistics on operation counts (OPC):'
620 total_flop = max(total_flop,epsilon(1.0d0))
621 dkeep(55)=dble(total_flop)
622 dkeep(60)=dble(100)
623 dkeep(56)=dble(flop_facto_lr+flop_frfronts)
624 dkeep(61)=dble(100*(flop_facto_lr+flop_frfronts)/total_flop)
625 IF (prok) THEN
626 WRITE(mpg,'(A,ES10.3,A,F5.1,A)')
627 & ' RINFOG(3) Total theoretical operations counts ='
628 & ,total_flop,' (',100*total_flop/total_flop,'%)'
629 WRITE(mpg,'(A,ES10.3,A,F5.1,A)')
630 & ' RINFOG(14) Total effective OPC (% of RINFOG(3)) ='
633 &,'%)'
634 ENDIF
635 IF (prok) WRITE(mpg,'(A,A)')
636 & '-------------- End of BLR statistics -------------------------',
637 & '--------------'
638 RETURN
639 END SUBROUTINE saveandwrite_gains
640 END MODULE zmumps_lr_stats
subroutine mumps_get_flops_cost(nfront, npiv, nass, keep50, level, cost)
Definition estim_flops.F:74
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21
integer min_blocksize_ass
Definition zlr_stats.F:85
integer max_blocksize_ass
Definition zlr_stats.F:85
double precision flop_facto_lr
Definition zlr_stats.F:24
double precision time_decomp_ucfs
Definition zlr_stats.F:70
double precision time_lrana_sepgrouping
Definition zlr_stats.F:77
double precision time_update
Definition zlr_stats.F:49
subroutine upd_flop_frfront_slave(nrow1, ncol1, nass1, keep50, inode)
Definition zlr_stats.F:512
double precision time_update_frfr
Definition zlr_stats.F:54
double precision time_frfronts
Definition zlr_stats.F:67
double precision global_mry_lpro_compr
Definition zlr_stats.F:17
double precision time_update_lrlr2
Definition zlr_stats.F:51
subroutine upd_flop_compress(lr_b, rec_acc, cb_compress, frswap)
Definition zlr_stats.F:154
double precision time_upd_nelim
Definition zlr_stats.F:60
subroutine upd_mry_cb_lrgain(lrb)
Definition zlr_stats.F:442
double precision flop_trsm
Definition zlr_stats.F:24
double precision flop_lrgain
Definition zlr_stats.F:24
double precision time_frtrsm
Definition zlr_stats.F:62
double precision flop_panel
Definition zlr_stats.F:24
double precision time_decomp_maplig1
Definition zlr_stats.F:73
double precision time_decomp_asms2s
Definition zlr_stats.F:74
double precision flop_update_frfr
Definition zlr_stats.F:24
double precision time_lrana_lrgrouping
Definition zlr_stats.F:76
integer max_blocksize_cb
Definition zlr_stats.F:86
double precision time_frswap_compress
Definition zlr_stats.F:57
double precision time_panel
Definition zlr_stats.F:63
integer total_nblocks_ass
Definition zlr_stats.F:84
double precision flop_decompress
Definition zlr_stats.F:38
double precision flop_cb_compress
Definition zlr_stats.F:38
integer cnt_nodes
Definition zlr_stats.F:23
double precision time_update_lrlr1
Definition zlr_stats.F:50
double precision time_cb_compress
Definition zlr_stats.F:58
double precision time_decomp
Definition zlr_stats.F:69
double precision time_decomp_asm1
Definition zlr_stats.F:71
double precision flop_facto_fr
Definition zlr_stats.F:24
double precision mry_lu_lrgain
Definition zlr_stats.F:17
double precision time_decomp_locasm2
Definition zlr_stats.F:72
subroutine stats_compute_flop_slave_type2(nrow1, ncol1, nass1, keep50, inode)
Definition zlr_stats.F:479
double precision time_lrana_gethalo
Definition zlr_stats.F:78
subroutine upd_mry_lu_lrgain(blr_panel, nbblocks)
Definition zlr_stats.F:452
double precision time_compress
Definition zlr_stats.F:55
double precision flop_update_lrlr2
Definition zlr_stats.F:24
double precision time_midblk_compress
Definition zlr_stats.F:56
double precision mry_lu_fr
Definition zlr_stats.F:17
double precision flop_update_lrlr1
Definition zlr_stats.F:24
double precision time_update_frlr
Definition zlr_stats.F:53
double precision avg_blocksize_ass
Definition zlr_stats.F:87
subroutine upd_mry_cb_fr(nrows, ncols, sym)
Definition zlr_stats.F:427
subroutine collect_blocksizes(cut, npartsass, npartscb)
Definition zlr_stats.F:90
double precision time_decomp_asms2m
Definition zlr_stats.F:75
double precision mry_cb_fr
Definition zlr_stats.F:17
double precision mry_cb_lrgain
Definition zlr_stats.F:17
double precision flop_accum_compress
Definition zlr_stats.F:38
subroutine upd_flop_decompress(f, cb)
Definition zlr_stats.F:140
double precision flop_frswap_compress
Definition zlr_stats.F:38
double precision global_mry_ltot_compr
Definition zlr_stats.F:17
double precision time_update_lrlr3
Definition zlr_stats.F:52
double precision max_flop_facto_lr
Definition zlr_stats.F:83
double precision flop_trsm_fr
Definition zlr_stats.F:24
double precision time_fac_sq
Definition zlr_stats.F:66
subroutine upd_flop_update_lrlr3(lrb, niv)
Definition zlr_stats.F:320
double precision total_flop
Definition zlr_stats.F:48
subroutine compute_global_gains(nb_entries_factor, flop_number, nb_entries_factor_withlr, prokg, mpg)
Definition zlr_stats.F:535
double precision avg_flop_facto_lr
Definition zlr_stats.F:81
double precision time_lrtrsm
Definition zlr_stats.F:61
subroutine saveandwrite_gains(local, k489, dkeep, n, icntl36, depth, bcksz, nassmin, nfrontmin, sym, k486, k472, k475, k478, k480, k481, k483, k484, k8110, k849, nbtreenodes, nprocs, mpg, prokg)
Definition zlr_stats.F:578
double precision flop_cb_decompress
Definition zlr_stats.F:38
double precision flop_update_lr
Definition zlr_stats.F:24
subroutine upd_flop_facto_fr(nfront, nass, npiv, sym, niv)
Definition zlr_stats.F:469
double precision flop_update_frlr
Definition zlr_stats.F:24
double precision flop_midblk_compress
Definition zlr_stats.F:38
double precision time_fac_i
Definition zlr_stats.F:64
double precision flop_frfronts
Definition zlr_stats.F:38
double precision time_lrana_gnew
Definition zlr_stats.F:80
double precision time_diagcopy
Definition zlr_stats.F:68
subroutine upd_flop_trsm(lrb, loru)
Definition zlr_stats.F:199
subroutine upd_flop_frfronts(nfront, npiv, nass, sym, niv)
Definition zlr_stats.F:501
subroutine upd_flop_update(lrb1, lrb2, midblk_compress, rank_in, buildq, is_symdiag, lua_activated, rec_acc)
Definition zlr_stats.F:226
double precision time_fac_mq
Definition zlr_stats.F:65
double precision avg_blocksize_cb
Definition zlr_stats.F:87
double precision flop_trsm_lr
Definition zlr_stats.F:24
subroutine upd_mry_lu_fr(nass, ncb, sym, nelim)
Definition zlr_stats.F:410
double precision time_lr_module
Definition zlr_stats.F:59
double precision factor_processed_fraction
Definition zlr_stats.F:46
double precision flop_update_lrlr3
Definition zlr_stats.F:24
integer total_nblocks_cb
Definition zlr_stats.F:84
integer(kind=8) factor_size
Definition zlr_stats.F:47
double precision time_lrana_kway
Definition zlr_stats.F:79
double precision flop_update_fr
Definition zlr_stats.F:24
subroutine init_stats_global(id)
Definition zlr_stats.F:344
integer min_blocksize_cb
Definition zlr_stats.F:86
subroutine upd_flop_root(keep50, nfront, npiv, nprow, npcol, myid)
Definition zlr_stats.F:331
double precision flop_compress
Definition zlr_stats.F:38