OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
pcpbdriver.f
Go to the documentation of this file.
1 PROGRAM pcpbdriver
2*
3*
4* -- ScaLAPACK routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* November 15, 1997
8*
9* Purpose
10* =======
11*
12* PCPBDRIVER is a test program for the
13* ScaLAPACK Band Cholesky routines corresponding to the options
14* indicated by CPB. This test driver performs an
15* A = L*L**H factorization
16* and solves a linear system with the factors for 1 or more RHS.
17*
18* The program must be driven by a short data file.
19* Here's an example file:
20*'ScaLAPACK, Version 1.2, banded linear systems input file'
21*'PVM.'
22*'' output file name (if any)
23*6 device out
24*'L' define Lower or Upper
25*9 number of problem sizes
26*1 5 17 28 37 121 200 1023 2048 3073 values of N
27*6 number of bandwidths
28*1 2 4 10 31 64 values of BW
29*1 number of NB's
30*-1 3 4 5 values of NB (-1 for automatic choice)
31*1 number of NRHS's (must be 1)
32*8 values of NRHS
33*1 number of NBRHS's (ignored)
34*1 values of NBRHS (ignored)
35*6 number of process grids
36*1 2 3 4 5 7 8 15 26 47 64 values of "Number of Process Columns"
37*3.0 threshold
38*
39* Internal Parameters
40* ===================
41*
42* TOTMEM INTEGER, default = 6200000.
43* TOTMEM is a machine-specific parameter indicating the
44* maximum amount of available memory in bytes.
45* The user should customize TOTMEM to his platform. Remember
46* to leave room in memory for the operating system, the BLACS
47* buffer, etc. For example, on a system with 8 MB of memory
48* per process (e.g., one processor on an Intel iPSC/860), the
49* parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
50* code, BLACS buffer, etc). However, for PVM, we usually set
51* TOTMEM = 2000000. Some experimenting with the maximum value
52* of TOTMEM may be required.
53*
54* INTGSZ INTEGER, default = 4 bytes.
55* CPLXSZ INTEGER, default = 8 bytes.
56* INTGSZ and CPLXSZ indicate the length in bytes on the
57* given platform for an integer and a single precision
58* complex.
59* MEM DOUBLE PRECISION array, dimension ( TOTMEM/CPLXSZ )
60* All arrays used by ScaLAPACK routines are allocated from
61* this array and referenced by pointers. The integer IPB,
62* for example, is a pointer to the starting element of MEM for
63* the solution vector(s) B.
64*
65* =====================================================================
66*
67* Code Developer: Andrew J. Cleary, University of Tennessee.
68* Current address: Lawrence Livermore National Labs.
69* This version released: August, 2001.
70*
71* =====================================================================
72*
73* .. Parameters ..
74 INTEGER totmem
75 parameter( totmem = 3000000 )
76 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
77 $ lld_, mb_, m_, NB_, n_, rsrc_
78 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
79 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
80 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
81*
82 REAL zero
83 INTEGER cplxsz, memsiz, ntests
84 COMPLEX padval
85 parameter( cplxsz = 8,
86 $ memsiz = totmem / cplxsz, ntests = 20,
87 $ padval = ( -9923.0e+0, -9923.0e+0 ),
88 $ zero = 0.0e+0 )
89 INTEGER int_one
90 parameter( int_one = 1 )
91* ..
92* .. Local Scalars ..
93 LOGICAL check
94 CHARACTER uplo
95 CHARACTER*6 passed
96 CHARACTER*80 outfile
97 INTEGER bw, bw_num, FILLIN_SIZE, free_ptr, h, hh, i,
98 $ iam, iaseed, ibseed, ictxt, ictxtb, ierr_temp,
99 $ imidpad, info, ipa, ipb, ipostpad, iprepad,
100 $ ipw, ipw_size, ipw_solve, ipw_solve_size,
101 $ ip_driver_w, ip_fillin, j, k, kfail, kpass,
102 $ kskip, ktests, mycol, myrhs_size, myrow, n, nb,
103 $ nbw, ngrids, nmat, nnb, nnbr, nnr, nout, np,
104 $ npcol, nprocs, nprocs_real, nprow, nq, nrhs,
105 $ n_first, n_last, worksiz
106 REAL anorm, sresid, thresh
107 DOUBLE PRECISION nops, nops2, tmflops, tmflops2
108* ..
109* .. Local Arrays ..
110 INTEGER bwval( ntests ), desca( 7 ), desca2d( dlen_ ),
111 $ descb( 7 ), descb2d( dlen_ ), ierr( 1 ),
112 $ nbrval( ntests ), NBVAL( ntests ),
113 $ nrval( ntests ), nval( ntests ),
114 $ pval( ntests ), qval( ntests )
115 DOUBLE PRECISION ctime( 2 ), wtime( 2 )
116 COMPLEX mem( memsiz )
117* ..
118* .. External Subroutines ..
119 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
121 $ blacs_pinfo, descinit, igsum2d, pcbmatgen,
125* ..
126* .. External Functions ..
127 INTEGER numroc
128 LOGICAL lsame
129 REAL pclange
130 EXTERNAL lsame, numroc, pclange
131* ..
132* .. Intrinsic Functions ..
133 INTRINSIC dble, max, min, mod
134* ..
135* .. Data Statements ..
136 DATA kfail, kpass, kskip, ktests / 4*0 /
137* ..
138*
139*
140*
141* .. Executable Statements ..
142*
143* Get starting information
144*
145 CALL blacs_pinfo( iam, nprocs )
146 iaseed = 100
147 ibseed = 200
148*
149 CALL pcpbinfo( outfile, nout, uplo, nmat, nval, ntests, nbw,
150 $ bwval, ntests, nnb, nbval, ntests, nnr, nrval,
151 $ ntests, nnbr, nbrval, ntests, ngrids, pval, ntests,
152 $ qval, ntests, thresh, mem, iam, nprocs )
153*
154 check = ( thresh.GE.0.0e+0 )
155*
156* Print headings
157*
158 IF( iam.EQ.0 ) THEN
159 WRITE( nout, fmt = * )
160 WRITE( nout, fmt = 9995 )
161 WRITE( nout, fmt = 9994 )
162 WRITE( nout, fmt = * )
163 END IF
164*
165* Loop over different process grids
166*
167 DO 60 i = 1, ngrids
168*
169 nprow = pval( i )
170 npcol = qval( i )
171*
172* Make sure grid information is correct
173*
174 ierr( 1 ) = 0
175 IF( nprow.LT.1 ) THEN
176 IF( iam.EQ.0 )
177 $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
178 ierr( 1 ) = 1
179 ELSE IF( npcol.LT.1 ) THEN
180 IF( iam.EQ.0 )
181 $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', NPCOL
182 IERR( 1 ) = 1
183.GT. ELSE IF( NPROW*NPCOLNPROCS ) THEN
184.EQ. IF( IAM0 )
185 $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS
186 IERR( 1 ) = 1
187 END IF
188*
189.GT. IF( IERR( 1 )0 ) THEN
190.EQ. IF( IAM0 )
191 $ WRITE( NOUT, FMT = 9997 ) 'grid'
192 KSKIP = KSKIP + 1
193 GO TO 50
194 END IF
195*
196* Define process grid
197*
198 CALL BLACS_GET( -1, 0, ICTXT )
199 CALL BLACS_GRIDINIT( ICTXT, 'row-major', NPROW, NPCOL )
200*
201*
202* Define transpose process grid
203*
204 CALL BLACS_GET( -1, 0, ICTXTB )
205 CALL BLACS_GRIDINIT( ICTXTB, 'column-major', NPCOL, NPROW )
206*
207* Go to bottom of process grid loop if this case doesn't use my
208* process
209*
210 CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
211*
212.LT..OR..LT. IF( MYROW0 MYCOL0 ) THEN
213 GO TO 50
214 ENDIF
215*
216 DO 40 J = 1, NMAT
217*
218 IERR( 1 ) = 0
219*
220 N = NVAL( J )
221*
222* Make sure matrix information is correct
223*
224.LT. IF( N1 ) THEN
225.EQ. IF( IAM0 )
226 $ WRITE( NOUT, FMT = 9999 ) 'matrix', 'n', N
227 IERR( 1 ) = 1
228 END IF
229*
230* Check all processes for an error
231*
232 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1, IERR, 1,
233 $ -1, 0 )
234*
235.GT. IF( IERR( 1 )0 ) THEN
236.EQ. IF( IAM0 )
237 $ WRITE( NOUT, FMT = 9997 ) 'size'
238 KSKIP = KSKIP + 1
239 GO TO 40
240 END IF
241*
242*
243 DO 45 BW_NUM = 1, NBW
244*
245 IERR( 1 ) = 0
246*
247 BW = BWVAL( BW_NUM )
248.LT. IF( BW0 ) THEN
249.EQ. IF( IAM0 )
250 $ WRITE( NOUT, FMT = 9999 ) 'band', 'bw', BW
251 IERR( 1 ) = 1
252 END IF
253*
254.GT. IF( BWN-1 ) THEN
255 IERR( 1 ) = 1
256 END IF
257*
258* Check all processes for an error
259*
260 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1, IERR, 1,
261 $ -1, 0 )
262*
263.GT. IF( IERR( 1 )0 ) THEN
264 KSKIP = KSKIP + 1
265 GO TO 45
266 END IF
267*
268 DO 30 K = 1, NNB
269*
270 IERR( 1 ) = 0
271*
272 NB = NBVAL( K )
273.LT. IF( NB0 ) THEN
274 NB =( (N-(NPCOL-1)*BW-1)/NPCOL + 1 )
275 $ + BW
276 NB = MAX( NB, 2*BW )
277 NB = MIN( N, NB )
278 END IF
279*
280* Make sure NB is legal
281*
282 IERR( 1 ) = 0
283.LT. IF( NBMIN( 2*BW, N ) ) THEN
284 IERR( 1 ) = 1
285 ENDIF
286*
287* Check all processes for an error
288*
289 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1, IERR, 1,
290 $ -1, 0 )
291*
292.GT. IF( IERR( 1 )0 ) THEN
293 KSKIP = KSKIP + 1
294 GO TO 30
295 END IF
296*
297* Padding constants
298*
299 NP = NUMROC( (BW+1), (BW+1),
300 $ MYROW, 0, NPROW )
301 NQ = NUMROC( N, NB, MYCOL, 0, NPCOL )
302*
303 IF( CHECK ) THEN
304 IPREPAD = ((BW+1)+10)
305 IMIDPAD = 10
306 IPOSTPAD = ((BW+1)+10)
307 ELSE
308 IPREPAD = 0
309 IMIDPAD = 0
310 IPOSTPAD = 0
311 END IF
312*
313* Initialize the array descriptor for the matrix A
314*
315 CALL DESCINIT( DESCA2D, (BW+1), N,
316 $ (BW+1), NB, 0, 0,
317 $ ICTXT,((BW+1)+10), IERR( 1 ) )
318*
319* Convert this to 1D descriptor
320*
321 DESCA( 1 ) = 501
322 DESCA( 3 ) = N
323 DESCA( 4 ) = NB
324 DESCA( 5 ) = 0
325 DESCA( 2 ) = ICTXT
326 DESCA( 6 ) = ((BW+1)+10)
327 DESCA( 7 ) = 0
328*
329 IERR_TEMP = IERR( 1 )
330 IERR( 1 ) = 0
331 IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP )
332*
333* Check all processes for an error
334*
335 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1, IERR, 1, -1, 0 )
336*
337.LT. IF( IERR( 1 )0 ) THEN
338.EQ. IF( IAM0 )
339 $ WRITE( NOUT, FMT = 9997 ) 'descriptor'
340 KSKIP = KSKIP + 1
341 GO TO 30
342 END IF
343*
344* Assign pointers into MEM for SCALAPACK arrays, A is
345* allocated starting at position MEM( IPREPAD+1 )
346*
347 FREE_PTR = 1
348 IPB = 0
349*
350* Save room for prepadding
351 FREE_PTR = FREE_PTR + IPREPAD
352*
353 IPA = FREE_PTR
354 FREE_PTR = FREE_PTR + DESCA2D( LLD_ )*
355 $ DESCA2D( NB_ )
356 $ + IPOSTPAD
357*
358* Add memory for fillin
359* Fillin space needs to store:
360* Fillin spike:
361* Contribution to previous proc's diagonal block of
362* reduced system:
363* Off-diagonal block of reduced system:
364* Diagonal block of reduced system:
365*
366 FILLIN_SIZE =
367 $ (NB+2*BW)*BW
368*
369* Claim memory for fillin
370*
371 FREE_PTR = FREE_PTR + IPREPAD
372 IP_FILLIN = FREE_PTR
373 FREE_PTR = FREE_PTR + FILLIN_SIZE
374*
375* Workspace needed by computational routines:
376*
377 IPW_SIZE = 0
378*
379* factorization:
380*
381 IPW_SIZE = BW*BW
382*
383* Claim memory for IPW
384*
385 IPW = FREE_PTR
386 FREE_PTR = FREE_PTR + IPW_SIZE
387*
388* Check for adequate memory for problem size
389*
390 IERR( 1 ) = 0
391.GT. IF( FREE_PTRMEMSIZ ) THEN
392.EQ. IF( IAM0 )
393 $ WRITE( NOUT, FMT = 9996 )
394 $ 'divide and conquer factorization',
395 $ (FREE_PTR )*CPLXSZ
396 IERR( 1 ) = 1
397 END IF
398*
399* Check all processes for an error
400*
401 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1, IERR,
402 $ 1, -1, 0 )
403*
404.GT. IF( IERR( 1 )0 ) THEN
405.EQ. IF( IAM0 )
406 $ WRITE( NOUT, FMT = 9997 ) 'memory'
407 KSKIP = KSKIP + 1
408 GO TO 30
409 END IF
410*
411* Worksize needed for LAPRNT
412 WORKSIZ = MAX( ((BW+1)+10), NB )
413*
414 IF( CHECK ) THEN
415*
416* Calculate the amount of workspace required by
417* the checking routines.
418*
419* PCLANGE
420 WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) )
421*
422* PCPBLASCHK
423 WORKSIZ = MAX( WORKSIZ,
424 $ MAX(5,MAX(BW*(BW+2),NB))+2*NB )
425 END IF
426*
427 FREE_PTR = FREE_PTR + IPREPAD
428 IP_DRIVER_W = FREE_PTR
429 FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD
430*
431*
432* Check for adequate memory for problem size
433*
434 IERR( 1 ) = 0
435.GT. IF( FREE_PTRMEMSIZ ) THEN
436.EQ. IF( IAM0 )
437 $ WRITE( NOUT, FMT = 9996 ) 'factorization',
438 $ ( FREE_PTR )*CPLXSZ
439 IERR( 1 ) = 1
440 END IF
441*
442* Check all processes for an error
443*
444 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1, IERR,
445 $ 1, -1, 0 )
446*
447.GT. IF( IERR( 1 )0 ) THEN
448.EQ. IF( IAM0 )
449 $ WRITE( NOUT, FMT = 9997 ) 'memory'
450 KSKIP = KSKIP + 1
451 GO TO 30
452 END IF
453*
454 CALL PCBMATGEN( ICTXT, UPLO, 'b', BW, BW, N, (BW+1), NB,
455 $ MEM( IPA ), ((BW+1)+10), 0, 0, IASEED,
456 $ MYROW, MYCOL, NPROW, NPCOL )
457*
458 CALL PCFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ),
459 $ ((BW+1)+10), IPREPAD, IPOSTPAD,
460 $ PADVAL )
461*
462 CALL PCFILLPAD( ICTXT, WORKSIZ, 1,
463 $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ,
464 $ IPREPAD, IPOSTPAD, PADVAL )
465*
466* Calculate norm of A for residual error-checking
467*
468 IF( CHECK ) THEN
469*
470 ANORM = PCLANGE( '1', (BW+1),
471 $ N, MEM( IPA ), 1, 1,
472 $ DESCA2D, MEM( IP_DRIVER_W ) )
473 CALL PCCHEKPAD( ICTXT, 'pclange', NP, NQ,
474 $ MEM( IPA-IPREPAD ), ((BW+1)+10),
475 $ IPREPAD, IPOSTPAD, PADVAL )
476 CALL PCCHEKPAD( ICTXT, 'pclange',
477 $ WORKSIZ, 1,
478 $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ,
479 $ IPREPAD, IPOSTPAD, PADVAL )
480 END IF
481*
482*
483 CALL SLBOOT()
484 CALL BLACS_BARRIER( ICTXT, 'all' )
485*
486* Perform factorization
487*
488 CALL SLTIMER( 1 )
489*
490 CALL PCPBTRF( UPLO, N, BW, MEM( IPA ), 1, DESCA,
491 $ MEM( IP_FILLIN ), FILLIN_SIZE, MEM( IPW ),
492 $ IPW_SIZE, INFO )
493*
494 CALL SLTIMER( 1 )
495*
496.NE. IF( INFO0 ) THEN
497.EQ. IF( IAM0 ) THEN
498 WRITE( NOUT, FMT = * ) 'pcpbtrf info=', INFO
499 ENDIF
500 KFAIL = KFAIL + 1
501 GO TO 30
502 END IF
503*
504 IF( CHECK ) THEN
505*
506* Check for memory overwrite in factorization
507*
508 CALL PCCHEKPAD( ICTXT, 'pcpbtrf', NP,
509 $ NQ, MEM( IPA-IPREPAD ), ((BW+1)+10),
510 $ IPREPAD, IPOSTPAD, PADVAL )
511 END IF
512*
513*
514* Loop over the different values for NRHS
515*
516 DO 20 HH = 1, NNR
517*
518 IERR( 1 ) = 0
519*
520 NRHS = NRVAL( HH )
521*
522* Initialize Array Descriptor for rhs
523*
524 CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0,
525 $ ICTXTB, NB+10, IERR( 1 ) )
526*
527* Convert this to 1D descriptor
528*
529 DESCB( 1 ) = 502
530 DESCB( 3 ) = N
531 DESCB( 4 ) = NB
532 DESCB( 5 ) = 0
533 DESCB( 2 ) = ICTXT
534 DESCB( 6 ) = DESCB2D( LLD_ )
535 DESCB( 7 ) = 0
536*
537* reset free_ptr to reuse space for right hand sides
538*
539.GT. IF( IPB 0 ) THEN
540 FREE_PTR = IPB
541 ENDIF
542*
543 FREE_PTR = FREE_PTR + IPREPAD
544 IPB = FREE_PTR
545 FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ )
546 $ + IPOSTPAD
547*
548* Allocate workspace for workspace in TRS routine:
549*
550 IPW_SOLVE_SIZE = (BW*NRHS)
551*
552 IPW_SOLVE = FREE_PTR
553 FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE
554*
555 IERR( 1 ) = 0
556.GT. IF( FREE_PTRMEMSIZ ) THEN
557.EQ. IF( IAM0 )
558 $ WRITE( NOUT, FMT = 9996 )'solve',
559 $ ( FREE_PTR )*CPLXSZ
560 IERR( 1 ) = 1
561 END IF
562*
563* Check all processes for an error
564*
565 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1,
566 $ IERR, 1, -1, 0 )
567*
568.GT. IF( IERR( 1 )0 ) THEN
569.EQ. IF( IAM0 )
570 $ WRITE( NOUT, FMT = 9997 ) 'memory'
571 KSKIP = KSKIP + 1
572 GO TO 15
573 END IF
574*
575 MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL )
576*
577* Generate RHS
578*
579 CALL PCMATGEN(ICTXTB, 'no', 'no',
580 $ DESCB2D( M_ ), DESCB2D( N_ ),
581 $ DESCB2D( MB_ ), DESCB2D( NB_ ),
582 $ MEM( IPB ),
583 $ DESCB2D( LLD_ ), DESCB2D( RSRC_ ),
584 $ DESCB2D( CSRC_ ),
585 $ IBSEED, 0, MYRHS_SIZE, 0, NRHS, MYCOL,
586 $ MYROW, NPCOL, NPROW )
587*
588 IF( CHECK ) THEN
589 CALL PCFILLPAD( ICTXTB, NB, NRHS,
590 $ MEM( IPB-IPREPAD ),
591 $ DESCB2D( LLD_ ),
592 $ IPREPAD, IPOSTPAD,
593 $ PADVAL )
594 CALL PCFILLPAD( ICTXT, WORKSIZ, 1,
595 $ MEM( IP_DRIVER_W-IPREPAD ),
596 $ WORKSIZ, IPREPAD,
597 $ IPOSTPAD, PADVAL )
598 END IF
599*
600*
601 CALL BLACS_BARRIER( ICTXT, 'all')
602 CALL SLTIMER( 2 )
603*
604* Solve linear system via factorization
605*
606 CALL PCPBTRS( UPLO, N, BW, NRHS, MEM( IPA ), 1,
607 $ DESCA, MEM( IPB ), 1, DESCB,
608 $ MEM( IP_FILLIN ), FILLIN_SIZE,
609 $ MEM( IPW_SOLVE ), IPW_SOLVE_SIZE,
610 $ INFO )
611*
612 CALL SLTIMER( 2 )
613*
614.NE. IF( INFO0 ) THEN
615.EQ. IF( IAM0 )
616 $ WRITE( NOUT, FMT = * ) 'pcpbtrs info=', INFO
617 KFAIL = KFAIL + 1
618 PASSED = 'failed'
619 GO TO 20
620 END IF
621*
622 IF( CHECK ) THEN
623*
624* check for memory overwrite
625*
626 CALL PCCHEKPAD( ICTXT, 'pcpbtrs-work',
627 $ WORKSIZ, 1,
628 $ MEM( IP_DRIVER_W-IPREPAD ),
629 $ WORKSIZ, IPREPAD,
630 $ IPOSTPAD, PADVAL )
631*
632* check the solution to rhs
633*
634 SRESID = ZERO
635*
636 CALL PCPBLASCHK( 'h', UPLO, N, BW, BW, NRHS,
637 $ MEM( IPB ), 1, 1, DESCB2D,
638 $ IASEED, MEM( IPA ), 1, 1, DESCA2D,
639 $ IBSEED, ANORM, SRESID,
640 $ MEM( IP_DRIVER_W ), WORKSIZ )
641*
642.EQ. IF( IAM0 ) THEN
643.GT. IF( SRESIDTHRESH )
644 $ WRITE( NOUT, FMT = 9985 ) SRESID
645 END IF
646*
647* The second test is a NaN trap
648*
649.LE..AND. IF( ( SRESIDTHRESH )
650.EQ. $ ( (SRESID-SRESID)0.0E+0 ) ) THEN
651 KPASS = KPASS + 1
652 PASSED = 'passed'
653 ELSE
654 KFAIL = KFAIL + 1
655 PASSED = 'failed'
656 END IF
657*
658 END IF
659*
660 15 CONTINUE
661* Skipped tests jump to here to print out "SKIPPED"
662*
663* Gather maximum of all CPU and WALL clock timings
664*
665 CALL SLCOMBINE( ICTXT, 'all', '>', 'w', 2, 1,
666 $ WTIME )
667 CALL SLCOMBINE( ICTXT, 'all', '>', 'c', 2, 1,
668 $ CTIME )
669*
670* Print results
671*
672.EQ..AND..EQ. IF( MYROW0 MYCOL0 ) THEN
673*
674 NOPS = 0
675 NOPS2 = 0
676*
677 N_FIRST = NB
678 NPROCS_REAL = ( N-1 )/NB + 1
679 N_LAST = MOD( N-1, NB ) + 1
680*
681*
682 NOPS = NOPS + DBLE(BW)*( -2.D0 / 3.D0+DBLE(BW)*
683 $ ( -1.D0+DBLE(BW)*( -1.D0 / 3.D0 ) ) ) +
684 $ DBLE(N)*( 1.D0+DBLE(BW)*( 3.D0 /
685 $ 2.D0+DBLE(BW)*( 1.D0 / 2.D0 ) ) )
686 NOPS = NOPS + DBLE(BW)*( -1.D0 / 6.D0+DBLE(BW)
687 $ *( -1.D0 /2.D0+DBLE(BW)
688 $ *( -1.D0 / 3.D0 ) ) ) +
689 $ DBLE(N)*( DBLE(BW) /
690 $ 2.D0*( 1.D0+DBLE(BW) ) )
691*
692 NOPS = NOPS +
693 $ DBLE(NRHS)*( ( 2*DBLE(N)-DBLE(BW) )*
694 $ ( DBLE(BW)+1.D0 ) )+ DBLE(NRHS)*
695 $ ( DBLE(BW)*( 2*DBLE(N)-
696 $ ( DBLE(BW)+1.D0 ) ) )
697*
698*
699* Second calc to represent actual hardware speed
700*
701* NB bw^2 flops for LLt factorization in 1st proc
702*
703 NOPS2 = ( (DBLE(N_FIRST))* DBLE(BW)**2 )
704*
705.GT. IF ( NPROCS_REAL 1) THEN
706* 4 NB bw^2 flops for LLt factorization and
707* spike calc in last processor
708*
709 NOPS2 = NOPS2 +
710 $ 4*( (DBLE(N_LAST)*DBLE(BW)**2) )
711 ENDIF
712*
713.GT. IF ( NPROCS_REAL 2) THEN
714* 4 NB bw^2 flops for LLt factorization and
715* spike calc in other processors
716*
717 NOPS2 = NOPS2 + (NPROCS_REAL-2)*
718 $ 4*( (DBLE(NB)*DBLE(BW)**2) )
719 ENDIF
720*
721* Reduced system
722*
723 NOPS2 = NOPS2 +
724 $ ( NPROCS_REAL-1 ) * ( BW*BW*BW/3 )
725.GT. IF( NPROCS_REAL 1 ) THEN
726 NOPS2 = NOPS2 +
727 $ ( NPROCS_REAL-2 ) * ( 2 * BW*BW*BW )
728 ENDIF
729*
730*
731* nrhs * 4 n_first*bw flops for LLt solve in proc 1.
732*
733 NOPS2 = NOPS2 +
734 $ ( 4.0D+0*(DBLE(N_FIRST)*DBLE(BW))*DBLE(NRHS) )
735*
736.GT. IF ( NPROCS_REAL 1 ) THEN
737*
738* 2*nrhs*4 n_last*bw flops for LLt solve in last.
739*
740 NOPS2 = NOPS2 +
741 $ 2*( 4.0D+0*(DBLE(N_LAST)*DBLE(BW))*DBLE(NRHS) )
742 ENDIF
743*
744.GT. IF ( NPROCS_REAL 2 ) THEN
745*
746* 2 * nrhs * 4 NB*bw flops for LLt solve in others.
747*
748 NOPS2 = NOPS2 +
749 $ ( NPROCS_REAL-2)*2*
750 $ ( 4.0D+0*(DBLE(NB)*DBLE(BW))*DBLE(NRHS) )
751 ENDIF
752*
753* Reduced system
754*
755 NOPS2 = NOPS2 +
756 $ NRHS*( NPROCS_REAL-1 ) * ( BW*BW )
757.GT. IF( NPROCS_REAL 1 ) THEN
758 NOPS2 = NOPS2 +
759 $ NRHS*( NPROCS_REAL-2 ) * ( 3 * BW*BW )
760 ENDIF
761*
762*
763* Multiply by 4 to get complex count
764*
765 NOPS2 = NOPS2 * DBLE(4)
766*
767* Calculate total megaflops - factorization and/or
768* solve -- for WALL and CPU time, and print output
769*
770* Print WALL time if machine supports it
771*
772.GT. IF( WTIME( 1 ) + WTIME( 2 ) 0.0D+0 ) THEN
773 TMFLOPS = NOPS /
774 $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 )
775 ELSE
776 TMFLOPS = 0.0D+0
777 END IF
778*
779.GT. IF( WTIME( 1 )+WTIME( 2 )0.0D+0 ) THEN
780 TMFLOPS2 = NOPS2 /
781 $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 )
782 ELSE
783 TMFLOPS2 = 0.0D+0
784 END IF
785*
786.GE. IF( WTIME( 2 )0.0D+0 )
787 $ WRITE( NOUT, FMT = 9993 ) 'wall', UPLO,
788 $ N,
789 $ BW,
790 $ NB, NRHS, NPROW, NPCOL,
791 $ WTIME( 1 ), WTIME( 2 ), TMFLOPS,
792 $ TMFLOPS2, PASSED
793*
794* Print CPU time if machine supports it
795*
796.GT. IF( CTIME( 1 )+CTIME( 2 )0.0D+0 ) THEN
797 TMFLOPS = NOPS /
798 $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 )
799 ELSE
800 TMFLOPS = 0.0D+0
801 END IF
802*
803.GT. IF( CTIME( 1 )+CTIME( 2 )0.0D+0 ) THEN
804 TMFLOPS2 = NOPS2 /
805 $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 )
806 ELSE
807 TMFLOPS2 = 0.0D+0
808 END IF
809*
810.GE. IF( CTIME( 2 )0.0D+0 )
811 $ WRITE( NOUT, FMT = 9993 ) 'cpu ', UPLO,
812 $ N,
813 $ BW,
814 $ NB, NRHS, NPROW, NPCOL,
815 $ CTIME( 1 ), CTIME( 2 ), TMFLOPS,
816 $ TMFLOPS2, PASSED
817*
818 END IF
819 20 CONTINUE
820*
821*
822 30 CONTINUE
823* NNB loop
824*
825 45 CONTINUE
826* BW[] loop
827*
828 40 CONTINUE
829* NMAT loop
830*
831 CALL BLACS_GRIDEXIT( ICTXT )
832 CALL BLACS_GRIDEXIT( ICTXTB )
833*
834 50 CONTINUE
835* NGRIDS DROPOUT
836 60 CONTINUE
837* NGRIDS loop
838*
839* Print ending messages and close output file
840*
841.EQ. IF( IAM0 ) THEN
842 KTESTS = KPASS + KFAIL + KSKIP
843 WRITE( NOUT, FMT = * )
844 WRITE( NOUT, FMT = 9992 ) KTESTS
845 IF( CHECK ) THEN
846 WRITE( NOUT, FMT = 9991 ) KPASS
847 WRITE( NOUT, FMT = 9989 ) KFAIL
848 ELSE
849 WRITE( NOUT, FMT = 9990 ) KPASS
850 END IF
851 WRITE( NOUT, FMT = 9988 ) KSKIP
852 WRITE( NOUT, FMT = * )
853 WRITE( NOUT, FMT = * )
854 WRITE( NOUT, FMT = 9987 )
855.NE..AND..NE. IF( NOUT6 NOUT0 )
856 $ CLOSE ( NOUT )
857 END IF
858*
859 CALL BLACS_EXIT( 0 )
860*
861 9999 FORMAT( 'illegal ', A6, ': ', A5, ' = ', I3,
862 $ '; it should be at least 1' )
863 9998 FORMAT( 'illegal grid: nprow*npcol = ', I4, '. it can be at most',
864 $ I4 )
865 9997 FORMAT( 'bad ', A6, ' parameters: going on to next test case.' )
866 9996 FORMAT( 'unable to perform ', A, ': need totmem of at least',
867 $ I11 )
868 9995 FORMAT( 'time ul n bw nb nrhs p q l*u time ',
869 $ 'slv time mflops mflop2 check' )
870 9994 FORMAT( '---- -- ------ --- ---- ----- -- ---- -------- ',
871 $ '-------- ------ ------ ------' )
872 9993 FORMAT( A4, 2X, A1, 1X, I6, 1X, I3, 1X, I4, 1X,
873 $ I5, 1X, I2, 1X,
874 $ I4, 1X, F8.3, F9.4, F9.2, F9.2, 1X, A6 )
875 9992 FORMAT( 'finished ', I6, ' tests, with the following results:' )
876 9991 FORMAT( I5, ' tests completed and passed residual checks.' )
877 9990 FORMAT( I5, ' tests completed without checking.' )
878 9989 FORMAT( I5, ' tests completed and failed residual checks.' )
879 9988 FORMAT( I5, ' tests skipped because of illegal input values.' )
880 9987 FORMAT( 'END OF TESTS.' )
881 9986 FORMAT( '||A - ', A4, '|| / (||A|| * N * eps) = ', G25.7 )
882 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 )
883*
884 STOP
885*
886* End of PCPBTRS_DRIVER
887*
888 END
889*
subroutine pcmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition pcmatgen.f:4
end diagonal values have been computed in the(sparse) matrix id.SOL
logical function lsame(ca, cb)
LSAME
Definition lsame.f:53
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21
subroutine blacs_gridinit(cntxt, c, nprow, npcol)
Definition mpi.f:745
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition mpi.f:777
real function pclange(norm, m, n, a, ia, ja, desca, work)
Definition mpi.f:1275
subroutine blacs_gridexit(cntxt)
Definition mpi.f:762
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
Definition mpi.f:754
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition mpi.f:786
subroutine pcbmatgen(ictxt, aform, aform2, bwl, bwu, n, mb, nb, a, lda, iarow, iacol, iseed, myrow, mycol, nprow, npcol)
Definition pcbmatgen.f:5
subroutine pcchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pcchekpad.f:3
subroutine pcfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition pcfillpad.f:2
program pcpbdriver
Definition pcpbdriver.f:1
subroutine pcpbinfo(summry, nout, uplo, nmat, nval, ldnval, nbw, bwval, ldbwval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
Definition pcpbinfo.f:6
subroutine pcpblaschk(symm, uplo, n, bwl, bwu, nrhs, x, ix, jx, descx, iaseed, a, ia, ja, desca, ibseed, anorm, resid, work, worksiz)
Definition pcpblaschk.f:4
subroutine pcpbtrf(uplo, n, bw, a, ja, desca, af, laf, work, lwork, info)
Definition pcpbtrf.f:3
subroutine pcpbtrs(uplo, n, bw, nrhs, a, ja, desca, b, ib, descb, af, laf, work, lwork, info)
Definition pcpbtrs.f:3
subroutine slboot()
Definition sltimer.f:2
subroutine sltimer(i)
Definition sltimer.f:47
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)
Definition sltimer.f:267