OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
pzludriver.f
Go to the documentation of this file.
1 PROGRAM pzludriver
2*
3* -- ScaLAPACK testing driver (version 1.7) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* May 1, 1997
7*
8* Purpose
9* ========
10*
11* PZLUDRIVER is the main test program for the COMPLEX*16
12* SCALAPACK LU routines. This test driver performs an LU factorization
13* and solve. If the input matrix is non-square, only the factorization
14* is performed. Condition estimation and iterative refinement are
15* optionally performed.
16*
17* The program must be driven by a short data file. An annotated
18* example of a data file can be obtained by deleting the first 3
19* characters from the following 18 lines:
20* 'SCALAPACK, Version 2.0, LU factorization input file'
21* 'Intel iPSC/860 hypercube, gamma model.'
22* 'LU.out' output file name (if any)
23* 6 device out
24* 1 number of problems sizes
25* 31 201 values of M
26* 31 201 values of N
27* 1 number of NB's
28* 2 values of NB
29* 1 number of NRHS's
30* 1 values of NRHS
31* 1 number of NBRHS's
32* 1 values of NBRHS
33* 1 number of process grids (ordered pairs of P & Q)
34* 2 1 4 2 3 8 values of P
35* 2 4 1 3 2 1 values of Q
36* 1.0 threshold
37* T (T or F) Test Cond. Est. and Iter. Ref. Routines
38*
39*
40* Internal Parameters
41* ===================
42*
43* TOTMEM INTEGER, default = 2000000
44* TOTMEM is a machine-specific parameter indicating the
45* maximum amount of available memory in bytes.
46* The user should customize TOTMEM to his platform. Remember
47* to leave room in memory for the operating system, the BLACS
48* buffer, etc. For example, on a system with 8 MB of memory
49* per process (e.g., one processor on an Intel iPSC/860), the
50* parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
51* code, BLACS buffer, etc). However, for PVM, we usually set
52* TOTMEM = 2000000. Some experimenting with the maximum value
53* of TOTMEM may be required.
54*
55* INTGSZ INTEGER, default = 4 bytes.
56* ZPLXSZ INTEGER, default = 16 bytes.
57* INTGSZ and ZPLXSZ indicate the length in bytes on the
58* given platform for an integer and a double precision
59* complex.
60* MEM COMPLEX*16 array, dimension ( TOTMEM / ZPLXSZ )
61*
62* All arrays used by SCALAPACK routines are allocated from
63* this array and referenced by pointers. The integer IPA,
64* for example, is a pointer to the starting element of MEM for
65* the matrix A.
66*
67* =====================================================================
68*
69* .. Parameters ..
70 INTEGER block_cyclic_2d, csrc_, ctxt_, DLEN_, dtype_,
71 $ lld_, mb_, m_, nb_, n_, rsrc_
72 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
73 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
74 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
75 INTEGER intgsz, dblesz, memsiz, ntests, totmem, zplxsz
76 DOUBLE PRECISION zero
77 COMPLEX*16 padval
78 parameter( intgsz = 4, dblesz = 8, totmem = 8000000,
79 $ zplxsz = 16, memsiz = totmem / zplxsz,
80 $ ntests = 20,
81 $ padval = ( -9923.0d+0, -9923.0d+0 ),
82 $ zero = 0.0d+0 )
83* ..
84* .. Local Scalars ..
85 LOGICAL check, est
86 CHARACTER*6 passed
87 CHARACTER*80 outfile
88 INTEGER hh, i, iam, iaseed, ibseed, ictxt, imidpad,
89 $ info, ipa, ipa0, ipb, ipb0, ipberr, ipferr,
90 $ ipostpad, ippiv, iprepad, ipw, ipw2, j, k,
91 $ kfail, kk, kpass, kskip, ktests, lcm, lcmq,
92 $ lipiv, lrwork, lwork, lw2, m, maxmn,
93 $ minmn, mp, mycol, myrhs, myrow, n, nb, nbrhs,
94 $ ngrids, nmat, nnb, NNBR, nnr, NOUT, np, npcol,
95 $ nprocs, NPROW, nq, nrhs, worksiz
96 REAL thresh
97 DOUBLE PRECISION anorm, anorm1, fresid, NOPS, rcond,
98 $ sresid, sresid2, tmflops
99* ..
100* .. Local Arrays ..
101 INTEGER desca( dlen_ ), descb( dlen_ ), ierr( 1 ),
102 $ mval( ntests ), nbrval( ntests ),
103 $ NBVAL( ntests ), nrval( ntests ),
104 $ nval( ntests ), pval( ntests ),
105 $ qval( ntests )
106 DOUBLE PRECISION ctime( 2 ), wtime( 2 )
107 COMPLEX*16 mem( memsiz )
108* ..
109* .. External Subroutines ..
110 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
112 $ blacs_pinfo, descinit, igsum2d, pzchekpad,
117* ..
118* .. External Functions ..
119 INTEGER iceil, ilcm, numroc
120 DOUBLE PRECISION pzlange
121 EXTERNAL iceil, ilcm, numroc, pzlange
122* ..
123* .. Intrinsic Functions ..
124 INTRINSIC dble, max, min
125* ..
126* .. Data Statements ..
127 DATA kfail, kpass, kskip, ktests / 4*0 /
128* ..
129* .. Executable Statements ..
130*
131* Get starting information
132*
133 CALL blacs_pinfo( iam, nprocs )
134 iaseed = 100
135 ibseed = 200
136 CALL pzluinfo( outfile, nout, nmat, mval, nval, ntests, nnb,
137 $ nbval, ntests, nnr, nrval, ntests, nnbr, nbrval,
138 $ ntests, ngrids, pval, ntests, qval, ntests, thresh,
139 $ est, mem, iam, nprocs )
140 check = ( thresh.GE.0.0e+0 )
141*
142* Print headings
143*
144 IF( iam.EQ.0 ) THEN
145 WRITE( nout, fmt = * )
146 WRITE( nout, fmt = 9995 )
147 WRITE( nout, fmt = 9994 )
148 WRITE( nout, fmt = * )
149 END IF
150*
151* Loop over different process grids
152*
153 DO 50 i = 1, ngrids
154*
155 nprow = pval( i )
156 npcol = qval( i )
157*
158* Make sure grid information is correct
159*
160 ierr( 1 ) = 0
161 IF( nprow.LT.1 ) THEN
162 IF( iam.EQ.0 )
163 $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
164 ierr( 1 ) = 1
165 ELSE IF( npcol.LT.1 ) THEN
166 IF( iam.EQ.0 )
167 $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
168 ierr( 1 ) = 1
169 ELSE IF( nprow*npcol.GT.nprocs ) THEN
170 IF( iam.EQ.0 )
171 $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
172 ierr( 1 ) = 1
173 END IF
174*
175 IF( ierr( 1 ).GT.0 ) THEN
176 IF( iam.EQ.0 )
177 $ WRITE( nout, fmt = 9997 ) 'grid'
178 kskip = kskip + 1
179 GO TO 50
180 END IF
181*
182* Define process grid
183*
184 CALL blacs_get( -1, 0, ictxt )
185 CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
186 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
187*
188* Go to bottom of process grid loop if this case doesn't use my
189* process
190*
191 IF( myrow.GE.nprow .OR. mycol.GE.npcol )
192 $ GO TO 50
193*
194 DO 40 j = 1, nmat
195*
196 m = mval( j )
197 n = nval( j )
198*
199* Make sure matrix information is correct
200*
201 ierr( 1 ) = 0
202 IF( m.LT.1 ) THEN
203 IF( iam.EQ.0 )
204 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'M', m
205 ierr( 1 ) = 1
206 ELSE IF( n.LT.1 ) THEN
207 IF( iam.EQ.0 )
208 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
209 ierr( 1 ) = 1
210 END IF
211*
212* Check all processes for an error
213*
214 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
215*
216 IF( ierr( 1 ).GT.0 ) THEN
217 IF( iam.EQ.0 )
218 $ WRITE( nout, fmt = 9997 ) 'matrix'
219 kskip = kskip + 1
220 GO TO 40
221 END IF
222*
223 DO 30 k = 1, nnb
224*
225 nb = nbval( k )
226*
227* Make sure nb is legal
228*
229 ierr( 1 ) = 0
230 IF( nb.LT.1 ) THEN
231 ierr( 1 ) = 1
232 IF( iam.EQ.0 )
233 $ WRITE( nout, fmt = 9999 ) 'NB', 'NB', nb
234 END IF
235*
236* Check all processes for an error
237*
238 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
239*
240 IF( ierr( 1 ).GT.0 ) THEN
241 IF( iam.EQ.0 )
242 $ WRITE( nout, fmt = 9997 ) 'NB'
243 kskip = kskip + 1
244 GO TO 30
245 END IF
246*
247* Padding constants
248*
249 mp = numroc( m, nb, myrow, 0, nprow )
250 np = numroc( n, nb, myrow, 0, nprow )
251 nq = numroc( n, nb, mycol, 0, npcol )
252 IF( check ) THEN
253 iprepad = max( nb, mp )
254 imidpad = nb
255 ipostpad = max( nb, nq )
256 ELSE
257 iprepad = 0
258 imidpad = 0
259 ipostpad = 0
260 END IF
261*
262* Initialize the array descriptor for the matrix A
263*
264 CALL descinit( desca, m, n, nb, nb, 0, 0, ictxt,
265 $ max( 1, mp )+imidpad, ierr( 1 ) )
266*
267* Check all processes for an error
268*
269 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
270*
271 IF( ierr( 1 ).LT.0 ) THEN
272 IF( iam.EQ.0 )
273 $ WRITE( nout, fmt = 9997 ) 'descriptor'
274 kskip = kskip + 1
275 GO TO 30
276 END IF
277*
278* Assign pointers into MEM for SCALAPACK arrays, A is
279* allocated starting at position MEM( IPREPAD+1 )
280*
281 ipa = iprepad+1
282 IF( est .AND. m.EQ.n ) THEN
283 ipa0 = ipa + desca( lld_ )*nq + ipostpad + iprepad
284 ippiv = ipa0 + desca( lld_ )*nq + ipostpad + iprepad
285 ELSE
286 ippiv = ipa + desca( lld_ )*nq + ipostpad + iprepad
287 END IF
288 lipiv = iceil( intgsz*( mp+nb ), zplxsz )
289 ipw = ippiv + lipiv + ipostpad + iprepad
290*
291 IF( check ) THEN
292*
293* Calculate the amount of workspace required by the
294* checking routines PZLANGE, PZGETRRV, and
295* PZLAFCHK
296*
297 worksiz = max( 2, nq )
298*
299 worksiz = max( worksiz, mp*desca( nb_ )+
300 $ nq*desca( mb_ ) )
301*
302 worksiz = max( worksiz, mp * desca( nb_ ) )
303*
304 worksiz = worksiz + ipostpad
305*
306 ELSE
307*
308 worksiz = ipostpad
309*
310 END IF
311*
312* Check for adequate memory for problem size
313*
314 ierr( 1 ) = 0
315 IF( ipw+worksiz.GT.memsiz ) THEN
316 IF( iam.EQ.0 )
317 $ WRITE( nout, fmt = 9996 ) 'factorization',
318 $ ( ipw+worksiz )*zplxsz
319 ierr( 1 ) = 1
320 END IF
321*
322* Check all processes for an error
323*
324 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
325*
326 IF( ierr( 1 ).GT.0 ) THEN
327 IF( iam.EQ.0 )
328 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
329 kskip = kskip + 1
330 GO TO 30
331 END IF
332*
333* Generate matrix A of Ax = b
334*
335 CALL pzmatgen( ictxt, 'No transpose', 'No transpose',
336 $ desca( m_ ), desca( n_ ), desca( mb_ ),
337 $ desca( nb_ ), mem( ipa ), desca( lld_ ),
338 $ desca( rsrc_ ), desca( csrc_ ), iaseed, 0,
339 $ mp, 0, nq, myrow, mycol, nprow, npcol )
340*
341* Calculate inf-norm of A for residual error-checking
342*
343 IF( check ) THEN
344 CALL pzfillpad( ictxt, mp, nq, mem( ipa-iprepad ),
345 $ desca( lld_ ), iprepad, ipostpad,
346 $ padval )
347 CALL pzfillpad( ictxt, lipiv, 1, mem( ippiv-iprepad ),
348 $ lipiv, iprepad, ipostpad, padval )
349 CALL pzfillpad( ictxt, worksiz-ipostpad, 1,
350 $ mem( ipw-iprepad ), worksiz-ipostpad,
351 $ iprepad, ipostpad, padval )
352 anorm = pzlange( 'I', m, n, mem( ipa ), 1, 1, desca,
353 $ mem( ipw ) )
354 anorm1 = pzlange( '1', m, n, mem( ipa ), 1, 1, desca,
355 $ mem( ipw ) )
356 CALL pzchekpad( ictxt, 'PZLANGE', mp, nq,
357 $ mem( ipa-iprepad ), desca( lld_ ),
358 $ iprepad, ipostpad, padval )
359 CALL pzchekpad( ictxt, 'PZLANGE', worksiz-ipostpad,
360 $ 1, mem( ipw-iprepad ),
361 $ worksiz-ipostpad, iprepad, ipostpad,
362 $ padval )
363 END IF
364*
365 IF( est .AND. m.EQ.n ) THEN
366 CALL pzmatgen( ictxt, 'No transpose', 'No transpose',
367 $ desca( m_ ), desca( n_ ), desca( mb_ ),
368 $ desca( nb_ ), mem( ipa0 ),
369 $ desca( lld_ ), desca( rsrc_ ),
370 $ desca( csrc_ ), iaseed, 0, mp, 0, nq,
371 $ myrow, mycol, nprow, npcol )
372 IF( check )
373 $ CALL pzfillpad( ictxt, mp, nq, mem( ipa0-iprepad ),
374 $ desca( lld_ ), iprepad, ipostpad,
375 $ padval )
376 END IF
377*
378 CALL slboot()
379 CALL blacs_barrier( ictxt, 'All' )
380 CALL sltimer( 1 )
381*
382* Perform LU factorization
383*
384 CALL pzgetrf( m, n, mem( ipa ), 1, 1, desca,
385 $ mem( ippiv ), info )
386*
387 CALL sltimer( 1 )
388*
389 IF( info.NE.0 ) THEN
390 IF( iam.EQ.0 )
391 $ WRITE( nout, fmt = * ) 'PZGETRF INFO=', info
392 kfail = kfail + 1
393 rcond = zero
394 GO TO 30
395 END IF
396*
397 IF( check ) THEN
398*
399* Check for memory overwrite in LU factorization
400*
401 CALL pzchekpad( ictxt, 'PZGETRF', mp, nq,
402 $ mem( ipa-iprepad ), desca( lld_ ),
403 $ iprepad, ipostpad, padval )
404 CALL pzchekpad( ictxt, 'PZGETRF', lipiv, 1,
405 $ mem( ippiv-iprepad ), lipiv, iprepad,
406 $ ipostpad, padval )
407 END IF
408*
409 IF( m.NE.n ) THEN
410*
411* For non-square matrices, factorization only
412*
413 nrhs = 0
414 nbrhs = 0
415*
416 IF( check ) THEN
417*
418* Compute FRESID = ||A - P*L*U|| / (||A|| * N * eps)
419*
420 CALL pzgetrrv( m, n, mem( ipa ), 1, 1, desca,
421 $ mem( ippiv ), mem( ipw ) )
422 CALL pzlafchk( 'No', 'No', m, n, mem( ipa ), 1, 1,
423 $ desca, iaseed, anorm, fresid,
424 $ mem( ipw ) )
425*
426* Check for memory overwrite
427*
428 CALL pzchekpad( ictxt, 'PZGETRRV', mp, nq,
429 $ mem( ipa-iprepad ), desca( lld_ ),
430 $ iprepad, ipostpad, padval )
431 CALL pzchekpad( ictxt, 'PZGETRRV', lipiv, 1,
432 $ mem( ippiv-iprepad ), lipiv,
433 $ iprepad, ipostpad, padval )
434 CALL pzchekpad( ictxt, 'PZGETRRV',
435 $ worksiz-ipostpad, 1,
436 $ mem( ipw-iprepad ),
437 $ worksiz-ipostpad, iprepad,
438 $ ipostpad, padval )
439*
440* Test residual and detect NaN result
441*
442 IF( ( fresid.LE.thresh ) .AND.
443 $ ( (fresid-fresid).EQ.0.0d+0 ) ) THEN
444 kpass = kpass + 1
445 passed = 'PASSED'
446 ELSE
447 kfail = kfail + 1
448 passed = 'failed'
449.EQ..AND..EQ. IF( MYROW0 MYCOL0 )
450 $ WRITE( NOUT, FMT = 9986 ) FRESID
451 END IF
452*
453 ELSE
454*
455* Don't perform the checking, only timing
456*
457 KPASS = KPASS + 1
458 FRESID = FRESID - FRESID
459 PASSED = 'bypass'
460*
461 END IF
462*
463* Gather maximum of all CPU and WALL clock timings
464*
465 CALL SLCOMBINE( ICTXT, 'all', '>', 'w', 1, 1,
466 $ WTIME )
467 CALL SLCOMBINE( ICTXT, 'all', '>', 'c', 1, 1,
468 $ CTIME )
469*
470* Print results
471*
472.EQ..AND..EQ. IF( MYROW0 MYCOL0 ) THEN
473*
474 MAXMN = MAX( M, N )
475 MINMN = MIN( M, N )
476*
477* 4 M N^2 - 4/3 N^3 + 2 M N - 3 N^2 flops for LU
478* factorization M >= N
479*
480 NOPS = 4.0D+0*DBLE(MAXMN)*(DBLE(MINMN)**2) -
481 $ (4.0D+0 / 3.0D+0)*( DBLE( MINMN )**3 ) +
482 $ (2.0D+0)*DBLE( MAXMN )*DBLE( MINMN ) -
483 $ (3.0D+0)*( DBLE( MINMN )**2 )
484*
485* Calculate total megaflops -- factorization only,
486* -- for WALL and CPU time, and print output
487*
488* Print WALL time if machine supports it
489*
490.GT. IF( WTIME( 1 )0.0D+0 ) THEN
491 TMFLOPS = NOPS / ( WTIME( 1 ) * 1.0D+6 )
492 ELSE
493 TMFLOPS = 0.0D+0
494 END IF
495*
496 WTIME( 2 ) = 0.0D+0
497.GE. IF( WTIME( 1 )0.0D+0 )
498 $ WRITE( NOUT, FMT = 9993 ) 'wall', M, N, NB,
499 $ NRHS, NBRHS, NPROW, NPCOL, WTIME( 1 ),
500 $ WTIME( 2 ), TMFLOPS, PASSED
501*
502* Print CPU time if machine supports it
503*
504.GT. IF( CTIME( 1 )0.0D+0 ) THEN
505 TMFLOPS = NOPS / ( CTIME( 1 ) * 1.0D+6 )
506 ELSE
507 TMFLOPS = 0.0D+0
508 END IF
509*
510 CTIME( 2 ) = 0.0D+0
511.GE. IF( CTIME( 1 )0.0D+0 )
512 $ WRITE( NOUT, FMT = 9993 ) 'cpu ', M, N, NB,
513 $ NRHS, NBRHS, NPROW, NPCOL, CTIME( 1 ),
514 $ CTIME( 2 ), TMFLOPS, PASSED
515 END IF
516*
517 ELSE
518*
519* If M = N
520*
521 IF( EST ) THEN
522*
523* Calculate workspace required for PZGECON
524*
525 LWORK = MAX( 1, 2*NP ) +
526 $ MAX( 2, DESCA( NB_ )*
527 $ MAX( 1, ICEIL( NPROW-1, NPCOL ) ),
528 $ NQ + DESCA( NB_ )*
529 $ MAX( 1, ICEIL( NPCOL-1, NPROW ) ) )
530 IPW2 = IPW + LWORK + IPOSTPAD + IPREPAD
531 LRWORK = MAX( 1, 2*NQ )
532 LW2 = ICEIL( LRWORK*DBLESZ, ZPLXSZ ) + IPOSTPAD
533*
534 IERR( 1 ) = 0
535.GT. IF( IPW2+LW2MEMSIZ ) THEN
536.EQ. IF( IAM0 )
537 $ WRITE( NOUT, FMT = 9996 )'cond est',
538 $ ( IPW2+LW2 )*ZPLXSZ
539 IERR( 1 ) = 1
540 END IF
541*
542* Check all processes for an error
543*
544 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1, IERR, 1,
545 $ -1, 0 )
546*
547.GT. IF( IERR( 1 )0 ) THEN
548.EQ. IF( IAM0 )
549 $ WRITE( NOUT, FMT = 9997 ) 'memory'
550 KSKIP = KSKIP + 1
551 GO TO 30
552 END IF
553*
554 IF( CHECK ) THEN
555 CALL PZFILLPAD( ICTXT, LWORK, 1,
556 $ MEM( IPW-IPREPAD ), LWORK,
557 $ IPREPAD, IPOSTPAD, PADVAL )
558 CALL PZFILLPAD( ICTXT, LW2-IPOSTPAD, 1,
559 $ MEM( IPW2-IPREPAD ),
560 $ LW2-IPOSTPAD, IPREPAD,
561 $ IPOSTPAD, PADVAL )
562 END IF
563*
564* Compute condition number of the matrix
565*
566 CALL PZGECON( '1', N, MEM( IPA ), 1, 1, DESCA,
567 $ ANORM1, RCOND, MEM( IPW ), LWORK,
568 $ MEM( IPW2 ), LRWORK, INFO )
569*
570 IF( CHECK ) THEN
571 CALL PZCHEKPAD( ICTXT, 'pzgecon', NP, NQ,
572 $ MEM( IPA-IPREPAD ),
573 $ DESCA( LLD_ ), IPREPAD,
574 $ IPOSTPAD, PADVAL )
575 CALL PZCHEKPAD( ICTXT, 'pzgecon', LWORK, 1,
576 $ MEM( IPW-IPREPAD ), LWORK,
577 $ IPREPAD, IPOSTPAD, PADVAL )
578 CALL PZCHEKPAD( ICTXT, 'pzgecon',
579 $ LW2-IPOSTPAD, 1,
580 $ MEM( IPW2-IPREPAD ),
581 $ LW2-IPOSTPAD, IPREPAD,
582 $ IPOSTPAD, PADVAL )
583 END IF
584 END IF
585*
586* Loop over the different values for NRHS
587*
588 DO 20 HH = 1, NNR
589*
590 NRHS = NRVAL( HH )
591*
592 DO 10 KK = 1, NNBR
593*
594 NBRHS = NBRVAL( KK )
595*
596* Initialize Array Descriptor for rhs
597*
598 CALL DESCINIT( DESCB, N, NRHS, NB, NBRHS, 0, 0,
599 $ ICTXT, MAX( 1, NP )+IMIDPAD,
600 $ IERR( 1 ) )
601*
602* Check all processes for an error
603*
604 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1, IERR, 1,
605 $ -1, 0 )
606*
607.LT. IF( IERR( 1 )0 ) THEN
608.EQ. IF( IAM0 )
609 $ WRITE( NOUT, FMT = 9997 ) 'descriptor'
610 KSKIP = KSKIP + 1
611 GO TO 10
612 END IF
613*
614* move IPW to allow room for RHS
615*
616 MYRHS = NUMROC( DESCB( N_ ), DESCB( NB_ ),
617 $ MYCOL, DESCB( CSRC_ ), NPCOL )
618 IPB = IPW
619*
620 IF( EST ) THEN
621 IPB0 = IPB + DESCB( LLD_ )*MYRHS + IPOSTPAD +
622 $ IPREPAD
623 IPFERR = IPB0 + DESCB( LLD_ )*MYRHS +
624 $ IPOSTPAD + IPREPAD
625 IPBERR = MYRHS + IPFERR + IPOSTPAD + IPREPAD
626 IPW = MYRHS + IPBERR + IPOSTPAD + IPREPAD
627 ELSE
628 IPW = IPB + DESCB( LLD_ )*MYRHS + IPOSTPAD +
629 $ IPREPAD
630 END IF
631*
632* Set worksiz: routines requiring most workspace
633* is PZLASCHK
634*
635 IF( CHECK ) THEN
636 LCM = ILCM( NPROW, NPCOL )
637 LCMQ = LCM / NPCOL
638 WORKSIZ = MAX( WORKSIZ-IPOSTPAD,
639 $ NQ * NBRHS + NP * NBRHS +
640 $ MAX( MAX( NQ*NB, 2*NBRHS ),
641 $ NBRHS * NUMROC( NUMROC(N,NB,0,0,NPCOL),NB,
642 $ 0,0,LCMQ ) ) )
643 WORKSIZ = IPOSTPAD + WORKSIZ
644 ELSE
645 WORKSIZ = IPOSTPAD
646 END IF
647*
648 IERR( 1 ) = 0
649.GT. IF( IPW+WORKSIZMEMSIZ ) THEN
650.EQ. IF( IAM0 )
651 $ WRITE( NOUT, FMT = 9996 )'solve',
652 $ ( IPW+WORKSIZ )*ZPLXSZ
653 IERR( 1 ) = 1
654 END IF
655*
656* Check all processes for an error
657*
658 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1, IERR, 1,
659 $ -1, 0 )
660*
661.GT. IF( IERR( 1 )0 ) THEN
662.EQ. IF( IAM0 )
663 $ WRITE( NOUT, FMT = 9997 ) 'memory'
664 KSKIP = KSKIP + 1
665 GO TO 10
666 END IF
667*
668* Generate RHS
669*
670 CALL PZMATGEN( ICTXT, 'no', 'no', DESCB( M_ ),
671 $ DESCB( N_ ), DESCB( MB_ ),
672 $ DESCB( NB_ ), MEM( IPB ),
673 $ DESCB( LLD_ ), DESCB( RSRC_ ),
674 $ DESCB( CSRC_ ), IBSEED, 0, NP, 0,
675 $ MYRHS, MYROW, MYCOL, NPROW,
676 $ NPCOL )
677*
678 IF( CHECK )
679 $ CALL PZFILLPAD( ICTXT, NP, MYRHS,
680 $ MEM( IPB-IPREPAD ),
681 $ DESCB( LLD_ ), IPREPAD,
682 $ IPOSTPAD, PADVAL )
683*
684 IF( EST ) THEN
685 CALL PZMATGEN( ICTXT, 'no', 'no',
686 $ DESCB( M_ ), DESCB( N_ ),
687 $ DESCB( MB_ ), DESCB( NB_ ),
688 $ MEM( IPB0 ), DESCB( LLD_ ),
689 $ DESCB( RSRC_ ),
690 $ DESCB( CSRC_ ), IBSEED, 0, NP,
691 $ 0, MYRHS, MYROW, MYCOL, NPROW,
692 $ NPCOL )
693 IF( CHECK ) THEN
694 CALL PZFILLPAD( ICTXT, NP, MYRHS,
695 $ MEM( IPB0-IPREPAD ),
696 $ DESCB( LLD_ ), IPREPAD,
697 $ IPOSTPAD, PADVAL )
698 CALL PZFILLPAD( ICTXT, 1, MYRHS,
699 $ MEM( IPFERR-IPREPAD ), 1,
700 $ IPREPAD, IPOSTPAD,
701 $ PADVAL )
702 CALL PZFILLPAD( ICTXT, 1, MYRHS,
703 $ MEM( IPBERR-IPREPAD ), 1,
704 $ IPREPAD, IPOSTPAD,
705 $ PADVAL )
706 END IF
707 END IF
708*
709 CALL BLACS_BARRIER( ICTXT, 'all' )
710 CALL SLTIMER( 2 )
711*
712* Solve linear sytem via LU factorization
713*
714 CALL PZGETRS( 'no', N, NRHS, MEM( IPA ), 1, 1,
715 $ DESCA, MEM( IPPIV ), MEM( IPB ),
716 $ 1, 1, DESCB, INFO )
717*
718 CALL SLTIMER( 2 )
719*
720 IF( CHECK ) THEN
721*
722* check for memory overwrite
723*
724 CALL PZCHEKPAD( ICTXT, 'pzgetrs', NP, NQ,
725 $ MEM( IPA-IPREPAD ),
726 $ DESCA( LLD_ ), IPREPAD,
727 $ IPOSTPAD, PADVAL )
728 CALL PZCHEKPAD( ICTXT, 'pzgetrs', LIPIV, 1,
729 $ MEM( IPPIV-IPREPAD ), LIPIV,
730 $ IPREPAD, IPOSTPAD, PADVAL )
731 CALL PZCHEKPAD( ICTXT, 'pzgetrs', NP,
732 $ MYRHS, MEM( IPB-IPREPAD ),
733 $ DESCB( LLD_ ), IPREPAD,
734 $ IPOSTPAD, PADVAL )
735*
736 CALL PZFILLPAD( ICTXT, WORKSIZ-IPOSTPAD,
737 $ 1, MEM( IPW-IPREPAD ),
738 $ WORKSIZ-IPOSTPAD, IPREPAD,
739 $ IPOSTPAD, PADVAL )
740*
741* check the solution to rhs
742*
743 CALL PZLASCHK( 'no', 'n', N, NRHS,
744 $ MEM( IPB ), 1, 1, DESCB,
745 $ IASEED, 1, 1, DESCA, IBSEED,
746 $ ANORM, SRESID, MEM( IPW ) )
747*
748.EQ..AND..GT. IF( IAM0 SRESIDTHRESH )
749 $ WRITE( NOUT, FMT = 9985 ) SRESID
750*
751* check for memory overwrite
752*
753 CALL PZCHEKPAD( ICTXT, 'pzlaschk', NP,
754 $ MYRHS, MEM( IPB-IPREPAD ),
755 $ DESCB( LLD_ ), IPREPAD,
756 $ IPOSTPAD, PADVAL )
757 CALL PZCHEKPAD( ICTXT, 'pzlaschk',
758 $ WORKSIZ-IPOSTPAD, 1,
759 $ MEM( IPW-IPREPAD ),
760 $ WORKSIZ-IPOSTPAD,
761 $ IPREPAD, IPOSTPAD, PADVAL )
762*
763* The second test is a NaN trap
764*
765.LE..AND. IF( SRESIDTHRESH
766.EQ. $ ( SRESID-SRESID )0.0D+0 ) THEN
767 KPASS = KPASS + 1
768 PASSED = 'passed'
769 ELSE
770 KFAIL = KFAIL + 1
771 PASSED = 'failed'
772 END IF
773 ELSE
774 KPASS = KPASS + 1
775 SRESID = SRESID - SRESID
776 PASSED = 'bypass'
777 END IF
778*
779 IF( EST ) THEN
780*
781* Calculate workspace required for PZGERFS
782*
783 LWORK = MAX( 1, 2*NP )
784 IPW2 = IPW + LWORK + IPOSTPAD + IPREPAD
785 LRWORK = MAX( 1, NP )
786 LW2 = ICEIL( LRWORK*DBLESZ, ZPLXSZ ) +
787 $ IPOSTPAD
788*
789 IERR( 1 ) = 0
790.GT. IF( IPW2+LW2MEMSIZ ) THEN
791.EQ. IF( IAM0 )
792 $ WRITE( NOUT, FMT = 9996 )
793 $ 'iter ref', ( IPW2+LW2 )*ZPLXSZ
794 IERR( 1 ) = 1
795 END IF
796*
797* Check all processes for an error
798*
799 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1,
800 $ IERR, 1, -1, 0 )
801*
802.GT. IF( IERR( 1 )0 ) THEN
803.EQ. IF( IAM0 )
804 $ WRITE( NOUT, FMT = 9997 )
805 $ 'memory'
806 KSKIP = KSKIP + 1
807 GO TO 10
808 END IF
809*
810 IF( CHECK ) THEN
811 CALL PZFILLPAD( ICTXT, LWORK, 1,
812 $ MEM( IPW-IPREPAD ),
813 $ LWORK, IPREPAD, IPOSTPAD,
814 $ PADVAL )
815 CALL PZFILLPAD( ICTXT, LW2-IPOSTPAD, 1,
816 $ MEM( IPW2-IPREPAD ),
817 $ LW2-IPOSTPAD, IPREPAD,
818 $ IPOSTPAD, PADVAL )
819 END IF
820*
821* Use iterative refinement to improve the
822* computed solution
823*
824 CALL PZGERFS( 'no', N, NRHS, MEM( IPA0 ), 1,
825 $ 1, DESCA, MEM( IPA ), 1, 1,
826 $ DESCA, MEM( IPPIV ),
827 $ MEM( IPB0 ), 1, 1, DESCB,
828 $ MEM( IPB ), 1, 1, DESCB,
829 $ MEM( IPFERR ), MEM( IPBERR ),
830 $ MEM( IPW ), LWORK, MEM( IPW2 ),
831 $ LRWORK, INFO )
832*
833 IF( CHECK ) THEN
834 CALL PZCHEKPAD( ICTXT, 'pzgerfs', NP,
835 $ NQ, MEM( IPA0-IPREPAD ),
836 $ DESCA( LLD_ ), IPREPAD,
837 $ IPOSTPAD, PADVAL )
838 CALL PZCHEKPAD( ICTXT, 'pzgerfs', NP,
839 $ NQ, MEM( IPA-IPREPAD ),
840 $ DESCA( LLD_ ), IPREPAD,
841 $ IPOSTPAD, PADVAL )
842 CALL PZCHEKPAD( ICTXT, 'pzgerfs', LIPIV,
843 $ 1, MEM( IPPIV-IPREPAD ),
844 $ LIPIV, IPREPAD,
845 $ IPOSTPAD, PADVAL )
846 CALL PZCHEKPAD( ICTXT, 'pzgerfs', NP,
847 $ MYRHS, MEM( IPB-IPREPAD ),
848 $ DESCB( LLD_ ), IPREPAD,
849 $ IPOSTPAD, PADVAL )
850 CALL PZCHEKPAD( ICTXT, 'pzgerfs', NP,
851 $ MYRHS,
852 $ MEM( IPB0-IPREPAD ),
853 $ DESCB( LLD_ ), IPREPAD,
854 $ IPOSTPAD, PADVAL )
855 CALL PZCHEKPAD( ICTXT, 'pzgerfs', 1,
856 $ MYRHS,
857 $ MEM( IPFERR-IPREPAD ), 1,
858 $ IPREPAD, IPOSTPAD,
859 $ PADVAL )
860 CALL PZCHEKPAD( ICTXT, 'pzgerfs', 1,
861 $ MYRHS,
862 $ MEM( IPBERR-IPREPAD ), 1,
863 $ IPREPAD, IPOSTPAD,
864 $ PADVAL )
865 CALL PZCHEKPAD( ICTXT, 'pzgerfs', LWORK,
866 $ 1, MEM( IPW-IPREPAD ),
867 $ LWORK, IPREPAD, IPOSTPAD,
868 $ PADVAL )
869 CALL PZCHEKPAD( ICTXT, 'pzgerfs',
870 $ LW2-IPOSTPAD, 1,
871 $ MEM( IPW2-IPREPAD ),
872 $ LW2-IPOSTPAD, IPREPAD,
873 $ IPOSTPAD, PADVAL )
874*
875 CALL PZFILLPAD( ICTXT, WORKSIZ-IPOSTPAD,
876 $ 1, MEM( IPW-IPREPAD ),
877 $ WORKSIZ-IPOSTPAD, IPREPAD,
878 $ IPOSTPAD, PADVAL )
879*
880* check the solution to rhs
881*
882 CALL PZLASCHK( 'no', 'n', N, NRHS,
883 $ MEM( IPB ), 1, 1, DESCB,
884 $ IASEED, 1, 1, DESCA,
885 $ IBSEED, ANORM, SRESID2,
886 $ MEM( IPW ) )
887*
888.EQ..AND..GT. IF( IAM0 SRESID2THRESH )
889 $ WRITE( NOUT, FMT = 9985 ) SRESID2
890*
891* check for memory overwrite
892*
893 CALL PZCHEKPAD( ICTXT, 'pzlaschk', NP,
894 $ MYRHS, MEM( IPB-IPREPAD ),
895 $ DESCB( LLD_ ), IPREPAD,
896 $ IPOSTPAD, PADVAL )
897 CALL PZCHEKPAD( ICTXT, 'pzlaschk',
898 $ WORKSIZ-IPOSTPAD, 1,
899 $ MEM( IPW-IPREPAD ),
900 $ WORKSIZ-IPOSTPAD, IPREPAD,
901 $ IPOSTPAD, PADVAL )
902 END IF
903 END IF
904*
905* Gather max. of all CPU and WALL clock timings
906*
907 CALL SLCOMBINE( ICTXT, 'all', '>', 'w', 2, 1,
908 $ WTIME )
909 CALL SLCOMBINE( ICTXT, 'all', '>', 'c', 2, 1,
910 $ CTIME )
911*
912* Print results
913*
914.EQ..AND..EQ. IF( MYROW0 MYCOL0 ) THEN
915*
916* 8/3 N^3 - N^2 flops for LU factorization
917*
918 NOPS = (8.0D+0/3.0D+0)*( DBLE(N)**3 ) -
919 $ DBLE(N)**2
920*
921* nrhs * 8 N^2 flops for LU solve.
922*
923 NOPS = NOPS + 8.0D+0*(DBLE(N)**2)*DBLE(NRHS)
924*
925* Calculate total megaflops -- factorization
926* and solve -- for WALL and CPU time, and print
927* output
928*
929* Print WALL time if machine supports it
930*
931.GT. IF( WTIME( 1 ) + WTIME( 2 ) 0.0D+0 )
932 $ THEN
933 TMFLOPS = NOPS /
934 $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 )
935 ELSE
936 TMFLOPS = 0.0D+0
937 END IF
938*
939* Print WALL time if supported
940*
941.GE. IF( WTIME( 2 )0.0D+0 )
942 $ WRITE( NOUT, FMT = 9993 ) 'wall', M, N,
943 $ NB, NRHS, NBRHS, NPROW, NPCOL,
944 $ WTIME( 1 ), WTIME( 2 ), TMFLOPS,
945 $ PASSED
946*
947* Print CPU time if supported
948*
949.GT. IF( CTIME( 1 )+CTIME( 2 )0.0D+0 )
950 $ THEN
951 TMFLOPS = NOPS /
952 $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 )
953 ELSE
954 TMFLOPS = 0.0D+0
955 END IF
956*
957.GE. IF( CTIME( 2 )0.0D+0 )
958 $ WRITE( NOUT, FMT = 9993 ) 'cpu ', M, N,
959 $ NB, NRHS, NBRHS, NPROW, NPCOL,
960 $ CTIME( 1 ), CTIME( 2 ), TMFLOPS,
961 $ PASSED
962 END IF
963 10 CONTINUE
964 20 CONTINUE
965*
966.AND..GT. IF( CHECK( SRESIDTHRESH ) ) THEN
967*
968* Compute fresid = ||A - P*L*U|| / (||A|| * N * eps)
969*
970 CALL PZGETRRV( M, N, MEM( IPA ), 1, 1, DESCA,
971 $ MEM( IPPIV ), MEM( IPW ) )
972 CALL PZLAFCHK( 'no', 'no', M, N, MEM( IPA ), 1,
973 $ 1, DESCA, IASEED, ANORM, FRESID,
974 $ MEM( IPW ) )
975*
976* Check for memory overwrite
977*
978 CALL PZCHEKPAD( ICTXT, 'pzgetrrv', NP, NQ,
979 $ MEM( IPA-IPREPAD ), DESCA( LLD_ ),
980 $ IPREPAD, IPOSTPAD, PADVAL )
981 CALL PZCHEKPAD( ICTXT, 'pzgetrrv', LIPIV,
982 $ 1, MEM( IPPIV-IPREPAD ), LIPIV,
983 $ IPREPAD, IPOSTPAD, PADVAL )
984 CALL PZCHEKPAD( ICTXT, 'pzgetrrv',
985 $ WORKSIZ-IPOSTPAD, 1,
986 $ MEM( IPW-IPREPAD ),
987 $ WORKSIZ-IPOSTPAD, IPREPAD,
988 $ IPOSTPAD, PADVAL )
989*
990.EQ..AND..EQ. IF( MYROW0 MYCOL0 )
991 $ WRITE( NOUT, FMT = 9986 ) FRESID
992 END IF
993 END IF
994 30 CONTINUE
995 40 CONTINUE
996 CALL BLACS_GRIDEXIT( ICTXT )
997 50 CONTINUE
998*
999* Print ending messages and close output file
1000*
1001 60 CONTINUE
1002.EQ. IF( IAM0 ) THEN
1003 KTESTS = KPASS + KFAIL + KSKIP
1004 WRITE( NOUT, FMT = * )
1005 WRITE( NOUT, FMT = 9992 ) KTESTS
1006 IF( CHECK ) THEN
1007 WRITE( NOUT, FMT = 9991 ) KPASS
1008 WRITE( NOUT, FMT = 9989 ) KFAIL
1009 ELSE
1010 WRITE( NOUT, FMT = 9990 ) KPASS
1011 END IF
1012 WRITE( NOUT, FMT = 9988 ) KSKIP
1013 WRITE( NOUT, FMT = * )
1014 WRITE( NOUT, FMT = * )
1015 WRITE( NOUT, FMT = 9987 )
1016.NE..AND..NE. IF( NOUT6 NOUT0 )
1017 $ CLOSE( NOUT )
1018 END IF
1019*
1020 CALL BLACS_EXIT( 0 )
1021*
1022 9999 FORMAT( 'illegal ', A6, ': ', A5, ' = ', I3,
1023 $ '; it should be at least 1' )
1024 9998 FORMAT( 'illegal grid: nprow*npcol = ', I4, '. it can be at most',
1025 $ I4 )
1026 9997 FORMAT( 'bad ', A6, ' parameters: going on to next test case.' )
1027 9996 FORMAT( 'unable to perform ', A, ': need totmem of at least',
1028 $ I11 )
1029 9995 FORMAT( 'time m n nb nrhs nbrhs p q lu time ',
1030 $ 'sol time mflops check' )
1031 9994 FORMAT( '---- ----- ----- --- ---- ----- ---- ---- -------- ',
1032 $ '-------- -------- ------' )
1033 9993 FORMAT( A4, 1X, I5, 1X, I5, 1X, I3, 1X, I5, 1X, I4, 1X, I4, 1X,
1034 $ I4, 1X, F8.2, 1X, F8.2, 1X, F8.2, 1X, A6 )
1035 9992 FORMAT( 'finished ', I6, ' tests, with the following results:' )
1036 9991 FORMAT( I5, ' tests completed and passed residual checks.' )
1037 9990 FORMAT( I5, ' tests completed without checking.' )
1038 9989 FORMAT( I5, ' tests completed and failed residual checks.' )
1039 9988 FORMAT( I5, ' tests skipped because of illegal input values.' )
1040 9987 FORMAT( 'END OF TESTS.' )
1041 9986 FORMAT( '||A - P*L*U|| / (||A|| * N * eps) = ', G25.7 )
1042 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 )
1043*
1044 STOP
1045*
1046* End of PZLUDRIVER
1047*
1048 END
subroutine pzlafchk(aform, diag, m, n, a, ia, ja, desca, iaseed, anorm, fresid, work)
Definition pzlafchk.f:3
subroutine pzmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition pzmatgen.f:4
end diagonal values have been computed in the(sparse) matrix id.SOL
integer function iceil(inum, idenom)
Definition iceil.f:2
integer function ilcm(m, n)
Definition ilcm.f:2
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21
subroutine blacs_gridinit(cntxt, c, nprow, npcol)
Definition mpi.f:745
subroutine pzgetrf(m, n, a, ia, ja, desca, ipiv, info)
Definition mpi.f:846
subroutine pzgetrs(trans, n, nrhs, a, ia, ja, desca, ipiv, b, ib, jb, descb, info)
Definition mpi.f:1146
double precision function pzlange(norm, m, n, a, ia, ja, desca, work)
Definition mpi.f:1287
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition mpi.f:777
subroutine pzgecon(norm, n, a, ia, ja, desca, anorm, rcond, work, lwork, iwork, liwork, info)
Definition mpi.f:1337
subroutine blacs_gridexit(cntxt)
Definition mpi.f:762
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
Definition mpi.f:754
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition mpi.f:786
subroutine pzchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pzchekpad.f:3
subroutine pzfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition pzfillpad.f:2
subroutine pzgerfs(trans, n, nrhs, a, ia, ja, desca, af, iaf, jaf, descaf, ipiv, b, ib, jb, descb, x, ix, jx, descx, ferr, berr, work, lwork, rwork, lrwork, info)
Definition pzgerfs.f:5
subroutine pzgetrrv(m, n, a, ia, ja, desca, ipiv, work)
Definition pzgetrrv.f:2
subroutine pzlaschk(symm, diag, n, nrhs, x, ix, jx, descx, iaseed, ia, ja, desca, ibseed, anorm, resid, work)
Definition pzlaschk.f:4
program pzludriver
Definition pzludriver.f:1
subroutine pzluinfo(summry, nout, nmat, mval, nval, ldnval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, est, work, iam, nprocs)
Definition pzluinfo.f:5
subroutine slboot()
Definition sltimer.f:2
subroutine sltimer(i)
Definition sltimer.f:47
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)
Definition sltimer.f:267