OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
psludriver.f
Go to the documentation of this file.
1 PROGRAM psludriver
2*
3* -- ScaLAPACK testing driver (version 1.7) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* May 1, 1997
7*
8* Purpose
9* ========
10*
11* PSLUDRIVER is the main test program for the REAL
12* SCALAPACK LU routines. This test driver performs an LU factorization
13* and solve. If the input matrix is non-square, only the factorization
14* is performed. Condition estimation and iterative refinement are
15* optionally performed.
16*
17* The program must be driven by a short data file. An annotated
18* example of a data file can be obtained by deleting the first 3
19* characters from the following 18 lines:
20* 'SCALAPACK, Version 2.0, LU factorization input file'
21* 'Intel iPSC/860 hypercube, gamma model.'
22* 'LU.out' output file name (if any)
23* 6 device out
24* 1 number of problems sizes
25* 31 201 values of M
26* 31 201 values of N
27* 1 number of NB's
28* 2 values of NB
29* 1 number of NRHS's
30* 1 values of NRHS
31* 1 number of NBRHS's
32* 1 values of NBRHS
33* 1 number of process grids (ordered pairs of P & Q)
34* 2 1 4 2 3 8 values of P
35* 2 4 1 3 2 1 values of Q
36* 1.0 threshold
37* T (T or F) Test Cond. Est. and Iter. Ref. Routines
38*
39*
40* Internal Parameters
41* ===================
42*
43* TOTMEM INTEGER, default = 2000000
44* TOTMEM is a machine-specific parameter indicating the
45* maximum amount of available memory in bytes.
46* The user should customize TOTMEM to his platform. Remember
47* to leave room in memory for the operating system, the BLACS
48* buffer, etc. For example, on a system with 8 MB of memory
49* per process (e.g., one processor on an Intel iPSC/860), the
50* parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
51* code, BLACS buffer, etc). However, for PVM, we usually set
52* TOTMEM = 2000000. Some experimenting with the maximum value
53* of TOTMEM may be required.
54*
55* INTGSZ INTEGER, default = 4 bytes.
56* REALSZ INTEGER, default = 4 bytes.
57* INTGSZ and REALSZ indicate the length in bytes on the
58* given platform for an integer and a single precision real.
59* MEM REAL array, dimension ( TOTMEM / REALSZ )
60*
61* All arrays used by SCALAPACK routines are allocated from
62* this array and referenced by pointers. The integer IPA,
63* for example, is a pointer to the starting element of MEM for
64* the matrix A.
65*
66* =====================================================================
67*
68* .. Parameters ..
69 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
70 $ LLD_, mb_, M_, nb_, n_, RSRC_
71 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
72 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
73 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
74 INTEGER intgsz, memsiz, ntests, realsz, totmem
75 REAL padval, zero
76 parameter( intgsz = 4, realsz = 4, totmem = 2000000,
77 $ memsiz = totmem / realsz, ntests = 20,
78 $ padval = -9923.0e+0, zero = 0.0e+0 )
79* ..
80* .. Local Scalars ..
81 LOGICAL CHECK, est
82 CHARACTER*6 passed
83 CHARACTER*80 outfile
84 INTEGER hh, i, iam, iaseed, ibseed, ictxt, imidpad,
85 $ info, ipa, ipa0, ipb, ipb0, ipberr, ipferr,
86 $ ipostpad, ippiv, iprepad, ipw, ipw2, j, k,
87 $ kfail, kk, kpass, kskip, ktests, lcm, lcmq,
88 $ lipiv, liwork, lwork, lw2, m, maxmn,
89 $ minmn, mp, mycol, myrhs, myrow, n, nb, nbrhs,
90 $ ngrids, nmat, nnb, nnbr, nnr, nout, np, npcol,
91 $ nprocs, nprow, nq, nrhs, worksiz
92 REAL anorm, anorm1, fresid, rcond, sresid, sresid2,
93 $ thresh
94 DOUBLE PRECISION nops, tmflops
95* ..
96* .. Local Arrays ..
97 INTEGER desca( dlen_ ), descb( dlen_ ), ierr( 1 ),
98 $ mval( ntests ), nbrval( ntests ),
99 $ nbval( ntests ), nrval( ntests ),
100 $ nval( ntests ), pval( ntests ),
101 $ qval( ntests )
102 REAL mem( memsiz )
103 DOUBLE PRECISION ctime( 2 ), wtime( 2 )
104* ..
105* .. External Subroutines ..
106 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
108 $ blacs_pinfo, descinit, igsum2d, pschekpad,
113* ..
114* .. External Functions ..
115 INTEGER iceil, ilcm, numroc
116 REAL pslange
117 EXTERNAL iceil, ilcm, numroc, pslange
118* ..
119* .. Intrinsic Functions ..
120 INTRINSIC dble, max, min
121* ..
122* .. Data Statements ..
123 DATA kfail, kpass, kskip, ktests / 4*0 /
124* ..
125* .. Executable Statements ..
126*
127* Get starting information
128*
129 CALL blacs_pinfo( iam, nprocs )
130 iaseed = 100
131 ibseed = 200
132 CALL psluinfo( outfile, nout, nmat, mval, nval, ntests, nnb,
133 $ nbval, ntests, nnr, nrval, ntests, nnbr, nbrval,
134 $ ntests, ngrids, pval, ntests, qval, ntests, thresh,
135 $ est, mem, iam, nprocs )
136 check = ( thresh.GE.0.0e+0 )
137*
138* Print headings
139*
140 IF( iam.EQ.0 ) THEN
141 WRITE( nout, fmt = * )
142 WRITE( nout, fmt = 9995 )
143 WRITE( nout, fmt = 9994 )
144 WRITE( nout, fmt = * )
145 END IF
146*
147* Loop over different process grids
148*
149 DO 50 i = 1, ngrids
150*
151 nprow = pval( i )
152 npcol = qval( i )
153*
154* Make sure grid information is correct
155*
156 ierr( 1 ) = 0
157 IF( nprow.LT.1 ) THEN
158 IF( iam.EQ.0 )
159 $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', NPROW
160 IERR( 1 ) = 1
161.LT. ELSE IF( NPCOL1 ) THEN
162.EQ. IF( IAM0 )
163 $ WRITE( NOUT, FMT = 9999 ) 'grid', 'npcol', NPCOL
164 IERR( 1 ) = 1
165.GT. ELSE IF( NPROW*NPCOLNPROCS ) THEN
166.EQ. IF( IAM0 )
167 $ WRITE( NOUT, FMT = 9998 ) NPROW*NPCOL, NPROCS
168 IERR( 1 ) = 1
169 END IF
170*
171.GT. IF( IERR( 1 )0 ) THEN
172.EQ. IF( IAM0 )
173 $ WRITE( NOUT, FMT = 9997 ) 'grid'
174 KSKIP = KSKIP + 1
175 GO TO 50
176 END IF
177*
178* Define process grid
179*
180 CALL BLACS_GET( -1, 0, ICTXT )
181 CALL BLACS_GRIDINIT( ICTXT, 'row-major', NPROW, NPCOL )
182 CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
183*
184* Go to bottom of process grid loop if this case doesn't use my
185* process
186*
187.GE..OR..GE. IF( MYROWNPROW MYCOLNPCOL )
188 $ GO TO 50
189*
190 DO 40 J = 1, NMAT
191*
192 M = MVAL( J )
193 N = NVAL( J )
194*
195* Make sure matrix information is correct
196*
197 IERR( 1 ) = 0
198.LT. IF( M1 ) THEN
199.EQ. IF( IAM0 )
200 $ WRITE( NOUT, FMT = 9999 ) 'matrix', 'm', M
201 IERR( 1 ) = 1
202.LT. ELSE IF( N1 ) THEN
203.EQ. IF( IAM0 )
204 $ WRITE( NOUT, FMT = 9999 ) 'matrix', 'n', N
205 IERR( 1 ) = 1
206 END IF
207*
208* Check all processes for an error
209*
210 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1, IERR, 1, -1, 0 )
211*
212.GT. IF( IERR( 1 )0 ) THEN
213.EQ. IF( IAM0 )
214 $ WRITE( NOUT, FMT = 9997 ) 'matrix'
215 KSKIP = KSKIP + 1
216 GO TO 40
217 END IF
218*
219 DO 30 K = 1, NNB
220*
221 NB = NBVAL( K )
222*
223* Make sure nb is legal
224*
225 IERR( 1 ) = 0
226.LT. IF( NB1 ) THEN
227 IERR( 1 ) = 1
228.EQ. IF( IAM0 )
229 $ WRITE( NOUT, FMT = 9999 ) 'nb', 'nb', NB
230 END IF
231*
232* Check all processes for an error
233*
234 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1, ierr, 1, -1, 0 )
235*
236 IF( ierr( 1 ).GT.0 ) THEN
237 IF( iam.EQ.0 )
238 $ WRITE( nout, fmt = 9997 ) 'NB'
239 kskip = kskip + 1
240 GO TO 30
241 END IF
242*
243* Padding constants
244*
245 mp = numroc( m, nb, myrow, 0, nprow )
246 np = numroc( n, nb, myrow, 0, nprow )
247 nq = numroc( n, nb, mycol, 0, npcol )
248 IF( check ) THEN
249 iprepad = max( nb, mp )
250 imidpad = nb
251 ipostpad = max( nb, nq )
252 ELSE
253 iprepad = 0
254 imidpad = 0
255 ipostpad = 0
256 END IF
257*
258* Initialize the array descriptor for the matrix A
259*
260 CALL descinit( desca, m, n, nb, nb, 0, 0, ictxt,
261 $ max( 1, mp )+imidpad, ierr( 1 ) )
262*
263* Check all processes for an error
264*
265 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
266*
267 IF( ierr( 1 ).LT.0 ) THEN
268 IF( iam.EQ.0 )
269 $ WRITE( nout, fmt = 9997 ) 'descriptor'
270 kskip = kskip + 1
271 GO TO 30
272 END IF
273*
274* Assign pointers into MEM for SCALAPACK arrays, A is
275* allocated starting at position MEM( IPREPAD+1 )
276*
277 ipa = iprepad+1
278 IF( est .AND. m.EQ.n ) THEN
279 ipa0 = ipa + desca( lld_ )*nq + ipostpad + iprepad
280 ippiv = ipa0 + desca( lld_ )*nq + ipostpad + iprepad
281 ELSE
282 ippiv = ipa + desca( lld_ )*nq + ipostpad + iprepad
283 END IF
284 lipiv = iceil( intgsz*( mp+nb ), realsz )
285 ipw = ippiv + lipiv + ipostpad + iprepad
286*
287 IF( check ) THEN
288*
289* Calculate the amount of workspace required by the
290* checking routines PSLANGE, PSGETRRV, and
291* PSLAFCHK
292*
293 worksiz = max( 2, nq )
294*
295 worksiz = max( worksiz, mp*desca( nb_ )+
296 $ nq*desca( mb_ ) )
297*
298 worksiz = max( worksiz, mp * desca( nb_ ) )
299*
300 worksiz = worksiz + ipostpad
301*
302 ELSE
303*
304 worksiz = ipostpad
305*
306 END IF
307*
308* Check for adequate memory for problem size
309*
310 ierr( 1 ) = 0
311 IF( ipw+worksiz.GT.memsiz ) THEN
312 IF( iam.EQ.0 )
313 $ WRITE( nout, fmt = 9996 ) 'factorization',
314 $ ( ipw+worksiz )*realsz
315 ierr( 1 ) = 1
316 END IF
317*
318* Check all processes for an error
319*
320 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
321*
322 IF( ierr( 1 ).GT.0 ) THEN
323 IF( iam.EQ.0 )
324 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
325 kskip = kskip + 1
326 GO TO 30
327 END IF
328*
329* Generate matrix A of Ax = b
330*
331 CALL psmatgen( ictxt, 'No transpose', 'No transpose',
332 $ desca( m_ ), desca( n_ ), desca( mb_ ),
333 $ desca( nb_ ), mem( ipa ), desca( lld_ ),
334 $ desca( rsrc_ ), desca( csrc_ ), iaseed, 0,
335 $ mp, 0, nq, myrow, mycol, nprow, npcol )
336*
337* Calculate inf-norm of A for residual error-checking
338*
339 IF( check ) THEN
340 CALL psfillpad( ictxt, mp, nq, mem( ipa-iprepad ),
341 $ desca( lld_ ), iprepad, ipostpad,
342 $ padval )
343 CALL psfillpad( ictxt, lipiv, 1, mem( ippiv-iprepad ),
344 $ lipiv, iprepad, ipostpad, padval )
345 CALL psfillpad( ictxt, worksiz-ipostpad, 1,
346 $ mem( ipw-iprepad ), worksiz-ipostpad,
347 $ iprepad, ipostpad, padval )
348 anorm = pslange( 'I', m, n, mem( ipa ), 1, 1, desca,
349 $ mem( ipw ) )
350 anorm1 = pslange( '1', m, n, mem( ipa ), 1, 1, desca,
351 $ mem( ipw ) )
352 CALL pschekpad( ictxt, 'PSLANGE', mp, nq,
353 $ mem( ipa-iprepad ), desca( lld_ ),
354 $ iprepad, ipostpad, padval )
355 CALL pschekpad( ictxt, 'PSLANGE', worksiz-ipostpad,
356 $ 1, mem( ipw-iprepad ),
357 $ worksiz-ipostpad, iprepad, ipostpad,
358 $ padval )
359 END IF
360*
361 IF( est .AND. m.EQ.n ) THEN
362 CALL psmatgen( ictxt, 'No transpose', 'No transpose',
363 $ desca( m_ ), desca( n_ ), desca( mb_ ),
364 $ desca( nb_ ), mem( ipa0 ),
365 $ desca( lld_ ), desca( rsrc_ ),
366 $ desca( csrc_ ), iaseed, 0, mp, 0, nq,
367 $ myrow, mycol, nprow, npcol )
368 IF( check )
369 $ CALL psfillpad( ictxt, mp, nq, mem( ipa0-iprepad ),
370 $ desca( lld_ ), iprepad, ipostpad,
371 $ padval )
372 END IF
373*
374 CALL slboot()
375 CALL blacs_barrier( ictxt, 'All' )
376 CALL sltimer( 1 )
377*
378* Perform LU factorization
379*
380 CALL psgetrf( m, n, mem( ipa ), 1, 1, desca,
381 $ mem( ippiv ), info )
382*
383 CALL sltimer( 1 )
384*
385 IF( info.NE.0 ) THEN
386 IF( iam.EQ.0 )
387 $ WRITE( nout, fmt = * ) 'PSGETRF INFO=', info
388 kfail = kfail + 1
389 rcond = zero
390 GO TO 30
391 END IF
392*
393 IF( check ) THEN
394*
395* Check for memory overwrite in LU factorization
396*
397 CALL pschekpad( ictxt, 'PSGETRF', mp, nq,
398 $ mem( ipa-iprepad ), desca( lld_ ),
399 $ iprepad, ipostpad, padval )
400 CALL pschekpad( ictxt, 'PSGETRF', lipiv, 1,
401 $ mem( ippiv-iprepad ), lipiv, iprepad,
402 $ ipostpad, padval )
403 END IF
404*
405 IF( m.NE.n ) THEN
406*
407* For non-square matrices, factorization only
408*
409 nrhs = 0
410 nbrhs = 0
411*
412 IF( check ) THEN
413*
414* Compute FRESID = ||A - P*L*U|| / (||A|| * N * eps)
415*
416 CALL psgetrrv( m, n, mem( ipa ), 1, 1, desca,
417 $ mem( ippiv ), mem( ipw ) )
418 CALL pslafchk( 'No', 'No', m, n, mem( ipa ), 1, 1,
419 $ desca, iaseed, anorm, fresid,
420 $ mem( ipw ) )
421*
422* Check for memory overwrite
423*
424 CALL pschekpad( ictxt, 'PSGETRRV', mp, nq,
425 $ mem( ipa-iprepad ), desca( lld_ ),
426 $ iprepad, ipostpad, padval )
427 CALL pschekpad( ictxt, 'PSGETRRV', lipiv, 1,
428 $ mem( ippiv-iprepad ), lipiv,
429 $ iprepad, ipostpad, padval )
430 CALL pschekpad( ictxt, 'PSGETRRV',
431 $ worksiz-ipostpad, 1,
432 $ mem( ipw-iprepad ),
433 $ worksiz-ipostpad, iprepad,
434 $ ipostpad, padval )
435*
436* Test residual and detect NaN result
437*
438 IF( ( fresid.LE.thresh ) .AND.
439 $ ( (fresid-fresid).EQ.0.0e+0 ) ) THEN
440 kpass = kpass + 1
441 passed = 'PASSED'
442 ELSE
443 kfail = kfail + 1
444 passed = 'FAILED'
445 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
446 $ WRITE( nout, fmt = 9986 ) fresid
447 END IF
448*
449 ELSE
450*
451* Don't perform the checking, only timing
452*
453 kpass = kpass + 1
454 fresid = fresid - fresid
455 passed = 'BYPASS'
456*
457 END IF
458*
459* Gather maximum of all CPU and WALL clock timings
460*
461 CALL slcombine( ictxt, 'all', '>', 'w', 1, 1,
462 $ WTIME )
463 CALL SLCOMBINE( ICTXT, 'all', '>', 'c', 1, 1,
464 $ CTIME )
465*
466* Print results
467*
468.EQ..AND..EQ. IF( MYROW0 MYCOL0 ) THEN
469*
470 MAXMN = MAX( M, N )
471 MINMN = MIN( M, N )
472*
473* M N^2 - 1/3 N^3 - 1/2 N^2 flops for LU
474* factorization when M >= N
475*
476 NOPS = DBLE( MAXMN )*( DBLE( MINMN )**2 ) -
477 $ (1.0D+0 / 3.0D+0)*( DBLE( MINMN )**3 ) -
478 $ (1.0D+0 / 2.0D+0)*( DBLE( MINMN )**2 )
479*
480* Calculate total megaflops -- factorization only,
481* -- for WALL and CPU time, and print output
482*
483* Print WALL time if machine supports it
484*
485.GT. IF( WTIME( 1 )0.0D+0 ) THEN
486 TMFLOPS = NOPS / ( WTIME( 1 ) * 1.0D+6 )
487 ELSE
488 TMFLOPS = 0.0D+0
489 END IF
490*
491 WTIME( 2 ) = 0.0D+0
492.GE. IF( WTIME( 1 )0.0D+0 )
493 $ WRITE( NOUT, FMT = 9993 ) 'wall', M, N, NB,
494 $ NRHS, NBRHS, NPROW, NPCOL, WTIME( 1 ),
495 $ WTIME( 2 ), TMFLOPS, PASSED
496*
497* Print CPU time if machine supports it
498*
499.GT. IF( CTIME( 1 )0.0D+0 ) THEN
500 TMFLOPS = NOPS / ( CTIME( 1 ) * 1.0D+6 )
501 ELSE
502 TMFLOPS = 0.0D+0
503 END IF
504*
505 CTIME( 2 ) = 0.0D+0
506.GE. IF( CTIME( 1 )0.0D+0 )
507 $ WRITE( NOUT, FMT = 9993 ) 'cpu ', M, N, NB,
508 $ NRHS, NBRHS, NPROW, NPCOL, CTIME( 1 ),
509 $ CTIME( 2 ), TMFLOPS, PASSED
510 END IF
511*
512 ELSE
513*
514* If M = N
515*
516 IF( EST ) THEN
517*
518* Calculate workspace required for PSGECON
519*
520 LWORK = MAX( 1, 2*NP ) + MAX( 1, 2*NQ ) +
521 $ MAX( 2, DESCA( NB_ )*
522 $ MAX( 1, ICEIL( NPROW-1, NPCOL ) ),
523 $ NQ + DESCA( NB_ )*
524 $ MAX( 1, ICEIL( NPCOL-1, NPROW ) ) )
525 IPW2 = IPW + LWORK + IPOSTPAD + IPREPAD
526 LIWORK = MAX( 1, NP )
527 LW2 = ICEIL( LIWORK*INTGSZ, REALSZ ) + IPOSTPAD
528*
529 IERR( 1 ) = 0
530.GT. IF( IPW2+LW2MEMSIZ ) THEN
531.EQ. IF( IAM0 )
532 $ WRITE( NOUT, FMT = 9996 )'cond est',
533 $ ( IPW2+LW2 )*REALSZ
534 IERR( 1 ) = 1
535 END IF
536*
537* Check all processes for an error
538*
539 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1, IERR, 1,
540 $ -1, 0 )
541*
542.GT. IF( IERR( 1 )0 ) THEN
543.EQ. IF( IAM0 )
544 $ WRITE( NOUT, FMT = 9997 ) 'memory'
545 KSKIP = KSKIP + 1
546 GO TO 30
547 END IF
548*
549 IF( CHECK ) THEN
550 CALL PSFILLPAD( ICTXT, LWORK, 1,
551 $ MEM( IPW-IPREPAD ), LWORK,
552 $ IPREPAD, IPOSTPAD, PADVAL )
553 CALL PSFILLPAD( ICTXT, LW2-IPOSTPAD, 1,
554 $ MEM( IPW2-IPREPAD ),
555 $ LW2-IPOSTPAD, IPREPAD,
556 $ IPOSTPAD, PADVAL )
557 END IF
558*
559* Compute condition number of the matrix
560*
561 CALL PSGECON( '1', N, MEM( IPA ), 1, 1, DESCA,
562 $ ANORM1, RCOND, MEM( IPW ), LWORK,
563 $ MEM( IPW2 ), LIWORK, INFO )
564*
565 IF( CHECK ) THEN
566 CALL PSCHEKPAD( ICTXT, 'psgecon', NP, NQ,
567 $ MEM( IPA-IPREPAD ),
568 $ DESCA( LLD_ ), IPREPAD,
569 $ IPOSTPAD, PADVAL )
570 CALL PSCHEKPAD( ICTXT, 'psgecon', LWORK, 1,
571 $ MEM( IPW-IPREPAD ), LWORK,
572 $ IPREPAD, IPOSTPAD, PADVAL )
573 CALL PSCHEKPAD( ICTXT, 'psgecon',
574 $ LW2-IPOSTPAD, 1,
575 $ MEM( IPW2-IPREPAD ),
576 $ LW2-IPOSTPAD, IPREPAD,
577 $ IPOSTPAD, PADVAL )
578 END IF
579 END IF
580*
581* Loop over the different values for NRHS
582*
583 DO 20 HH = 1, NNR
584*
585 NRHS = NRVAL( HH )
586*
587 DO 10 KK = 1, NNBR
588*
589 NBRHS = NBRVAL( KK )
590*
591* Initialize Array Descriptor for rhs
592*
593 CALL DESCINIT( DESCB, N, NRHS, NB, NBRHS, 0, 0,
594 $ ICTXT, MAX( 1, NP )+IMIDPAD,
595 $ IERR( 1 ) )
596*
597* Check all processes for an error
598*
599 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1, IERR, 1,
600 $ -1, 0 )
601*
602.LT. IF( IERR( 1 )0 ) THEN
603.EQ. IF( IAM0 )
604 $ WRITE( NOUT, FMT = 9997 ) 'descriptor'
605 KSKIP = KSKIP + 1
606 GO TO 10
607 END IF
608*
609* move IPW to allow room for RHS
610*
611 MYRHS = NUMROC( DESCB( N_ ), DESCB( NB_ ),
612 $ MYCOL, DESCB( CSRC_ ), NPCOL )
613 IPB = IPW
614*
615 IF( EST ) THEN
616 IPB0 = IPB + DESCB( LLD_ )*MYRHS + IPOSTPAD +
617 $ IPREPAD
618 IPFERR = IPB0 + DESCB( LLD_ )*MYRHS +
619 $ IPOSTPAD + IPREPAD
620 IPBERR = MYRHS + IPFERR + IPOSTPAD + IPREPAD
621 IPW = MYRHS + IPBERR + IPOSTPAD + IPREPAD
622 ELSE
623 IPW = IPB + DESCB( LLD_ )*MYRHS + IPOSTPAD +
624 $ IPREPAD
625 END IF
626*
627* Set worksiz: routines requiring most workspace
628* is PSLASCHK
629*
630 IF( CHECK ) THEN
631 LCM = ILCM( NPROW, NPCOL )
632 LCMQ = LCM / NPCOL
633 WORKSIZ = MAX( WORKSIZ-IPOSTPAD,
634 $ NQ * NBRHS + NP * NBRHS +
635 $ MAX( MAX( NQ*NB, 2*NBRHS ),
636 $ NBRHS * NUMROC( NUMROC(N,NB,0,0,NPCOL),NB,
637 $ 0,0,LCMQ ) ) )
638 WORKSIZ = IPOSTPAD + WORKSIZ
639 ELSE
640 WORKSIZ = IPOSTPAD
641 END IF
642*
643 IERR( 1 ) = 0
644.GT. IF( IPW+WORKSIZMEMSIZ ) THEN
645.EQ. IF( IAM0 )
646 $ WRITE( NOUT, FMT = 9996 )'solve',
647 $ ( IPW+WORKSIZ )*REALSZ
648 IERR( 1 ) = 1
649 END IF
650*
651* Check all processes for an error
652*
653 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1, IERR, 1,
654 $ -1, 0 )
655*
656.GT. IF( IERR( 1 )0 ) THEN
657.EQ. IF( IAM0 )
658 $ WRITE( NOUT, FMT = 9997 ) 'memory'
659 KSKIP = KSKIP + 1
660 GO TO 10
661 END IF
662*
663* Generate RHS
664*
665 CALL PSMATGEN( ICTXT, 'no', 'no', DESCB( M_ ),
666 $ DESCB( N_ ), DESCB( MB_ ),
667 $ DESCB( NB_ ), MEM( IPB ),
668 $ DESCB( LLD_ ), DESCB( RSRC_ ),
669 $ DESCB( CSRC_ ), IBSEED, 0, NP, 0,
670 $ MYRHS, MYROW, MYCOL, NPROW,
671 $ NPCOL )
672*
673 IF( CHECK )
674 $ CALL PSFILLPAD( ICTXT, NP, MYRHS,
675 $ MEM( IPB-IPREPAD ),
676 $ DESCB( LLD_ ), IPREPAD,
677 $ IPOSTPAD, PADVAL )
678*
679 IF( EST ) THEN
680 CALL PSMATGEN( ICTXT, 'no', 'no',
681 $ DESCB( M_ ), DESCB( N_ ),
682 $ DESCB( MB_ ), DESCB( NB_ ),
683 $ MEM( IPB0 ), DESCB( LLD_ ),
684 $ DESCB( RSRC_ ),
685 $ DESCB( CSRC_ ), IBSEED, 0, NP,
686 $ 0, MYRHS, MYROW, MYCOL, NPROW,
687 $ NPCOL )
688 IF( CHECK ) THEN
689 CALL PSFILLPAD( ICTXT, NP, MYRHS,
690 $ MEM( IPB0-IPREPAD ),
691 $ DESCB( LLD_ ), IPREPAD,
692 $ IPOSTPAD, PADVAL )
693 CALL PSFILLPAD( ICTXT, 1, MYRHS,
694 $ MEM( IPFERR-IPREPAD ), 1,
695 $ IPREPAD, IPOSTPAD,
696 $ PADVAL )
697 CALL PSFILLPAD( ICTXT, 1, MYRHS,
698 $ MEM( IPBERR-IPREPAD ), 1,
699 $ IPREPAD, IPOSTPAD,
700 $ PADVAL )
701 END IF
702 END IF
703*
704 CALL BLACS_BARRIER( ICTXT, 'all' )
705 CALL SLTIMER( 2 )
706*
707* Solve linear sytem via LU factorization
708*
709 CALL PSGETRS( 'no', N, NRHS, MEM( IPA ), 1, 1,
710 $ DESCA, MEM( IPPIV ), MEM( IPB ),
711 $ 1, 1, DESCB, INFO )
712*
713 CALL SLTIMER( 2 )
714*
715 IF( CHECK ) THEN
716*
717* check for memory overwrite
718*
719 CALL PSCHEKPAD( ICTXT, 'psgetrs', NP, NQ,
720 $ MEM( IPA-IPREPAD ),
721 $ DESCA( LLD_ ), IPREPAD,
722 $ IPOSTPAD, PADVAL )
723 CALL PSCHEKPAD( ICTXT, 'psgetrs', LIPIV, 1,
724 $ MEM( IPPIV-IPREPAD ), LIPIV,
725 $ IPREPAD, IPOSTPAD, PADVAL )
726 CALL PSCHEKPAD( ICTXT, 'psgetrs', NP,
727 $ MYRHS, MEM( IPB-IPREPAD ),
728 $ DESCB( LLD_ ), IPREPAD,
729 $ IPOSTPAD, PADVAL )
730*
731 CALL PSFILLPAD( ICTXT, WORKSIZ-IPOSTPAD,
732 $ 1, MEM( IPW-IPREPAD ),
733 $ WORKSIZ-IPOSTPAD, IPREPAD,
734 $ IPOSTPAD, PADVAL )
735*
736* check the solution to rhs
737*
738 CALL PSLASCHK( 'no', 'n', N, NRHS,
739 $ MEM( IPB ), 1, 1, DESCB,
740 $ IASEED, 1, 1, DESCA, IBSEED,
741 $ ANORM, SRESID, MEM( IPW ) )
742*
743.EQ..AND..GT. IF( IAM0 SRESIDTHRESH )
744 $ WRITE( NOUT, FMT = 9985 ) SRESID
745*
746* check for memory overwrite
747*
748 CALL PSCHEKPAD( ICTXT, 'pslaschk', NP,
749 $ MYRHS, MEM( IPB-IPREPAD ),
750 $ DESCB( LLD_ ), IPREPAD,
751 $ IPOSTPAD, PADVAL )
752 CALL PSCHEKPAD( ICTXT, 'pslaschk',
753 $ WORKSIZ-IPOSTPAD, 1,
754 $ MEM( IPW-IPREPAD ),
755 $ WORKSIZ-IPOSTPAD,
756 $ IPREPAD, IPOSTPAD, PADVAL )
757*
758* The second test is a NaN trap
759*
760.LE..AND. IF( SRESIDTHRESH
761.EQ. $ ( SRESID-SRESID )0.0E+0 ) THEN
762 KPASS = KPASS + 1
763 PASSED = 'passed'
764 ELSE
765 KFAIL = KFAIL + 1
766 PASSED = 'failed'
767 END IF
768 ELSE
769 KPASS = KPASS + 1
770 SRESID = SRESID - SRESID
771 PASSED = 'bypass'
772 END IF
773*
774 IF( EST ) THEN
775*
776* Calculate workspace required for PSGERFS
777*
778 LWORK = MAX( 1, 3*NP )
779 IPW2 = IPW + LWORK + IPOSTPAD + IPREPAD
780 LIWORK = MAX( 1, NP )
781 LW2 = ICEIL( LIWORK*INTGSZ, REALSZ ) +
782 $ IPOSTPAD
783*
784 IERR( 1 ) = 0
785.GT. IF( IPW2+LW2MEMSIZ ) THEN
786.EQ. IF( IAM0 )
787 $ WRITE( NOUT, FMT = 9996 )
788 $ 'iter ref', ( IPW2+LW2 )*REALSZ
789 IERR( 1 ) = 1
790 END IF
791*
792* Check all processes for an error
793*
794 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1,
795 $ IERR, 1, -1, 0 )
796*
797.GT. IF( IERR( 1 )0 ) THEN
798.EQ. IF( IAM0 )
799 $ WRITE( NOUT, FMT = 9997 )
800 $ 'memory'
801 KSKIP = KSKIP + 1
802 GO TO 10
803 END IF
804*
805 IF( CHECK ) THEN
806 CALL PSFILLPAD( ICTXT, LWORK, 1,
807 $ MEM( IPW-IPREPAD ),
808 $ LWORK, IPREPAD, IPOSTPAD,
809 $ PADVAL )
810 CALL PSFILLPAD( ICTXT, LW2-IPOSTPAD, 1,
811 $ MEM( IPW2-IPREPAD ),
812 $ LW2-IPOSTPAD, IPREPAD,
813 $ IPOSTPAD, PADVAL )
814 END IF
815*
816* Use iterative refinement to improve the
817* computed solution
818*
819 CALL PSGERFS( 'no', N, NRHS, MEM( IPA0 ), 1,
820 $ 1, DESCA, MEM( IPA ), 1, 1,
821 $ DESCA, MEM( IPPIV ),
822 $ MEM( IPB0 ), 1, 1, DESCB,
823 $ MEM( IPB ), 1, 1, DESCB,
824 $ MEM( IPFERR ), MEM( IPBERR ),
825 $ MEM( IPW ), LWORK, MEM( IPW2 ),
826 $ LIWORK, INFO )
827*
828 IF( CHECK ) THEN
829 CALL PSCHEKPAD( ICTXT, 'psgerfs', NP,
830 $ NQ, MEM( IPA0-IPREPAD ),
831 $ DESCA( LLD_ ), IPREPAD,
832 $ IPOSTPAD, PADVAL )
833 CALL PSCHEKPAD( ICTXT, 'psgerfs', NP,
834 $ NQ, MEM( IPA-IPREPAD ),
835 $ DESCA( LLD_ ), IPREPAD,
836 $ IPOSTPAD, PADVAL )
837 CALL PSCHEKPAD( ICTXT, 'psgerfs', LIPIV,
838 $ 1, MEM( IPPIV-IPREPAD ),
839 $ LIPIV, IPREPAD,
840 $ IPOSTPAD, PADVAL )
841 CALL PSCHEKPAD( ICTXT, 'psgerfs', NP,
842 $ MYRHS, MEM( IPB-IPREPAD ),
843 $ DESCB( LLD_ ), IPREPAD,
844 $ IPOSTPAD, PADVAL )
845 CALL PSCHEKPAD( ICTXT, 'psgerfs', NP,
846 $ MYRHS,
847 $ MEM( IPB0-IPREPAD ),
848 $ DESCB( LLD_ ), IPREPAD,
849 $ IPOSTPAD, PADVAL )
850 CALL PSCHEKPAD( ICTXT, 'psgerfs', 1,
851 $ MYRHS,
852 $ MEM( IPFERR-IPREPAD ), 1,
853 $ IPREPAD, IPOSTPAD,
854 $ PADVAL )
855 CALL PSCHEKPAD( ICTXT, 'psgerfs', 1,
856 $ MYRHS,
857 $ MEM( IPBERR-IPREPAD ), 1,
858 $ IPREPAD, IPOSTPAD,
859 $ PADVAL )
860 CALL PSCHEKPAD( ICTXT, 'psgerfs', LWORK,
861 $ 1, MEM( IPW-IPREPAD ),
862 $ LWORK, IPREPAD, IPOSTPAD,
863 $ PADVAL )
864 CALL PSCHEKPAD( ICTXT, 'psgerfs',
865 $ LW2-IPOSTPAD, 1,
866 $ MEM( IPW2-IPREPAD ),
867 $ LW2-IPOSTPAD, IPREPAD,
868 $ IPOSTPAD, PADVAL )
869*
870 CALL PSFILLPAD( ICTXT, WORKSIZ-IPOSTPAD,
871 $ 1, MEM( IPW-IPREPAD ),
872 $ WORKSIZ-IPOSTPAD, IPREPAD,
873 $ IPOSTPAD, PADVAL )
874*
875* check the solution to rhs
876*
877 CALL PSLASCHK( 'no', 'n', N, NRHS,
878 $ MEM( IPB ), 1, 1, DESCB,
879 $ IASEED, 1, 1, DESCA,
880 $ IBSEED, ANORM, SRESID2,
881 $ MEM( IPW ) )
882*
883.EQ..AND..GT. IF( IAM0 SRESID2THRESH )
884 $ WRITE( NOUT, FMT = 9985 ) SRESID2
885*
886* check for memory overwrite
887*
888 CALL PSCHEKPAD( ICTXT, 'pslaschk', NP,
889 $ MYRHS, MEM( IPB-IPREPAD ),
890 $ DESCB( LLD_ ), IPREPAD,
891 $ IPOSTPAD, PADVAL )
892 CALL PSCHEKPAD( ICTXT, 'pslaschk',
893 $ WORKSIZ-IPOSTPAD, 1,
894 $ MEM( IPW-IPREPAD ),
895 $ WORKSIZ-IPOSTPAD, IPREPAD,
896 $ IPOSTPAD, PADVAL )
897 END IF
898 END IF
899*
900* Gather max. of all CPU and WALL clock timings
901*
902 CALL SLCOMBINE( ICTXT, 'all', '>', 'w', 2, 1,
903 $ WTIME )
904 CALL SLCOMBINE( ICTXT, 'all', '>', 'c', 2, 1,
905 $ CTIME )
906*
907* Print results
908*
909.EQ..AND..EQ. IF( MYROW0 MYCOL0 ) THEN
910*
911* 2/3 N^3 - 1/2 N^2 flops for LU factorization
912*
913 NOPS = (2.0D+0/3.0D+0)*( DBLE(N)**3 ) -
914 $ (1.0D+0/2.0D+0)*( DBLE(N)**2 )
915*
916* nrhs * 2 N^2 flops for LU solve.
917*
918 NOPS = NOPS + 2.0D+0*(DBLE(N)**2)*DBLE(NRHS)
919*
920* Calculate total megaflops -- factorization
921* and solve -- for WALL and CPU time, and print
922* output
923*
924* Print WALL time if machine supports it
925*
926.GT. IF( WTIME( 1 ) + WTIME( 2 ) 0.0D+0 )
927 $ THEN
928 TMFLOPS = NOPS /
929 $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 )
930 ELSE
931 TMFLOPS = 0.0D+0
932 END IF
933*
934* Print WALL time if supported
935*
936.GE. IF( WTIME( 2 )0.0D+0 )
937 $ WRITE( NOUT, FMT = 9993 ) 'wall', M, N,
938 $ NB, NRHS, NBRHS, NPROW, NPCOL,
939 $ WTIME( 1 ), WTIME( 2 ), TMFLOPS,
940 $ PASSED
941*
942* Print CPU time if supported
943*
944.GT. IF( CTIME( 1 )+CTIME( 2 )0.0D+0 )
945 $ THEN
946 TMFLOPS = NOPS /
947 $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 )
948 ELSE
949 TMFLOPS = 0.0D+0
950 END IF
951*
952.GE. IF( CTIME( 2 )0.0D+0 )
953 $ WRITE( NOUT, FMT = 9993 ) 'cpu ', M, N,
954 $ NB, NRHS, NBRHS, NPROW, NPCOL,
955 $ CTIME( 1 ), CTIME( 2 ), TMFLOPS,
956 $ PASSED
957 END IF
958 10 CONTINUE
959 20 CONTINUE
960*
961.AND..GT. IF( CHECK( SRESIDTHRESH ) ) THEN
962*
963* Compute fresid = ||A - P*L*U|| / (||A|| * N * eps)
964*
965 CALL PSGETRRV( M, N, MEM( IPA ), 1, 1, DESCA,
966 $ MEM( IPPIV ), MEM( IPW ) )
967 CALL PSLAFCHK( 'no', 'no', M, N, MEM( IPA ), 1,
968 $ 1, DESCA, IASEED, ANORM, FRESID,
969 $ MEM( IPW ) )
970*
971* Check for memory overwrite
972*
973 CALL PSCHEKPAD( ICTXT, 'psgetrrv', NP, NQ,
974 $ MEM( IPA-IPREPAD ), DESCA( LLD_ ),
975 $ IPREPAD, IPOSTPAD, PADVAL )
976 CALL PSCHEKPAD( ICTXT, 'psgetrrv', LIPIV,
977 $ 1, MEM( IPPIV-IPREPAD ), LIPIV,
978 $ IPREPAD, IPOSTPAD, PADVAL )
979 CALL PSCHEKPAD( ICTXT, 'psgetrrv',
980 $ WORKSIZ-IPOSTPAD, 1,
981 $ MEM( IPW-IPREPAD ),
982 $ WORKSIZ-IPOSTPAD, IPREPAD,
983 $ IPOSTPAD, PADVAL )
984*
985.EQ..AND..EQ. IF( MYROW0 MYCOL0 )
986 $ WRITE( NOUT, FMT = 9986 ) FRESID
987 END IF
988 END IF
989 30 CONTINUE
990 40 CONTINUE
991 CALL BLACS_GRIDEXIT( ICTXT )
992 50 CONTINUE
993*
994* Print ending messages and close output file
995*
996 60 CONTINUE
997.EQ. IF( IAM0 ) THEN
998 KTESTS = KPASS + KFAIL + KSKIP
999 WRITE( NOUT, FMT = * )
1000 WRITE( NOUT, FMT = 9992 ) KTESTS
1001 IF( CHECK ) THEN
1002 WRITE( NOUT, FMT = 9991 ) KPASS
1003 WRITE( NOUT, FMT = 9989 ) KFAIL
1004 ELSE
1005 WRITE( NOUT, FMT = 9990 ) KPASS
1006 END IF
1007 WRITE( NOUT, FMT = 9988 ) KSKIP
1008 WRITE( NOUT, FMT = * )
1009 WRITE( NOUT, FMT = * )
1010 WRITE( NOUT, FMT = 9987 )
1011.NE..AND..NE. IF( NOUT6 NOUT0 )
1012 $ CLOSE( NOUT )
1013 END IF
1014*
1015 CALL BLACS_EXIT( 0 )
1016*
1017 9999 FORMAT( 'illegal ', A6, ': ', A5, ' = ', i3,
1018 $ '; It should be at least 1' )
1019 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
1020 $ i4 )
1021 9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
1022 9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
1023 $ i11 )
1024 9995 FORMAT( 'TIME M N NB NRHS NBRHS P Q LU Time ',
1025 $ 'Sol Time MFLOPS CHECK' )
1026 9994 FORMAT( '---- ----- ----- --- ---- ----- ---- ---- -------- ',
1027 $ '-------- -------- ------' )
1028 9993 FORMAT( a4, 1x, i5, 1x, i5, 1x, i3, 1x, i5, 1x, i4, 1x, i4, 1x,
1029 $ i4, 1x, f8.2, 1x, f8.2, 1x, f8.2, 1x, a6 )
1030 9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
1031 9991 FORMAT( i5, ' tests completed and passed residual checks.' )
1032 9990 FORMAT( i5, ' tests completed without checking.' )
1033 9989 FORMAT( i5, ' tests completed and failed residual checks.' )
1034 9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
1035 9987 FORMAT( 'END OF TESTS.' )
1036 9986 FORMAT( '||A - P*L*U|| / (||A|| * N * eps) = ', g25.7 )
1037 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
1038*
1039 stop
1040*
1041* End of PSLUDRIVER
1042*
1043 END
subroutine pslafchk(aform, diag, m, n, a, ia, ja, desca, iaseed, anorm, fresid, work)
Definition pslafchk.f:3
subroutine psmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition psmatgen.f:4
integer function iceil(inum, idenom)
Definition iceil.f:2
integer function ilcm(m, n)
Definition ilcm.f:2
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21
subroutine blacs_gridinit(cntxt, c, nprow, npcol)
Definition mpi.f:745
subroutine psgetrs(trans, n, nrhs, a, ia, ja, desca, ipiv, b, ib, jb, descb, info)
Definition mpi.f:1159
real function pslange(norm, m, n, a, ia, ja, desca, work)
Definition mpi.f:1299
subroutine psgetrf(m, n, a, ia, ja, desca, ipiv, info)
Definition mpi.f:881
subroutine psgecon(norm, n, a, ia, ja, desca, anorm, rcond, work, lwork, iwork, liwork, info)
Definition mpi.f:1352
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition mpi.f:777
subroutine blacs_gridexit(cntxt)
Definition mpi.f:762
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
Definition mpi.f:754
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition mpi.f:786
subroutine pschekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pschekpad.f:3
subroutine psfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition psfillpad.f:2
subroutine psgerfs(trans, n, nrhs, a, ia, ja, desca, af, iaf, jaf, descaf, ipiv, b, ib, jb, descb, x, ix, jx, descx, ferr, berr, work, lwork, iwork, liwork, info)
Definition psgerfs.f:5
subroutine psgetrrv(m, n, a, ia, ja, desca, ipiv, work)
Definition psgetrrv.f:2
subroutine pslaschk(symm, diag, n, nrhs, x, ix, jx, descx, iaseed, ia, ja, desca, ibseed, anorm, resid, work)
Definition pslaschk.f:4
program psludriver
Definition psludriver.f:1
subroutine psluinfo(summry, nout, nmat, mval, nval, ldnval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, est, work, iam, nprocs)
Definition psluinfo.f:5
subroutine slboot()
Definition sltimer.f:2
subroutine sltimer(i)
Definition sltimer.f:47
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)
Definition sltimer.f:267