OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
pdludriver.f
Go to the documentation of this file.
1 PROGRAM pdludriver
2*
3* -- ScaLAPACK testing driver (version 1.7) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* May 1, 1997
7*
8* Purpose
9* ========
10*
11* PDLUDRIVER is the main test program for the DOUBLE PRECISION
12* SCALAPACK LU routines. This test driver performs an LU factorization
13* and solve. If the input matrix is non-square, only the factorization
14* is performed. Condition estimation and iterative refinement are
15* optionally performed.
16*
17* The program must be driven by a short data file. An annotated
18* example of a data file can be obtained by deleting the first 3
19* characters from the following 18 lines:
20* 'SCALAPACK, Version 2.0, LU factorization input file'
21* 'Intel iPSC/860 hypercube, gamma model.'
22* 'LU.out' output file name (if any)
23* 6 device out
24* 1 number of problems sizes
25* 31 201 values of M
26* 31 201 values of N
27* 1 number of NB's
28* 2 values of NB
29* 1 number of NRHS's
30* 1 values of NRHS
31* 1 number of NBRHS's
32* 1 values of NBRHS
33* 1 number of process grids (ordered pairs of P & Q)
34* 2 1 4 2 3 8 values of P
35* 2 4 1 3 2 1 values of Q
36* 1.0 threshold
37* T (T or F) Test Cond. Est. and Iter. Ref. Routines
38*
39*
40* Internal Parameters
41* ===================
42*
43* TOTMEM INTEGER, default = 2000000
44* TOTMEM is a machine-specific parameter indicating the
45* maximum amount of available memory in bytes.
46* The user should customize TOTMEM to his platform. Remember
47* to leave room in memory for the operating system, the BLACS
48* buffer, etc. For example, on a system with 8 MB of memory
49* per process (e.g., one processor on an Intel iPSC/860), the
50* parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
51* code, BLACS buffer, etc). However, for PVM, we usually set
52* TOTMEM = 2000000. Some experimenting with the maximum value
53* of TOTMEM may be required.
54*
55* INTGSZ INTEGER, default = 4 bytes.
56* DBLESZ INTEGER, default = 8 bytes.
57* INTGSZ and DBLESZ indicate the length in bytes on the
58* given platform for an integer and a double precision real.
59* MEM DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ )
60*
61* All arrays used by SCALAPACK routines are allocated from
62* this array and referenced by pointers. The integer IPA,
63* for example, is a pointer to the starting element of MEM for
64* the matrix A.
65*
66* =====================================================================
67*
68* .. Parameters ..
69 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
70 $ lld_, mb_, m_, nb_, n_, rsrc_
71 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
72 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
73 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
74 INTEGER dblesz, intgsz, memsiz, ntests, totmem
75 DOUBLE PRECISION padval, zero
76 parameter( dblesz = 8, intgsz = 4, totmem = 4000000,
77 $ memsiz = totmem / dblesz, ntests = 20,
78 $ padval = -9923.0d+0, zero = 0.0d+0 )
79* ..
80* .. Local Scalars ..
81 LOGICAL check, est
82 CHARACTER*6 passed
83 CHARACTER*80 outfile
84 INTEGER hh, i, iam, iaseed, ibseed, ictxt, imidpad,
85 $ info, ipa, ipa0, ipb, ipb0, ipberr, ipferr,
86 $ ipostpad, ippiv, iprepad, ipw, ipw2, j, k,
87 $ kfail, kk, kpass, kskip, ktests, lcm, lcmq,
88 $ lipiv, liwork, lwork, lw2, m, maxmn,
89 $ minmn, mp, mycol, myrhs, myrow, n, nb, nbrhs,
90 $ ngrids, nmat, nnb, nnbr, nnr, nout, np, npcol,
91 $ nprocs, nprow, nq, nrhs, worksiz
92 REAL thresh
93 DOUBLE PRECISION anorm, anorm1, fresid, nops, rcond,
94 $ sresid, sresid2, tmflops
95* ..
96* .. Local Arrays ..
97 INTEGER desca( dlen_ ), descb( dlen_ ), ierr( 1 ),
98 $ mval( ntests ), nbrval( ntests ),
99 $ nbval( ntests ), nrval( ntests ),
100 $ nval( ntests ), pval( ntests ),
101 $ qval( ntests )
102 DOUBLE PRECISION ctime( 2 ), mem( memsiz ), wtime( 2 )
103* ..
104* .. External Subroutines ..
105 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
107 $ blacs_pinfo, descinit, igsum2d, pdchekpad,
112* ..
113* .. External Functions ..
114 INTEGER iceil, ilcm, numroc
115 DOUBLE PRECISION pdlange
116 EXTERNAL iceil, ilcm, numroc, pdlange
117* ..
118* .. Intrinsic Functions ..
119 INTRINSIC dble, max, min
120* ..
121* .. Data Statements ..
122 DATA kfail, kpass, kskip, ktests / 4*0 /
123* ..
124* .. Executable Statements ..
125*
126* Get starting information
127*
128 CALL blacs_pinfo( iam, nprocs )
129 iaseed = 100
130 ibseed = 200
131 CALL pdluinfo( outfile, nout, nmat, mval, nval, ntests, nnb,
132 $ nbval, ntests, nnr, nrval, ntests, nnbr, nbrval,
133 $ ntests, ngrids, pval, ntests, qval, ntests, thresh,
134 $ est, mem, iam, nprocs )
135 check = ( thresh.GE.0.0e+0 )
136*
137* Print headings
138*
139 IF( iam.EQ.0 ) THEN
140 WRITE( nout, fmt = * )
141 WRITE( nout, fmt = 9995 )
142 WRITE( nout, fmt = 9994 )
143 WRITE( nout, fmt = * )
144 END IF
145*
146* Loop over different process grids
147*
148 DO 50 i = 1, ngrids
149*
150 nprow = pval( i )
151 npcol = qval( i )
152*
153* Make sure grid information is correct
154*
155 ierr( 1 ) = 0
156 IF( nprow.LT.1 ) THEN
157 IF( iam.EQ.0 )
158 $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
159 ierr( 1 ) = 1
160 ELSE IF( npcol.LT.1 ) THEN
161 IF( iam.EQ.0 )
162 $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
163 ierr( 1 ) = 1
164 ELSE IF( nprow*npcol.GT.nprocs ) THEN
165 IF( iam.EQ.0 )
166 $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
167 ierr( 1 ) = 1
168 END IF
169*
170 IF( ierr( 1 ).GT.0 ) THEN
171 IF( iam.EQ.0 )
172 $ WRITE( nout, fmt = 9997 ) 'grid'
173 kskip = kskip + 1
174 GO TO 50
175 END IF
176*
177* Define process grid
178*
179 CALL blacs_get( -1, 0, ictxt )
180 CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
181 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
182*
183* Go to bottom of process grid loop if this case doesn't use my
184* process
185*
186 IF( myrow.GE.nprow .OR. mycol.GE.npcol )
187 $ GO TO 50
188*
189 DO 40 j = 1, nmat
190*
191 m = mval( j )
192 n = nval( j )
193*
194* Make sure matrix information is correct
195*
196 ierr( 1 ) = 0
197 IF( m.LT.1 ) THEN
198 IF( iam.EQ.0 )
199 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'M', m
200 ierr( 1 ) = 1
201 ELSE IF( n.LT.1 ) THEN
202 IF( iam.EQ.0 )
203 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
204 ierr( 1 ) = 1
205 END IF
206*
207* Check all processes for an error
208*
209 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
210*
211 IF( ierr( 1 ).GT.0 ) THEN
212 IF( iam.EQ.0 )
213 $ WRITE( nout, fmt = 9997 ) 'matrix'
214 kskip = kskip + 1
215 GO TO 40
216 END IF
217*
218 DO 30 k = 1, nnb
219*
220 nb = nbval( k )
221*
222* Make sure nb is legal
223*
224 ierr( 1 ) = 0
225 IF( nb.LT.1 ) THEN
226 ierr( 1 ) = 1
227 IF( iam.EQ.0 )
228 $ WRITE( nout, fmt = 9999 ) 'NB', 'NB', nb
229 END IF
230*
231* Check all processes for an error
232*
233 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
234*
235 IF( ierr( 1 ).GT.0 ) THEN
236 IF( iam.EQ.0 )
237 $ WRITE( nout, fmt = 9997 ) 'NB'
238 kskip = kskip + 1
239 GO TO 30
240 END IF
241*
242* Padding constants
243*
244 mp = numroc( m, nb, myrow, 0, nprow )
245 np = numroc( n, nb, myrow, 0, nprow )
246 nq = numroc( n, nb, mycol, 0, npcol )
247 IF( check ) THEN
248 iprepad = max( nb, mp )
249 imidpad = nb
250 ipostpad = max( nb, nq )
251 ELSE
252 iprepad = 0
253 imidpad = 0
254 ipostpad = 0
255 END IF
256*
257* Initialize the array descriptor for the matrix A
258*
259 CALL descinit( desca, m, n, nb, nb, 0, 0, ictxt,
260 $ max( 1, mp )+imidpad, ierr( 1 ) )
261*
262* Check all processes for an error
263*
264 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
265*
266 IF( ierr( 1 ).LT.0 ) THEN
267 IF( iam.EQ.0 )
268 $ WRITE( nout, fmt = 9997 ) 'descriptor'
269 kskip = kskip + 1
270 GO TO 30
271 END IF
272*
273* Assign pointers into MEM for SCALAPACK arrays, A is
274* allocated starting at position MEM( IPREPAD+1 )
275*
276 ipa = iprepad+1
277 IF( est .AND. m.EQ.n ) THEN
278 ipa0 = ipa + desca( lld_ )*nq + ipostpad + iprepad
279 ippiv = ipa0 + desca( lld_ )*nq + ipostpad + iprepad
280 ELSE
281 ippiv = ipa + desca( lld_ )*nq + ipostpad + iprepad
282 END IF
283 lipiv = iceil( intgsz*( mp+nb ), dblesz )
284 ipw = ippiv + lipiv + ipostpad + iprepad
285*
286 IF( check ) THEN
287*
288* Calculate the amount of workspace required by the
289* checking routines PDLANGE, PDGETRRV, and
290* PDLAFCHK
291*
292 worksiz = max( 2, nq )
293*
294 worksiz = max( worksiz, mp*desca( nb_ )+
295 $ nq*desca( mb_ ) )
296*
297 worksiz = max( worksiz, mp * desca( nb_ ) )
298*
299 worksiz = worksiz + ipostpad
300*
301 ELSE
302*
303 worksiz = ipostpad
304*
305 END IF
306*
307* Check for adequate memory for problem size
308*
309 ierr( 1 ) = 0
310 IF( ipw+worksiz.GT.memsiz ) THEN
311 IF( iam.EQ.0 )
312 $ WRITE( nout, fmt = 9996 ) 'factorization',
313 $ ( ipw+worksiz )*dblesz
314 ierr( 1 ) = 1
315 END IF
316*
317* Check all processes for an error
318*
319 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
320*
321 IF( ierr( 1 ).GT.0 ) THEN
322 IF( iam.EQ.0 )
323 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
324 kskip = kskip + 1
325 GO TO 30
326 END IF
327*
328* Generate matrix A of Ax = b
329*
330 CALL pdmatgen( ictxt, 'No transpose', 'No transpose',
331 $ desca( m_ ), desca( n_ ), desca( mb_ ),
332 $ desca( nb_ ), mem( ipa ), desca( lld_ ),
333 $ desca( rsrc_ ), desca( csrc_ ), iaseed, 0,
334 $ mp, 0, nq, myrow, mycol, nprow, npcol )
335*
336* Calculate inf-norm of A for residual error-checking
337*
338 IF( check ) THEN
339 CALL pdfillpad( ictxt, mp, nq, mem( ipa-iprepad ),
340 $ desca( lld_ ), iprepad, ipostpad,
341 $ padval )
342 CALL pdfillpad( ictxt, lipiv, 1, mem( ippiv-iprepad ),
343 $ lipiv, iprepad, ipostpad, padval )
344 CALL pdfillpad( ictxt, worksiz-ipostpad, 1,
345 $ mem( ipw-iprepad ), worksiz-ipostpad,
346 $ iprepad, ipostpad, padval )
347 anorm = pdlange( 'I', m, n, mem( ipa ), 1, 1, desca,
348 $ mem( ipw ) )
349 anorm1 = pdlange( '1', M, N, MEM( IPA ), 1, 1, DESCA,
350 $ MEM( IPW ) )
351 CALL PDCHEKPAD( ICTXT, 'pdlange', MP, NQ,
352 $ MEM( IPA-IPREPAD ), DESCA( LLD_ ),
353 $ IPREPAD, IPOSTPAD, PADVAL )
354 CALL PDCHEKPAD( ICTXT, 'pdlange', WORKSIZ-IPOSTPAD,
355 $ 1, MEM( IPW-IPREPAD ),
356 $ WORKSIZ-IPOSTPAD, IPREPAD, IPOSTPAD,
357 $ PADVAL )
358 END IF
359*
360.AND..EQ. IF( EST MN ) THEN
361 CALL PDMATGEN( ICTXT, 'no transpose', 'no transpose',
362 $ DESCA( M_ ), DESCA( N_ ), DESCA( MB_ ),
363 $ DESCA( NB_ ), MEM( IPA0 ),
364 $ DESCA( LLD_ ), DESCA( RSRC_ ),
365 $ DESCA( CSRC_ ), IASEED, 0, MP, 0, NQ,
366 $ MYROW, MYCOL, NPROW, NPCOL )
367 IF( CHECK )
368 $ CALL PDFILLPAD( ICTXT, MP, NQ, MEM( IPA0-IPREPAD ),
369 $ DESCA( LLD_ ), IPREPAD, IPOSTPAD,
370 $ PADVAL )
371 END IF
372*
373 CALL SLBOOT()
374 CALL BLACS_BARRIER( ICTXT, 'all' )
375 CALL SLTIMER( 1 )
376*
377* Perform LU factorization
378*
379 CALL PDGETRF( M, N, MEM( IPA ), 1, 1, DESCA,
380 $ MEM( IPPIV ), INFO )
381*
382 CALL SLTIMER( 1 )
383*
384.NE. IF( INFO0 ) THEN
385.EQ. IF( IAM0 )
386 $ WRITE( NOUT, FMT = * ) 'pdgetrf info=', INFO
387 KFAIL = KFAIL + 1
388 RCOND = ZERO
389 GO TO 30
390 END IF
391*
392 IF( CHECK ) THEN
393*
394* Check for memory overwrite in LU factorization
395*
396 CALL PDCHEKPAD( ICTXT, 'pdgetrf', MP, NQ,
397 $ MEM( IPA-IPREPAD ), DESCA( LLD_ ),
398 $ IPREPAD, IPOSTPAD, PADVAL )
399 CALL PDCHEKPAD( ICTXT, 'pdgetrf', LIPIV, 1,
400 $ MEM( IPPIV-IPREPAD ), LIPIV, IPREPAD,
401 $ IPOSTPAD, PADVAL )
402 END IF
403*
404.NE. IF( MN ) THEN
405*
406* For non-square matrices, factorization only
407*
408 NRHS = 0
409 NBRHS = 0
410*
411 IF( CHECK ) THEN
412*
413* Compute FRESID = ||A - P*L*U|| / (||A|| * N * eps)
414*
415 CALL PDGETRRV( M, N, MEM( IPA ), 1, 1, DESCA,
416 $ MEM( IPPIV ), MEM( IPW ) )
417 CALL PDLAFCHK( 'no', 'no', M, N, MEM( IPA ), 1, 1,
418 $ DESCA, IASEED, ANORM, FRESID,
419 $ MEM( IPW ) )
420*
421* Check for memory overwrite
422*
423 CALL PDCHEKPAD( ICTXT, 'pdgetrrv', MP, NQ,
424 $ MEM( IPA-IPREPAD ), DESCA( LLD_ ),
425 $ IPREPAD, IPOSTPAD, PADVAL )
426 CALL PDCHEKPAD( ICTXT, 'pdgetrrv', LIPIV, 1,
427 $ MEM( IPPIV-IPREPAD ), LIPIV,
428 $ IPREPAD, IPOSTPAD, PADVAL )
429 CALL PDCHEKPAD( ICTXT, 'pdgetrrv',
430 $ WORKSIZ-IPOSTPAD, 1,
431 $ MEM( IPW-IPREPAD ),
432 $ WORKSIZ-IPOSTPAD, IPREPAD,
433 $ IPOSTPAD, PADVAL )
434*
435* Test residual and detect NaN result
436*
437.LE..AND. IF( ( FRESIDTHRESH )
438.EQ. $ ( (FRESID-FRESID)0.0D+0 ) ) THEN
439 KPASS = KPASS + 1
440 PASSED = 'passed'
441 ELSE
442 KFAIL = KFAIL + 1
443 PASSED = 'failed'
444.EQ..AND..EQ. IF( MYROW0 MYCOL0 )
445 $ WRITE( NOUT, FMT = 9986 ) FRESID
446 END IF
447*
448 ELSE
449*
450* Don't perform the checking, only timing
451*
452 KPASS = KPASS + 1
453 FRESID = FRESID - FRESID
454 PASSED = 'bypass'
455*
456 END IF
457*
458* Gather maximum of all CPU and WALL clock timings
459*
460 CALL SLCOMBINE( ICTXT, 'all', '>', 'w', 1, 1,
461 $ WTIME )
462 CALL SLCOMBINE( ICTXT, 'all', '>', 'c', 1, 1,
463 $ CTIME )
464*
465* Print results
466*
467.EQ..AND..EQ. IF( MYROW0 MYCOL0 ) THEN
468*
469 MAXMN = MAX( M, N )
470 MINMN = MIN( M, N )
471*
472* M N^2 - 1/3 N^3 - 1/2 N^2 flops for LU
473* factorization when M >= N
474*
475 NOPS = DBLE( MAXMN )*( DBLE( MINMN )**2 ) -
476 $ (1.0D+0 / 3.0D+0)*( DBLE( MINMN )**3 ) -
477 $ (1.0D+0 / 2.0D+0)*( DBLE( MINMN )**2 )
478*
479* Calculate total megaflops -- factorization only,
480* -- for WALL and CPU time, and print output
481*
482* Print WALL time if machine supports it
483*
484.GT. IF( WTIME( 1 )0.0D+0 ) THEN
485 TMFLOPS = NOPS / ( WTIME( 1 ) * 1.0D+6 )
486 ELSE
487 TMFLOPS = 0.0D+0
488 END IF
489*
490 WTIME( 2 ) = 0.0D+0
491.GE. IF( WTIME( 1 )0.0D+0 )
492 $ WRITE( NOUT, FMT = 9993 ) 'wall', M, N, NB,
493 $ NRHS, NBRHS, NPROW, NPCOL, WTIME( 1 ),
494 $ WTIME( 2 ), TMFLOPS, PASSED
495*
496* Print CPU time if machine supports it
497*
498.GT. IF( CTIME( 1 )0.0D+0 ) THEN
499 TMFLOPS = NOPS / ( CTIME( 1 ) * 1.0D+6 )
500 ELSE
501 TMFLOPS = 0.0D+0
502 END IF
503*
504 CTIME( 2 ) = 0.0D+0
505.GE. IF( CTIME( 1 )0.0D+0 )
506 $ WRITE( NOUT, FMT = 9993 ) 'cpu ', M, N, NB,
507 $ NRHS, NBRHS, NPROW, NPCOL, CTIME( 1 ),
508 $ CTIME( 2 ), TMFLOPS, PASSED
509 END IF
510*
511 ELSE
512*
513* If M = N
514*
515 IF( EST ) THEN
516*
517* Calculate workspace required for PDGECON
518*
519 LWORK = MAX( 1, 2*NP ) + MAX( 1, 2*NQ ) +
520 $ MAX( 2, DESCA( NB_ )*
521 $ MAX( 1, ICEIL( NPROW-1, NPCOL ) ),
522 $ NQ + DESCA( NB_ )*
523 $ MAX( 1, ICEIL( NPCOL-1, NPROW ) ) )
524 IPW2 = IPW + LWORK + IPOSTPAD + IPREPAD
525 LIWORK = MAX( 1, NP )
526 LW2 = ICEIL( LIWORK*INTGSZ, DBLESZ ) + IPOSTPAD
527*
528 IERR( 1 ) = 0
529.GT. IF( IPW2+LW2MEMSIZ ) THEN
530.EQ. IF( IAM0 )
531 $ WRITE( NOUT, FMT = 9996 )'cond est',
532 $ ( IPW2+LW2 )*DBLESZ
533 IERR( 1 ) = 1
534 END IF
535*
536* Check all processes for an error
537*
538 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1, IERR, 1,
539 $ -1, 0 )
540*
541.GT. IF( IERR( 1 )0 ) THEN
542.EQ. IF( IAM0 )
543 $ WRITE( NOUT, FMT = 9997 ) 'memory'
544 KSKIP = KSKIP + 1
545 GO TO 30
546 END IF
547*
548 IF( CHECK ) THEN
549 CALL PDFILLPAD( ICTXT, LWORK, 1,
550 $ MEM( IPW-IPREPAD ), LWORK,
551 $ IPREPAD, IPOSTPAD, PADVAL )
552 CALL PDFILLPAD( ICTXT, LW2-IPOSTPAD, 1,
553 $ MEM( IPW2-IPREPAD ),
554 $ LW2-IPOSTPAD, IPREPAD,
555 $ IPOSTPAD, PADVAL )
556 END IF
557*
558* Compute condition number of the matrix
559*
560 CALL PDGECON( '1', N, MEM( IPA ), 1, 1, DESCA,
561 $ ANORM1, RCOND, MEM( IPW ), LWORK,
562 $ MEM( IPW2 ), LIWORK, INFO )
563*
564 IF( CHECK ) THEN
565 CALL PDCHEKPAD( ICTXT, 'pdgecon', NP, NQ,
566 $ MEM( IPA-IPREPAD ),
567 $ DESCA( LLD_ ), IPREPAD,
568 $ IPOSTPAD, PADVAL )
569 CALL PDCHEKPAD( ICTXT, 'pdgecon', LWORK, 1,
570 $ MEM( IPW-IPREPAD ), LWORK,
571 $ IPREPAD, IPOSTPAD, PADVAL )
572 CALL PDCHEKPAD( ICTXT, 'pdgecon',
573 $ LW2-IPOSTPAD, 1,
574 $ MEM( IPW2-IPREPAD ),
575 $ LW2-IPOSTPAD, IPREPAD,
576 $ IPOSTPAD, PADVAL )
577 END IF
578 END IF
579*
580* Loop over the different values for NRHS
581*
582 DO 20 HH = 1, NNR
583*
584 NRHS = NRVAL( HH )
585*
586 DO 10 KK = 1, NNBR
587*
588 NBRHS = NBRVAL( KK )
589*
590* Initialize Array Descriptor for rhs
591*
592 CALL DESCINIT( DESCB, N, NRHS, NB, NBRHS, 0, 0,
593 $ ICTXT, MAX( 1, NP )+IMIDPAD,
594 $ IERR( 1 ) )
595*
596* Check all processes for an error
597*
598 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1, ierr, 1,
599 $ -1, 0 )
600*
601 IF( ierr( 1 ).LT.0 ) THEN
602 IF( iam.EQ.0 )
603 $ WRITE( nout, fmt = 9997 ) 'descriptor'
604 kskip = kskip + 1
605 GO TO 10
606 END IF
607*
608* move IPW to allow room for RHS
609*
610 myrhs = numroc( descb( n_ ), descb( nb_ ),
611 $ mycol, descb( csrc_ ), npcol )
612 ipb = ipw
613*
614 IF( est ) THEN
615 ipb0 = ipb + descb( lld_ )*myrhs + ipostpad +
616 $ iprepad
617 ipferr = ipb0 + descb( lld_ )*myrhs +
618 $ ipostpad + iprepad
619 ipberr = myrhs + ipferr + ipostpad + iprepad
620 ipw = myrhs + ipberr + ipostpad + iprepad
621 ELSE
622 ipw = ipb + descb( lld_ )*myrhs + ipostpad +
623 $ iprepad
624 END IF
625*
626* Set worksiz: routines requiring most workspace
627* is PDLASCHK
628*
629 IF( check ) THEN
630 lcm = ilcm( nprow, npcol )
631 lcmq = lcm / npcol
632 worksiz = max( worksiz-ipostpad,
633 $ nq * nbrhs + np * nbrhs +
634 $ max( max( nq*nb, 2*nbrhs ),
635 $ nbrhs * numroc( numroc(n,nb,0,0,npcol),nb,
636 $ 0,0,lcmq ) ) )
637 worksiz = ipostpad + worksiz
638 ELSE
639 worksiz = ipostpad
640 END IF
641*
642 ierr( 1 ) = 0
643 IF( ipw+worksiz.GT.memsiz ) THEN
644 IF( iam.EQ.0 )
645 $ WRITE( nout, fmt = 9996 )'solve',
646 $ ( IPW+WORKSIZ )*DBLESZ
647 IERR( 1 ) = 1
648 END IF
649*
650* Check all processes for an error
651*
652 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1, IERR, 1,
653 $ -1, 0 )
654*
655.GT. IF( IERR( 1 )0 ) THEN
656.EQ. IF( IAM0 )
657 $ WRITE( NOUT, FMT = 9997 ) 'memory'
658 KSKIP = KSKIP + 1
659 GO TO 10
660 END IF
661*
662* Generate RHS
663*
664 CALL PDMATGEN( ICTXT, 'no', 'no', DESCB( M_ ),
665 $ DESCB( N_ ), DESCB( MB_ ),
666 $ DESCB( NB_ ), MEM( IPB ),
667 $ DESCB( LLD_ ), DESCB( RSRC_ ),
668 $ DESCB( CSRC_ ), IBSEED, 0, NP, 0,
669 $ MYRHS, MYROW, MYCOL, NPROW,
670 $ NPCOL )
671*
672 IF( CHECK )
673 $ CALL PDFILLPAD( ICTXT, NP, MYRHS,
674 $ MEM( IPB-IPREPAD ),
675 $ DESCB( LLD_ ), IPREPAD,
676 $ IPOSTPAD, PADVAL )
677*
678 IF( EST ) THEN
679 CALL PDMATGEN( ICTXT, 'no', 'no',
680 $ DESCB( M_ ), DESCB( N_ ),
681 $ DESCB( MB_ ), DESCB( NB_ ),
682 $ MEM( IPB0 ), DESCB( LLD_ ),
683 $ DESCB( RSRC_ ),
684 $ DESCB( CSRC_ ), IBSEED, 0, NP,
685 $ 0, MYRHS, MYROW, MYCOL, NPROW,
686 $ NPCOL )
687 IF( CHECK ) THEN
688 CALL PDFILLPAD( ICTXT, NP, MYRHS,
689 $ MEM( IPB0-IPREPAD ),
690 $ DESCB( LLD_ ), IPREPAD,
691 $ IPOSTPAD, PADVAL )
692 CALL PDFILLPAD( ICTXT, 1, MYRHS,
693 $ MEM( IPFERR-IPREPAD ), 1,
694 $ IPREPAD, IPOSTPAD,
695 $ PADVAL )
696 CALL PDFILLPAD( ICTXT, 1, MYRHS,
697 $ MEM( IPBERR-IPREPAD ), 1,
698 $ IPREPAD, IPOSTPAD,
699 $ PADVAL )
700 END IF
701 END IF
702*
703 CALL BLACS_BARRIER( ICTXT, 'all' )
704 CALL SLTIMER( 2 )
705*
706* Solve linear sytem via LU factorization
707*
708 CALL PDGETRS( 'no', N, NRHS, MEM( IPA ), 1, 1,
709 $ DESCA, MEM( IPPIV ), MEM( IPB ),
710 $ 1, 1, DESCB, INFO )
711*
712 CALL SLTIMER( 2 )
713*
714 IF( CHECK ) THEN
715*
716* check for memory overwrite
717*
718 CALL PDCHEKPAD( ICTXT, 'pdgetrs', NP, NQ,
719 $ MEM( IPA-IPREPAD ),
720 $ DESCA( LLD_ ), IPREPAD,
721 $ IPOSTPAD, PADVAL )
722 CALL PDCHEKPAD( ICTXT, 'pdgetrs', LIPIV, 1,
723 $ MEM( IPPIV-IPREPAD ), LIPIV,
724 $ IPREPAD, IPOSTPAD, PADVAL )
725 CALL PDCHEKPAD( ICTXT, 'pdgetrs', NP,
726 $ MYRHS, MEM( IPB-IPREPAD ),
727 $ DESCB( LLD_ ), IPREPAD,
728 $ IPOSTPAD, PADVAL )
729*
730 CALL PDFILLPAD( ICTXT, WORKSIZ-IPOSTPAD,
731 $ 1, MEM( IPW-IPREPAD ),
732 $ WORKSIZ-IPOSTPAD, IPREPAD,
733 $ IPOSTPAD, PADVAL )
734*
735* check the solution to rhs
736*
737 CALL PDLASCHK( 'no', 'n', N, NRHS,
738 $ MEM( IPB ), 1, 1, DESCB,
739 $ IASEED, 1, 1, DESCA, IBSEED,
740 $ ANORM, SRESID, MEM( IPW ) )
741*
742.EQ..AND..GT. IF( IAM0 SRESIDTHRESH )
743 $ WRITE( NOUT, FMT = 9985 ) SRESID
744*
745* check for memory overwrite
746*
747 CALL PDCHEKPAD( ICTXT, 'pdlaschk', NP,
748 $ MYRHS, MEM( IPB-IPREPAD ),
749 $ DESCB( LLD_ ), IPREPAD,
750 $ IPOSTPAD, PADVAL )
751 CALL PDCHEKPAD( ICTXT, 'pdlaschk',
752 $ WORKSIZ-IPOSTPAD, 1,
753 $ MEM( IPW-IPREPAD ),
754 $ WORKSIZ-IPOSTPAD,
755 $ IPREPAD, IPOSTPAD, PADVAL )
756*
757* The second test is a NaN trap
758*
759.LE..AND. IF( SRESIDTHRESH
760.EQ. $ ( SRESID-SRESID )0.0D+0 ) THEN
761 KPASS = KPASS + 1
762 PASSED = 'passed'
763 ELSE
764 KFAIL = KFAIL + 1
765 PASSED = 'failed'
766 END IF
767 ELSE
768 KPASS = KPASS + 1
769 SRESID = SRESID - SRESID
770 PASSED = 'bypass'
771 END IF
772*
773 IF( EST ) THEN
774*
775* Calculate workspace required for PDGERFS
776*
777 LWORK = MAX( 1, 3*NP )
778 IPW2 = IPW + LWORK + IPOSTPAD + IPREPAD
779 LIWORK = MAX( 1, NP )
780 LW2 = ICEIL( LIWORK*INTGSZ, DBLESZ ) +
781 $ IPOSTPAD
782*
783 IERR( 1 ) = 0
784.GT. IF( IPW2+LW2MEMSIZ ) THEN
785.EQ. IF( IAM0 )
786 $ WRITE( NOUT, FMT = 9996 )
787 $ 'iter ref', ( IPW2+LW2 )*DBLESZ
788 IERR( 1 ) = 1
789 END IF
790*
791* Check all processes for an error
792*
793 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1,
794 $ IERR, 1, -1, 0 )
795*
796.GT. IF( IERR( 1 )0 ) THEN
797.EQ. IF( IAM0 )
798 $ WRITE( NOUT, FMT = 9997 )
799 $ 'memory'
800 KSKIP = KSKIP + 1
801 GO TO 10
802 END IF
803*
804 IF( CHECK ) THEN
805 CALL PDFILLPAD( ICTXT, LWORK, 1,
806 $ MEM( IPW-IPREPAD ),
807 $ LWORK, IPREPAD, IPOSTPAD,
808 $ PADVAL )
809 CALL PDFILLPAD( ICTXT, LW2-IPOSTPAD, 1,
810 $ MEM( IPW2-IPREPAD ),
811 $ LW2-IPOSTPAD, IPREPAD,
812 $ IPOSTPAD, PADVAL )
813 END IF
814*
815* Use iterative refinement to improve the
816* computed solution
817*
818 CALL PDGERFS( 'no', N, NRHS, MEM( IPA0 ), 1,
819 $ 1, DESCA, MEM( IPA ), 1, 1,
820 $ DESCA, MEM( IPPIV ),
821 $ MEM( IPB0 ), 1, 1, DESCB,
822 $ MEM( IPB ), 1, 1, DESCB,
823 $ MEM( IPFERR ), MEM( IPBERR ),
824 $ MEM( IPW ), LWORK, MEM( IPW2 ),
825 $ LIWORK, INFO )
826*
827 IF( CHECK ) THEN
828 CALL PDCHEKPAD( ICTXT, 'pdgerfs', NP,
829 $ NQ, MEM( IPA0-IPREPAD ),
830 $ DESCA( LLD_ ), IPREPAD,
831 $ IPOSTPAD, PADVAL )
832 CALL PDCHEKPAD( ICTXT, 'pdgerfs', NP,
833 $ NQ, MEM( IPA-IPREPAD ),
834 $ DESCA( LLD_ ), IPREPAD,
835 $ IPOSTPAD, PADVAL )
836 CALL PDCHEKPAD( ICTXT, 'pdgerfs', LIPIV,
837 $ 1, MEM( IPPIV-IPREPAD ),
838 $ LIPIV, IPREPAD,
839 $ IPOSTPAD, PADVAL )
840 CALL PDCHEKPAD( ICTXT, 'pdgerfs', NP,
841 $ MYRHS, MEM( IPB-IPREPAD ),
842 $ DESCB( LLD_ ), IPREPAD,
843 $ IPOSTPAD, PADVAL )
844 CALL PDCHEKPAD( ICTXT, 'pdgerfs', NP,
845 $ MYRHS,
846 $ MEM( IPB0-IPREPAD ),
847 $ DESCB( LLD_ ), IPREPAD,
848 $ IPOSTPAD, PADVAL )
849 CALL PDCHEKPAD( ICTXT, 'pdgerfs', 1,
850 $ MYRHS,
851 $ MEM( IPFERR-IPREPAD ), 1,
852 $ IPREPAD, IPOSTPAD,
853 $ PADVAL )
854 CALL PDCHEKPAD( ICTXT, 'pdgerfs', 1,
855 $ MYRHS,
856 $ MEM( IPBERR-IPREPAD ), 1,
857 $ IPREPAD, IPOSTPAD,
858 $ PADVAL )
859 CALL PDCHEKPAD( ICTXT, 'pdgerfs', LWORK,
860 $ 1, MEM( IPW-IPREPAD ),
861 $ LWORK, IPREPAD, IPOSTPAD,
862 $ PADVAL )
863 CALL PDCHEKPAD( ICTXT, 'pdgerfs',
864 $ LW2-IPOSTPAD, 1,
865 $ MEM( IPW2-IPREPAD ),
866 $ LW2-IPOSTPAD, IPREPAD,
867 $ IPOSTPAD, PADVAL )
868*
869 CALL PDFILLPAD( ICTXT, WORKSIZ-IPOSTPAD,
870 $ 1, MEM( IPW-IPREPAD ),
871 $ WORKSIZ-IPOSTPAD, IPREPAD,
872 $ IPOSTPAD, PADVAL )
873*
874* check the solution to rhs
875*
876 CALL PDLASCHK( 'no', 'n', N, NRHS,
877 $ MEM( IPB ), 1, 1, DESCB,
878 $ IASEED, 1, 1, DESCA,
879 $ IBSEED, ANORM, SRESID2,
880 $ MEM( IPW ) )
881*
882.EQ..AND..GT. IF( IAM0 SRESID2THRESH )
883 $ WRITE( NOUT, FMT = 9985 ) SRESID2
884*
885* check for memory overwrite
886*
887 CALL PDCHEKPAD( ICTXT, 'pdlaschk', NP,
888 $ MYRHS, MEM( IPB-IPREPAD ),
889 $ DESCB( LLD_ ), IPREPAD,
890 $ IPOSTPAD, PADVAL )
891 CALL PDCHEKPAD( ICTXT, 'pdlaschk',
892 $ WORKSIZ-IPOSTPAD, 1,
893 $ MEM( IPW-IPREPAD ),
894 $ WORKSIZ-IPOSTPAD, IPREPAD,
895 $ IPOSTPAD, PADVAL )
896 END IF
897 END IF
898*
899* Gather max. of all CPU and WALL clock timings
900*
901 CALL SLCOMBINE( ICTXT, 'all', '>', 'w', 2, 1,
902 $ WTIME )
903 CALL SLCOMBINE( ICTXT, 'all', '>', 'c', 2, 1,
904 $ CTIME )
905*
906* Print results
907*
908.EQ..AND..EQ. IF( MYROW0 MYCOL0 ) THEN
909*
910* 2/3 N^3 - 1/2 N^2 flops for LU factorization
911*
912 NOPS = (2.0D+0/3.0D+0)*( DBLE(N)**3 ) -
913 $ (1.0D+0/2.0D+0)*( DBLE(N)**2 )
914*
915* nrhs * 2 N^2 flops for LU solve.
916*
917 NOPS = NOPS + 2.0D+0*(DBLE(N)**2)*DBLE(NRHS)
918*
919* Calculate total megaflops -- factorization
920* and solve -- for WALL and CPU time, and print
921* output
922*
923* Print WALL time if machine supports it
924*
925.GT. IF( WTIME( 1 ) + WTIME( 2 ) 0.0D+0 )
926 $ THEN
927 TMFLOPS = NOPS /
928 $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 )
929 ELSE
930 TMFLOPS = 0.0D+0
931 END IF
932*
933* Print WALL time if supported
934*
935.GE. IF( WTIME( 2 )0.0D+0 )
936 $ WRITE( NOUT, FMT = 9993 ) 'wall', M, N,
937 $ NB, NRHS, NBRHS, NPROW, NPCOL,
938 $ WTIME( 1 ), WTIME( 2 ), TMFLOPS,
939 $ PASSED
940*
941* Print CPU time if supported
942*
943.GT. IF( CTIME( 1 )+CTIME( 2 )0.0D+0 )
944 $ THEN
945 TMFLOPS = NOPS /
946 $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 )
947 ELSE
948 TMFLOPS = 0.0D+0
949 END IF
950*
951.GE. IF( CTIME( 2 )0.0D+0 )
952 $ WRITE( NOUT, FMT = 9993 ) 'cpu ', M, N,
953 $ NB, NRHS, NBRHS, NPROW, NPCOL,
954 $ CTIME( 1 ), CTIME( 2 ), TMFLOPS,
955 $ PASSED
956 END IF
957 10 CONTINUE
958 20 CONTINUE
959*
960.AND..GT. IF( CHECK( SRESIDTHRESH ) ) THEN
961*
962* Compute fresid = ||A - P*L*U|| / (||A|| * N * eps)
963*
964 CALL PDGETRRV( M, N, MEM( IPA ), 1, 1, DESCA,
965 $ MEM( IPPIV ), MEM( IPW ) )
966 CALL PDLAFCHK( 'no', 'no', M, N, MEM( IPA ), 1,
967 $ 1, DESCA, IASEED, ANORM, FRESID,
968 $ MEM( IPW ) )
969*
970* Check for memory overwrite
971*
972 CALL PDCHEKPAD( ICTXT, 'pdgetrrv', NP, NQ,
973 $ MEM( IPA-IPREPAD ), DESCA( LLD_ ),
974 $ IPREPAD, IPOSTPAD, PADVAL )
975 CALL PDCHEKPAD( ICTXT, 'pdgetrrv', LIPIV,
976 $ 1, MEM( IPPIV-IPREPAD ), LIPIV,
977 $ IPREPAD, IPOSTPAD, PADVAL )
978 CALL PDCHEKPAD( ICTXT, 'pdgetrrv',
979 $ WORKSIZ-IPOSTPAD, 1,
980 $ MEM( IPW-IPREPAD ),
981 $ WORKSIZ-IPOSTPAD, IPREPAD,
982 $ IPOSTPAD, PADVAL )
983*
984.EQ..AND..EQ. IF( MYROW0 MYCOL0 )
985 $ WRITE( NOUT, FMT = 9986 ) FRESID
986 END IF
987 END IF
988 30 CONTINUE
989 40 CONTINUE
990 CALL BLACS_GRIDEXIT( ICTXT )
991 50 CONTINUE
992*
993* Print ending messages and close output file
994*
995 60 CONTINUE
996.EQ. IF( IAM0 ) THEN
997 KTESTS = KPASS + KFAIL + KSKIP
998 WRITE( NOUT, FMT = * )
999 WRITE( NOUT, FMT = 9992 ) KTESTS
1000 IF( CHECK ) THEN
1001 WRITE( NOUT, FMT = 9991 ) KPASS
1002 WRITE( NOUT, FMT = 9989 ) KFAIL
1003 ELSE
1004 WRITE( NOUT, FMT = 9990 ) KPASS
1005 END IF
1006 WRITE( NOUT, FMT = 9988 ) KSKIP
1007 WRITE( NOUT, FMT = * )
1008 WRITE( NOUT, FMT = * )
1009 WRITE( NOUT, FMT = 9987 )
1010.NE..AND..NE. IF( NOUT6 NOUT0 )
1011 $ CLOSE( NOUT )
1012 END IF
1013*
1014 CALL BLACS_EXIT( 0 )
1015*
1016 9999 FORMAT( 'illegal ', A6, ': ', A5, ' = ', I3,
1017 $ '; it should be at least 1' )
1018 9998 FORMAT( 'illegal grid: nprow*npcol = ', I4, '. it can be at most',
1019 $ I4 )
1020 9997 FORMAT( 'bad ', A6, ' parameters: going on to next test case.' )
1021 9996 FORMAT( 'unable to perform ', A, ': need totmem of at least',
1022 $ I11 )
1023 9995 FORMAT( 'time m n nb nrhs nbrhs p q lu time ',
1024 $ 'sol time mflops check' )
1025 9994 FORMAT( '---- ----- ----- --- ---- ----- ---- ---- -------- ',
1026 $ '-------- -------- ------' )
1027 9993 FORMAT( A4, 1X, I5, 1X, I5, 1X, I3, 1X, I5, 1X, I4, 1X, I4, 1X,
1028 $ I4, 1X, F8.2, 1X, F8.2, 1X, F8.2, 1X, A6 )
1029 9992 FORMAT( 'finished ', I6, ' tests, with the following results:' )
1030 9991 FORMAT( I5, ' tests completed and passed residual checks.' )
1031 9990 FORMAT( I5, ' tests completed without checking.' )
1032 9989 FORMAT( I5, ' tests completed and failed residual checks.' )
1033 9988 FORMAT( I5, ' tests skipped because of illegal input values.' )
1034 9987 FORMAT( 'END OF TESTS.' )
1035 9986 FORMAT( '||A - P*L*U|| / (||A|| * N * eps) = ', G25.7 )
1036 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N) ', F25.7 )
1037*
1038 STOP
1039*
1040* End of PDLUDRIVER
1041*
1042 END
subroutine pdlafchk(aform, diag, m, n, a, ia, ja, desca, iaseed, anorm, fresid, work)
Definition pdlafchk.f:3
subroutine pdmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition pdmatgen.f:4
end diagonal values have been computed in the(sparse) matrix id.SOL
integer function iceil(inum, idenom)
Definition iceil.f:2
integer function ilcm(m, n)
Definition ilcm.f:2
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21
subroutine pdgetrs(trans, n, nrhs, a, ia, ja, desca, ipiv, b, ib, jb, descb, info)
Definition mpi.f:1171
subroutine blacs_gridinit(cntxt, c, nprow, npcol)
Definition mpi.f:745
subroutine pdgetrf(m, n, a, ia, ja, desca, ipiv, info)
Definition mpi.f:914
subroutine pdgecon(norm, n, a, ia, ja, desca, anorm, rcond, work, lwork, iwork, liwork, info)
Definition mpi.f:1366
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition mpi.f:777
subroutine blacs_gridexit(cntxt)
Definition mpi.f:762
double precision function pdlange(norm, m, n, a, ia, ja, desca, work)
Definition mpi.f:1311
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
Definition mpi.f:754
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition mpi.f:786
subroutine pdchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pdchekpad.f:3
subroutine pdfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition pdfillpad.f:2
subroutine pdgerfs(trans, n, nrhs, a, ia, ja, desca, af, iaf, jaf, descaf, ipiv, b, ib, jb, descb, x, ix, jx, descx, ferr, berr, work, lwork, iwork, liwork, info)
Definition pdgerfs.f:5
subroutine pdgetrrv(m, n, a, ia, ja, desca, ipiv, work)
Definition pdgetrrv.f:2
subroutine pdlaschk(symm, diag, n, nrhs, x, ix, jx, descx, iaseed, ia, ja, desca, ibseed, anorm, resid, work)
Definition pdlaschk.f:4
program pdludriver
Definition pdludriver.f:1
subroutine pdluinfo(summry, nout, nmat, mval, nval, ldnval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, est, work, iam, nprocs)
Definition pdluinfo.f:5
subroutine slboot()
Definition sltimer.f:2
subroutine sltimer(i)
Definition sltimer.f:47
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)
Definition sltimer.f:267