OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
psinvdriver.f
Go to the documentation of this file.
1 PROGRAM psinvdriver
2*
3* -- ScaLAPACK testing driver (version 1.7) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* May 1, 1997
7*
8* Purpose
9* =======
10*
11* PSINVDRIVER is the main test program for the REAL
12* SCALAPACK matrix inversion routines. This test driver computes the
13* inverse of different kind of matrix and tests the results.
14*
15* The program must be driven by a short data file. An annotated example
16* of a data file can be obtained by deleting the first 3 characters
17* from the following 14 lines:
18* 'ScaLAPACK Matrix Inversion Testing input file'
19* 'PVM machine.'
20* 'INV.out' output file name (if any)
21* 6 device out
22* 5 number of matrix types (next line)
23* 'GEN' 'UTR' 'LTR' 'UPD' LPD' GEN, UTR, LTR, UPD, LPD
24* 4 number of problems sizes
25* 1000 2000 3000 4000 values of N
26* 3 number of NB's
27* 4 30 35 values of NB
28* 2 number of process grids (ordered P & Q)
29* 4 2 values of P
30* 4 4 values of Q
31* 1.0 threshold
32*
33* Internal Parameters
34* ===================
35*
36* TOTMEM INTEGER, default = 2000000
37* TOTMEM is a machine-specific parameter indicating the
38* maximum amount of available memory in bytes.
39* The user should customize TOTMEM to his platform. Remember
40* to leave room in memory for the operating system, the BLACS
41* buffer, etc. For example, on a system with 8 MB of memory
42* per process (e.g., one processor on an Intel iPSC/860), the
43* parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
44* code, BLACS buffer, etc). However, for PVM, we usually set
45* TOTMEM = 2000000. Some experimenting with the maximum value
46* of TOTMEM may be required.
47*
48* INTGSZ INTEGER, default = 4 bytes.
49* REALSZ INTEGER, default = 4 bytes.
50* INTGSZ and REALSZ indicate the length in bytes on the
51* given platform for an integer and a single precision real.
52* MEM REAL array, dimension ( TOTMEM / REALSZ )
53*
54* All arrays used by SCALAPACK routines are allocated from
55* this array and referenced by pointers. The integer IPA,
56* for example, is a pointer to the starting element of MEM for
57* the matrix A.
58*
59* =====================================================================
60*
61* .. Parameters ..
62 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
63 $ lld_, mb_, m_, nb_, n_, rsrc_
64 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
65 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
66 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
67 INTEGER intgsz, memsiz, ntests, realsz, totmem
68 REAL padval, zero
69 parameter( intgsz = 4, realsz = 4, totmem = 2000000,
70 $ memsiz = totmem / realsz, ntests = 20,
71 $ padval = -9923.0e+0, zero = 0.0e+0 )
72* ..
73* .. Local Scalars ..
74 CHARACTER uplo
75 CHARACTER*3 mtyp
76 CHARACTER*6 passed
77 CHARACTER*80 outfile
78 LOGICAL check
79 INTEGER i, iam, iaseed, ictxt, imidpad, info, ipa,
80 $ ippiv, iprepad, ipostpad, ipiw, ipw, itemp, j,
81 $ k, ktests, kpass, kfail, kskip, l, lcm, lipiv,
82 $ liwork, lwork, mycol, myrow, n, nb, ngrids,
83 $ nmat, nmtyp, nnb, nout, np, npcol, nprocs,
84 $ nprow, nq, workiinv, workinv, worksiz
85 REAL anorm, fresid, rcond, thresh
86 DOUBLE PRECISION nops, tmflops
87* ..
88* .. Local Arrays ..
89 CHARACTER*3 mattyp( ntests )
90 INTEGER desca( dlen_ ), ierr( 1 ), nbval( ntests ),
91 $ nval( ntests ), pval( ntests ),
92 $ qval( ntests )
93 REAL mem( memsiz )
94 DOUBLE PRECISION ctime( 2 ), wtime( 2 )
95* ..
96* .. External Subroutines ..
97 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
99 $ blacs_pinfo, descinit, igsum2d, pschekpad,
104* ..
105* .. External Functions ..
106 LOGICAL lsamen
107 INTEGER iceil, ilcm, numroc
108 REAL pslange, pslansy, pslantr
109 EXTERNAL iceil, ilcm, lsamen, numroc, pslange,
111* ..
112* .. Intrinsic Functions ..
113 INTRINSIC dble, max
114* ..
115* .. Data Statements ..
116 DATA ktests, kpass, kfail, kskip /4*0/
117* ..
118* .. Executable Statements ..
119*
120* Get starting information
121*
122 CALL blacs_pinfo( iam, nprocs )
123 iaseed = 100
124 CALL psinvinfo( outfile, nout, nmtyp, mattyp, ntests, nmat, nval,
125 $ ntests, nnb, nbval, ntests, ngrids, pval, ntests,
126 $ qval, ntests, thresh, mem, iam, nprocs )
127 check = ( thresh.GE.0.0e+0 )
128*
129* Loop over the different matrix types
130*
131 DO 40 i = 1, nmtyp
132*
133 mtyp = mattyp( i )
134*
135* Print headings
136*
137 IF( iam.EQ.0 ) THEN
138 WRITE( nout, fmt = * )
139 IF( lsamen( 3, mtyp, 'GEN' ) ) THEN
140 WRITE( nout, fmt = 9986 )
141 $ 'A is a general matrix.'
142 ELSE IF( lsamen( 3, mtyp, 'UTR' ) ) THEN
143 WRITE( nout, fmt = 9986 )
144 $ 'A is an upper triangular matrix.'
145 ELSE IF( lsamen( 3, mtyp, 'LTR' ) ) THEN
146 WRITE( nout, fmt = 9986 )
147 $ 'A is a lower triangular matrix.'
148 ELSE IF( lsamen( 3, mtyp, 'UPD' ) ) THEN
149 WRITE( nout, fmt = 9986 )
150 $ 'A is a symmetric positive definite matrix.'
151 WRITE( nout, fmt = 9986 )
152 $ 'Only the upper triangular part will be '//
153 $ 'referenced.'
154 ELSE IF( lsamen( 3, mtyp, 'LPD' ) ) THEN
155 WRITE( nout, fmt = 9986 )
156 $ 'A is a symmetric positive definite matrix.'
157 WRITE( nout, fmt = 9986 )
158 $ 'Only the lower triangular part will be '//
159 $ 'referenced.'
160 END IF
161 WRITE( nout, fmt = * )
162 WRITE( nout, fmt = 9995 )
163 WRITE( nout, fmt = 9994 )
164 WRITE( nout, fmt = * )
165 END IF
166*
167* Loop over different process grids
168*
169 DO 30 j = 1, ngrids
170*
171 nprow = pval( j )
172 npcol = qval( j )
173*
174* Make sure grid information is correct
175*
176 ierr( 1 ) = 0
177 IF( nprow.LT.1 ) THEN
178 IF( iam.EQ.0 )
179 $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
180 ierr( 1 ) = 1
181 ELSE IF( npcol.LT.1 ) THEN
182 IF( iam.EQ.0 )
183 $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
184 ierr( 1 ) = 1
185 ELSE IF( nprow*npcol.GT.nprocs ) THEN
186 IF( iam.EQ.0 )
187 $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
188 ierr( 1 ) = 1
189 END IF
190*
191 IF( ierr( 1 ).GT.0 ) THEN
192 IF( iam.EQ.0 )
193 $ WRITE( nout, fmt = 9997 ) 'grid'
194 kskip = kskip + 1
195 GO TO 30
196 END IF
197*
198* Define process grid
199*
200 CALL blacs_get( -1, 0, ictxt )
201 CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
202 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
203*
204* Go to bottom of loop if this case doesn't use my process
205*
206 IF( myrow.GE.nprow .OR. mycol.GE.npcol )
207 $ GO TO 30
208*
209 DO 20 k = 1, nmat
210*
211 n = nval( k )
212*
213* Make sure matrix information is correct
214*
215 ierr( 1 ) = 0
216 IF( n.LT.1 ) THEN
217 IF( iam.EQ.0 )
218 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
219 ierr( 1 ) = 1
220 END IF
221*
222* Make sure no one had error
223*
224 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
225*
226 IF( ierr( 1 ).GT.0 ) THEN
227 IF( iam.EQ.0 )
228 $ WRITE( nout, fmt = 9997 ) 'matrix'
229 kskip = kskip + 1
230 GO TO 20
231 END IF
232*
233* Loop over different blocking sizes
234*
235 DO 10 l = 1, nnb
236*
237 nb = nbval( l )
238*
239* Make sure nb is legal
240*
241 ierr( 1 ) = 0
242 IF( nb.LT.1 ) THEN
243 ierr( 1 ) = 1
244 IF( iam.EQ.0 )
245 $ WRITE( nout, fmt = 9999 ) 'NB', 'NB', nb
246 END IF
247*
248* Check all processes for an error
249*
250 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
251 $ 0 )
252*
253 IF( ierr( 1 ).GT.0 ) THEN
254 IF( iam.EQ.0 )
255 $ WRITE( nout, fmt = 9997 ) 'NB'
256 kskip = kskip + 1
257 GO TO 10
258 END IF
259*
260* Padding constants
261*
262 np = numroc( n, nb, myrow, 0, nprow )
263 nq = numroc( n, nb, mycol, 0, npcol )
264 IF( check ) THEN
265 iprepad = max( nb, np )
266 imidpad = nb
267 ipostpad = max( nb, nq )
268 ELSE
269 iprepad = 0
270 imidpad = 0
271 ipostpad = 0
272 END IF
273*
274* Initialize the array descriptor for the matrix A
275*
276 CALL descinit( desca, n, n, nb, nb, 0, 0, ictxt,
277 $ max( 1, np ) + imidpad, ierr( 1 ) )
278*
279* Check all processes for an error
280*
281 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
282 $ 0 )
283*
284 IF( ierr( 1 ).LT.0 ) THEN
285 IF( iam.EQ.0 )
286 $ WRITE( nout, fmt = 9997 ) 'descriptor'
287 kskip = kskip + 1
288 GO TO 10
289 END IF
290*
291* Assign pointers into MEM for ScaLAPACK arrays, A is
292* allocated starting at position MEM( IPREPAD+1 )
293*
294 ipa = iprepad+1
295*
296 lcm = ilcm( nprow, npcol )
297 IF( lsamen( 3, mtyp, 'GEN' ) ) THEN
298*
299* Pivots are needed by LU factorization
300*
301 ippiv = ipa + desca( lld_ ) * nq + ipostpad +
302 $ iprepad
303 lipiv = iceil( intgsz * ( np + nb ), realsz )
304 ipw = ippiv + lipiv + ipostpad + iprepad
305*
306 lwork = max( 1, np * desca( nb_ ) )
307 workinv = lwork + ipostpad
308*
309* Figure the amount of workspace required by the
310* general matrix inversion
311*
312 IF( nprow.EQ.npcol ) THEN
313 liwork = nq + desca( nb_ )
314 ELSE
315*
316* change the integer workspace needed for PDGETRI
317* LIWORK = MAX( DESCA( NB_ ), DESCA( MB_ ) *
318* $ ICEIL( ICEIL( DESCA( LLD_ ),
319* $ DESCA( MB_ ) ), LCM / NPROW ) )
320* $ + NQ
321 liwork = numroc( desca( m_ ) +
322 $ desca( mb_ ) * nprow
323 $ + mod( 1 - 1, desca( mb_ ) ), desca ( nb_ ),
324 $ mycol, desca( csrc_ ), npcol ) +
325 $ max( desca( mb_ ) * iceil ( iceil(
326 $ numroc( desca( m_ ) + desca( mb_ ) * nprow,
327 $ desca( mb_ ), myrow, desca( rsrc_ ), nprow ),
328 $ desca( mb_ ) ), lcm / nprow ), desca( nb_ ) )
329*
330 END IF
331 workiinv = iceil( liwork*intgsz, realsz ) +
332 $ ipostpad
333 ipiw = ipw + workinv + iprepad
334 worksiz = workinv + iprepad + workiinv
335*
336 ELSE
337*
338* No pivots or workspace needed for triangular or
339* symmetric positive definite matrices.
340*
341 ipw = ipa + desca( lld_ ) * nq + ipostpad + iprepad
342 worksiz = 1 + ipostpad
343*
344 END IF
345*
346 IF( check ) THEN
347*
348* Figure amount of work space for the norm
349* computations
350*
351 IF( lsamen( 3, mtyp, 'GEN' ).OR.
352 $ lsamen( 2, mtyp( 2:3 ), 'TR' ) ) THEN
353 itemp = nq
354 ELSE
355 itemp = 2 * nq + np
356 IF( nprow.NE.npcol ) THEN
357 itemp = itemp +
358 $ nb * iceil( iceil( np, nb ),
359 $ lcm / nprow )
360 END IF
361 END IF
362 worksiz = max( worksiz-ipostpad, itemp )
363*
364* Figure the amount of workspace required by the
365* checking routine
366*
367 worksiz = max( worksiz, 2 * nb * max( 1, np ) ) +
368 $ ipostpad
369*
370 END IF
371*
372* Check for adequate memory for problem size
373*
374 ierr( 1 ) = 0
375 IF( ipw+worksiz.GT.memsiz ) THEN
376 IF( iam.EQ.0 )
377 $ WRITE( nout, fmt = 9996 ) 'inversion',
378 $ ( ipw + worksiz ) * realsz
379 ierr( 1 ) = 1
380 END IF
381*
382* Check all processes for an error
383*
384 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
385 $ 0 )
386*
387 IF( ierr( 1 ).GT.0 ) THEN
388 IF( iam.EQ.0 )
389 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
390 kskip = kskip + 1
391 GO TO 10
392 END IF
393*
394 IF( lsamen( 3, mtyp, 'gen.OR.' )
395 $ LSAMEN( 2, MTYP( 2:3 ), 'tr' ) ) THEN
396*
397* Generate a general diagonally dominant matrix A
398*
399 CALL PSMATGEN( ICTXT, 'n', 'd', DESCA( M_ ),
400 $ DESCA( N_ ), DESCA( MB_ ),
401 $ DESCA( NB_ ), MEM( IPA ),
402 $ DESCA( LLD_ ), DESCA( RSRC_ ),
403 $ DESCA( CSRC_ ), IASEED, 0, NP, 0,
404 $ NQ, MYROW, MYCOL, NPROW, NPCOL )
405*
406 ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'pd' ) ) THEN
407*
408* Generate a symmetric positive definite matrix
409*
410 CALL PSMATGEN( ICTXT, 's', 'd', DESCA( M_ ),
411 $ DESCA( N_ ), DESCA( MB_ ),
412 $ DESCA( NB_ ), MEM( IPA ),
413 $ DESCA( LLD_ ), DESCA( RSRC_ ),
414 $ DESCA( CSRC_ ), IASEED, 0, NP, 0,
415 $ NQ, MYROW, MYCOL, NPROW, NPCOL )
416*
417 END IF
418*
419* Zeros not-referenced part of A, if any.
420*
421 IF( LSAMEN( 1, MTYP, 'u' ) ) THEN
422*
423 UPLO = 'u'
424 CALL PSLASET( 'lower', N-1, N-1, ZERO, ZERO,
425 $ MEM( IPA ), 2, 1, DESCA )
426*
427 ELSE IF( LSAMEN( 1, MTYP, 'l' ) ) THEN
428*
429 UPLO = 'l'
430 CALL PSLASET( 'upper', N-1, N-1, ZERO, ZERO,
431 $ MEM( IPA ), 1, 2, DESCA )
432*
433 ELSE
434*
435 UPLO = 'g'
436*
437 END IF
438*
439* Need 1-norm of A for checking
440*
441 IF( CHECK ) THEN
442*
443 CALL PSFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ),
444 $ DESCA( LLD_ ), IPREPAD, IPOSTPAD,
445 $ PADVAL )
446 CALL PSFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1,
447 $ MEM( IPW-IPREPAD ),
448 $ WORKSIZ-IPOSTPAD, IPREPAD,
449 $ IPOSTPAD, PADVAL )
450*
451 IF( LSAMEN( 3, MTYP, 'gen' ) ) THEN
452*
453 CALL PSFILLPAD( ICTXT, LIPIV, 1,
454 $ MEM( IPPIV-IPREPAD ), LIPIV,
455 $ IPREPAD, IPOSTPAD, PADVAL )
456 ANORM = PSLANGE( '1', N, N, MEM( IPA ), 1, 1,
457 $ DESCA, MEM( IPW ) )
458 CALL PSCHEKPAD( ICTXT, 'pslange', NP, NQ,
459 $ MEM( IPA-IPREPAD ),
460 $ DESCA( LLD_ ),
461 $ IPREPAD, IPOSTPAD, PADVAL )
462 CALL PSCHEKPAD( ICTXT, 'pslange',
463 $ WORKSIZ-IPOSTPAD, 1,
464 $ MEM( IPW-IPREPAD ),
465 $ WORKSIZ-IPOSTPAD,
466 $ IPREPAD, IPOSTPAD, PADVAL )
467 CALL PSFILLPAD( ICTXT, WORKINV-IPOSTPAD, 1,
468 $ MEM( IPW-IPREPAD ),
469 $ WORKINV-IPOSTPAD,
470 $ IPREPAD, IPOSTPAD, PADVAL )
471 CALL PSFILLPAD( ICTXT, WORKIINV-IPOSTPAD, 1,
472 $ MEM( IPIW-IPREPAD ),
473 $ WORKIINV-IPOSTPAD, IPREPAD,
474 $ IPOSTPAD, PADVAL )
475 ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'tr' ) ) THEN
476*
477 ANORM = PSLANTR( '1', UPLO, 'non unit', N, N,
478 $ MEM( IPA ), 1, 1, DESCA,
479 $ MEM( IPW ) )
480 CALL PSCHEKPAD( ICTXT, 'pslantr', NP, NQ,
481 $ MEM( IPA-IPREPAD ),
482 $ DESCA( LLD_ ),
483 $ IPREPAD, IPOSTPAD, PADVAL )
484 CALL PSCHEKPAD( ICTXT, 'pslantr',
485 $ WORKSIZ-IPOSTPAD, 1,
486 $ MEM( IPW-IPREPAD ),
487 $ WORKSIZ-IPOSTPAD,
488 $ IPREPAD, IPOSTPAD, PADVAL )
489*
490 ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'pd' ) ) THEN
491*
492 ANORM = PSLANSY( '1', UPLO, N, MEM( IPA ), 1, 1,
493 $ DESCA, MEM( IPW ) )
494 CALL PSCHEKPAD( ICTXT, 'pslansy', NP, NQ,
495 $ MEM( IPA-IPREPAD ),
496 $ DESCA( LLD_ ),
497 $ IPREPAD, IPOSTPAD, PADVAL )
498 CALL PSCHEKPAD( ICTXT, 'pslansy',
499 $ WORKSIZ-IPOSTPAD, 1,
500 $ MEM( IPW-IPREPAD ),
501 $ WORKSIZ-IPOSTPAD,
502 $ IPREPAD, IPOSTPAD, PADVAL )
503*
504 ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'sy' ) ) THEN
505*
506 CALL PSFILLPAD( ICTXT, LIPIV, 1,
507 $ MEM( IPPIV-IPREPAD ), LIPIV,
508 $ IPREPAD, IPOSTPAD, PADVAL )
509 ANORM = PSLANSY( '1', UPLO, N, MEM( IPA ), 1, 1,
510 $ DESCA, MEM( IPW ) )
511 CALL PSCHEKPAD( ICTXT, 'pslansy', np, nq,
512 $ mem( ipa-iprepad ),
513 $ desca( lld_ ),
514 $ iprepad, ipostpad, padval )
515 CALL pschekpad( ictxt, 'PSLANSY',
516 $ worksiz-ipostpad, 1,
517 $ mem( ipw-iprepad ),
518 $ worksiz-ipostpad,
519 $ iprepad,ipostpad, padval )
520*
521 END IF
522*
523 END IF
524*
525 CALL slboot()
526 CALL blacs_barrier( ictxt, 'All' )
527*
528 IF( lsamen( 3, mtyp, 'GEN' ) ) THEN
529*
530* Perform LU factorization
531*
532 CALL sltimer( 1 )
533 CALL psgetrf( n, n, mem( ipa ), 1, 1, desca,
534 $ mem( ippiv ), info )
535 CALL sltimer( 1 )
536*
537 IF( check ) THEN
538*
539* Check for memory overwrite
540*
541 CALL pschekpad( ictxt, 'PSGETRF', np, nq,
542 $ mem( ipa-iprepad ),
543 $ desca( lld_ ),
544 $ iprepad, ipostpad, padval )
545 CALL pschekpad( ictxt, 'PSGETRF', lipiv, 1,
546 $ mem( ippiv-iprepad ), lipiv,
547 $ iprepad, ipostpad, padval )
548 END IF
549*
550* Perform the general matrix inversion
551*
552 CALL sltimer( 2 )
553 CALL psgetri( n, mem( ipa ), 1, 1, desca,
554 $ mem( ippiv ), mem( ipw ), lwork,
555 $ mem( ipiw ), liwork, info )
556 CALL sltimer( 2 )
557*
558 IF( check ) THEN
559*
560* Check for memory overwrite
561*
562 CALL pschekpad( ictxt, 'PSGETRI', np, nq,
563 $ mem( ipa-iprepad ),
564 $ desca( lld_ ),
565 $ iprepad, ipostpad, padval )
566 CALL pschekpad( ictxt, 'PSGETRI', lipiv, 1,
567 $ mem( ippiv-iprepad ), lipiv,
568 $ iprepad, ipostpad, padval )
569 CALL pschekpad( ictxt, 'PSGETRI',
570 $ workiinv-ipostpad, 1,
571 $ mem( ipiw-iprepad ),
572 $ workiinv-ipostpad,
573 $ iprepad, ipostpad, padval )
574 CALL pschekpad( ictxt, 'PSGETRI',
575 $ workinv-ipostpad, 1,
576 $ mem( ipw-iprepad ),
577 $ workinv-ipostpad,
578 $ iprepad, ipostpad, padval )
579 END IF
580*
581 ELSE IF( lsamen( 2, mtyp( 2:3 ), 'TR' ) ) THEN
582*
583* Perform the general matrix inversion
584*
585 CALL sltimer( 2 )
586 CALL pstrtri( uplo, 'Non unit', n, mem( ipa ), 1,
587 $ 1, desca, info )
588 CALL sltimer( 2 )
589*
590 IF( check ) THEN
591*
592* Check for memory overwrite
593*
594 CALL pschekpad( ictxt, 'PSTRTRI', np, nq,
595 $ mem( ipa-iprepad ),
596 $ desca( lld_ ),
597 $ iprepad, ipostpad, padval )
598 END IF
599*
600 ELSE IF( lsamen( 2, mtyp( 2:3 ), 'pd' ) ) THEN
601*
602* Perform Cholesky factorization
603*
604 CALL SLTIMER( 1 )
605 CALL PSPOTRF( UPLO, N, MEM( IPA ), 1, 1, DESCA,
606 $ INFO )
607 CALL SLTIMER( 1 )
608*
609 IF( CHECK ) THEN
610*
611* Check for memory overwrite
612*
613 CALL PSCHEKPAD( ICTXT, 'pspotrf', NP, NQ,
614 $ MEM( IPA-IPREPAD ),
615 $ DESCA( LLD_ ),
616 $ IPREPAD, IPOSTPAD, PADVAL )
617 END IF
618*
619* Perform the symmetric positive definite matrix
620* inversion
621*
622 CALL SLTIMER( 2 )
623 CALL PSPOTRI( UPLO, N, MEM( IPA ), 1, 1, DESCA,
624 $ INFO )
625 CALL SLTIMER( 2 )
626*
627 IF( CHECK ) THEN
628*
629* Check for memory overwrite
630*
631 CALL PSCHEKPAD( ICTXT, 'pspotri', NP, NQ,
632 $ MEM( IPA-IPREPAD ),
633 $ DESCA( LLD_ ),
634 $ IPREPAD, IPOSTPAD, PADVAL )
635 END IF
636*
637 END IF
638*
639 IF( CHECK ) THEN
640*
641 CALL PSFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1,
642 $ MEM( IPW-IPREPAD ),
643 $ WORKSIZ-IPOSTPAD, IPREPAD,
644 $ IPOSTPAD, PADVAL )
645*
646* Compute fresid = || inv(A)*A-I ||
647*
648 CALL PSINVCHK( MTYP, N, MEM( IPA ), 1, 1, DESCA,
649 $ IASEED, ANORM, FRESID, RCOND,
650 $ MEM( IPW ) )
651*
652* Check for memory overwrite
653*
654 CALL PSCHEKPAD( ICTXT, 'psinvchk', NP, NQ,
655 $ MEM( IPA-IPREPAD ),
656 $ DESCA( LLD_ ),
657 $ IPREPAD, IPOSTPAD, PADVAL )
658 CALL PSCHEKPAD( ICTXT, 'psinvchk',
659 $ WORKSIZ-IPOSTPAD, 1,
660 $ MEM( IPW-IPREPAD ),
661 $ WORKSIZ-IPOSTPAD, IPREPAD,
662 $ IPOSTPAD, PADVAL )
663*
664* Test residual and detect NaN result
665*
666.LE..AND..EQ..AND. IF( FRESIDTHRESH INFO0
667.EQ. $ ( (FRESID-FRESID) 0.0E+0 ) ) THEN
668 KPASS = KPASS + 1
669 PASSED = 'passed'
670 ELSE
671 KFAIL = KFAIL + 1
672.GT. IF( INFO0 ) THEN
673 PASSED = 'singul'
674 ELSE
675 PASSED = 'failed'
676 END IF
677 END IF
678*
679 ELSE
680*
681* Don't perform the checking, only the timing
682* operation
683*
684 KPASS = KPASS + 1
685 FRESID = FRESID - FRESID
686 PASSED = 'bypass'
687*
688 END IF
689*
690* Gather maximum of all CPU and WALL clock timings
691*
692 CALL SLCOMBINE( ICTXT, 'all', '>', 'w', 2, 1, WTIME )
693 CALL SLCOMBINE( ICTXT, 'all', '>', 'c', 2, 1, CTIME )
694*
695* Print results
696*
697.EQ..AND..EQ. IF( MYROW0 MYCOL0 ) THEN
698*
699 IF( LSAMEN( 3, MTYP, 'gen' ) ) THEN
700*
701* 2/3 N^3 - 1/2 N^2 flops for LU factorization
702*
703 NOPS = ( 2.0D+0 / 3.0D+0 )*( DBLE( N )**3 ) -
704 $ ( 1.0D+0 / 2.0D+0 )*( DBLE( N )**2 )
705*
706* 4/3 N^3 - N^2 flops for inversion
707*
708 NOPS = NOPS +
709 $ ( 4.0D+0 / 3.0D+0 ) * ( DBLE( N )**3 ) -
710 $ DBLE( N )**2
711*
712 ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'tr' ) ) THEN
713*
714* 1/3 N^3 + 2/3 N flops for triangular inversion
715*
716 CTIME(1) = 0.0D+0
717 WTIME(1) = 0.0D+0
718 NOPS = ( 1.0D+0 / 3.0D+0 ) * ( DBLE( N )**3 ) +
719 $ ( 2.0D+0 / 3.0D+0 ) * ( DBLE( N ) )
720*
721 ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'pd' ) ) THEN
722*
723* 1/3 N^3 + 1/2 N^2 flops for Cholesky
724* factorization
725*
726 NOPS = ( 1.0D+0 / 3.0D+0 ) * ( DBLE( N )**3 ) +
727 $ ( 1.0D+0 / 2.0D+0 ) * ( DBLE( N )**2 )
728*
729* 2/3 N^3 + 1/2 N^2 flops for Cholesky inversion
730*
731 NOPS = NOPS +
732 $ ( 2.0D+0 / 3.0D+0 ) * ( DBLE( N )**3 ) +
733 $ ( 1.0D+0 / 2.0D+0 ) * ( DBLE( N )**2 )
734*
735 END IF
736*
737* Figure total megaflops -- factorization and
738* inversion, for WALL and CPU time, and print
739* output.
740*
741* Print WALL time if machine supports it
742*
743.GT. IF( WTIME( 1 ) + WTIME( 2 ) 0.0D+0 ) THEN
744 TMFLOPS = NOPS /
745 $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 )
746 ELSE
747 TMFLOPS = 0.0D+0
748 END IF
749*
750.GE. IF( WTIME( 2 ) 0.0D+0 )
751 $ WRITE( NOUT, FMT = 9993 ) 'wall', N, NB, NPROW,
752 $ NPCOL, WTIME( 1 ), WTIME( 2 ), TMFLOPS,
753 $ RCOND, FRESID, PASSED
754*
755* Print CPU time if machine supports it
756*
757.GT. IF( CTIME( 1 ) + CTIME( 2 ) 0.0D+0 ) THEN
758 TMFLOPS = NOPS /
759 $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 )
760 ELSE
761 TMFLOPS = 0.0D+0
762 END IF
763*
764.GE. IF( CTIME( 2 ) 0.0D+0 )
765 $ WRITE( NOUT, FMT = 9993 ) 'cpu ', N, NB, NPROW,
766 $ NPCOL, CTIME( 1 ), CTIME( 2 ), TMFLOPS,
767 $ RCOND, FRESID, PASSED
768 END IF
769*
770 10 CONTINUE
771*
772 20 CONTINUE
773*
774 CALL BLACS_GRIDEXIT( ICTXT )
775*
776 30 CONTINUE
777*
778 40 CONTINUE
779*
780* Print out ending messages and close output file
781*
782.EQ. IF( IAM0 ) THEN
783 KTESTS = KPASS + KFAIL + KSKIP
784 WRITE( NOUT, FMT = * )
785 WRITE( NOUT, FMT = 9992 ) KTESTS
786 IF( CHECK ) THEN
787 WRITE( NOUT, FMT = 9991 ) KPASS
788 WRITE( NOUT, FMT = 9989 ) KFAIL
789 ELSE
790 WRITE( NOUT, FMT = 9990 ) KPASS
791 END IF
792 WRITE( NOUT, FMT = 9988 ) KSKIP
793 WRITE( NOUT, FMT = * )
794 WRITE( NOUT, FMT = * )
795 WRITE( NOUT, FMT = 9987 )
796.NE..AND..NE. IF( NOUT6 NOUT0 )
797 $ CLOSE ( NOUT )
798 END IF
799*
800 CALL BLACS_EXIT( 0 )
801*
802 9999 FORMAT( 'illegal ', A6, ': ', A5, ' = ', I3,
803 $ '; it should be at least 1' )
804 9998 FORMAT( 'illegal grid: nprow*npcol = ', I4, '. it can be at most',
805 $ I4 )
806 9997 FORMAT( 'bad ', A6, ' parameters: going on to next test case.' )
807 9996 FORMAT( 'unable to perform ', A, ': need totmem of at least',
808 $ I11 )
809 9995 FORMAT( 'time n nb p q fct time inv time ',
810 $ ' mflops cond resid check' )
811 9994 FORMAT( '---- ----- --- ----- ----- -------- -------- ',
812 $ '----------- ------- ------- ------' )
813 9993 FORMAT( A4, 1X, I5, 1X, I3, 1X, I5, 1X, I5, 1X, F8.2, 1X, F8.2,
814 $ 1X, F11.2, 1X, F7.1, 1X, F7.2, 1X, A6 )
815 9992 FORMAT( 'finished ', I6, ' tests, with the following results:' )
816 9991 FORMAT( I5, ' tests completed and passed residual checks.' )
817 9990 FORMAT( I5, ' tests completed without checking.' )
818 9989 FORMAT( I5, ' tests completed and failed residual checks.' )
819 9988 FORMAT( I5, ' tests skipped because of illegal input values.' )
820 9987 FORMAT( 'END OF TESTS.' )
821 9986 FORMAT( A )
822*
823 STOP
824*
825* End of PSINVDRIVER
826*
827 END
subroutine psmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition psmatgen.f:4
end diagonal values have been computed in the(sparse) matrix id.SOL
logical function lsamen(n, ca, cb)
LSAMEN
Definition lsamen.f:74
integer function iceil(inum, idenom)
Definition iceil.f:2
integer function ilcm(m, n)
Definition ilcm.f:2
#define max(a, b)
Definition macros.h:21
subroutine blacs_gridinit(cntxt, c, nprow, npcol)
Definition mpi.f:745
subroutine pspotrf(uplo, n, a, ia, ja, desca, info)
Definition mpi.f:870
real function pslange(norm, m, n, a, ia, ja, desca, work)
Definition mpi.f:1299
subroutine psgetrf(m, n, a, ia, ja, desca, ipiv, info)
Definition mpi.f:881
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition mpi.f:777
subroutine blacs_gridexit(cntxt)
Definition mpi.f:762
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
Definition mpi.f:754
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition mpi.f:786
subroutine pslaset(uplo, m, n, alpha, beta, a, ia, ja, desca)
Definition psblastst.f:6863
subroutine pschekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pschekpad.f:3
subroutine psfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition psfillpad.f:2
subroutine psgetri(n, a, ia, ja, desca, ipiv, work, lwork, iwork, liwork, info)
Definition psgetri.f:3
subroutine psinvchk(mattyp, n, a, ia, ja, desca, iaseed, anorm, fresid, rcond, work)
Definition psinvchk.f:3
program psinvdriver
Definition psinvdriver.f:1
subroutine psinvinfo(summry, nout, nmtyp, mattyp, ldmtyp, nmat, nval, ldnval, nnb, nbval, ldnbval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
Definition psinvinfo.f:5
real function pslansy(norm, uplo, n, a, ia, ja, desca, work)
Definition pslansy.f:3
real function pslantr(norm, uplo, diag, m, n, a, ia, ja, desca, work)
Definition pslantr.f:3
subroutine pspotri(uplo, n, a, ia, ja, desca, info)
Definition pspotri.f:2
subroutine pstrtri(uplo, diag, n, a, ia, ja, desca, info)
Definition pstrtri.f:2
subroutine slboot()
Definition sltimer.f:2
subroutine sltimer(i)
Definition sltimer.f:47
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)
Definition sltimer.f:267