OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
pslsdriver.f
Go to the documentation of this file.
1 PROGRAM pslsdriver
2*
3* -- ScaLAPACK routine (version 1.7) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* May 28, 2001
7*
8* Purpose
9* =======
10*
11* PSLSDRIVER is the main test program for the REAL
12* SCALAPACK (full rank) Least Squares routines. This test driver solves
13* full-rank least square problems.
14*
15* The program must be driven by a short data file. An annotated
16* example of a data file can be obtained by deleting the first 3
17* characters from the following 17 lines:
18* 'ScaLapack LS solve input file'
19* 'Intel iPSC/860 hypercube, gamma model.'
20* 'LS.out' output file name (if any)
21* 6 device out
22* 4 number of problems sizes
23* 55 17 31 201 values of M
24* 5 71 31 201 values of N
25* 3 number of NB's
26* 2 3 5 values of NB
27* 3 number of NRHS's
28* 2 3 5 values of NRHS
29* 2 number of NBRHS's
30* 1 2 values of NBRHS
31* 7 number of process grids (ordered P & Q)
32* 1 2 1 4 2 3 8 values of P
33* 7 2 4 1 3 2 1 values of Q
34* 3.0 threshold
35*
36* Internal Parameters
37* ===================
38*
39* TOTMEM INTEGER, default = 6200000.
40* TOTMEM is a machine-specific parameter indicating the
41* maximum amount of available memory in bytes.
42* The user should customize TOTMEM to his platform. Remember
43* to leave room in memory for the operating system, the BLACS
44* buffer, etc. For example, on a system with 8 MB of memory
45* per process (e.g., one processor on an Intel iPSC/860), the
46* parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
47* code, BLACS buffer, etc). However, for PVM, we usually set
48* TOTMEM = 2000000. Some experimenting with the maximum value
49* of TOTMEM may be required.
50* INTGSZ INTEGER, default = 4 bytes.
51* REALSZ INTEGER, default = 4 bytes.
52* INTGSZ and REALSZ indicate the length in bytes on the
53* given platform for an integer and a single precision real.
54* MEM REAL array, dimension ( TOTMEM / REALSZ )
55* All arrays used by SCALAPACK routines are allocated from
56* this array and referenced by pointers. The integer IPA,
57* for example, is a pointer to the starting element of MEM for
58* the matrix A.
59*
60* =====================================================================
61*
62* .. Parameters ..
63 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
64 $ lld_, mb_, m_, nb_, n_, rsrc_
65 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
66 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
67 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
68 INTEGER memsiz, ntests, realsz, totmem
69 REAL padval
70 REAL one, zero
71 parameter( realsz = 4, totmem = 2000000,
72 $ memsiz = totmem / realsz, ntests = 20,
73 $ padval = -9923.0e+0 )
74 parameter( one = 1.0e+0, zero = 0.0e+0 )
75* ..
76* .. Local Scalars ..
77 LOGICAL check, tpsd
78 CHARACTER trans
79 CHARACTER*6 passed
80 CHARACTER*80 outfile
81 INTEGER hh, i, iam, iaseed, ibseed, ictxt, ii, imidpad,
82 $ INFO, ipa, ipb, ipostpad, iprepad, ipw, ipw2,
83 $ ipx, iscale, ITRAN, itype, J, jj, k, kfail, kk,
84 $ kpass, kskip, ktests, lcm, lcmp, ltau, lwf,
85 $ lwork, lws, m, mnp, mnrhsp, mp, mq, mycol,
86 $ myrow, n, nb, nbrhs, ncols, ngrids, nmat, nnb,
87 $ nnbr, nnr, nnrhsq, nout, np, npcol, nprocs,
88 $ nprow, nrows, nq, nrhs, nrhsp, nrhsq, worksiz
89 REAL anorm, bnorm, sresid, thresh
90 DOUBLE PRECISION addfac, adds, mulfac, mults, nops, tmflops
91* ..
92* .. Local Arrays ..
93 INTEGER desca( dlen_ ), DESCB( dlen_ ), descw( lld_ ),
94 $ DESCX( dlen_ ), ierr( 2 ), mval( NTESTS ),
95 $ nbrval( ntests ), nbval( ntests ),
96 $ nrval( ntests ), nval( ntests ),
97 $ pval( ntests ), qval( ntests )
98 REAL mem( memsiz ), result( 2 )
99 DOUBLE PRECISION ctime( 1 ), wtime( 1 )
100* ..
101* .. External Subroutines ..
102 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
104 $ blacs_pinfo, descinit, igsum2d, pschekpad,
105 $ psfillpad, psgels, psgemm, pslacpy,
108* ..
109* .. External Functions ..
110 LOGICAL lsame
111 INTEGER iceil, ilcm, numroc
112 REAL pslange, psqrt14, psqrt17
113 EXTERNAL iceil, ilcm, lsame, numroc, pslange,
115* ..
116* .. Intrinsic Functions ..
117 INTRINSIC max, min
118* ..
119* .. Data Statements ..
120 DATA ktests, kpass, kfail, kskip / 4*0 /
121* ..
122* .. Executable Statements ..
123*
124* Get starting information
125*
126 CALL blacs_pinfo( iam, nprocs )
127*
128 iaseed = 100
129 ibseed = 200
130 CALL pslsinfo( outfile, nout, nmat, mval, ntests, nval,
131 $ ntests, nnb, nbval, ntests, nnr, nrval, ntests,
132 $ nnbr, nbrval, ntests, ngrids, pval, ntests, qval,
133 $ ntests, thresh, mem, iam, nprocs )
134 check = ( thresh.GE.0.0e+0 )
135*
136* Print headings
137*
138 IF( iam.EQ.0 ) THEN
139 WRITE( nout, fmt = * )
140 WRITE( nout, fmt = 9995 )
141 WRITE( nout, fmt = 9994 )
142 WRITE( nout, fmt = * )
143 END IF
144*
145* Loop over different process grids
146*
147 DO 90 i = 1, ngrids
148*
149 nprow = pval( i )
150 npcol = qval( i )
151*
152* Make sure grid information is correct
153*
154 ierr( 1 ) = 0
155 IF( nprow.LT.1 ) THEN
156 IF( iam.EQ.0 )
157 $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
158 ierr( 1 ) = 1
159 ELSE IF( npcol.LT.1 ) THEN
160 IF( iam.EQ.0 )
161 $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
162 ierr( 1 ) = 1
163 ELSE IF( nprow*npcol.GT.nprocs ) THEN
164 IF( iam.EQ.0 )
165 $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
166 ierr( 1 ) = 1
167 END IF
168*
169 IF( ierr( 1 ).GT.0 ) THEN
170 IF( iam.EQ.0 )
171 $ WRITE( nout, fmt = 9997 ) 'grid'
172 kskip = kskip + 1
173 GO TO 90
174 END IF
175*
176* Define process grid
177*
178 CALL blacs_get( -1, 0, ictxt )
179 CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
180 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
181*
182* Go to bottom of loop if this case doesn't use my process
183*
184 IF( ( myrow.GE.nprow ).OR.( mycol.GE.npcol ) )
185 $ GO TO 90
186*
187 DO 80 j = 1, nmat
188*
189 m = mval( j )
190 n = nval( j )
191*
192* Make sure matrix information is correct
193*
194 ierr( 1 ) = 0
195 IF( m.LT.1 ) THEN
196 IF( iam.EQ.0 )
197 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'M', m
198 ierr( 1 ) = 1
199 ELSE IF( n.LT.1 ) THEN
200 IF( iam.EQ.0 )
201 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
202 ierr( 1 ) = 1
203 END IF
204*
205* Make sure no one had error
206*
207 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
208*
209 IF( ierr( 1 ).GT.0 ) THEN
210 IF( iam.EQ.0 )
211 $ WRITE( nout, fmt = 9997 ) 'matrix'
212 kskip = kskip + 1
213 GO TO 80
214 END IF
215*
216* Loop over different blocking sizes
217*
218 DO 70 k = 1, nnb
219*
220 nb = nbval( k )
221*
222* Make sure nb is legal
223*
224 ierr( 1 ) = 0
225 IF( nb.LT.1 ) THEN
226 ierr( 1 ) = 1
227 IF( iam.EQ.0 )
228 $ WRITE( nout, fmt = 9999 ) 'NB', 'NB', nb
229 END IF
230*
231* Check all processes for an error
232*
233 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
234*
235 IF( ierr( 1 ).GT.0 ) THEN
236 IF( iam.EQ.0 )
237 $ WRITE( nout, fmt = 9997 ) 'NB'
238 kskip = kskip + 1
239 GO TO 70
240 END IF
241*
242* Padding constants
243*
244 mp = numroc( m, nb, myrow, 0, nprow )
245 mq = numroc( m, nb, mycol, 0, npcol )
246 np = numroc( n, nb, myrow, 0, nprow )
247 mnp = max( mp, np )
248 nq = numroc( n, nb, mycol, 0, npcol )
249*
250 IF( check ) THEN
251 iprepad = max( nb, mp )
252 imidpad = nb
253 ipostpad = max( nb, nq )
254 ELSE
255 iprepad = 0
256 imidpad = 0
257 ipostpad = 0
258 END IF
259*
260* Initialize the array descriptor for the matrix A
261*
262 CALL descinit( desca, m, n, nb, nb, 0, 0, ictxt,
263 $ max( 1, mp ) + imidpad, ierr( 1 ) )
264*
265* Check all processes for an error
266*
267 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
268*
269 IF( ierr( 1 ).LT.0 ) THEN
270 IF( iam.EQ.0 )
271 $ WRITE( nout, fmt = 9997 ) 'descriptor'
272 kskip = kskip + 1
273 GO TO 70
274 END IF
275*
276 DO 60 iscale = 1, 3
277*
278 itype = iscale
279*
280* Assign pointers into MEM for SCALAPACK arrays, A is
281* allocated starting at position MEM( IPREPAD+1 )
282*
283 ipa = iprepad + 1
284 ipx = ipa + desca( lld_ )*nq + ipostpad + iprepad
285 ipw = ipx
286*
287 worksiz = nq + ipostpad
288*
289* Check for adequate memory for problem size
290*
291 ierr( 1 ) = 0
292 IF( ( ipw+worksiz ).GT.memsiz ) THEN
293 IF( iam.EQ.0 )
294 $ WRITE( nout, fmt = 9996 ) 'MEMORY',
295 $ ( ipx+worksiz )*realsz
296 ierr( 1 ) = 1
297 END IF
298*
299* Check all processes for an error
300*
301 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
302 $ 0 )
303*
304 IF( ierr( 1 ).GT.0 ) THEN
305 IF( iam.EQ.0 )
306 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
307 kskip = kskip + 1
308 GO TO 70
309 END IF
310*
311 IF( check ) THEN
312 CALL psfillpad( ictxt, mp, nq, mem( ipa-iprepad ),
313 $ desca( lld_ ), iprepad, ipostpad,
314 $ padval )
315 CALL psfillpad( ictxt, worksiz-ipostpad, 1,
316 $ mem( ipw-iprepad ),
317 $ worksiz-ipostpad, iprepad,
318 $ ipostpad, padval )
319 END IF
320*
321* Generate the matrix A and calculate its 1-norm
322*
323 CALL psqrt13( iscale, m, n, mem( ipa ), 1, 1,
324 $ desca, anorm, iaseed, mem( ipw ) )
325*
326 IF( check ) THEN
327 CALL pschekpad( ictxt, 'PSQRT13', mp, nq,
328 $ mem( ipa-iprepad ), desca( lld_ ),
329 $ iprepad, ipostpad, padval )
330 CALL pschekpad( ictxt, 'PSQRT13',
331 $ worksiz-ipostpad, 1,
332 $ mem( ipw-iprepad ),
333 $ worksiz-ipostpad, iprepad,
334 $ ipostpad, padval )
335 END IF
336*
337 DO 50 itran = 1, 2
338*
339 IF( itran.EQ.1 ) THEN
340 nrows = m
341 ncols = n
342 trans = 'N'
343 tpsd = .false.
344 ELSE
345 nrows = n
346 ncols = m
347 trans = 't'
348 TPSD = .TRUE.
349 END IF
350*
351* Loop over the different values for NRHS
352*
353 DO 40 HH = 1, NNR
354*
355 NRHS = NRVAL( HH )
356*
357 DO 30 KK = 1, NNBR
358*
359 NBRHS = NBRVAL( KK )
360*
361 NRHSP = NUMROC( NRHS, NBRHS, MYROW, 0,
362 $ NPROW )
363 NRHSQ = NUMROC( NRHS, NBRHS, MYCOL, 0,
364 $ NPCOL )
365*
366* Define Array descriptor for rhs MAX(M,N)xNRHS
367*
368 CALL DESCINIT( DESCX, MAX( M, N ), NRHS, NB,
369 $ NBRHS, 0, 0, ICTXT,
370 $ MAX( 1, MNP ) + IMIDPAD,
371 $ IERR( 1 ) )
372 IF( TPSD ) THEN
373 CALL DESCINIT( DESCW, M, NRHS, NB, NBRHS,
374 $ 0, 0, ICTXT, MAX( 1, MP ) +
375 $ IMIDPAD, IERR( 2 ) )
376 ELSE
377 CALL DESCINIT( DESCW, N, NRHS, NB, NBRHS,
378 $ 0, 0, ICTXT, MAX( 1, NP ) +
379 $ IMIDPAD, IERR( 2 ) )
380 END IF
381*
382* Check all processes for an error
383*
384 CALL IGSUM2D( ICTXT, 'all', ' ', 2, 1, IERR,
385 $ 2, -1, 0 )
386*
387.LT..OR..LT. IF( IERR( 1 )0 IERR( 2 )0 ) THEN
388.EQ. IF( IAM0 )
389 $ WRITE( NOUT, FMT = 9997 ) 'descriptor'
390 KSKIP = KSKIP + 1
391 GO TO 30
392 END IF
393*
394* Check for enough memory
395*
396 IPX = IPA + DESCA( LLD_ )*NQ + IPOSTPAD +
397 $ IPREPAD
398 IPW = IPX + DESCX( LLD_ )*NRHSQ + IPOSTPAD +
399 $ IPREPAD
400 WORKSIZ = DESCW( LLD_ )*NRHSQ + IPOSTPAD
401*
402 IERR( 1 ) = 0
403.GT. IF( IPW+WORKSIZMEMSIZ ) THEN
404.EQ. IF( IAM0 )
405 $ WRITE( NOUT, FMT = 9996 ) 'generation',
406 $ ( IPW+WORKSIZ )*REALSZ
407 IERR( 1 ) = 1
408 END IF
409*
410* Check all processes for an error
411*
412 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1, IERR,
413 $ 1, -1, 0 )
414*
415.GT. IF( IERR( 1 )0 ) THEN
416.EQ. IF( IAM0 )
417 $ WRITE( NOUT, FMT = 9997 ) 'memory'
418 KSKIP = KSKIP + 1
419 GO TO 30
420 END IF
421*
422* Generate RHS
423*
424 IF( TPSD ) THEN
425 CALL PSMATGEN( ICTXT, 'no', 'no',
426 $ DESCW( M_ ), DESCW( N_ ),
427 $ DESCW( MB_ ), DESCW( NB_ ),
428 $ MEM( IPW ), DESCW( LLD_ ),
429 $ DESCW( RSRC_ ),
430 $ DESCW( CSRC_ ), IBSEED, 0,
431 $ MP, 0, NRHSQ, MYROW, MYCOL,
432 $ NPROW, NPCOL )
433 ELSE
434 CALL PSMATGEN( ICTXT, 'no', 'no',
435 $ DESCW( M_ ), DESCW( N_ ),
436 $ DESCW( MB_ ), DESCW( NB_ ),
437 $ MEM( IPW ), DESCW( LLD_ ),
438 $ DESCW( RSRC_ ),
439 $ DESCW( CSRC_ ), IBSEED, 0,
440 $ NP, 0, NRHSQ, MYROW, MYCOL,
441 $ NPROW, NPCOL )
442 END IF
443*
444 IF( CHECK ) THEN
445 CALL PSFILLPAD( ICTXT, MNP, NRHSQ,
446 $ MEM( IPX-IPREPAD ),
447 $ DESCX( LLD_ ), IPREPAD,
448 $ IPOSTPAD, PADVAL )
449 IF( TPSD ) THEN
450 CALL PSFILLPAD( ICTXT, MP, NRHSQ,
451 $ MEM( IPW-IPREPAD ),
452 $ DESCW( LLD_ ), IPREPAD,
453 $ IPOSTPAD, PADVAL )
454 ELSE
455 CALL PSFILLPAD( ICTXT, NP, NRHSQ,
456 $ MEM( IPW-IPREPAD ),
457 $ DESCW( LLD_ ), IPREPAD,
458 $ IPOSTPAD, PADVAL )
459 END IF
460 END IF
461*
462 DO 10 JJ = 1, NRHS
463 CALL PSNRM2( NCOLS, BNORM, MEM( IPW ), 1,
464 $ JJ, DESCW, 1 )
465.GT. IF( BNORMZERO )
466 $ CALL PSSCAL( NCOLS, ONE / BNORM,
467 $ MEM( IPW ), 1, JJ, DESCW,
468 $ 1 )
469 10 CONTINUE
470*
471 CALL PSGEMM( TRANS, 'n', NROWS, NRHS, NCOLS,
472 $ ONE, MEM( IPA ), 1, 1, DESCA,
473 $ MEM( IPW ), 1, 1, DESCW, ZERO,
474 $ MEM( IPX ), 1, 1, DESCX )
475*
476 IF( CHECK ) THEN
477*
478* check for memory overwrite
479*
480 CALL PSCHEKPAD( ICTXT, 'generation', MP,
481 $ NQ, MEM( IPA-IPREPAD ),
482 $ DESCA( LLD_ ), IPREPAD,
483 $ IPOSTPAD, PADVAL )
484 CALL PSCHEKPAD( ICTXT, 'generation', MNP,
485 $ NRHSQ, MEM( IPX-IPREPAD ),
486 $ DESCX( LLD_ ), IPREPAD,
487 $ IPOSTPAD, PADVAL )
488 IF( TPSD ) THEN
489 CALL PSCHEKPAD( ICTXT, 'generation',
490 $ MP, NRHSQ,
491 $ MEM( IPW-IPREPAD ),
492 $ DESCW( LLD_ ), IPREPAD,
493 $ IPOSTPAD, PADVAL )
494 ELSE
495 CALL PSCHEKPAD( ICTXT, 'generation',
496 $ NP, NRHSQ,
497 $ MEM( IPW-IPREPAD ),
498 $ DESCW( LLD_ ), IPREPAD,
499 $ IPOSTPAD, PADVAL )
500 END IF
501*
502* Allocate space for copy of rhs
503*
504 IPB = IPW
505*
506 IF( TPSD ) THEN
507 CALL DESCINIT( DESCB, N, NRHS, NB,
508 $ NBRHS, 0, 0, ICTXT,
509 $ MAX( 1, NP ) + IMIDPAD,
510 $ IERR( 1 ) )
511 ELSE
512 CALL DESCINIT( DESCB, M, NRHS, NB,
513 $ NBRHS, 0, 0, ICTXT,
514 $ MAX( 1, MP ) + IMIDPAD,
515 $ IERR( 1 ) )
516 END IF
517*
518* Check all processes for an error
519*
520 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1,
521 $ IERR, 1, -1, 0 )
522*
523.LT. IF( IERR( 1 )0 ) THEN
524.EQ. IF( IAM0 )
525 $ WRITE( NOUT, FMT = 9997 )
526 $ 'descriptor'
527 KSKIP = KSKIP + 1
528 GO TO 30
529 END IF
530*
531 IPW = IPB + DESCB( LLD_ )*NRHSQ +
532 $ IPOSTPAD + IPREPAD
533*
534 END IF
535*
536* Calculate the amount of workspace for PSGELS
537*
538.GE. IF( MN ) THEN
539 LTAU = NUMROC( MIN(M,N), NB, MYCOL, 0,
540 $ NPCOL )
541 LWF = NB * ( MP + NQ + NB )
542 LWS = MAX( ( NB*( NB - 1 ) ) / 2,
543 $ ( MP + NRHSQ ) * NB ) + NB*NB
544 ELSE
545 LCM = ILCM( NPROW, NPCOL )
546 LCMP = LCM / NPROW
547 LTAU = NUMROC( MIN(M,N), NB, MYROW, 0,
548 $ NPROW )
549 LWF = NB * ( MP + NQ + NB )
550 LWS = MAX( ( NB*( NB - 1 ) ) / 2, ( NP +
551 $ MAX( NQ + NUMROC( NUMROC( N, NB, 0,
552 $ 0, NPROW ), NB, 0, 0, LCMP ),
553 $ NRHSQ ) ) * NB ) + NB*NB
554 END IF
555*
556 LWORK = LTAU + MAX( LWF, LWS )
557 WORKSIZ = LWORK + IPOSTPAD
558*
559* Check for adequate memory for problem size
560*
561 IERR( 1 ) = 0
562.GT. IF( IPW+WORKSIZMEMSIZ ) THEN
563.EQ. IF( IAM0 )
564 $ WRITE( NOUT, FMT = 9996 ) 'solve',
565 $ ( IPW+WORKSIZ )*REALSZ
566 IERR( 1 ) = 1
567 END IF
568*
569* Check all processes for an error
570*
571 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1, IERR,
572 $ 1, -1, 0 )
573*
574.GT. IF( IERR( 1 )0 ) THEN
575.EQ. IF( IAM0 )
576 $ WRITE( NOUT, FMT = 9997 ) 'memory'
577 KSKIP = KSKIP + 1
578 GO TO 30
579 END IF
580*
581 IF( CHECK ) THEN
582*
583* Make the copy of the right hand side
584*
585 CALL PSLACPY( 'all', NROWS, NRHS,
586 $ MEM( IPX ), 1, 1, DESCX,
587 $ MEM( IPB ), 1, 1, DESCB )
588*
589 IF( TPSD ) THEN
590 CALL PSFILLPAD( ICTXT, NP, NRHSQ,
591 $ MEM( IPB-IPREPAD ),
592 $ DESCB( LLD_ ), IPREPAD,
593 $ IPOSTPAD, PADVAL )
594 ELSE
595 CALL PSFILLPAD( ICTXT, MP, NRHSQ,
596 $ MEM( IPB-IPREPAD ),
597 $ DESCB( LLD_ ), IPREPAD,
598 $ IPOSTPAD, PADVAL )
599 END IF
600 CALL PSFILLPAD( ICTXT, LWORK, 1,
601 $ MEM( IPW-IPREPAD ),
602 $ LWORK, IPREPAD,
603 $ IPOSTPAD, PADVAL )
604 END IF
605*
606 CALL SLBOOT( )
607 CALL BLACS_BARRIER( ICTXT, 'all' )
608 CALL SLTIMER( 1 )
609*
610* Solve the LS or overdetermined system
611*
612 CALL PSGELS( TRANS, M, N, NRHS, MEM( IPA ),
613 $ 1, 1, DESCA, MEM( IPX ), 1, 1,
614 $ DESCX, MEM( IPW ), LWORK, INFO )
615*
616 CALL SLTIMER( 1 )
617*
618 IF( CHECK ) THEN
619*
620* check for memory overwrite
621*
622 CALL PSCHEKPAD( ICTXT, 'psgels', MP,
623 $ NQ, MEM( IPA-IPREPAD ),
624 $ DESCA( LLD_ ), IPREPAD,
625 $ IPOSTPAD, PADVAL )
626 CALL PSCHEKPAD( ICTXT, 'psgels', MNP,
627 $ NRHSQ, MEM( IPX-IPREPAD ),
628 $ DESCX( LLD_ ), IPREPAD,
629 $ IPOSTPAD, PADVAL )
630 CALL PSCHEKPAD( ICTXT, 'psgels', LWORK,
631 $ 1, MEM( IPW-IPREPAD ),
632 $ LWORK, IPREPAD,
633 $ IPOSTPAD, PADVAL )
634 END IF
635*
636* Regenerate A in place for testing and next
637* iteration
638*
639 CALL PSQRT13( ISCALE, M, N, MEM( IPA ), 1, 1,
640 $ DESCA, ANORM, IASEED,
641 $ MEM( IPW ) )
642*
643* check the solution to rhs
644*
645 IF( CHECK ) THEN
646*
647* Am I going to call PSQRT17 ?
648*
649.GE..AND..NOT..OR. IF( ( MN ( TPSD ) )
650.LT..AND. $ ( MN TPSD ) ) THEN
651*
652* Call PSQRT17 first, A, X, and B remain
653* unchanged. Solving LS system
654*
655* Check amount of memory for PSQRT17
656*
657 IF( TPSD ) THEN
658 WORKSIZ = NP*NRHSQ + NRHSP*MQ
659 IPW2 = IPW + WORKSIZ
660 WORKSIZ = WORKSIZ +
661 $ MAX( NQ, MAX( MQ, NRHSQ ) ) +
662 $ IPOSTPAD
663 ELSE
664 WORKSIZ = MP*NRHSQ + NRHSP*NQ
665 IPW2 = IPW + WORKSIZ
666 WORKSIZ = WORKSIZ +
667 $ MAX( NQ, NRHSQ ) +
668 $ IPOSTPAD
669 END IF
670*
671* Check for adequate memory for problem
672* size
673*
674 IERR( 1 ) = 0
675.GT. IF( ( IPW+WORKSIZ )MEMSIZ ) THEN
676.EQ. IF( IAM0 )
677 $ WRITE( NOUT, FMT = 9996 )
678 $ 'memory', ( IPW+WORKSIZ )*REALSZ
679 IERR( 1 ) = 1
680 END IF
681*
682* Check all processes for an error
683*
684 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1,
685 $ IERR, 1, -1, 0 )
686*
687.GT. IF( IERR( 1 )0 ) THEN
688.EQ. IF( IAM0 )
689 $ WRITE( NOUT, FMT = 9997 )
690 $ 'memory'
691 KSKIP = KSKIP + 1
692 GO TO 30
693 END IF
694*
695 CALL PSFILLPAD( ICTXT,
696 $ WORKSIZ-IPOSTPAD, 1,
697 $ MEM( IPW-IPREPAD ),
698 $ WORKSIZ-IPOSTPAD,
699 $ IPREPAD, IPOSTPAD,
700 $ PADVAL )
701*
702 RESULT( 2 ) = PSQRT17( TRANS, 1, M, N,
703 $ NRHS,
704 $ MEM( IPA ),
705 $ 1, 1, DESCA,
706 $ MEM( IPX ), 1,
707 $ 1, DESCX,
708 $ MEM( IPB ),
709 $ 1, 1, DESCB,
710 $ MEM( IPW ),
711 $ MEM( IPW2 ) )
712 SRESID = RESULT( 2 )
713*
714 CALL PSCHEKPAD( ICTXT, 'psqrt17',
715 $ MP, NQ,
716 $ MEM( IPA-IPREPAD ),
717 $ DESCA( LLD_ ),
718 $ IPREPAD, IPOSTPAD,
719 $ PADVAL )
720 CALL PSCHEKPAD( ICTXT, 'psqrt17',
721 $ MNP, NRHSQ,
722 $ MEM( IPX-IPREPAD ),
723 $ DESCX( LLD_ ), IPREPAD,
724 $ IPOSTPAD, PADVAL )
725 IF( TPSD ) THEN
726 CALL PSCHEKPAD( ICTXT, 'psqrt17',
727 $ NP, NRHSQ,
728 $ MEM( IPB-IPREPAD ),
729 $ DESCB( LLD_ ),
730 $ IPREPAD, IPOSTPAD,
731 $ PADVAL )
732 ELSE
733 CALL PSCHEKPAD( ICTXT, 'psqrt17',
734 $ MP, NRHSQ,
735 $ MEM( IPB-IPREPAD ),
736 $ DESCB( LLD_ ),
737 $ IPREPAD, IPOSTPAD,
738 $ PADVAL )
739 END IF
740 CALL PSCHEKPAD( ICTXT, 'psqrt17',
741 $ WORKSIZ-IPOSTPAD, 1,
742 $ MEM( IPW-IPREPAD ),
743 $ WORKSIZ-IPOSTPAD,
744 $ IPREPAD, IPOSTPAD,
745 $ PADVAL )
746 END IF
747*
748* Call PSQRT16, B will be destroyed.
749*
750 IF( TPSD ) THEN
751 WORKSIZ = MP + IPOSTPAD
752 ELSE
753 WORKSIZ = NQ + IPOSTPAD
754 END IF
755*
756* Check for adequate memory for problem size
757*
758 IERR( 1 ) = 0
759.GT. IF( ( IPW+WORKSIZ )MEMSIZ ) THEN
760.EQ. IF( IAM0 )
761 $ WRITE( NOUT, FMT = 9996 ) 'memory',
762 $ ( IPW+WORKSIZ )*REALSZ
763 IERR( 1 ) = 1
764 END IF
765*
766* Check all processes for an error
767*
768 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1,
769 $ IERR, 1, -1, 0 )
770*
771.GT. IF( IERR( 1 )0 ) THEN
772.EQ. IF( IAM0 )
773 $ WRITE( NOUT, FMT = 9997 ) 'memory'
774 KSKIP = KSKIP + 1
775 GO TO 30
776 END IF
777*
778 CALL PSFILLPAD( ICTXT,
779 $ WORKSIZ-IPOSTPAD, 1,
780 $ MEM( IPW-IPREPAD ),
781 $ WORKSIZ-IPOSTPAD,
782 $ IPREPAD, IPOSTPAD,
783 $ PADVAL )
784*
785 CALL PSQRT16( TRANS, M, N, NRHS,
786 $ MEM( IPA ), 1, 1, DESCA,
787 $ MEM( IPX ), 1, 1, DESCX,
788 $ MEM( IPB ), 1, 1, DESCB,
789 $ MEM( IPW ), RESULT( 1 ) )
790*
791 CALL PSCHEKPAD( ICTXT, 'psqrt16',
792 $ mp, nq,
793 $ mem( ipa-iprepad ),
794 $ desca( lld_ ),
795 $ iprepad, ipostpad,
796 $ padval )
797 CALL pschekpad( ictxt, 'PSQRT16',
798 $ mnp, nrhsq,
799 $ mem( ipx-iprepad ),
800 $ descx( lld_ ), iprepad,
801 $ ipostpad, padval )
802 IF( tpsd ) THEN
803 CALL pschekpad( ictxt, 'PSQRT16',
804 $ np, nrhsq,
805 $ mem( ipb-iprepad ),
806 $ descb( lld_ ),
807 $ iprepad, ipostpad,
808 $ padval )
809 ELSE
810 CALL pschekpad( ictxt, 'PSQRT16',
811 $ mp, nrhsq,
812 $ mem( ipb-iprepad ),
813 $ descb( lld_ ),
814 $ iprepad, ipostpad,
815 $ padval )
816 END IF
817 CALL pschekpad( ictxt, 'PSQRT16',
818 $ worksiz-ipostpad, 1,
819 $ mem( ipw-iprepad ),
820 $ worksiz-ipostpad,
821 $ iprepad, ipostpad,
822 $ padval )
823*
824* Call PSQRT14
825*
826 IF( ( m.GE.n .AND. tpsd ) .OR.
827 $ ( m.LT.n .AND. ( .NOT.tpsd ) ) ) THEN
828*
829 ipw = ipb
830*
831 IF( tpsd ) THEN
832*
833 nnrhsq = numroc( n+nrhs, nb, mycol,
834 $ 0, npcol )
835 ltau = numroc( min( m, n+nrhs ), nb,
836 $ mycol, 0, npcol )
837 lwf = nb * ( nb + mp + nnrhsq )
838 worksiz = mp * nnrhsq + ltau + lwf +
839 $ ipostpad
840*
841 ELSE
842*
843 mnrhsp = numroc( m+nrhs, nb, myrow,
844 $ 0, nprow )
845 ltau = numroc( min( m+nrhs, n ), nb,
846 $ myrow, 0, nprow )
847 lwf = nb * ( nb + mnrhsp + nq )
848 worksiz = mnrhsp * nq + ltau + lwf +
849 $ ipostpad
850*
851 END IF
852*
853* Check for adequate memory for problem
854* size
855*
856 ierr( 1 ) = 0
857 IF( ( ipw+worksiz ).GT.memsiz ) THEN
858 IF( iam.EQ.0 )
859 $ WRITE( nout, fmt = 9996 )
860 $ 'MEMORY', ( ipw+worksiz )*realsz
861 ierr( 1 ) = 1
862 END IF
863*
864* Check all processes for an error
865*
866 CALL igsum2d( ictxt, 'All', ' ', 1, 1,
867 $ ierr, 1, -1, 0 )
868*
869 IF( ierr( 1 ).GT.0 ) THEN
870 IF( iam.EQ.0 )
871 $ WRITE( nout, fmt = 9997 )
872 $ 'MEMORY'
873 kskip = kskip + 1
874 GO TO 30
875 END IF
876*
877 CALL psfillpad( ictxt,
878 $ worksiz-ipostpad, 1,
879 $ mem( ipw-iprepad ),
880 $ worksiz-ipostpad,
881 $ iprepad, ipostpad,
882 $ padval )
883*
884* Solve underdetermined system
885*
886 result( 2 ) = psqrt14( trans, m, n,
887 $ nrhs,
888 $ mem( ipa ), 1,
889 $ 1, desca,
890 $ mem( ipx ),
891 $ 1, 1, descx,
892 $ mem( ipw ) )
893 sresid = result( 2 )
894*
895 CALL pschekpad( ictxt, 'PSQRT14',
896 $ mp, nq,
897 $ mem( ipa-iprepad ),
898 $ desca( lld_ ),
899 $ iprepad, ipostpad,
900 $ padval )
901 CALL pschekpad( ictxt, 'PSQRT14',
902 $ mnp, nrhsq,
903 $ mem( ipx-iprepad ),
904 $ descx( lld_ ), iprepad,
905 $ ipostpad, padval )
906 CALL pschekpad( ictxt, 'PSQRT14',
907 $ worksiz-ipostpad, 1,
908 $ mem( ipw-iprepad ),
909 $ worksiz-ipostpad,
910 $ iprepad, ipostpad,
911 $ padval )
912 END IF
913*
914* Print information about the tests that
915* did not pass the threshold.
916*
917 passed = 'PASSED'
918 DO 20 ii = 1, 2
919 IF( ( result( ii ).GE.thresh ) .AND.
920 $ ( result( ii )-result( ii ).EQ.0.0e+0
921 $ ) ) THEN
922 IF( iam.EQ.0 )
923 $ WRITE( nout, fmt = 9986 )trans,
924 $ m, n, nrhs, nb, itype, ii,
925 $ result( ii )
926 kfail = kfail + 1
927 passed = 'FAILED'
928 ELSE
929 kpass = kpass + 1
930 END IF
931 20 CONTINUE
932*
933 ELSE
934*
935* By-pass the solve check
936*
937 kpass = kpass + 1
938 sresid = sresid - sresid
939 passed = 'BYPASS'
940*
941 END IF
942*
943* Gather maximum of all CPU and WALL clock
944* timings
945*
946 CALL slcombine( ictxt, 'All', '>', 'W', 1, 1,
947 $ wtime )
948 CALL slcombine( ictxt, 'All', '>', 'C', 1, 1,
949 $ ctime )
950*
951* Print results
952*
953 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
954 addfac = 1
955 mulfac = 1
956 IF( m.GE.n ) THEN
957*
958* NOPS = SOPLA( 'SGEQRF', M, N, 0, 0,
959* NB ) + SOPLA( 'SORMQR', M, NRHS, N,
960* 0, NB )
961*
962 mults = n*( ( ( 23.d0 / 6.d0 )+m+n /
963 $ 2.d0 )+ n*( m-n / 3.d0 ) ) +
964 $ n*nrhs*( 2.d0*m+2.d0-n )
965 adds = n*( ( 5.d0 / 6.d0 )+n*
966 $ ( 1.d0 / 2.d0+( m-n / 3.d0 ) ) )
967 $ + n*nrhs*( 2.d0*m+1.d0-n )
968 ELSE
969*
970* NOPS = SOPLA( 'SGELQF', M, N, 0, 0,
971* NB ) + SOPLA( 'SORMLQ', M,
972* NRHS, N, 0, NB )
973*
974 mults = m*( ( ( 29.d0 / 6.d0 )+2.d0*n-m
975 $ / 2.d0 )+m*( n-m / 3.d0 ) )
976 $ + n*nrhs*( 2.d0*m+2.d0-n )
977 adds = m*( ( 5.d0 / 6.d0 )+m / 2.d0+m*
978 $ ( n-m / 3.d0 ) )
979 $ + n*nrhs*( 2.d0*m+1.d0-n )
980 END IF
981 nops = addfac*adds + mulfac*mults
982*
983* Calculate total megaflops, for WALL and
984* CPU time, and print output
985*
986* Print WALL time if machine supports it
987*
988 IF( wtime( 1 ).GT.0.0d+0 ) THEN
989 tmflops = nops / ( wtime( 1 )*1.0d+6 )
990 ELSE
991 tmflops = 0.0d+0
992 END IF
993*
994 IF( wtime( 1 ).GE.0.0d+0 )
995 $ WRITE( nout, fmt = 9993 )
996 $ 'WALL', trans, m, n, nb, nrhs,
997 $ nbrhs, nprow, npcol, wtime( 1 ),
998 $ tmflops, passed
999*
1000* Print CPU time if machine supports it
1001*
1002 IF( ctime( 1 ).GT.0.0d+0 ) THEN
1003 tmflops = nops / ( ctime( 1 )*1.0d+6 )
1004 ELSE
1005 tmflops = 0.0d+0
1006 END IF
1007*
1008 IF( ctime( 1 ).GE.0.0d+0 )
1009 $ WRITE( nout, fmt = 9993 )
1010 $ 'CPU ', trans, m, n, nb, nrhs,
1011 $ nbrhs, nprow, npcol, ctime( 1 ),
1012 $ tmflops, passed
1013 END IF
1014 30 CONTINUE
1015 40 CONTINUE
1016 50 CONTINUE
1017 60 CONTINUE
1018 70 CONTINUE
1019 80 CONTINUE
1020 CALL blacs_gridexit( ictxt )
1021 90 CONTINUE
1022*
1023* Print out ending messages and close output file
1024*
1025 IF( iam.EQ.0 ) THEN
1026 ktests = kpass + kfail + kskip
1027 WRITE( nout, fmt = * )
1028 WRITE( nout, fmt = 9992 ) ktests
1029 IF( check ) THEN
1030 WRITE( nout, fmt = 9991 ) kpass
1031 WRITE( nout, fmt = 9989 ) kfail
1032 ELSE
1033 WRITE( nout, fmt = 9990 ) kpass
1034 END IF
1035 WRITE( nout, fmt = 9988 ) kskip
1036 WRITE( nout, fmt = * )
1037 WRITE( nout, fmt = * )
1038 WRITE( nout, fmt = 9987 )
1039 IF( nout.NE.6 .AND. nout.NE.0 )
1040 $ CLOSE ( nout )
1041 END IF
1042*
1043 CALL blacs_exit( 0 )
1044*
1045 9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
1046 $ '; It should be at least 1' )
1047 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
1048 $ i4 )
1049 9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
1050 9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
1051 $ i11 )
1052 9995 FORMAT( 'Time TRANS M N NB NRHS NBRHS P Q ',
1053 $ 'LS Time MFLOPS CHECK' )
1054 9994 FORMAT( '---- ----- ------ ------ --- ----- ----- ----- ----- ',
1055 $ '--------- -------- ------' )
1056 9993 FORMAT( a4, 3x, a1, 3x, i6, 1x, i6, 1x, i3, 1x, i5, 1x, i5, 1x,
1057 $ i5, 1x, i5, 1x, f9.2, 1x, f8.2, 1x, a6 )
1058 9992 FORMAT( 'Finished', i6, ' tests, with the following results:' )
1059 9991 FORMAT( i5, ' tests completed and passed residual checks.' )
1060 9990 FORMAT( i5, ' tests completed without checking.' )
1061 9989 FORMAT( i5, ' tests completed and failed residual checks.' )
1062 9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
1063 9987 FORMAT( 'END OF TESTS.' )
1064 9986 FORMAT( ' TRANS=''', a1, ''', M=', i5, ', N=', i5, ', NRHS=', i4,
1065 $ ', NB=', i4, ', type', i2, ', test(', I2, ')=', G12.5 )
1066*
1067 STOP
1068*
1069* End of PSLSDRIVER
1070*
1071 END
subroutine psmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition psmatgen.f:4
logical function lsame(ca, cb)
LSAME
Definition lsame.f:53
integer function iceil(inum, idenom)
Definition iceil.f:2
integer function ilcm(m, n)
Definition ilcm.f:2
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21
subroutine blacs_gridinit(cntxt, c, nprow, npcol)
Definition mpi.f:745
subroutine psnrm2(n, norm2, x, ix, jx, descx, incx)
Definition mpi.f:1254
real function pslange(norm, m, n, a, ia, ja, desca, work)
Definition mpi.f:1299
subroutine psscal(n, alpha, x, ix, jx, descx, incx)
Definition mpi.f:989
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition mpi.f:777
subroutine blacs_gridexit(cntxt)
Definition mpi.f:762
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
Definition mpi.f:754
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition mpi.f:786
subroutine pschekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pschekpad.f:3
subroutine psfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition psfillpad.f:2
subroutine psgels(trans, m, n, nrhs, a, ia, ja, desca, b, ib, jb, descb, work, lwork, info)
Definition psgels.f:3
subroutine pslacpy(uplo, m, n, a, ia, ja, desca, b, ib, jb, descb)
Definition pslacpy.f:3
program pslsdriver
Definition pslsdriver.f:1
subroutine pslsinfo(summry, nout, nmat, mval, ldmval, nval, ldnval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
Definition pslsinfo.f:6
subroutine psqrt13(scale, m, n, a, ia, ja, desca, norma, iseed, work)
Definition psqrt13.f:3
real function psqrt14(trans, m, n, nrhs, a, ia, ja, desca, x, ix, jx, descx, work)
Definition psqrt14.f:3
subroutine psqrt16(trans, m, n, nrhs, a, ia, ja, desca, x, ix, jx, descx, b, ib, jb, descb, rwork, resid)
Definition psqrt16.f:3
real function psqrt17(trans, iresid, m, n, nrhs, a, ia, ja, desca, x, ix, jx, descx, b, ib, jb, descb, work, rwork)
Definition psqrt17.f:5
subroutine slboot()
Definition sltimer.f:2
subroutine sltimer(i)
Definition sltimer.f:47
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)
Definition sltimer.f:267