OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
pslsdriver.f
Go to the documentation of this file.
1 PROGRAM pslsdriver
2*
3* -- ScaLAPACK routine (version 1.7) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* May 28, 2001
7*
8* Purpose
9* =======
10*
11* PSLSDRIVER is the main test program for the REAL
12* SCALAPACK (full rank) Least Squares routines. This test driver solves
13* full-rank least square problems.
14*
15* The program must be driven by a short data file. An annotated
16* example of a data file can be obtained by deleting the first 3
17* characters from the following 17 lines:
18* 'ScaLapack LS solve input file'
19* 'Intel iPSC/860 hypercube, gamma model.'
20* 'LS.out' output file name (if any)
21* 6 device out
22* 4 number of problems sizes
23* 55 17 31 201 values of M
24* 5 71 31 201 values of N
25* 3 number of NB's
26* 2 3 5 values of NB
27* 3 number of NRHS's
28* 2 3 5 values of NRHS
29* 2 number of NBRHS's
30* 1 2 values of NBRHS
31* 7 number of process grids (ordered P & Q)
32* 1 2 1 4 2 3 8 values of P
33* 7 2 4 1 3 2 1 values of Q
34* 3.0 threshold
35*
36* Internal Parameters
37* ===================
38*
39* TOTMEM INTEGER, default = 6200000.
40* TOTMEM is a machine-specific parameter indicating the
41* maximum amount of available memory in bytes.
42* The user should customize TOTMEM to his platform. Remember
43* to leave room in memory for the operating system, the BLACS
44* buffer, etc. For example, on a system with 8 MB of memory
45* per process (e.g., one processor on an Intel iPSC/860), the
46* parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
47* code, BLACS buffer, etc). However, for PVM, we usually set
48* TOTMEM = 2000000. Some experimenting with the maximum value
49* of TOTMEM may be required.
50* INTGSZ INTEGER, default = 4 bytes.
51* REALSZ INTEGER, default = 4 bytes.
52* INTGSZ and REALSZ indicate the length in bytes on the
53* given platform for an integer and a single precision real.
54* MEM REAL array, dimension ( TOTMEM / REALSZ )
55* All arrays used by SCALAPACK routines are allocated from
56* this array and referenced by pointers. The integer IPA,
57* for example, is a pointer to the starting element of MEM for
58* the matrix A.
59*
60* =====================================================================
61*
62* .. Parameters ..
63 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
64 $ lld_, mb_, m_, nb_, n_, rsrc_
65 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
66 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
67 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
68 INTEGER memsiz, ntests, realsz, TOTMEM
69 REAL padval
70 REAL one, zero
71 parameter( realsz = 4, totmem = 2000000,
72 $ memsiz = totmem / realsz, ntests = 20,
73 $ padval = -9923.0e+0 )
74 parameter( one = 1.0e+0, zero = 0.0e+0 )
75* ..
76* .. Local Scalars ..
77 LOGICAL check, tpsd
78 CHARACTER trans
79 CHARACTER*6 passed
80 CHARACTER*80 outfile
81 INTEGER hh, I, iam, iaseed, ibseed, ictxt, ii, imidpad,
82 $ info, ipa, ipb, ipostpad, iprepad, ipw, ipw2,
83 $ ipx, iscale, itran, itype, j, jj, k, KFAIL, kk,
84 $ kpass, kskip, ktests, lcm, lcmp, ltau, lwf,
85 $ lwork, lws, m, mnp, mnrhsp, mp, mq, mycol,
86 $ myrow, n, nb, nbrhs, ncols, ngrids, nmat, nnb,
87 $ nnbr, nnr, nnrhsq, nout, np, npcol, nprocs,
88 $ nprow, nrows, nq, NRHS, nrhsp, nrhsq, worksiz
89 REAL anorm, bnorm, sresid, thresh
90 DOUBLE PRECISION ADDFAC, adds, mulfac, mults, nops, tmflops
91* ..
92* .. Local Arrays ..
93 INTEGER desca( dlen_ ), descb( dlen_ ), descw( lld_ ),
94 $ descx( dlen_ ), ierr( 2 ), mval( ntests ),
95 $ nbrval( ntests ), nbval( ntests ),
96 $ nrval( ntests ), nval( ntests ),
97 $ pval( ntests ), qval( ntests )
98 REAL mem( memsiz ), result( 2 )
99 DOUBLE PRECISION ctime( 1 ), wtime( 1 )
100* ..
101* .. External Subroutines ..
102 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
104 $ blacs_pinfo, descinit, igsum2d, pschekpad,
105 $ psfillpad, psgels, psgemm, pslacpy,
108* ..
109* .. External Functions ..
110 LOGICAL lsame
111 INTEGER iceil, ilcm, numroc
112 REAL pslange, psqrt14, psqrt17
113 EXTERNAL iceil, ilcm, lsame, numroc, pslange,
115* ..
116* .. Intrinsic Functions ..
117 INTRINSIC max, min
118* ..
119* .. Data Statements ..
120 DATA ktests, kpass, kfail, kskip / 4*0 /
121* ..
122* .. Executable Statements ..
123*
124* Get starting information
125*
126 CALL blacs_pinfo( iam, nprocs )
127*
128 iaseed = 100
129 ibseed = 200
130 CALL pslsinfo( outfile, nout, nmat, mval, ntests, nval,
131 $ ntests, nnb, nbval, ntests, nnr, nrval, ntests,
132 $ nnbr, nbrval, ntests, ngrids, pval, ntests, qval,
133 $ ntests, thresh, mem, iam, nprocs )
134 check = ( thresh.GE.0.0e+0 )
135*
136* Print headings
137*
138 IF( iam.EQ.0 ) THEN
139 WRITE( nout, fmt = * )
140 WRITE( nout, fmt = 9995 )
141 WRITE( nout, fmt = 9994 )
142 WRITE( nout, fmt = * )
143 END IF
144*
145* Loop over different process grids
146*
147 DO 90 i = 1, ngrids
148*
149 nprow = pval( i )
150 npcol = qval( i )
151*
152* Make sure grid information is correct
153*
154 ierr( 1 ) = 0
155 IF( nprow.LT.1 ) THEN
156 IF( iam.EQ.0 )
157 $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
158 ierr( 1 ) = 1
159 ELSE IF( npcol.LT.1 ) THEN
160 IF( iam.EQ.0 )
161 $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
162 ierr( 1 ) = 1
163 ELSE IF( nprow*npcol.GT.nprocs ) THEN
164 IF( iam.EQ.0 )
165 $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
166 ierr( 1 ) = 1
167 END IF
168*
169 IF( ierr( 1 ).GT.0 ) THEN
170 IF( iam.EQ.0 )
171 $ WRITE( nout, fmt = 9997 ) 'grid'
172 KSKIP = KSKIP + 1
173 GO TO 90
174 END IF
175*
176* Define process grid
177*
178 CALL BLACS_GET( -1, 0, ICTXT )
179 CALL BLACS_GRIDINIT( ICTXT, 'row-major', NPROW, NPCOL )
180 CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
181*
182* Go to bottom of loop if this case doesn't use my process
183*
184.GE..OR..GE. IF( ( MYROWNPROW )( MYCOLNPCOL ) )
185 $ GO TO 90
186*
187 DO 80 J = 1, NMAT
188*
189 M = MVAL( J )
190 N = NVAL( J )
191*
192* Make sure matrix information is correct
193*
194 IERR( 1 ) = 0
195.LT. IF( M1 ) THEN
196.EQ. IF( IAM0 )
197 $ WRITE( NOUT, FMT = 9999 ) 'matrix', 'm', M
198 IERR( 1 ) = 1
199.LT. ELSE IF( N1 ) THEN
200.EQ. IF( IAM0 )
201 $ WRITE( NOUT, FMT = 9999 ) 'matrix', 'n', N
202 IERR( 1 ) = 1
203 END IF
204*
205* Make sure no one had error
206*
207 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1, IERR, 1, -1, 0 )
208*
209.GT. IF( IERR( 1 )0 ) THEN
210.EQ. IF( IAM0 )
211 $ WRITE( NOUT, FMT = 9997 ) 'matrix'
212 KSKIP = KSKIP + 1
213 GO TO 80
214 END IF
215*
216* Loop over different blocking sizes
217*
218 DO 70 K = 1, NNB
219*
220 NB = NBVAL( K )
221*
222* Make sure nb is legal
223*
224 IERR( 1 ) = 0
225.LT. IF( NB1 ) THEN
226 IERR( 1 ) = 1
227.EQ. IF( IAM0 )
228 $ WRITE( NOUT, FMT = 9999 ) 'nb', 'nb', NB
229 END IF
230*
231* Check all processes for an error
232*
233 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1, IERR, 1, -1, 0 )
234*
235.GT. IF( IERR( 1 )0 ) THEN
236.EQ. IF( IAM0 )
237 $ WRITE( NOUT, FMT = 9997 ) 'nb'
238 KSKIP = KSKIP + 1
239 GO TO 70
240 END IF
241*
242* Padding constants
243*
244 MP = NUMROC( M, NB, MYROW, 0, NPROW )
245 MQ = NUMROC( M, NB, MYCOL, 0, NPCOL )
246 NP = NUMROC( N, NB, MYROW, 0, NPROW )
247 MNP = MAX( MP, NP )
248 NQ = NUMROC( N, NB, MYCOL, 0, NPCOL )
249*
250 IF( CHECK ) THEN
251 IPREPAD = MAX( NB, MP )
252 IMIDPAD = NB
253 IPOSTPAD = MAX( NB, NQ )
254 ELSE
255 IPREPAD = 0
256 IMIDPAD = 0
257 IPOSTPAD = 0
258 END IF
259*
260* Initialize the array descriptor for the matrix A
261*
262 CALL DESCINIT( DESCA, M, N, NB, NB, 0, 0, ICTXT,
263 $ MAX( 1, MP ) + IMIDPAD, IERR( 1 ) )
264*
265* Check all processes for an error
266*
267 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1, IERR, 1, -1, 0 )
268*
269.LT. IF( IERR( 1 )0 ) THEN
270.EQ. IF( IAM0 )
271 $ WRITE( NOUT, FMT = 9997 ) 'descriptor'
272 KSKIP = KSKIP + 1
273 GO TO 70
274 END IF
275*
276 DO 60 ISCALE = 1, 3
277*
278 ITYPE = ISCALE
279*
280* Assign pointers into MEM for SCALAPACK arrays, A is
281* allocated starting at position MEM( IPREPAD+1 )
282*
283 IPA = IPREPAD + 1
284 IPX = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD
285 IPW = IPX
286*
287 WORKSIZ = NQ + IPOSTPAD
288*
289* Check for adequate memory for problem size
290*
291 IERR( 1 ) = 0
292.GT. IF( ( IPW+WORKSIZ )MEMSIZ ) THEN
293.EQ. IF( IAM0 )
294 $ WRITE( NOUT, FMT = 9996 ) 'memory',
295 $ ( IPX+WORKSIZ )*REALSZ
296 IERR( 1 ) = 1
297 END IF
298*
299* Check all processes for an error
300*
301 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1, IERR, 1, -1,
302 $ 0 )
303*
304.GT. IF( IERR( 1 )0 ) THEN
305.EQ. IF( IAM0 )
306 $ WRITE( NOUT, FMT = 9997 ) 'memory'
307 KSKIP = KSKIP + 1
308 GO TO 70
309 END IF
310*
311 IF( CHECK ) THEN
312 CALL PSFILLPAD( ICTXT, MP, NQ, MEM( IPA-IPREPAD ),
313 $ DESCA( LLD_ ), IPREPAD, IPOSTPAD,
314 $ PADVAL )
315 CALL PSFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1,
316 $ MEM( IPW-IPREPAD ),
317 $ WORKSIZ-IPOSTPAD, IPREPAD,
318 $ IPOSTPAD, PADVAL )
319 END IF
320*
321* Generate the matrix A and calculate its 1-norm
322*
323 CALL PSQRT13( ISCALE, M, N, MEM( IPA ), 1, 1,
324 $ DESCA, ANORM, IASEED, MEM( IPW ) )
325*
326 IF( CHECK ) THEN
327 CALL PSCHEKPAD( ICTXT, 'psqrt13', MP, NQ,
328 $ MEM( IPA-IPREPAD ), DESCA( LLD_ ),
329 $ IPREPAD, IPOSTPAD, PADVAL )
330 CALL PSCHEKPAD( ICTXT, 'psqrt13',
331 $ WORKSIZ-IPOSTPAD, 1,
332 $ MEM( IPW-IPREPAD ),
333 $ WORKSIZ-IPOSTPAD, IPREPAD,
334 $ IPOSTPAD, PADVAL )
335 END IF
336*
337 DO 50 ITRAN = 1, 2
338*
339.EQ. IF( ITRAN1 ) THEN
340 NROWS = M
341 NCOLS = N
342 TRANS = 'n'
343 TPSD = .FALSE.
344 ELSE
345 NROWS = N
346 NCOLS = M
347 TRANS = 't'
348 TPSD = .TRUE.
349 END IF
350*
351* Loop over the different values for NRHS
352*
353 DO 40 HH = 1, NNR
354*
355 NRHS = NRVAL( HH )
356*
357 DO 30 KK = 1, NNBR
358*
359 NBRHS = NBRVAL( KK )
360*
361 NRHSP = NUMROC( NRHS, NBRHS, MYROW, 0,
362 $ NPROW )
363 NRHSQ = NUMROC( NRHS, NBRHS, MYCOL, 0,
364 $ NPCOL )
365*
366* Define Array descriptor for rhs MAX(M,N)xNRHS
367*
368 CALL DESCINIT( DESCX, MAX( M, N ), NRHS, NB,
369 $ NBRHS, 0, 0, ICTXT,
370 $ MAX( 1, MNP ) + IMIDPAD,
371 $ IERR( 1 ) )
372 IF( TPSD ) THEN
373 CALL DESCINIT( DESCW, M, NRHS, NB, NBRHS,
374 $ 0, 0, ICTXT, MAX( 1, MP ) +
375 $ IMIDPAD, IERR( 2 ) )
376 ELSE
377 CALL DESCINIT( DESCW, N, NRHS, NB, NBRHS,
378 $ 0, 0, ICTXT, MAX( 1, NP ) +
379 $ IMIDPAD, IERR( 2 ) )
380 END IF
381*
382* Check all processes for an error
383*
384 CALL IGSUM2D( ICTXT, 'all', ' ', 2, 1, IERR,
385 $ 2, -1, 0 )
386*
387.LT..OR..LT. IF( IERR( 1 )0 IERR( 2 )0 ) THEN
388.EQ. IF( IAM0 )
389 $ WRITE( NOUT, FMT = 9997 ) 'descriptor'
390 KSKIP = KSKIP + 1
391 GO TO 30
392 END IF
393*
394* Check for enough memory
395*
396 IPX = IPA + DESCA( LLD_ )*NQ + IPOSTPAD +
397 $ IPREPAD
398 IPW = IPX + DESCX( LLD_ )*NRHSQ + IPOSTPAD +
399 $ IPREPAD
400 WORKSIZ = DESCW( LLD_ )*NRHSQ + IPOSTPAD
401*
402 IERR( 1 ) = 0
403.GT. IF( IPW+WORKSIZMEMSIZ ) THEN
404.EQ. IF( IAM0 )
405 $ WRITE( NOUT, FMT = 9996 ) 'generation',
406 $ ( IPW+WORKSIZ )*REALSZ
407 IERR( 1 ) = 1
408 END IF
409*
410* Check all processes for an error
411*
412 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1, IERR,
413 $ 1, -1, 0 )
414*
415.GT. IF( IERR( 1 )0 ) THEN
416.EQ. IF( IAM0 )
417 $ WRITE( NOUT, FMT = 9997 ) 'memory'
418 KSKIP = KSKIP + 1
419 GO TO 30
420 END IF
421*
422* Generate RHS
423*
424 IF( TPSD ) THEN
425 CALL PSMATGEN( ICTXT, 'no', 'no',
426 $ DESCW( M_ ), DESCW( N_ ),
427 $ DESCW( MB_ ), DESCW( NB_ ),
428 $ MEM( IPW ), DESCW( LLD_ ),
429 $ DESCW( RSRC_ ),
430 $ DESCW( CSRC_ ), IBSEED, 0,
431 $ MP, 0, NRHSQ, MYROW, MYCOL,
432 $ NPROW, NPCOL )
433 ELSE
434 CALL PSMATGEN( ICTXT, 'no', 'no',
435 $ DESCW( M_ ), DESCW( N_ ),
436 $ DESCW( MB_ ), DESCW( NB_ ),
437 $ MEM( IPW ), DESCW( LLD_ ),
438 $ DESCW( RSRC_ ),
439 $ DESCW( CSRC_ ), IBSEED, 0,
440 $ NP, 0, NRHSQ, MYROW, MYCOL,
441 $ NPROW, NPCOL )
442 END IF
443*
444 IF( CHECK ) THEN
445 CALL PSFILLPAD( ICTXT, MNP, NRHSQ,
446 $ MEM( IPX-IPREPAD ),
447 $ DESCX( LLD_ ), IPREPAD,
448 $ IPOSTPAD, PADVAL )
449 IF( TPSD ) THEN
450 CALL PSFILLPAD( ICTXT, MP, NRHSQ,
451 $ MEM( IPW-IPREPAD ),
452 $ DESCW( LLD_ ), IPREPAD,
453 $ IPOSTPAD, PADVAL )
454 ELSE
455 CALL PSFILLPAD( ICTXT, NP, NRHSQ,
456 $ MEM( IPW-IPREPAD ),
457 $ DESCW( LLD_ ), IPREPAD,
458 $ IPOSTPAD, PADVAL )
459 END IF
460 END IF
461*
462 DO 10 JJ = 1, NRHS
463 CALL PSNRM2( NCOLS, BNORM, MEM( IPW ), 1,
464 $ JJ, DESCW, 1 )
465.GT. IF( BNORMZERO )
466 $ CALL PSSCAL( NCOLS, ONE / BNORM,
467 $ MEM( IPW ), 1, JJ, DESCW,
468 $ 1 )
469 10 CONTINUE
470*
471 CALL PSGEMM( TRANS, 'n', NROWS, NRHS, NCOLS,
472 $ ONE, MEM( IPA ), 1, 1, DESCA,
473 $ MEM( IPW ), 1, 1, DESCW, ZERO,
474 $ MEM( IPX ), 1, 1, DESCX )
475*
476 IF( CHECK ) THEN
477*
478* check for memory overwrite
479*
480 CALL PSCHEKPAD( ICTXT, 'generation', MP,
481 $ NQ, MEM( IPA-IPREPAD ),
482 $ DESCA( LLD_ ), IPREPAD,
483 $ IPOSTPAD, PADVAL )
484 CALL PSCHEKPAD( ICTXT, 'generation', MNP,
485 $ NRHSQ, MEM( IPX-IPREPAD ),
486 $ DESCX( LLD_ ), IPREPAD,
487 $ IPOSTPAD, PADVAL )
488 IF( TPSD ) THEN
489 CALL PSCHEKPAD( ICTXT, 'generation',
490 $ MP, NRHSQ,
491 $ MEM( IPW-IPREPAD ),
492 $ DESCW( LLD_ ), IPREPAD,
493 $ IPOSTPAD, PADVAL )
494 ELSE
495 CALL PSCHEKPAD( ICTXT, 'generation',
496 $ NP, NRHSQ,
497 $ MEM( IPW-IPREPAD ),
498 $ DESCW( LLD_ ), IPREPAD,
499 $ IPOSTPAD, PADVAL )
500 END IF
501*
502* Allocate space for copy of rhs
503*
504 IPB = IPW
505*
506 IF( TPSD ) THEN
507 CALL DESCINIT( DESCB, N, NRHS, NB,
508 $ NBRHS, 0, 0, ICTXT,
509 $ MAX( 1, NP ) + IMIDPAD,
510 $ IERR( 1 ) )
511 ELSE
512 CALL DESCINIT( DESCB, M, NRHS, NB,
513 $ NBRHS, 0, 0, ICTXT,
514 $ MAX( 1, MP ) + IMIDPAD,
515 $ IERR( 1 ) )
516 END IF
517*
518* Check all processes for an error
519*
520 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1,
521 $ IERR, 1, -1, 0 )
522*
523.LT. IF( IERR( 1 )0 ) THEN
524.EQ. IF( IAM0 )
525 $ WRITE( NOUT, FMT = 9997 )
526 $ 'descriptor'
527 kskip = kskip + 1
528 GO TO 30
529 END IF
530*
531 ipw = ipb + descb( lld_ )*nrhsq +
532 $ ipostpad + iprepad
533*
534 END IF
535*
536* Calculate the amount of workspace for PSGELS
537*
538 IF( m.GE.n ) THEN
539 ltau = numroc( min(m,n), nb, mycol, 0,
540 $ npcol )
541 lwf = nb * ( mp + nq + nb )
542 lws = max( ( nb*( nb - 1 ) ) / 2,
543 $ ( mp + nrhsq ) * nb ) + nb*nb
544 ELSE
545 lcm = ilcm( nprow, npcol )
546 lcmp = lcm / nprow
547 ltau = numroc( min(m,n), nb, myrow, 0,
548 $ nprow )
549 lwf = nb * ( mp + nq + nb )
550 lws = max( ( nb*( nb - 1 ) ) / 2, ( np +
551 $ max( nq + numroc( numroc( n, nb, 0,
552 $ 0, nprow ), nb, 0, 0, lcmp ),
553 $ nrhsq ) ) * nb ) + nb*nb
554 END IF
555*
556 lwork = ltau + max( lwf, lws )
557 worksiz = lwork + ipostpad
558*
559* Check for adequate memory for problem size
560*
561 ierr( 1 ) = 0
562 IF( ipw+worksiz.GT.memsiz ) THEN
563 IF( iam.EQ.0 )
564 $ WRITE( nout, fmt = 9996 ) 'solve',
565 $ ( ipw+worksiz )*realsz
566 ierr( 1 ) = 1
567 END IF
568*
569* Check all processes for an error
570*
571 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
572 $ 1, -1, 0 )
573*
574 IF( ierr( 1 ).GT.0 ) THEN
575 IF( iam.EQ.0 )
576 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
577 kskip = kskip + 1
578 GO TO 30
579 END IF
580*
581 IF( check ) THEN
582*
583* Make the copy of the right hand side
584*
585 CALL pslacpy( 'All', nrows, nrhs,
586 $ mem( ipx ), 1, 1, descx,
587 $ mem( ipb ), 1, 1, descb )
588*
589 IF( tpsd ) THEN
590 CALL psfillpad( ictxt, np, nrhsq,
591 $ mem( ipb-iprepad ),
592 $ descb( lld_ ), iprepad,
593 $ ipostpad, padval )
594 ELSE
595 CALL psfillpad( ictxt, mp, nrhsq,
596 $ mem( ipb-iprepad ),
597 $ descb( lld_ ), iprepad,
598 $ ipostpad, padval )
599 END IF
600 CALL psfillpad( ictxt, lwork, 1,
601 $ mem( ipw-iprepad ),
602 $ lwork, iprepad,
603 $ ipostpad, padval )
604 END IF
605*
606 CALL slboot( )
607 CALL blacs_barrier( ictxt, 'All' )
608 CALL sltimer( 1 )
609*
610* Solve the LS or overdetermined system
611*
612 CALL psgels( trans, m, n, nrhs, mem( ipa ),
613 $ 1, 1, desca, mem( ipx ), 1, 1,
614 $ descx, mem( ipw ), lwork, info )
615*
616 CALL sltimer( 1 )
617*
618 IF( check ) THEN
619*
620* check for memory overwrite
621*
622 CALL pschekpad( ictxt, 'PSGELS', mp,
623 $ nq, mem( ipa-iprepad ),
624 $ desca( lld_ ), iprepad,
625 $ ipostpad, padval )
626 CALL pschekpad( ictxt, 'PSGELS', mnp,
627 $ nrhsq, mem( ipx-iprepad ),
628 $ descx( lld_ ), iprepad,
629 $ ipostpad, padval )
630 CALL pschekpad( ictxt, 'PSGELS', lwork,
631 $ 1, mem( ipw-iprepad ),
632 $ lwork, iprepad,
633 $ ipostpad, padval )
634 END IF
635*
636* Regenerate A in place for testing and next
637* iteration
638*
639 CALL psqrt13( iscale, m, n, mem( ipa ), 1, 1,
640 $ desca, anorm, iaseed,
641 $ mem( ipw ) )
642*
643* check the solution to rhs
644*
645 IF( check ) THEN
646*
647* Am I going to call PSQRT17 ?
648*
649 IF( ( m.GE.n .AND. ( .NOT.tpsd ) ) .OR.
650 $ ( m.LT.n .AND. tpsd ) ) THEN
651*
652* Call PSQRT17 first, A, X, and B remain
653* unchanged. Solving LS system
654*
655* Check amount of memory for PSQRT17
656*
657 IF( tpsd ) THEN
658 worksiz = np*nrhsq + nrhsp*mq
659 ipw2 = ipw + worksiz
660 worksiz = worksiz +
661 $ max( nq, max( mq, nrhsq ) ) +
662 $ ipostpad
663 ELSE
664 worksiz = mp*nrhsq + nrhsp*nq
665 ipw2 = ipw + worksiz
666 worksiz = worksiz +
667 $ max( nq, nrhsq ) +
668 $ ipostpad
669 END IF
670*
671* Check for adequate memory for problem
672* size
673*
674 ierr( 1 ) = 0
675 IF( ( ipw+worksiz ).GT.memsiz ) THEN
676 IF( iam.EQ.0 )
677 $ WRITE( nout, fmt = 9996 )
678 $ 'MEMORY', ( ipw+worksiz )*realsz
679 ierr( 1 ) = 1
680 END IF
681*
682* Check all processes for an error
683*
684 CALL igsum2d( ictxt, 'All', ' ', 1, 1,
685 $ ierr, 1, -1, 0 )
686*
687 IF( ierr( 1 ).GT.0 ) THEN
688 IF( iam.EQ.0 )
689 $ WRITE( nout, fmt = 9997 )
690 $ 'MEMORY'
691 kskip = kskip + 1
692 GO TO 30
693 END IF
694*
695 CALL psfillpad( ictxt,
696 $ worksiz-ipostpad, 1,
697 $ mem( ipw-iprepad ),
698 $ worksiz-ipostpad,
699 $ iprepad, ipostpad,
700 $ padval )
701*
702 result( 2 ) = psqrt17( trans, 1, m, n,
703 $ nrhs,
704 $ mem( ipa ),
705 $ 1, 1, desca,
706 $ mem( ipx ), 1,
707 $ 1, descx,
708 $ mem( ipb ),
709 $ 1, 1, descb,
710 $ mem( ipw ),
711 $ mem( ipw2 ) )
712 sresid = result( 2 )
713*
714 CALL pschekpad( ictxt, 'PSQRT17',
715 $ mp, nq,
716 $ mem( ipa-iprepad ),
717 $ desca( lld_ ),
718 $ iprepad, ipostpad,
719 $ padval )
720 CALL pschekpad( ictxt, 'PSQRT17',
721 $ mnp, nrhsq,
722 $ mem( ipx-iprepad ),
723 $ descx( lld_ ), iprepad,
724 $ ipostpad, padval )
725 IF( tpsd ) THEN
726 CALL pschekpad( ictxt, 'PSQRT17',
727 $ np, nrhsq,
728 $ mem( ipb-iprepad ),
729 $ descb( lld_ ),
730 $ iprepad, ipostpad,
731 $ padval )
732 ELSE
733 CALL pschekpad( ictxt, 'PSQRT17',
734 $ mp, nrhsq,
735 $ mem( ipb-iprepad ),
736 $ descb( lld_ ),
737 $ iprepad, ipostpad,
738 $ padval )
739 END IF
740 CALL pschekpad( ictxt, 'PSQRT17',
741 $ worksiz-ipostpad, 1,
742 $ mem( ipw-iprepad ),
743 $ worksiz-ipostpad,
744 $ iprepad, ipostpad,
745 $ padval )
746 END IF
747*
748* Call PSQRT16, B will be destroyed.
749*
750 IF( tpsd ) THEN
751 worksiz = mp + ipostpad
752 ELSE
753 worksiz = nq + ipostpad
754 END IF
755*
756* Check for adequate memory for problem size
757*
758 ierr( 1 ) = 0
759 IF( ( ipw+worksiz ).GT.memsiz ) THEN
760 IF( iam.EQ.0 )
761 $ WRITE( nout, fmt = 9996 ) 'MEMORY',
762 $ ( ipw+worksiz )*realsz
763 ierr( 1 ) = 1
764 END IF
765*
766* Check all processes for an error
767*
768 CALL igsum2d( ictxt, 'All', ' ', 1, 1,
769 $ ierr, 1, -1, 0 )
770*
771 IF( ierr( 1 ).GT.0 ) THEN
772 IF( iam.EQ.0 )
773 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
774 kskip = kskip + 1
775 GO TO 30
776 END IF
777*
778 CALL psfillpad( ictxt,
779 $ worksiz-ipostpad, 1,
780 $ mem( ipw-iprepad ),
781 $ worksiz-ipostpad,
782 $ iprepad, ipostpad,
783 $ padval )
784*
785 CALL psqrt16( trans, m, n, nrhs,
786 $ mem( ipa ), 1, 1, desca,
787 $ mem( ipx ), 1, 1, descx,
788 $ mem( ipb ), 1, 1, descb,
789 $ mem( ipw ), result( 1 ) )
790*
791 CALL pschekpad( ictxt, 'PSQRT16',
792 $ mp, nq,
793 $ mem( ipa-iprepad ),
794 $ desca( lld_ ),
795 $ iprepad, ipostpad,
796 $ padval )
797 CALL pschekpad( ictxt, 'PSQRT16',
798 $ mnp, nrhsq,
799 $ mem( ipx-iprepad ),
800 $ descx( lld_ ), iprepad,
801 $ ipostpad, padval )
802 IF( tpsd ) THEN
803 CALL pschekpad( ictxt, 'PSQRT16',
804 $ np, nrhsq,
805 $ mem( ipb-iprepad ),
806 $ descb( lld_ ),
807 $ iprepad, ipostpad,
808 $ padval )
809 ELSE
810 CALL pschekpad( ictxt, 'PSQRT16',
811 $ mp, nrhsq,
812 $ mem( ipb-iprepad ),
813 $ descb( lld_ ),
814 $ iprepad, ipostpad,
815 $ padval )
816 END IF
817 CALL pschekpad( ictxt, 'PSQRT16',
818 $ worksiz-ipostpad, 1,
819 $ mem( ipw-iprepad ),
820 $ worksiz-ipostpad,
821 $ iprepad, ipostpad,
822 $ padval )
823*
824* Call PSQRT14
825*
826 IF( ( m.GE.n .AND. tpsd ) .OR.
827 $ ( m.LT.n .AND. ( .NOT.tpsd ) ) ) THEN
828*
829 ipw = ipb
830*
831 IF( tpsd ) THEN
832*
833 nnrhsq = numroc( n+nrhs, nb, mycol,
834 $ 0, npcol )
835 ltau = numroc( min( m, n+nrhs ), nb,
836 $ mycol, 0, npcol )
837 lwf = nb * ( nb + mp + nnrhsq )
838 worksiz = mp * nnrhsq + ltau + lwf +
839 $ ipostpad
840*
841 ELSE
842*
843 mnrhsp = numroc( m+nrhs, nb, myrow,
844 $ 0, nprow )
845 ltau = numroc( min( m+nrhs, n ), nb,
846 $ myrow, 0, nprow )
847 lwf = nb * ( nb + mnrhsp + nq )
848 worksiz = mnrhsp * nq + ltau + lwf +
849 $ ipostpad
850*
851 END IF
852*
853* Check for adequate memory for problem
854* size
855*
856 ierr( 1 ) = 0
857 IF( ( ipw+worksiz ).GT.memsiz ) THEN
858 IF( iam.EQ.0 )
859 $ WRITE( nout, fmt = 9996 )
860 $ 'MEMORY', ( ipw+worksiz )*realsz
861 ierr( 1 ) = 1
862 END IF
863*
864* Check all processes for an error
865*
866 CALL igsum2d( ictxt, 'All', ' ', 1, 1,
867 $ ierr, 1, -1, 0 )
868*
869 IF( ierr( 1 ).GT.0 ) THEN
870 IF( iam.EQ.0 )
871 $ WRITE( nout, fmt = 9997 )
872 $ 'MEMORY'
873 kskip = kskip + 1
874 GO TO 30
875 END IF
876*
877 CALL psfillpad( ictxt,
878 $ worksiz-ipostpad, 1,
879 $ mem( ipw-iprepad ),
880 $ worksiz-ipostpad,
881 $ iprepad, ipostpad,
882 $ padval )
883*
884* Solve underdetermined system
885*
886 result( 2 ) = psqrt14( trans, m, n,
887 $ nrhs,
888 $ mem( ipa ), 1,
889 $ 1, desca,
890 $ mem( ipx ),
891 $ 1, 1, descx,
892 $ mem( ipw ) )
893 sresid = result( 2 )
894*
895 CALL pschekpad( ictxt, 'PSQRT14',
896 $ mp, nq,
897 $ mem( ipa-iprepad ),
898 $ desca( lld_ ),
899 $ iprepad, ipostpad,
900 $ padval )
901 CALL pschekpad( ictxt, 'PSQRT14',
902 $ mnp, nrhsq,
903 $ mem( ipx-iprepad ),
904 $ descx( lld_ ), iprepad,
905 $ ipostpad, padval )
906 CALL pschekpad( ictxt, 'PSQRT14',
907 $ worksiz-ipostpad, 1,
908 $ mem( ipw-iprepad ),
909 $ worksiz-ipostpad,
910 $ iprepad, ipostpad,
911 $ padval )
912 END IF
913*
914* Print information about the tests that
915* did not pass the threshold.
916*
917 passed = 'PASSED'
918 DO 20 ii = 1, 2
919 IF( ( result( ii ).GE.thresh ) .AND.
920 $ ( result( ii )-result( ii ).EQ.0.0e+0
921 $ ) ) THEN
922 IF( iam.EQ.0 )
923 $ WRITE( nout, fmt = 9986 )trans,
924 $ m, n, nrhs, nb, itype, ii,
925 $ result( ii )
926 kfail = kfail + 1
927 passed = 'FAILED'
928 ELSE
929 kpass = kpass + 1
930 END IF
931 20 CONTINUE
932*
933 ELSE
934*
935* By-pass the solve check
936*
937 kpass = kpass + 1
938 sresid = sresid - sresid
939 passed = 'BYPASS'
940*
941 END IF
942*
943* Gather maximum of all CPU and WALL clock
944* timings
945*
946 CALL slcombine( ictxt, 'All', '>', 'W', 1, 1,
947 $ wtime )
948 CALL slcombine( ictxt, 'All', '>', 'C', 1, 1,
949 $ ctime )
950*
951* Print results
952*
953 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
954 addfac = 1
955 mulfac = 1
956 IF( m.GE.n ) THEN
957*
958* NOPS = SOPLA( 'SGEQRF', M, N, 0, 0,
959* NB ) + SOPLA( 'SORMQR', M, NRHS, N,
960* 0, NB )
961*
962 mults = n*( ( ( 23.d0 / 6.d0 )+m+n /
963 $ 2.d0 )+ n*( m-n / 3.d0 ) ) +
964 $ n*nrhs*( 2.d0*m+2.d0-n )
965 adds = n*( ( 5.d0 / 6.d0 )+n*
966 $ ( 1.d0 / 2.d0+( m-n / 3.d0 ) ) )
967 $ + n*nrhs*( 2.d0*m+1.d0-n )
968 ELSE
969*
970* NOPS = SOPLA( 'SGELQF', M, N, 0, 0,
971* NB ) + SOPLA( 'SORMLQ', M,
972* NRHS, N, 0, NB )
973*
974 mults = m*( ( ( 29.d0 / 6.d0 )+2.d0*n-m
975 $ / 2.d0 )+m*( n-m / 3.d0 ) )
976 $ + n*nrhs*( 2.d0*m+2.d0-n )
977 adds = m*( ( 5.d0 / 6.d0 )+m / 2.d0+m*
978 $ ( n-m / 3.d0 ) )
979 $ + n*nrhs*( 2.d0*m+1.d0-n )
980 END IF
981 nops = addfac*adds + mulfac*mults
982*
983* Calculate total megaflops, for WALL and
984* CPU time, and print output
985*
986* Print WALL time if machine supports it
987*
988 IF( wtime( 1 ).GT.0.0d+0 ) THEN
989 tmflops = nops / ( wtime( 1 )*1.0d+6 )
990 ELSE
991 tmflops = 0.0d+0
992 END IF
993*
994 IF( wtime( 1 ).GE.0.0d+0 )
995 $ WRITE( nout, fmt = 9993 )
996 $ 'WALL', trans, m, n, nb, nrhs,
997 $ nbrhs, nprow, npcol, wtime( 1 ),
998 $ tmflops, passed
999*
1000* Print CPU time if machine supports it
1001*
1002 IF( ctime( 1 ).GT.0.0d+0 ) THEN
1003 tmflops = nops / ( ctime( 1 )*1.0d+6 )
1004 ELSE
1005 tmflops = 0.0d+0
1006 END IF
1007*
1008 IF( ctime( 1 ).GE.0.0d+0 )
1009 $ WRITE( nout, fmt = 9993 )
1010 $ 'CPU ', trans, m, n, nb, nrhs,
1011 $ nbrhs, nprow, npcol, ctime( 1 ),
1012 $ tmflops, passed
1013 END IF
1014 30 CONTINUE
1015 40 CONTINUE
1016 50 CONTINUE
1017 60 CONTINUE
1018 70 CONTINUE
1019 80 CONTINUE
1020 CALL blacs_gridexit( ictxt )
1021 90 CONTINUE
1022*
1023* Print out ending messages and close output file
1024*
1025 IF( iam.EQ.0 ) THEN
1026 ktests = kpass + kfail + kskip
1027 WRITE( nout, fmt = * )
1028 WRITE( nout, fmt = 9992 ) ktests
1029 IF( check ) THEN
1030 WRITE( nout, fmt = 9991 ) kpass
1031 WRITE( nout, fmt = 9989 ) kfail
1032 ELSE
1033 WRITE( nout, fmt = 9990 ) kpass
1034 END IF
1035 WRITE( nout, fmt = 9988 ) kskip
1036 WRITE( nout, fmt = * )
1037 WRITE( nout, fmt = * )
1038 WRITE( nout, fmt = 9987 )
1039 IF( nout.NE.6 .AND. nout.NE.0 )
1040 $ CLOSE ( nout )
1041 END IF
1042*
1043 CALL blacs_exit( 0 )
1044*
1045 9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
1046 $ '; It should be at least 1' )
1047 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
1048 $ i4 )
1049 9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
1050 9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
1051 $ i11 )
1052 9995 FORMAT( 'Time TRANS M N NB NRHS NBRHS P Q ',
1053 $ 'LS Time MFLOPS CHECK' )
1054 9994 FORMAT( '---- ----- ------ ------ --- ----- ----- ----- ----- ',
1055 $ '--------- -------- ------' )
1056 9993 FORMAT( a4, 3x, a1, 3x, i6, 1x, i6, 1x, i3, 1x, i5, 1x, i5, 1x,
1057 $ i5, 1x, i5, 1x, f9.2, 1x, f8.2, 1x, a6 )
1058 9992 FORMAT( 'Finished', i6, ' tests, with the following results:' )
1059 9991 FORMAT( i5, ' tests completed and passed residual checks.' )
1060 9990 FORMAT( i5, ' tests completed without checking.' )
1061 9989 FORMAT( i5, ' tests completed and failed residual checks.' )
1062 9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
1063 9987 FORMAT( 'END OF TESTS.' )
1064 9986 FORMAT( ' TRANS=''', a1, ''', M=', i5, ', N=', i5, ', NRHS=', i4,
1065 $ ', NB=', i4, ', type', i2, ', test(', i2, ')=', g12.5 )
1066*
1067 stop
1068*
1069* End of PSLSDRIVER
1070*
1071 END
subroutine psmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition psmatgen.f:4
logical function lsame(ca, cb)
LSAME
Definition lsame.f:53
integer function iceil(inum, idenom)
Definition iceil.f:2
integer function ilcm(m, n)
Definition ilcm.f:2
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21
subroutine blacs_gridinit(cntxt, c, nprow, npcol)
Definition mpi.f:745
subroutine psnrm2(n, norm2, x, ix, jx, descx, incx)
Definition mpi.f:1254
real function pslange(norm, m, n, a, ia, ja, desca, work)
Definition mpi.f:1299
subroutine psscal(n, alpha, x, ix, jx, descx, incx)
Definition mpi.f:989
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition mpi.f:777
subroutine blacs_gridexit(cntxt)
Definition mpi.f:762
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
Definition mpi.f:754
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition mpi.f:786
subroutine pschekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pschekpad.f:3
subroutine psfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition psfillpad.f:2
subroutine psgels(trans, m, n, nrhs, a, ia, ja, desca, b, ib, jb, descb, work, lwork, info)
Definition psgels.f:3
subroutine pslacpy(uplo, m, n, a, ia, ja, desca, b, ib, jb, descb)
Definition pslacpy.f:3
program pslsdriver
Definition pslsdriver.f:1
subroutine pslsinfo(summry, nout, nmat, mval, ldmval, nval, ldnval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
Definition pslsinfo.f:6
subroutine psqrt13(scale, m, n, a, ia, ja, desca, norma, iseed, work)
Definition psqrt13.f:3
real function psqrt14(trans, m, n, nrhs, a, ia, ja, desca, x, ix, jx, descx, work)
Definition psqrt14.f:3
subroutine psqrt16(trans, m, n, nrhs, a, ia, ja, desca, x, ix, jx, descx, b, ib, jb, descb, rwork, resid)
Definition psqrt16.f:3
real function psqrt17(trans, iresid, m, n, nrhs, a, ia, ja, desca, x, ix, jx, descx, b, ib, jb, descb, work, rwork)
Definition psqrt17.f:5
subroutine slboot()
Definition sltimer.f:2
subroutine sltimer(i)
Definition sltimer.f:47
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)
Definition sltimer.f:267