1 SUBROUTINE pzttrdtester( IAM, NPROCS, CHECK, NOUT, THRESH, NVAL,
2 $ NMAT, MEM, TOTMEM, KPASS, KFAIL, KSKIP )
11 INTEGER IAM, KFAIL, KPASS, KSKIP, NMAT, NOUT, NPROCS,
71 INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
72 $ mb_, nb_, rsrc_, csrc_, lld_
73 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
74 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
75 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
76 INTEGER DBLESZ, ZPLXSZ
78 parameter( dblesz = 8, zplxsz = 16,
79 $ padval = ( -9923.0d+0, -9924.0d+0 ) )
81 parameter( timetests = 11 )
83 parameter( tests = 8 )
85 parameter( mintimen = 8 )
91 INTEGER DUMMY, IASEED, ICTXT, IMIDPAD, INFO, IPA, IPD,
92 $ ipe, ipostpad, iprepad, ipt, ipw, itemp, j, k,
93 $ lcm, lwmin, maxtests, memsiz, mycol, myrow, n,
94 $ nb, ndiag, ngrids, nn, noffd, np, npcol, nprow,
95 $ nps, nq, splitstimed, worksiz, worktrd
96 DOUBLE PRECISION ANORM, FRESID, NOPS, TMFLOPS
99 INTEGER ANBTEST( TESTS ), ANBTIME( ),
100 $ baltest( tests ), baltime( timetests ),
101 $ desca( dlen_ ), descd( dlen_ ), ierr( 1 ),
102 $ intertest( tests ), intertime( timetests ),
103 $ pnbtest( tests ), pnbtime( timetests ),
104 $ twogemmtest( tests ), twogemmtime( timetests )
105 DOUBLE PRECISION CTIME( 100 ), WTIME( 100 )
116 INTEGER ICEIL, ILCM, NUMROC, PJLAENV
117 DOUBLE PRECISION PZLANHE
118 EXTERNAL lsame, iceil, ilcm, numroc, pjlaenv, pzlanhe
121 INTRINSIC dble, int,
max, sqrt
125 INTEGER ANB, BALANCED, BCKBLOCK, GSTBLOCK, INTERLEAVE,
126 $ lltblock, minsz, pnb, timeinternals, timing,
130 COMMON / blocksizes / gstblock, lltblock, bckblock,
132 COMMON / minsize / minsz
133 COMMON / pjlaenvtiming / timing
134 COMMON / tailoredopts / pnb, anb, interleave,
136 COMMON / timecontrol / timeinternals
139 DATA baltime / 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0 /
140 DATA intertime / 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1 /
141 DATA twogemmtime / 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0 /
142 DATA anbtime / 16, 16, 16, 16, 16, 8, 8, 32, 32, 16,
144 DATA pnbtime / 32, 32, 32, 32, 32, 32, 32, 32, 32,
146 DATA baltest / 0, 0, 0, 0, 1, 1, 1, 1 /
147 DATA intertest / 0, 0, 1, 1, 0, 0, 1, 1 /
148 DATA twogemmtest / 0, 1, 0, 1, 0, 1, 0, 1 /
149 DATA anbtest / 1, 2, 3, 16, 1, 2, 3, 16 /
150 DATA pnbtest / 1, 16, 8, 1, 16, 8, 1, 16 /
154 IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
162 memsiz = totmem / zplxsz
167 WRITE( nout, fmt = * )
168 WRITE( nout, fmt = 9995 )
169 WRITE( nout, fmt = 9994
171 WRITE( nout, fmt = * )
176 ngrids = int( sqrt( dble( nprocs ) ) )
186 CALL blacs_get( -1, 0, ictxt )
192 IF( myrow.GE.nprow .OR. mycol.GE.npcol )
204 $
WRITE( nout, fmt = 9999 )
'MATRIX',
'N', n
210 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
212 IF( ierr( 1 ).GT.0 )
THEN
214 $
WRITE( nout, fmt = 9997 )
'matrix'
221 IF( n.GT.mintimen )
THEN
232 maxtests = timetests + 2
239 DO 10 k = 1, maxtests
242 IF( k.GE.maxtests-1 )
THEN
258 dummy = pjlaenv( ictxt, 3,
'PZHETTRD',
'L', 0, 0,
265 balanced = baltime( k )
266 interleave = intertime( k )
267 twogemms = twogemmtime( k )
274 balanced = baltest( k )
275 interleave = intertest( k )
276 twogemms = twogemmtest( k )
284 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
THEN
285 CALL igebs2d( ictxt, 'all
', ' ', 1, 1, SPLITSTIMED,
288 CALL IGEBR2D( ICTXT, 'all
', ' ', 1, 1, SPLITSTIMED, 1,
293.EQ..AND..EQ.
IF( SPLITSTIMED0 KMAXTESTS )
305 NP = NUMROC( N, NB, MYROW, 0, NPROW )
306 NQ = NUMROC( N, NB, MYCOL, 0, NPCOL )
308 IPREPAD = MAX( NB, NP )
310 IPOSTPAD = MAX( NB, NQ )
320 CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT,
321 $ MAX( 1, NP )+IMIDPAD, IERR( 1 ) )
323 CALL DESCINIT( DESCD, 1, N, NB, NB, 0, 0, ICTXT, 1,
328 CALL IGSUM2D( ICTXT, 'all
', ' ', 1, 1, IERR, 1, -1, 0 )
330.LT.
IF( IERR( 1 )0 ) THEN
332 $ WRITE( NOUT, FMT = 9997 )'descriptor
'
341 IF( LSAME( UPLO, 'u
' ) ) THEN
344 NOFFD = NUMROC( N-1, NB, MYCOL, 0, NPCOL )
346 NDIAG = ICEIL( DBLESZ*NDIAG, ZPLXSZ )
347 NOFFD = ICEIL( DBLESZ*NOFFD, ZPLXSZ )
350 IPD = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD
351 IPE = IPD + NDIAG + IPOSTPAD + IPREPAD
352 IPT = IPE + NOFFD + IPOSTPAD + IPREPAD
353 IPW = IPT + NQ + IPOSTPAD + IPREPAD
358 NPS = MAX( NUMROC( N, 1, 0, 0, NPROW ), 2*ANB )
359 LWMIN = 2*( ANB+1 )*( 4*NPS+2 ) + NPS
361 WORKTRD = LWMIN + IPOSTPAD
368.NE.
IF( NPROWNPCOL ) THEN
369 LCM = ILCM( NPROW, NPCOL )
370 ITEMP = NB*ICEIL( ICEIL( NP, NB ), LCM / NPROW ) +
373 ITEMP = MAX( ICEIL( DBLESZ*ITEMP, ZPLXSZ ),
375 WORKSIZ = MAX( LWMIN, ITEMP ) + IPOSTPAD
381.GT.
IF( IPW+WORKSIZMEMSIZ ) THEN
383 $ WRITE( NOUT, FMT = 9996 )'tridiagonal reduction
',
384 $ ( IPW+WORKSIZ )*ZPLXSZ
390 CALL IGSUM2D( ICTXT, 'all
', ' ', 1, 1, IERR, 1, -1, 0 )
392.GT.
IF( IERR( 1 )0 ) THEN
394 $ WRITE( NOUT, FMT = 9997 )'memory
'
403 CALL PZMATGEN( ICTXT, 'hemm
', 'n
', DESCA( M_ ),
404 $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ),
405 $ MEM( IPA ), DESCA( LLD_ ), DESCA( RSRC_ ),
406 $ DESCA( CSRC_ ), IASEED, 0, NP, 0, NQ,
407 $ MYROW, MYCOL, NPROW, NPCOL )
413 CALL PZFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ),
414 $ DESCA( LLD_ ), IPREPAD, IPOSTPAD,
416 CALL PZFILLPAD( ICTXT, NDIAG, 1, MEM( IPD-IPREPAD ),
417 $ NDIAG, IPREPAD, IPOSTPAD, PADVAL )
418 CALL PZFILLPAD( ICTXT, NOFFD, 1, MEM( IPE-IPREPAD ),
419 $ NOFFD, IPREPAD, IPOSTPAD, PADVAL )
420 CALL PZFILLPAD( ICTXT, NQ, 1, MEM( IPT-IPREPAD ), NQ,
421 $ IPREPAD, IPOSTPAD, PADVAL )
422 CALL PZFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1,
423 $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD,
424 $ IPREPAD, IPOSTPAD, PADVAL )
425 ANORM = PZLANHE( 'i
', UPLO, N, MEM( IPA ), 1, 1,
426 $ DESCA, MEM( IPW ) )
427 CALL PZCHEKPAD( ICTXT, 'pzlanhe
', NP, NQ,
428 $ MEM( IPA-IPREPAD ), DESCA( LLD_ ),
429 $ IPREPAD, IPOSTPAD, PADVAL )
430 CALL PZCHEKPAD( ICTXT, 'pzlanhe
', WORKSIZ-IPOSTPAD, 1,
431 $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD,
432 $ IPREPAD, IPOSTPAD, PADVAL )
433 CALL PZFILLPAD( ICTXT, WORKTRD-IPOSTPAD, 1,
434 $ MEM( IPW-IPREPAD ), WORKTRD-IPOSTPAD,
435 $ IPREPAD, IPOSTPAD, PADVAL )
439 CALL BLACS_BARRIER( ICTXT, 'all
' )
444 CALL PZHETTRD( UPLO, N, MEM( IPA ), 1, 1, DESCA,
445 $ MEM( IPD ), MEM( IPE ), MEM( IPT ),
446 $ MEM( IPW ), LWMIN, INFO )
454 CALL PZCHEKPAD( ICTXT, 'pzhettrd', NP, NQ,
455 $ MEM( IPA-IPREPAD ), DESCA( LLD_ ),
456 $ IPREPAD, IPOSTPAD, PADVAL )
457 CALL PZCHEKPAD( ICTXT, 'pzhettrd', NDIAG, 1,
458 $ MEM( IPD-IPREPAD ), NDIAG, IPREPAD,
461 CALL PZCHEKPAD( ICTXT, 'pzhettrdc
', NOFFD, 1,
462 $ MEM( IPE-IPREPAD ), NOFFD, IPREPAD,
464 CALL PZCHEKPAD( ICTXT, 'pzhettrdd
', NQ, 1,
465 $ MEM( IPT-IPREPAD ), NQ, IPREPAD,
467 CALL PZCHEKPAD( ICTXT, 'pzhettrde
', WORKTRD-IPOSTPAD,
468 $ 1, MEM( IPW-IPREPAD ),
469 $ WORKTRD-IPOSTPAD, IPREPAD, IPOSTPAD,
471 CALL PZFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1,
472 $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD,
473 $ IPREPAD, IPOSTPAD, PADVAL )
477 CALL PZHETDRV( UPLO, N, MEM( IPA ), 1, 1, DESCA,
478 $ MEM( IPD ), MEM( IPE ), MEM( IPT ),
479 $ MEM( IPW ), IERR( 1 ) )
485 CALL PZLATRAN( N, 1, MEM( IPA ), 1, 1, DESCA,
487 CALL PZLAFCHK( 'hemm
', 'no
', N, N, MEM( IPA ), 1, 1,
488 $ DESCA, IASEED, ANORM, FRESID,
493 CALL PZCHEKPAD( ICTXT, 'pzhetdrvf
', NP, NQ,
494 $ MEM( IPA-IPREPAD ), DESCA( LLD_ ),
495 $ IPREPAD, IPOSTPAD, PADVAL )
496 CALL PZCHEKPAD( ICTXT, 'pzhetdrvg
', NDIAG, 1,
497 $ MEM( IPD-IPREPAD ), NDIAG, IPREPAD,
499 CALL PZCHEKPAD( ICTXT, 'pzhetdrvh
', NOFFD, 1,
500 $ MEM( IPE-IPREPAD ), NOFFD, IPREPAD,
502 CALL PZCHEKPAD( ICTXT, 'pzhetdrvi
', WORKSIZ-IPOSTPAD,
503 $ 1, MEM( IPW-IPREPAD ),
504 $ WORKSIZ-IPOSTPAD, IPREPAD, IPOSTPAD,
509.LE..AND..EQ.
IF( FRESIDTHRESH FRESID-FRESID
510.AND..EQ.
$ 0.0D+0 IERR( 1 )0 ) THEN
514.EQ..AND..EQ.
IF( MYROW0 MYCOL0 )
515 $ WRITE( NOUT, FMT = 9991 )FRESID
523.EQ..AND..EQ..AND..NE.
IF( MYROW0 MYCOL0 IERR( 1 )0 )
524 $ WRITE( NOUT, FMT = * )'d or e copies incorrect ...
'
530 FRESID = FRESID - FRESID
536 CALL SLCOMBINE( ICTXT, 'all
', '>
', 'w
', 50, 1, WTIME )
537 CALL SLCOMBINE( ICTXT, 'all
', '>
', 'c
', 50, 1, CTIME )
541.EQ..AND..EQ.
IF( MYROW0 MYCOL0 ) THEN
546 NOPS = ( 16.0D+0 / 3.0D+0 )*NOPS**3
551.GT.
IF( WTIME( 1 )0.0D+0 ) THEN
552 TMFLOPS = NOPS / WTIME( 1 )
556.GE.
IF( WTIME( 1 )0.0D+0 )
557 $ WRITE( NOUT, FMT = 9992 )'wall
', N, INTERLEAVE,
558 $ TWOGEMMS, BALANCED, ANB, PNB, NPROW*NPCOL,
559 $ WTIME( 1 ), TMFLOPS, FRESID, PASSED
563.GT.
IF( CTIME( 1 )0.0D+0 ) THEN
564 TMFLOPS = NOPS / CTIME( 1 )
568.GE.
IF( CTIME( 1 )0.0D+0 )
569 $ WRITE( NOUT, FMT = 9992 )'cpu
', N, INTERLEAVE,
570 $ TWOGEMMS, BALANCED, ANB, PNB, NPROW*NPCOL,
571 $ CTIME( 1 ), TMFLOPS, FRESID, PASSED
577.GT..OR.
IF( WTIME( 13 )+WTIME( 15 )+WTIME( 16 )0.0D+0
578.GT.
$ CTIME( 13 )+CTIME( 15 )+CTIME( 16 )0.0D+0 )
582.EQ.
IF( SPLITSTIMED1 ) THEN
583 WRITE( NOUT, FMT = 9990 )WTIME( 10 ), WTIME( 11 ),
584 $ WTIME( 12 ), WTIME( 13 ), WTIME( 14 ),
586 WRITE( NOUT, FMT = 9989 )WTIME( 16 ), WTIME( 17 ),
587 $ WTIME( 18 ), WTIME( 19 ), WTIME( 20 ),
590 WRITE( NOUT, FMT = 9988 )CTIME( 10 ), CTIME( 11 ),
591 $ CTIME( 12 ), CTIME( 13 ), CTIME( 14 ),
593 WRITE( NOUT, FMT = 9987 )CTIME( 16 ), CTIME( 17 ),
594 $ CTIME( 18 ), CTIME( 19 ), CTIME( 20 ),
596 WRITE( NOUT, FMT = 9986 )N, NPROW*NPCOL, PNB, ANB,
597 $ INTERLEAVE, BALANCED, TWOGEMMS, TIMEINTERNALS
603.EQ..AND..EQ.
IF( MYROW0 MYCOL0 ) THEN
604.EQ.
IF( SPLITSTIMED1 ) THEN
605 WRITE( NOUT, FMT = 9985 )
606 WRITE( NOUT, FMT = 9984 )
607 WRITE( NOUT, FMT = 9983 )
608 WRITE( NOUT, FMT = 9982 )
609 WRITE( NOUT, FMT = 9981 )
610 WRITE( NOUT, FMT = 9980 )
611 WRITE( NOUT, FMT = 9979 )
612 WRITE( NOUT, FMT = 9978 )
613 WRITE( NOUT, FMT = 9977 )
614 WRITE( NOUT, FMT = 9976 )
615 WRITE( NOUT, FMT = 9975 )
616 WRITE( NOUT, FMT = 9974 )
617 WRITE( NOUT, FMT = 9973 )
622 CALL BLACS_GRIDEXIT( ICTXT )
626 9999 FORMAT( 'illegal
', A6, ':
', A5, ' =
', I3,
627 $ '; it should be at least 1
' )
628 9998 FORMAT( 'illegal grid: nprow*npcol =
', I4, '. it can be at most
',
630 9997 FORMAT( 'bad
', A6, ' parameters: going on to next test case.
' )
631 9996 FORMAT( 'unable to perform
', A, ': need totmem of at least
',
634 9995 FORMAT( 'pzhettrd, tailored reduction to tridiagonal form, test.
'
636 9994 FORMAT( 'time n int 2gm bal anb pnb prcs trd time
',
637 $ ' mflops residual check
' )
638 9993 FORMAT( '---- ---- --- --- --- --- --- ---- --------
',
639 $ '----------- -------- ------
' )
640 9992 FORMAT( A4, 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 1X,
641 $ I5, 1X, F9.2, 1X, F11.2, 1X, F8.2, 1X, A6 )
642 9991 FORMAT( '||a - q*t*q
''|| / (||a|| * n * eps) =
', G25.7 )
643 9990 FORMAT( 'wsplit1=[wsplit1;
', F9.2, 1X, F9.2, 1X, F9.2, 1X, F9.2,
644 $ 1X, F9.2, 1X, F9.2, ' ];
' )
645 9989 FORMAT( 'wsplit2=[wsplit2;
', F9.2, 1X, F9.2, 1X, F9.2, 1X, F9.2,
646 $ 1X, F9.2, 1X, F9.2, ' ];
' )
647 9988 FORMAT( 'csplit1=[csplit1;
', F9.2, 1X, F9.2, 1X, F9.2, 1X, F9.2,
648 $ 1X, F9.2, 1X, F9.2, ' ];
' )
649 9987 FORMAT( 'csplit2=[csplit2;
', F9.2, 1X, F9.2, 1X, F9.2, 1X, F9.2,
650 $ 1X, F9.2, 1X, F9.2, ' ];
' )
651 9986 FORMAT( 'size_opts=[size_opts;
', I4, 1X, I4, 1X, I4, 1X, I4, 1X,
652 $ I4, 1X, I4, 1X, I4, 1X, I4, 1X, ' ];
' )
653 9985 FORMAT( 'n=1; nprocs=2; pnb=3; anb=4; interleave=5; balanced=6;
',
654 $ ' twogemms=7; timeinternals=8;
' )
655 9984 FORMAT( 's1_overhead = 1; % Should be mainly cost of barrier
' )
656 9983 FORMAT( 's1_barrier = 2; % Cost of barrier
' )
657 9982 FORMAT( 's1_updcurcol = 3; % Update
the current column
' )
658 9981 FORMAT( 's1_house = 4; % Compute
the householder vector
' )
659 9980 FORMAT( 's1_spread = 5; % Spread across
' )
660 9979 FORMAT( 's1_transpose = 6; % Transpose
' )
661 9978 FORMAT( 's2_updcurblk = 1; % Update
the current block column
' )
662 9977 FORMAT( 's2_trmvt = 2; % TRMVT v = a * h; vt = ht * a
'' ' )
663 9976 FORMAT( 's2_upd_v = 3; % v = v + v * ht * h + h * vt * h ' )
664 9975
FORMAT(
'S2_TRANS_SUM = 4; % v = v + vt'' ' )
665 9974
FORMAT(
'S2_DOT = 5; % c = v'' * h ' )
666 9973
FORMAT(
'S2_R2K = 6; % A = A - v * h'' - h * v'' ' )