75 parameter( totmem = 3000000 )
76 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
77 $ lld_, mb_, m_, nb_, n_, rsrc_
78 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
79 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
80 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
83 INTEGER memsiz, ntests,
85 parameter( zplxsz = 16,
86 $ memsiz = totmem / zplxsz, ntests = 20,
87 $ padval = ( -9923.0d+0, -9923.0d+0 ),
90 parameter( int_one = 1 )
97 INTEGER bwl, bwu, bw_num, fillin_size, free_ptr, h, hh,
98 $ i, iam, iaseed, ibseed, ictxt, ictxtb,
99 $ ierr_temp, imidpad, info, ipa, ipb, ipostpad,
100 $ iprepad, ipw, ipw_size, ,
101 $ ipw_solve_size, ip_driver_w, ip_fillin, j, k,
102 $ kfail, kpass, kskip, ktests, mycol, myrhs_size,
103 $ myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,
104 $ nnr, nout, np, npcol, nprocs, nprocs_real,
105 $ nprow, nq, nrhs, n_first, n_last, worksiz
107 DOUBLE PRECISION anorm, nops, nops2, sresid, tmflops,
111 INTEGER bwlval( ntests ), bwuval( ntests ), desca( 7 ),
112 $ desca2d( dlen_ ), descb( 7 ), descb2d( dlen_ ),
113 $ ierr( 1 ), nbrval( ntests ), nbval( ntests ),
114 $ nrval( ntests ), nval( ntests ),
115 $ pval( ntests ), qval( ntests )
116 DOUBLE PRECISION ctime( 2 ), wtime( 2 )
117 COMPLEX*16 mem( memsiz )
120 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
134 INTRINSIC dble,
max,
min, mod
137 DATA kfail, kpass, kskip, ktests / 4*0 /
146 CALL blacs_pinfo( iam, nprocs )
150 CALL pzdbinfo( outfile, nout, trans, nmat, nval, ntests, nbw,
151 $ bwlval, bwuval, ntests, nnb, nbval, ntests, nnr,
152 $ nrval, ntests, nnbr, nbrval, ntests, ngrids, pval,
153 $ ntests, qval, ntests, thresh, mem, iam, nprocs )
155 check = ( thresh.GE.0.0d+0 )
160 WRITE( nout, fmt = * )
161 WRITE( nout, fmt = 9995 )
162 WRITE( nout, fmt = 9994 )
163 WRITE( nout, fmt = * )
176 IF( nprow.LT.1 )
THEN
178 $
WRITE( nout, fmt = 9999 )
'GRID',
'nprow', nprow
180 ELSE IF( npcol.LT.1 )
THEN
182 $
WRITE( nout, fmt = 9999 )
'GRID',
'npcol', npcol
184 ELSE IF( nprow*npcol.GT.nprocs )
THEN
186 $
WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
190 IF( ierr( 1 ).GT.0 )
THEN
192 $
WRITE( nout, fmt = 9997 )
'grid'
199 CALL blacs_get( -1, 0, ictxt )
205 CALL blacs_get( -1, 0, ictxtb )
213 IF( myrow.LT.0 .OR. mycol.LT.0 )
THEN
227 $
WRITE( nout, fmt = 9999 )
'MATRIX',
'N', n
233 CALL igsum2d( ictxt, 'all
', ' ', 1, 1, IERR, 1,
236.GT.
IF( IERR( 1 )0 ) THEN
238 $ WRITE( NOUT, FMT = 9997 ) 'size
'
244 DO 45 BW_NUM = 1, NBW
248 BWL = BWLVAL( BW_NUM )
251 $ WRITE( NOUT, FMT = 9999 ) 'lower band
', 'bwl
', BWL
255 BWU = BWUVAL( BW_NUM )
258 $ WRITE( NOUT, FMT = 9999 ) 'upper band
', 'bwu
', BWU
262.GT.
IF( BWLN-1 ) THEN
268.GT.
IF( BWUN-1 ) THEN
276 CALL IGSUM2D( ICTXT, 'all
', ' ', 1, 1, IERR, 1,
279.GT.
IF( IERR( 1 )0 ) THEN
290 NB =( (N-(NPCOL-1)*MAX(BWL,BWU)-1)/NPCOL + 1 )
292 NB = MAX( NB, 2*MAX(BWL,BWU) )
299.LT.
IF( NBMIN( 2*MAX(BWL,BWU), N ) ) THEN
305 CALL IGSUM2D( ICTXT, 'all
', ' ', 1, 1, IERR, 1,
308.GT.
IF( IERR( 1 )0 ) THEN
315 NP = NUMROC( (BWL+BWU+1), (BWL+BWU+1),
317 NQ = NUMROC( N, NB, MYCOL, 0, NPCOL )
320 IPREPAD = ((BWL+BWU+1)+10)
322 IPOSTPAD = ((BWL+BWU+1)+10)
331 CALL DESCINIT( DESCA2D, (BWL+BWU+1), N,
332 $ (BWL+BWU+1), NB, 0, 0,
333 $ ICTXT,((BWL+BWU+1)+10), IERR( 1 ) )
342 DESCA( 6 ) = ((BWL+BWU+1)+10)
345 IERR_TEMP = IERR( 1 )
347 IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP )
351 CALL IGSUM2D( ICTXT, 'all
', ' ', 1, 1, IERR, 1, -1, 0 )
353.LT.
IF( IERR( 1 )0 ) THEN
355 $ WRITE( NOUT, FMT = 9997 ) 'descriptor
'
367 FREE_PTR = FREE_PTR + IPREPAD
370 FREE_PTR = FREE_PTR + DESCA2D( LLD_ )*
383 $ NB*(BWL+BWU)+6*MAX(BWL,BWU)*MAX(BWL,BWU)
387 FREE_PTR = FREE_PTR + IPREPAD
389 FREE_PTR = FREE_PTR + FILLIN_SIZE
397 IPW_SIZE = MAX(BWL,BWU)*MAX(BWL,BWU)
402 FREE_PTR = FREE_PTR + IPW_SIZE
407.GT.
IF( FREE_PTRMEMSIZ ) THEN
409 $ WRITE( NOUT, FMT = 9996 )
410 $ 'divide and conquer factorization
',
417 CALL IGSUM2D( ICTXT, 'all
', ' ', 1, 1, IERR,
420.GT.
IF( IERR( 1 )0 ) THEN
422 $ WRITE( NOUT, FMT = 9997 ) 'memory
'
428 WORKSIZ = MAX( ((BWL+BWU+1)+10), NB )
436 WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) )
439 WORKSIZ = MAX( WORKSIZ,
440 $ MAX(5,MAX(MAX(BWL,BWU)*(MAX(BWL,BWU)+2),NB))+2*NB )
443 FREE_PTR = FREE_PTR + IPREPAD
444 IP_DRIVER_W = FREE_PTR
445 FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD
451.GT.
IF( FREE_PTRMEMSIZ ) THEN
453 $ WRITE( NOUT, FMT = 9996 ) 'factorization
',
454 $ ( FREE_PTR )*ZPLXSZ
460 CALL IGSUM2D( ICTXT, 'all
', ' ', 1, 1, IERR,
463.GT.
IF( IERR( 1 )0 ) THEN
465 $ WRITE( NOUT, FMT = 9997 ) 'memory
'
470 CALL PZBMATGEN( ICTXT, 'g
', 'd
', BWL, BWU, N,
471 $ (BWL+BWU+1), NB, MEM( IPA ),
472 $ ((BWL+BWU+1)+10), 0, 0, IASEED, MYROW,
473 $ MYCOL, NPROW, NPCOL )
475 CALL PZFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ),
476 $ ((BWL+BWU+1)+10), IPREPAD, IPOSTPAD,
479 CALL PZFILLPAD( ICTXT, WORKSIZ, 1,
480 $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ,
481 $ IPREPAD, IPOSTPAD, PADVAL )
487 ANORM = PZLANGE( '1
', (BWL+BWU+1),
488 $ N, MEM( IPA ), 1, 1,
489 $ DESCA2D, MEM( IP_DRIVER_W ) )
490 CALL PZCHEKPAD( ICTXT, 'pzlange', NP, NQ,
491 $ MEM( IPA-IPREPAD ), ((BWL+BWU+1)+10),
492 $ IPREPAD, IPOSTPAD, PADVAL )
493 CALL PZCHEKPAD( ICTXT, 'pzlange',
495 $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ,
496 $ IPREPAD, IPOSTPAD, PADVAL )
501 CALL BLACS_BARRIER( ICTXT, 'all
' )
507 CALL PZDBTRF( N, BWL, BWU, MEM( IPA ), 1, DESCA,
508 $ MEM( IP_FILLIN ), FILLIN_SIZE, MEM( IPW ),
515 WRITE( NOUT, FMT = * ) 'pzdbtrf', INFO
525 CALL PZCHEKPAD( ICTXT, 'pzdbtrf', NP,
526 $ NQ, MEM( IPA-IPREPAD ), ((BWL+BWU+1)+10),
527 $ IPREPAD, IPOSTPAD, PADVAL )
541 CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0,
542 $ ICTXTB, NB+10, IERR( 1 ) )
551 DESCB( 6 ) = DESCB2D( LLD_ )
560 FREE_PTR = FREE_PTR + IPREPAD
562 FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ )
567 IPW_SOLVE_SIZE = (MAX(BWL,BWU)*NRHS)
570 FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE
573.GT.
IF( FREE_PTRMEMSIZ ) THEN
575 $ WRITE( NOUT, FMT = 9996 )'solve
',
576 $ ( FREE_PTR )*ZPLXSZ
582 CALL IGSUM2D( ICTXT, 'all
', ' ', 1, 1,
585.GT.
IF( IERR( 1 )0 ) THEN
587 $ WRITE( NOUT, FMT = 9997 ) 'memory
'
592 MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL )
596 CALL PZMATGEN(ICTXTB, 'no',
'No',
597 $ descb2d( m_ ), descb2d( n_ ),
598 $ descb2d( mb_ ), descb2d( nb_ ),
600 $ descb2d( lld_ ), descb2d( rsrc_ ),
602 $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
603 $ myrow, npcol, nprow )
607 $ mem( ipb-iprepad ),
612 $ mem( ip_driver_w-iprepad ),
618 CALL blacs_barrier( ictxt, 'all
')
623 CALL PZDBTRS( TRANS, N, BWL, BWU, NRHS, MEM( IPA ),
624 $ 1, DESCA, MEM( IPB ), 1, DESCB,
625 $ MEM( IP_FILLIN ), FILLIN_SIZE,
626 $ MEM( IPW_SOLVE ), IPW_SOLVE_SIZE,
633 $ WRITE( NOUT, FMT = * ) 'pzdbtrs info=
', INFO
643 CALL PZCHEKPAD( ICTXT, 'pzdbtrs-work
',
645 $ MEM( IP_DRIVER_W-IPREPAD ),
653 CALL PZDBLASCHK( 'n
', 'd
', TRANS,
655 $ MEM( IPB ), 1, 1, DESCB2D,
656 $ IASEED, MEM( IPA ), 1, 1, DESCA2D,
657 $ IBSEED, ANORM, SRESID,
658 $ MEM( IP_DRIVER_W ), WORKSIZ )
661.GT.
IF( SRESIDTHRESH )
662 $ WRITE( NOUT, FMT = 9985 ) SRESID
667.LE..AND.
IF( ( SRESIDTHRESH )
668.EQ.
$ ( (SRESID-SRESID)0.0D+0 ) ) THEN
683 CALL SLCOMBINE( ICTXT, 'all
', '>
', 'w
', 2, 1,
685 CALL SLCOMBINE( ICTXT, 'all
', '>
', 'c
', 2, 1,
690.EQ..AND..EQ.
IF( MYROW0 MYCOL0 ) THEN
696 NPROCS_REAL = ( N-1 )/NB + 1
697 N_LAST = MOD( N-1, NB ) + 1
702 NOPS = 2*(DBLE(N)*DBLE(BWL)*
704 $ (DBLE(N)*DBLE(BWL))
709 $ 2 * (DBLE(N)*(DBLE(BWL)+DBLE(BWU))
714 NOPS = NOPS * DBLE(4)
721 NOPS2 = 2*( (DBLE(N_FIRST)*
722 $ DBLE(BWL)*DBLE(BWU)))
724.GT.
IF ( NPROCS_REAL 1) THEN
730 $ 8*( (DBLE(N_LAST)*DBLE(BWL)
734.GT.
IF ( NPROCS_REAL 2) THEN
738 NOPS2 = NOPS2 + (NPROCS_REAL-2)*
739 $ 8*( (DBLE(NB)*DBLE(BWL)
746 $ 2*( NPROCS_REAL-1 ) *
748.GT.
IF( NPROCS_REAL 1 ) THEN
750 $ 2*( NPROCS_REAL-2 ) *
764 $ ( DBLE(BWL)+DBLE(BWU))
766.GT.
IF ( NPROCS_REAL 1 ) THEN
774 $ (DBLE(N_LAST)*(DBLE(BWL)+
775 $ DBLE(BWU)))*DBLE(NRHS)
778.GT.
IF ( NPROCS_REAL 2 ) THEN
785 $ ( NPROCS_REAL-2)*2*
786 $ ( (DBLE(NB)*(DBLE(BWL)+
787 $ DBLE(BWU)))*DBLE(NRHS) )
793 $ NRHS*( NPROCS_REAL-1)*2*(BWL*BWU )
794.GT.
IF( NPROCS_REAL 1 ) THEN
796 $ NRHS*( NPROCS_REAL-2 ) *
803 NOPS2 = NOPS2 * DBLE(4)
810.GT.
IF( WTIME( 1 ) + WTIME( 2 ) 0.0D+0 ) THEN
812 $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 )
817.GT.
IF( WTIME( 1 )+WTIME( 2 )0.0D+0 ) THEN
819 $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 )
824.GE.
IF( WTIME( 2 )0.0D+0 )
825 $ WRITE( NOUT, FMT = 9993 ) 'wall
', TRANS,
828 $ NB, NRHS, NPROW, NPCOL,
829 $ WTIME( 1 ), WTIME( 2 ), TMFLOPS,
834.GT.
IF( CTIME( 1 )+CTIME( 2 )0.0D+0 ) THEN
836 $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 )
841.GT.
IF( CTIME( 1 )+CTIME( 2 )0.0D+0 ) THEN
843 $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 )
848.GE.
IF( CTIME( 2 )0.0D+0 )
849 $ WRITE( NOUT, FMT = 9993 ) 'cpu
', TRANS,
852 $ NB, NRHS, NPROW, NPCOL,
853 $ CTIME( 1 ), CTIME( 2 ), TMFLOPS,
869 CALL BLACS_GRIDEXIT( ICTXT )
870 CALL BLACS_GRIDEXIT( ICTXTB )
880 KTESTS = KPASS + KFAIL + KSKIP
881 WRITE( NOUT, FMT = * )
882 WRITE( NOUT, FMT = 9992 ) KTESTS
884 WRITE( NOUT, FMT = 9991 ) KPASS
885 WRITE( NOUT, FMT = 9989 ) KFAIL
887 WRITE( NOUT, FMT = 9990 ) KPASS
889 WRITE( NOUT, FMT = 9988 ) KSKIP
890 WRITE( NOUT, FMT = * )
891 WRITE( NOUT, FMT = * )
892 WRITE( NOUT, FMT = 9987 )
893.NE..AND..NE.
IF( NOUT6 NOUT0 )
899 9999 FORMAT( 'illegal
', A6, ':
', A5, ' =
', I3,
900 $ '; it should be at least 1
' )
901 9998 FORMAT( 'illegal grid: nprow*npcol =
', I4, '. it can be at most
',
903 9997 FORMAT( 'bad
', A6, ' parameters: going on to next test case.
' )
904 9996 FORMAT( 'unable to perform
', A, ': need totmem of at least
',
906 9995 FORMAT( 'time tr n bwl bwu nb nrhs p q l*u time
',
907 $ 'slv time mflops mflop2 check
' )
908 9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- --------
',
909 $ '-------- -------- -------- ------
' )
910 9993 FORMAT( A4,1X,A1,2X,I6,1X,I3,1X,I3,1X,I4,1X,I5,
911 $ 1X,I4,1X,I4,1X,F9.3,
912 $ F9.4, F9.2, F9.2, 1X, A6 )
913 9992 FORMAT( 'finished
', I6, ' tests, with
the following results:
' )
914 9991 FORMAT( I5, ' tests completed and passed residual checks.
' )
915 9990 FORMAT( I5, ' tests completed without checking.
' )
916 9989 FORMAT( I5, ' tests completed and failed residual checks.
' )
917 9988 FORMAT( I5, ' tests skipped because of illegal input values.
' )
918 9987 FORMAT( 'END OF TESTS.
' )
919 9986 FORMAT( '||A -
', A4, '|| / (||A|| * N * eps) =
', G25.7 )
920 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N)
', F25.7 )
subroutine pzmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
end diagonal values have been computed in the(sparse) matrix id.SOL
logical function lsame(ca, cb)
LSAME
subroutine blacs_gridinit(cntxt, c, nprow, npcol)
double precision function pzlange(norm, m, n, a, ia, ja, desca, work)
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
subroutine blacs_gridexit(cntxt)
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
integer function numroc(n, nb, iproc, isrcproc, nprocs)
subroutine pzbmatgen(ictxt, aform, aform2, bwl, bwu, n, mb, nb, a, lda, iarow, iacol, iseed, myrow, mycol, nprow, npcol)
subroutine pzchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
subroutine pzdbinfo(summry, nout, trans, nmat, nval, ldnval, nbw, bwlval, bwuval, ldbwval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
subroutine pzdblaschk(symm, uplo, trans, n, bwl, bwu, nrhs, x, ix, jx, descx, iaseed, a, ia, ja, desca, ibseed, anorm, resid, work, worksiz)
subroutine pzdbtrf(n, bwl, bwu, a, ja, desca, af, laf, work, lwork, info)
subroutine pzdbtrs(trans, n, bwl, bwu, nrhs, a, ja, desca, b, ib, descb, af, laf, work, lwork, info)
subroutine pzfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)