74 parameter( totmem = 3000000 )
75 INTEGER , csrc_, ctxt_, dlen_, dtype_,
76 $ lld_, mb_, m_, nb_, n_,
77 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
78 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
79 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
82 INTEGER memsiz, ntests, realsz
84 parameter( realsz = 4,
85 $ memsiz = totmem / realsz, ntests = 20,
86 $ padval = -9923.0e+0, zero = 0.0e+0 )
88 parameter( int_one = 1 )
95 INTEGER bw, bw_num, fillin_size, free_ptr, h, hh, i,
96 $ iam, iaseed, ibseed, ictxt, ictxtb, ierr_temp,
97 $ imidpad, info, int_temp, , ipb, ipostpad,
98 $ iprepad, ipw, ipw_size, ipw_solve,
99 $ ipw_solve_size, ip_driver_w, ip_fillin, j, k,
100 $ kfail, kpass, kskip, ktests, mycol, myrhs_size,
101 $ myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,
102 $ nnr, nout, np, npcol, nprocs, nprocs_real,
103 $ nprow, nq, nrhs, n_first, n_last, worksiz
105 DOUBLE PRECISION nops, nops2, tmflops, tmflops2
108 INTEGER bwval( ntests ), desca( 7 ), desca2d( dlen_ ),
109 $ descb( 7 ), descb2d( dlen_ ), ierr( 1 ),
110 $ nbrval( ntests ), nbval( ntests ),
111 $ nrval( ntests ), nval( ntests ),
112 $ pval( ntests ), qval( ntests )
114 DOUBLE PRECISION ctime( 2 ), wtime( 2 )
117 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
131 INTRINSIC dble,
max,
min, mod
134 DATA kfail, kpass, kskip, ktests / 4*0 /
143 CALL blacs_pinfo( iam, nprocs )
147 CALL psptinfo( outfile, nout, uplo, nmat, nval, ntests, nbw,
148 $ bwval, ntests, nnb, nbval, ntests, nnr, nrval,
149 $ ntests, nnbr, nbrval, ntests, ngrids, pval, ntests,
150 $ qval, ntests, thresh, mem, iam, nprocs )
152 check = ( thresh.GE.0.0e+0 )
157 WRITE( nout, fmt = * )
158 WRITE( nout, fmt = 9995 )
159 WRITE( nout, fmt = 9994 )
160 WRITE( nout, fmt = * )
173 IF( nprow.LT.1 )
THEN
175 $
WRITE( nout, fmt = 9999 )
'GRID',
'nprow', nprow
177 ELSE IF( npcol.LT.1 )
THEN
179 $
WRITE( nout, fmt = 9999 )
'GRID',
'npcol', npcol
181 ELSE IF( nprow*npcol.GT.nprocs )
THEN
183 $
WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
187 IF( ierr( 1 ).GT.0 )
THEN
189 $
WRITE( nout, fmt = 9997 )
'grid'
196 CALL blacs_get( -1, 0, ictxt )
202 CALL blacs_get( -1, 0, ictxtb )
210 IF( myrow.LT.0 .OR. mycol.LT.0 )
THEN
224 $
WRITE( nout, fmt = 9999 )
'MATRIX',
'N', n
230 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
233 IF( ierr( 1 ).GT.0 )
THEN
235 $
WRITE( nout, fmt = 9997 )
'size'
241 DO 45 bw_num = 1, nbw
248 $
WRITE( nout, fmt = 9999 )
'Band',
'bw', bw
258 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
261 IF( ierr( 1 ).GT.0 )
THEN
272 nb =( (n-(npcol-1)*int_one-1)/npcol + 1 )
274 nb =
max( nb, 2*int_one )
281 IF( nb.LT.
min( 2*int_one, n ) )
THEN
287 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
290 IF( ierr( 1 ).GT.0 )
THEN
299 nq =
numroc( n, nb, mycol, 0, npcol )
315 $ ictxtb, nb+10, ierr( 1 ) )
324 desca( 6 ) = ((2)+10)
327 ierr_temp = ierr( 1 )
329 ierr( 1 ) =
min( ierr( 1 ), ierr_temp )
333 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
335 IF( ierr( 1 ).LT.0 )
THEN
337 $
WRITE( nout, fmt = 9997 )
'descriptor'
349 free_ptr = free_ptr + iprepad
352 free_ptr = free_ptr + (nb+10)*(2)
368 free_ptr = free_ptr + iprepad
370 free_ptr = free_ptr + fillin_size
383 free_ptr = free_ptr + ipw_size
388 IF( free_ptr.GT.memsiz )
THEN
390 $
WRITE( nout, fmt = 9996 )
391 $
'divide and conquer factorization',
398 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr,
401 IF( ierr( 1 ).GT.0 )
THEN
403 $
WRITE( nout, fmt = 9997 )
'MEMORY'
409 worksiz =
max( ((2)+10), nb )
417 worksiz =
max( worksiz, desca2d( nb_ ) )
420 worksiz =
max( worksiz,
424 free_ptr = free_ptr + iprepad
425 ip_driver_w = free_ptr
426 free_ptr = free_ptr + worksiz + ipostpad
432 IF( free_ptr.GT.memsiz )
THEN
434 $
WRITE( nout, fmt = 9996 )
'factorization',
435 $ ( free_ptr )*realsz
441 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr,
444 IF( ierr( 1 ).GT.0 )
THEN
446 $
WRITE( nout, fmt = 9997 )
'MEMORY'
451 CALL psbmatgen( ictxt, uplo,
'T', bw, bw, n, (2), nb,
452 $ mem( ipa ), nb+10, 0, 0, iaseed, myrow,
453 $ mycol, nprow, npcol )
454 CALL psfillpad( ictxt, nq, np, mem( ipa-iprepad ),
455 $ nb+10, iprepad, ipostpad,
467 $ (2), mem( ipa ), 1, 1,
468 $ desca2d, mem( ip_driver_w ) )
469 CALL pschekpad( ictxt,
'PSLANGE', nq, np,
470 $ mem( ipa-iprepad ), nb+10,
471 $ iprepad, ipostpad, padval )
475 $ iprepad, ipostpad, padval )
478 IF(
lsame( uplo,
'L' ) )
THEN
481 int_temp = desca2d( lld_ )
486 CALL blacs_barrier( ictxt,
'All' )
493 $ mem( ipa+1*( nb+10-int_temp ) ), 1, desca,
501 WRITE( nout, fmt = * )
'PSPTTRF INFO=', info
513 $ iprepad, ipostpad, padval )
527 CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,
528 $ ictxtb, nb+10, ierr( 1 ) )
537 descb( 6 ) = descb2d( lld_ )
542 IF( ipb .GT. 0 )
THEN
546 free_ptr = free_ptr + iprepad
548 free_ptr = free_ptr + nrhs*descb2d( lld_ )
553 ipw_solve_size = (10+2*
min(100,nrhs))*npcol+4*nrhs
556 free_ptr = free_ptr + ipw_solve_size
559 IF( free_ptr.GT.memsiz )
THEN
561 $
WRITE( nout, fmt = 9996 )
'solve',
562 $ ( free_ptr )*realsz
568 CALL igsum2d( ictxt,
'All',
' ', 1, 1,
571 IF( ierr( 1 ).GT.0 )
THEN
573 $
WRITE( nout, fmt = 9997 )
'MEMORY'
578 myrhs_size =
numroc( n, nb, mycol, 0, npcol )
583 $ descb2d( m_ ), descb2d( n_ ),
584 $ descb2d( mb_ ), descb2d( nb_ ),
586 $ descb2d( lld_ ), descb2d( rsrc_ ),
588 $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
589 $ myrow, npcol, nprow )
593 $ mem( ipb-iprepad ),
598 $ mem( ip_driver_w-iprepad ),
604 CALL blacs_barrier( ictxt,
'All')
609 CALL pspttrs( n, nrhs, mem( ipa+int_temp ),
610 $ mem( ipa+1*( nb+10-int_temp ) ), 1,
611 $ desca, mem( ipb ), 1, descb,
612 $ mem( ip_fillin ), fillin_size,
613 $ mem( ipw_solve ), ipw_solve_size,
620 $
WRITE( nout, fmt = * )
'PSPTTRS INFO=', info
630 CALL PSCHEKPAD( ICTXT, 'pspttrs-work
',
632 $ MEM( IP_DRIVER_W-IPREPAD ),
643 CALL DESCINIT( DESCA2D, (2), N,
645 $ ICTXT, (2), IERR( 1 ) )
646 CALL PSPTLASCHK( 's
', UPLO, N, BW, BW, NRHS,
647 $ MEM( IPB ), 1, 1, DESCB2D,
648 $ IASEED, MEM( IPA ), 1, 1, DESCA2D,
649 $ IBSEED, ANORM, SRESID,
650 $ MEM( IP_DRIVER_W ), WORKSIZ )
653.GT.
IF( SRESIDTHRESH )
654 $ WRITE( NOUT, FMT = 9985 ) SRESID
659.LE..AND.
IF( ( SRESIDTHRESH )
660.EQ.
$ ( (SRESID-SRESID)0.0E+0 ) ) THEN
675 CALL SLCOMBINE( ICTXT, 'all
', '>
', 'w
', 2, 1,
677 CALL SLCOMBINE( ICTXT, 'all
', '>
', 'c
', 2, 1,
682.EQ..AND..EQ.
IF( MYROW0 MYCOL0 ) THEN
688 NPROCS_REAL = ( N-1 )/NB + 1
689 N_LAST = MOD( N-1, NB ) + 1
692 NOPS = NOPS + DBLE(BW)*( -2.D0 / 3.D0+DBLE(BW)*
693 $ ( -1.D0+DBLE(BW)*( -1.D0 / 3.D0 ) ) ) +
694 $ DBLE(N)*( 1.D0+DBLE(BW)*( 3.D0 /
695 $ 2.D0+DBLE(BW)*( 1.D0 / 2.D0 ) ) )
696 NOPS = NOPS + DBLE(BW)*( -1.D0 / 6.D0+DBLE(BW)
697 $ *( -1.D0 /2.D0+DBLE(BW)
698 $ *( -1.D0 / 3.D0 ) ) ) +
699 $ DBLE(N)*( DBLE(BW) /
700 $ 2.D0*( 1.D0+DBLE(BW) ) )
703 $ DBLE(NRHS)*( ( 2*DBLE(N)-DBLE(BW) )*
704 $ ( DBLE(BW)+1.D0 ) )+ DBLE(NRHS)*
705 $ ( DBLE(BW)*( 2*DBLE(N)-
706 $ ( DBLE(BW)+1.D0 ) ) )
713 NOPS2 = ( (DBLE(N_FIRST))* DBLE(BW)**2 )
715.GT.
IF ( NPROCS_REAL 1) THEN
720 $ 4*( (DBLE(N_LAST)*DBLE(BW)**2) )
723.GT.
IF ( NPROCS_REAL 2) THEN
727 NOPS2 = NOPS2 + (NPROCS_REAL-2)*
728 $ 4*( (DBLE(NB)*DBLE(BW)**2) )
734 $ ( NPROCS_REAL-1 ) * ( BW*BW*BW/3 )
735.GT.
IF( NPROCS_REAL 1 ) THEN
737 $ ( NPROCS_REAL-2 ) * ( 2 * BW*BW*BW )
744 $ ( 4.0D+0*(DBLE(N_FIRST)*DBLE(BW))*DBLE(NRHS) )
746.GT.
IF ( NPROCS_REAL 1 ) THEN
751 $ 2*( 4.0D+0*(DBLE(N_LAST)*DBLE(BW))*DBLE(NRHS) )
754.GT.
IF ( NPROCS_REAL 2 ) THEN
759 $ ( NPROCS_REAL-2)*2*
760 $ ( 4.0D+0*(DBLE(NB)*DBLE(BW))*DBLE(NRHS) )
766 $ NRHS*( NPROCS_REAL-1 ) * ( BW*BW )
767.GT.
IF( NPROCS_REAL 1 ) THEN
769 $ NRHS*( NPROCS_REAL-2 ) * ( 3 * BW*BW )
778.GT.
IF( WTIME( 1 ) + WTIME( 2 ) 0.0D+0 ) THEN
780 $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 )
785.GT.
IF( WTIME( 1 )+WTIME( 2 )0.0D+0 ) THEN
787 $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 )
792.GE.
IF( WTIME( 2 )0.0D+0 )
793 $ WRITE( NOUT, FMT = 9993 ) 'wall
', UPLO,
796 $ NB, NRHS, NPROW, NPCOL,
797 $ WTIME( 1 ), WTIME( 2 ), TMFLOPS,
802.GT.
IF( CTIME( 1 )+CTIME( 2 )0.0D+0 ) THEN
804 $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 )
809.GT.
IF( CTIME( 1 )+CTIME( 2 )0.0D+0 ) THEN
811 $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 )
816.GE.
IF( CTIME( 2 )0.0D+0 )
817 $ WRITE( NOUT, FMT = 9993 ) 'cpu
', UPLO,
820 $ NB, NRHS, NPROW, NPCOL,
821 $ CTIME( 1 ), CTIME( 2 ), TMFLOPS,
837 CALL BLACS_GRIDEXIT( ICTXT )
838 CALL BLACS_GRIDEXIT( ICTXTB )
848 KTESTS = KPASS + KFAIL + KSKIP
849 WRITE( NOUT, FMT = * )
850 WRITE( NOUT, FMT = 9992 ) KTESTS
852 WRITE( NOUT, FMT = 9991 ) KPASS
853 WRITE( NOUT, FMT = 9989 ) KFAIL
855 WRITE( NOUT, FMT = 9990 ) KPASS
857 WRITE( NOUT, FMT = 9988 ) KSKIP
858 WRITE( NOUT, FMT = * )
859 WRITE( NOUT, FMT = * )
860 WRITE( NOUT, FMT = 9987 )
861.NE..AND..NE.
IF( NOUT6 NOUT0 )
867 9999 FORMAT( 'illegal ', a6,
': ', a5,
' = ', i3,
868 $
'; It should be at least 1' )
869 9998
FORMAT(
'ILLEGAL GRID: nprow*npcol = ', i4,
'. It can be at most',
871 9997
FORMAT(
'Bad ', a6,
' parameters: going on to next test case.' )
872 9996
FORMAT(
'Unable to perform ', a,
': need TOTMEM of at least',
874 9995
FORMAT(
'TIME UL N BW NB NRHS P Q L*U Time ',
875 $
'Slv Time MFLOPS MFLOP2 CHECK' )
876 9994
FORMAT(
'---- -- ------ --- ---- ----- -- ---- -------- ',
877 $
'-------- ------ ------ ------' )
878 9993
FORMAT( a4, 2x, a1, 1x, i6, 1x, i3, 1x, i4, 1x,
880 $ i4, 1x, f8.3, f9.4, f9.2, f9.2, 1x, a6 )
881 9992
FORMAT(
'Finished ', i6,
' tests, with the following results:' )
882 9991
FORMAT( i5,
' tests completed and passed residual checks.'
883 9990
FORMAT( i5,
' tests completed without checking.' )
884 9989
FORMAT( i5,
' tests completed and failed residual checks.' )
885 9988
FORMAT( i5,
' tests skipped because of illegal input values.' )
886 9987
FORMAT(
'END OF TESTS.' )
887 9986
FORMAT(
'||A - ', a4,
'|| / (||A|| * N * eps) = ', g25.7
888 9985
FORMAT(
'||Ax-b||/(||x||*||A||*eps*N) ', f25.7 )
subroutine psmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
logical function lsame(ca, cb)
LSAME
subroutine blacs_gridinit(cntxt, c, nprow, npcol)
real function pslange(norm, m, n, a, ia, ja, desca, work)
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
subroutine blacs_gridexit(cntxt)
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
integer function numroc(n, nb, iproc, isrcproc, nprocs)
subroutine psbmatgen(ictxt, aform, aform2, bwl, bwu, n, mb, nb, a, lda, iarow, iacol, iseed, myrow, mycol, nprow, npcol)
subroutine pschekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
subroutine psfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
subroutine psptinfo(summry, nout, uplo, nmat, nval, ldnval, nbw, bwval, ldbwval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
subroutine psptlaschk(symm, uplo, n, bwl, bwu, nrhs, x, ix, jx, descx, iaseed, a, ia, ja, desca, ibseed, anorm, resid, work, worksiz)
subroutine pspttrf(n, d, e, ja, desca, af, laf, work, lwork, info)
subroutine pspttrs(n, nrhs, d, e, ja, desca, b, ib, descb, af, laf, work, lwork, info)
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)