75 parameter( totmem = 3000000 )
76 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
77 $ lld_, mb_, m_, nb_, n_, rsrc_
78 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
79 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
80 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
83 INTEGER cplxsz, memsiz, ntests
85 parameter( cplxsz = 8,
86 $ memsiz = totmem / cplxsz, ntests = 20,
87 $ padval = ( -9923.0e+0, -9923.0e+0 ),
90 parameter( int_one = 1 )
97 INTEGER bwl, bwu, bw_num, fillin_size, free_ptr, , hh,
98 $ i, iam, iaseed, ibseed, ictxt, ictxtb,
99 $ ierr_temp, imidpad, info, ipa, ipb, ipostpad,
100 $ iprepad, ipw, ipw_size, ipw_solve,
101 $ ipw_solve_size, ip_driver_w, ip_fillin, j, k,
102 $ kfail, kpass, kskip, ktests, mycol, myrhs_size,
103 $ myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,
104 $ nnr, nout, np, npcol, nprocs, nprocs_real,
105 $ nprow, nq, nrhs, n_first, n_last, worksiz
106 REAL anorm, sresid, thresh
107 DOUBLE PRECISION nops, nops2, tmflops, tmflops2
110 INTEGER bwlval( ntests ), bwuval( ntests ), desca( 7 ),
115DOUBLE PRECISION ctime( 2 ), wtime( 2 )
116 COMPLEX mem( memsiz )
119 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
133 INTRINSIC dble,
max,
min, mod
136 DATA kfail, kpass, kskip, ktests / 4*0 /
145 CALL blacs_pinfo( iam, nprocs )
149 CALL pcdbinfo( outfile, nout, trans, nmat, nval, ntests, nbw,
150 $ bwlval, bwuval, ntests, nnb, nbval, ntests, nnr,
151 $ nrval, ntests, nnbr, nbrval, ntests, ngrids, pval,
152 $ ntests, qval, ntests, thresh, mem, iam, nprocs )
154 check = ( thresh.GE.0.0e+0 )
159 WRITE( nout, fmt = * )
160 WRITE( nout, fmt = 9995 )
161 WRITE( nout, fmt = 9994 )
162 WRITE( nout, fmt = * )
175 IF( nprow.LT.1 )
THEN
177 $
WRITE( nout, fmt = 9999 )
'GRID',
'nprow', nprow
179 ELSE IF( npcol.LT.1 )
THEN
181 $
WRITE( nout, fmt = 9999 )
'GRID',
'npcol', npcol
183 ELSE IF( nprow*npcol.GT.nprocs )
THEN
185 $
WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
189 IF( ierr( 1 ).GT.0 )
THEN
191 $
WRITE( nout, fmt = 9997 )
'grid'
198 CALL blacs_get( -1, 0, ictxt )
204 CALL blacs_get( -1, 0, ictxtb )
212 IF( myrow.LT.0 .OR. mycol.LT.0 )
THEN
226 $
WRITE( nout, fmt = 9999 )
'MATRIX',
'N', n
232 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
235 IF( ierr( 1 ).GT.0 )
THEN
237 $
WRITE( nout, fmt = 9997 )
'size'
243 DO 45 bw_num = 1, nbw
247 bwl = bwlval( bw_num )
250 $
WRITE( nout, fmt = 9999 )
'Lower Band',
'bwl', bwl
254 bwu = bwuval( bw_num )
257 $
WRITE( nout, fmt = 9999 )
'Upper Band',
'bwu', bwu
261 IF( bwl.GT.n-1 )
THEN
267 IF( bwu.GT.n-1 )
THEN
275 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
278 IF( ierr( 1 ).GT.0 )
THEN
289 nb =( (n-(npcol-1)*
max(bwl,bwu)-1)/npcol + 1 )
291 nb =
max( nb, 2*
max(bwl,bwu) )
298 IF( nb.LT.
min( 2*
max(bwl,bwu), n ) )
THEN
304 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
307 IF( ierr( 1 ).GT.0 )
THEN
314 np =
numroc( (bwl+bwu+1), (bwl+bwu+1),
316 nq =
numroc( n, nb, mycol, 0, npcol )
319 iprepad = ((bwl+bwu+1)+10)
321 ipostpad = ((bwl+bwu+1)+10)
330 CALL descinit( desca2d, (bwl+bwu+1), n,
331 $ (bwl+bwu+1), nb, 0, 0,
332 $ ictxt,((bwl+bwu+1)+10), ierr( 1 ) )
341 desca( 6 ) = ((bwl+bwu+1)+10)
344 ierr_temp = ierr( 1 )
346 ierr( 1 ) =
min( ierr( 1 ), ierr_temp )
350 CALL igsum2d( ictxt,
'All', '
', 1, 1, IERR, 1, -1, 0 )
352.LT.
IF( IERR( 1 )0 ) THEN
354 $ WRITE( NOUT, FMT = 9997 ) 'descriptor
'
366 FREE_PTR = FREE_PTR + IPREPAD
369 FREE_PTR = FREE_PTR + DESCA2D( LLD_ )*
382 $ NB*(BWL+BWU)+6*MAX(BWL,BWU)*MAX(BWL,BWU)
386 FREE_PTR = FREE_PTR + IPREPAD
388 FREE_PTR = FREE_PTR + FILLIN_SIZE
396 IPW_SIZE = MAX(BWL,BWU)*MAX(BWL,BWU)
401 FREE_PTR = FREE_PTR + IPW_SIZE
406.GT.
IF( FREE_PTRMEMSIZ ) THEN
408 $ WRITE( NOUT, FMT = 9996 )
409 $ 'divide and conquer factorization
',
416 CALL IGSUM2D( ICTXT, 'all
', ' ', 1, 1, ierr,
419 IF( ierr( 1 ).GT.0 )
THEN
421 $
WRITE( nout, fmt = 9997 )
'MEMORY'
427 worksiz =
max( ((bwl+bwu+1)+10), nb )
435 worksiz =
max( worksiz, desca2d( nb_ ) )
438 worksiz =
max( worksiz,
442 free_ptr = free_ptr + iprepad
443 ip_driver_w = free_ptr
444 free_ptr = free_ptr + worksiz + ipostpad
450 IF( free_ptr.GT.memsiz )
THEN
452 $
WRITE( nout, fmt = 9996 )
'factorization',
453 $ ( free_ptr )*cplxsz
459 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr,
462 IF( ierr( 1 ).GT.0 )
THEN
464 $
WRITE( nout, fmt = 9997 )
'MEMORY'
469 CALL pcbmatgen( ictxt,
'G',
'D', bwl, bwu, n,
470 $ (bwl+bwu+1), nb, mem( ipa ),
471 $ ((bwl+bwu+1)+10), 0, 0, iaseed, myrow,
472 $ mycol, nprow, npcol )
474 CALL pcfillpad( ictxt, np, nq, mem( ipa-iprepad ),
475 $ ((bwl+bwu+1)+10), iprepad, ipostpad,
479 $ mem( ip_driver_w-iprepad ), worksiz,
480 $ iprepad, ipostpad, padval )
486 anorm =
pclange(
'1', (bwl+bwu+1),
487 $ n, mem( ipa ), 1, 1,
488 $ desca2d, mem( ip_driver_w ) )
489 CALL pcchekpad( ictxt,
'PCLANGE', np, nq,
490 $ mem( ipa-iprepad ), ((bwl+bwu+1)+10),
491 $ iprepad, ipostpad, padval )
494 $ mem( ip_driver_w-iprepad ), worksiz,
495 $ iprepad, ipostpad, padval )
500 CALL blacs_barrier( ictxt,
'All' )
506 CALL pcdbtrf( n, bwl, bwu, mem( ipa ), 1, desca,
507 $ mem( ip_fillin ), fillin_size, mem( ipw ),
514 WRITE( nout, fmt = * )
'PCDBTRF INFO=', info
525 $ nq, mem( ipa-iprepad ), ((bwl+bwu+1)+10),
526 $ iprepad, ipostpad, padval )
540 CALL descinit( descb2d, n, nrhs, nb, 1, 0, 0,
541 $ ictxtb, nb+10, ierr( 1 ) )
550 descb( 6 ) = descb2d( lld_ )
555 IF( ipb .GT. 0 )
THEN
559 free_ptr = free_ptr + iprepad
561 free_ptr = free_ptr + nrhs*descb2d( lld_ )
566 ipw_solve_size = (
max(bwl,bwu)*nrhs)
569 free_ptr = free_ptr + ipw_solve_size
572 IF( free_ptr.GT.memsiz )
THEN
574 $
WRITE( nout, fmt = 9996 )
'solve',
575 $ ( free_ptr )*cplxsz
581 CALL igsum2d( ictxt,
'All',
' ', 1, 1,
584 IF( ierr( 1 ).GT.0 )
THEN
586 $
WRITE( nout, fmt = 9997 )
'MEMORY'
591 myrhs_size =
numroc( n, nb, mycol, 0, npcol )
596 $ descb2d( m_ ), descb2d( n_ ),
597 $ descb2d( mb_ ), descb2d( nb_ ),
599 $ descb2d( lld_ ), descb2d( rsrc_ ),
601 $ ibseed, 0, myrhs_size, 0, nrhs, mycol,
602 $ myrow, npcol, nprow )
606 $ mem( ipb-iprepad ),
611 $ mem( ip_driver_w-iprepad ),
617 CALL blacs_barrier( ictxt,
'All')
622 CALL pcdbtrs( trans, n, bwl, bwu, nrhs, mem( ipa ),
623 $ 1, desca, mem( ipb ), 1, descb,
624 $ mem( ip_fillin ), fillin_size,
625 $ mem( ipw_solve ), ipw_solve_size,
632 $
WRITE( nout, fmt = * )
'PCDBTRS INFO=', info
644 $ mem( ip_driver_w-iprepad ),
654 $ mem( ipb ), 1, 1, descb2d,
655 $ iaseed, mem( ipa ), 1, 1, desca2d,
656 $ ibseed, anorm, sresid,
657 $ mem( ip_driver_w ), worksiz )
660 IF( sresid.GT.thresh )
661 $
WRITE( nout, fmt = 9985 ) sresid
666 IF( ( sresid.LE.thresh ).AND.
667 $ ( (sresid-sresid).EQ.0.0e+0 ) )
THEN
682 CALL slcombine( ictxt,
'All',
'>',
'W', 2, 1,
684 CALL slcombine( ictxt, 'all
', '>
', 'c
', 2, 1,
689.EQ..AND..EQ.
IF( MYROW0 MYCOL0 ) THEN
695 NPROCS_REAL = ( N-1 )/NB + 1
696 N_LAST = MOD( N-1, NB ) + 1
701 NOPS = 2*(DBLE(N)*DBLE(BWL)*
703 $ (DBLE(N)*DBLE(BWL))
708 $ 2 * (DBLE(N)*(DBLE(BWL)+DBLE(BWU))
713 NOPS = NOPS * DBLE(4)
720 NOPS2 = 2*( (DBLE(N_FIRST)*
721 $ DBLE(BWL)*DBLE(BWU)))
723.GT.
IF ( NPROCS_REAL 1) THEN
729 $ 8*( (DBLE(N_LAST)*DBLE(BWL)
733.GT.
IF ( NPROCS_REAL 2) THEN
737 NOPS2 = NOPS2 + (NPROCS_REAL-2)*
738 $ 8*( (DBLE(NB)*DBLE(BWL)
745 $ 2*( NPROCS_REAL-1 ) *
747.GT.
IF( NPROCS_REAL 1 ) THEN
749 $ 2*( NPROCS_REAL-2 ) *
763 $ ( DBLE(BWL)+DBLE(BWU))
765.GT.
IF ( NPROCS_REAL 1 ) THEN
773 $ (DBLE(N_LAST)*(DBLE(BWL)+
774 $ DBLE(BWU)))*DBLE(NRHS)
777.GT.
IF ( NPROCS_REAL 2 ) THEN
784 $ ( NPROCS_REAL-2)*2*
785 $ ( (DBLE(NB)*(DBLE(BWL)+
786 $ DBLE(BWU)))*DBLE(NRHS) )
792 $ NRHS*( NPROCS_REAL-1)*2*(BWL*BWU )
793.GT.
IF( NPROCS_REAL 1 ) THEN
795 $ NRHS*( NPROCS_REAL-2 ) *
802 NOPS2 = NOPS2 * DBLE(4)
809.GT.
IF( WTIME( 1 ) + WTIME( 2 ) 0.0D+0 ) THEN
811 $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 )
816.GT.
IF( WTIME( 1 )+WTIME( 2 )0.0D+0 ) THEN
818 $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 )
823.GE.
IF( WTIME( 2 )0.0D+0 )
824 $ WRITE( NOUT, FMT = 9993 ) 'wall
', TRANS,
827 $ NB, NRHS, NPROW, NPCOL,
828 $ WTIME( 1 ), WTIME( 2 ), TMFLOPS,
833.GT.
IF( CTIME( 1 )+CTIME( 2 )0.0D+0 ) THEN
835 $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 )
840.GT.
IF( CTIME( 1 )+CTIME( 2 )0.0D+0 ) THEN
842 $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 )
847.GE.
IF( CTIME( 2 )0.0D+0 )
848 $ WRITE( NOUT, FMT = 9993 ) 'cpu
', TRANS,
851 $ NB, NRHS, NPROW, NPCOL,
852 $ CTIME( 1 ), CTIME( 2 ), TMFLOPS,
868 CALL BLACS_GRIDEXIT( ICTXT )
869 CALL BLACS_GRIDEXIT( ICTXTB )
879 KTESTS = KPASS + KFAIL + KSKIP
880 WRITE( NOUT, FMT = * )
881 WRITE( NOUT, FMT = 9992 ) KTESTS
883 WRITE( NOUT, FMT = 9991 ) KPASS
884 WRITE( NOUT, FMT = 9989 ) KFAIL
886 WRITE( NOUT, FMT = 9990 ) KPASS
888 WRITE( NOUT, FMT = 9988 ) KSKIP
889 WRITE( NOUT, FMT = * )
890 WRITE( NOUT, FMT = * )
891 WRITE( NOUT, FMT = 9987 )
892.NE..AND..NE.
IF( NOUT6 NOUT0 )
898 9999 FORMAT( 'illegal
', A6, ':
', A5, ' =
', I3,
899 $ '; it should be at least 1
' )
900 9998 FORMAT( 'illegal grid: nprow*npcol =
', I4, '. it can be at most
',
902 9997 FORMAT( 'bad
', A6, ' parameters: going on to next test case.
' )
903 9996 FORMAT( 'unable to perform
', A, ': need totmem of at least
',
905 9995 FORMAT( 'time tr n bwl bwu nb nrhs p q l*u time
',
906 $ 'slv time mflops mflop2 check
' )
907 9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- --------
',
908 $ '-------- -------- -------- ------
' )
909 9993 FORMAT( A4,1X,A1,2X,I6,1X,I3,1X,I3,1X,I4,1X,I5,
910 $ 1X,I4,1X,I4,1X,F9.3,
911 $ F9.4, F9.2, F9.2, 1X, A6 )
912 9992 FORMAT( 'finished
', I6, ' tests, with
the following results:
' )
913 9991 FORMAT( I5, ' tests completed and passed residual checks.
' )
914 9990 FORMAT( I5, ' tests completed without checking.
' )
915 9989 FORMAT( I5, ' tests completed and failed residual checks.
' )
916 9988 FORMAT( I5, ' tests skipped because of illegal input values.
' )
917 9987 FORMAT( 'END OF TESTS.
' )
918 9986 FORMAT( '||A -
', A4, '|| / (||A|| * N * eps) =
', G25.7 )
919 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N)
', F25.7 )
subroutine pcmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
end diagonal values have been computed in the(sparse) matrix id.SOL
logical function lsame(ca, cb)
LSAME
subroutine blacs_gridinit(cntxt, c, nprow, npcol)
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
real function pclange(norm, m, n, a, ia, ja, desca, work)
subroutine blacs_gridexit(cntxt)
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
integer function numroc(n, nb, iproc, isrcproc, nprocs)
subroutine pcbmatgen(ictxt, aform, aform2, bwl, bwu, n, mb, nb, a, lda, iarow, iacol, iseed, myrow, mycol, nprow, npcol)
subroutine pcchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
subroutine pcdbinfo(summry, nout, trans, nmat, nval, ldnval, nbw, bwlval, bwuval, ldbwval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
subroutine pcdblaschk(symm, uplo, trans, n, bwl, bwu, nrhs, x, ix, jx, descx, iaseed, a, ia, ja, desca, ibseed, anorm, resid, work, worksiz)
subroutine pcdbtrf(n, bwl, bwu, a, ja, desca, af, laf, work, lwork, info)
subroutine pcdbtrs(trans, n, bwl, bwu, nrhs, a, ja, desca, b, ib, descb, af, laf, work, lwork, info)
subroutine pcfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)