1 SUBROUTINE pdpbtrsv( UPLO, TRANS, N, BW, NRHS, A, JA, DESCA, B,
2 $ IB, DESCB, AF, LAF, WORK, LWORK, INFO )
10 INTEGER BW, IB, INFO, JA, LAF, LWORK, N, NRHS
13 INTEGER DESCA( * ), DESCB( * )
14 DOUBLE PRECISION A( * ), AF( * ), B( * ), WORK( * )
369 parameter( one = 1.0d+0 )
370 DOUBLE PRECISION ZERO
371 parameter( zero = 0.0d+0 )
373 parameter( int_one = 1 )
374 INTEGER DESCMULT, BIGNUM
375 parameter( descmult = 100, bignum = descmult*descmult )
376 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
377 $ lld_, mb_, m_, nb_, n_, rsrc_
378 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
379 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
380 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
383 INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE
385 $ lldb, mbw2, mycol, myrow, my_num_cols, nb, np,
386 $ npcol, nprow, np_save, odd_size, ofst,
387 $ part_offset, part_size, return_code, store_m_b,
388 $ store_n_a, work_size_min
391 INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ),
392 $ param_check( 17, 3 )
403 EXTERNAL lsame, numroc
422 IF( return_code.NE.0 )
THEN
428 IF( return_code.NE.0 )
THEN
435 IF( desca_1xp( 2 ).NE.descb_px1( 2 ) )
THEN
443 IF( desca_1xp( 4 ).NE.descb_px1( 4 ) )
THEN
449 IF( desca_1xp( 5 ).NE.descb_px1( 5 ) )
THEN
455 ictxt = desca_1xp( 2 )
456 csrc = desca_1xp( 5 )
458 llda = desca_1xp( 6 )
459 store_n_a = desca_1xp( 3 )
460 lldb = descb_px1( 6 )
461 store_m_b = descb_px1( 3 )
475 IF( lsame( uplo,
'U' ) )
THEN
477 ELSE IF( lsame( uplo,
'L' ) )
THEN
483 IF( lsame( trans,
'N' ) )
THEN
485 ELSE IF( lsame( trans,
'T' ) )
THEN
487 ELSE IF( lsame( trans,
'C' ) )
THEN
493 IF( lwork.LT.-1 )
THEN
495 ELSE IF( lwork.EQ.-1 )
THEN
505 IF( n+ja-1.GT.store_n_a )
THEN
509 IF( ( bw.GT.n-1 ) .OR. ( bw.LT.0 ) )
THEN
513 IF( llda.LT.( bw+1 ) )
THEN
521 IF( n+ib-1.GT.store_m_b )
THEN
525 IF( lldb.LT.nb )
THEN
541 IF( nprow.NE.1 )
THEN
545 IF( n.GT.np*nb-mod( ja-1, nb ) )
THEN
548 $
'PDPBTRSV, D&C alg.: only 1 block per proc',
553 IF( ( ja+n-1.GT.nb ) .AND. ( nb.LT.2*bw ) )
THEN
555 CALL pxerbla( ictxt,
'PDPBTRSV, D&C alg.: NB too small',
561 work_size_min = bw*nrhs
563 work( 1 ) = work_size_min
565 IF( lwork.LT.work_size_min )
THEN
566 IF( lwork.NE.-1 )
THEN
568 CALL pxerbla( ictxt,
'PDPBTRSV: worksize error', -info )
575 param_check( 17, 1 ) = descb( 5 )
576 param_check( 16, 1 ) = descb( 4 )
577 param_check( 15, 1 ) = descb( 3 )
578 param_check( 14, 1 ) = descb( 2 )
579 param_check( 13, 1 ) = descb( 1 )
580 param_check( 12, 1 ) = ib
581 param_check( 11, 1 ) = desca( 5 )
582 param_check( 10, 1 ) = desca( 4 )
583 param_check( 9, 1 ) = desca( 3 )
584 param_check( 8, 1 ) = desca( 1 )
585 param_check( 7, 1 ) = ja
586 param_check( 6, 1 ) = nrhs
587 param_check( 5, 1 ) = bw
588 param_check( 4, 1 ) = n
589 param_check( 3, 1 ) = idum3
590 param_check( 2, 1 ) = idum2
591 param_check( 1, 1 ) = idum1
593 param_check( 17, 2 ) = 1105
594 param_check( 16, 2 ) = 1104
595 param_check( 15, 2 ) = 1103
596 param_check( 14, 2 ) = 1102
597 param_check( 13, 2 ) = 1101
598 param_check( 12, 2 ) = 10
599 param_check( 11, 2 ) = 805
600 param_check( 10, 2 ) = 804
601 param_check( 9, 2 ) = 803
602 param_check( 8, 2 ) = 801
603 param_check( 7, 2 ) = 7
604 param_check( 6, 2 ) = 5
605 param_check( 5, 2 ) = 4
606 param_check( 4, 2 ) = 3
607 param_check( 3, 2 ) = 14
608 param_check( 2, 2 ) = 2
609 param_check( 1, 2 ) = 1
617 ELSE IF( info.LT.-descmult )
THEN
620 info = -info*descmult
625 CALL globchk( ictxt, 17, param_check, 17, param_check( 1, 3 ),
631 IF( info.EQ.bignum )
THEN
633 ELSE IF( mod( info, descmult ).EQ.0 )
THEN
634 info = -info / descmult
640 CALL pxerbla( ictxt,
'PDPBTRSV', -info )
656 part_offset = nb*( ( ja-1 ) / ( npcol*nb ) )
658 IF( ( mycol-csrc ).LT.( ja-part_offset-1 ) / nb )
THEN
659 part_offset = part_offset + nb
662 IF( mycol.LT.csrc )
THEN
663 part_offset = part_offset - nb
672 first_proc = mod( ( ja-1 ) / nb+csrc, npcol )
676 ja_new = mod( ja-1, nb ) + 1
681 np = ( ja_new+n-2 ) / nb + 1
685 CALL reshape( ictxt, int_one, ictxt_new, int_one, first_proc,
692 desca_1xp( 2 ) = ictxt_new
693 descb_px1( 2 ) = ictxt_new
701 IF( myrow.LT.0 )
THEN
714 my_num_cols = numroc( n, part_size, mycol, 0, npcol )
718 IF( mycol.EQ.0 )
THEN
719 part_offset = part_offset + mod( ja_new-1, part_size )
720 my_num_cols = my_num_cols - mod( ja_new-1, part_size )
725 ofst = part_offset*llda
729 odd_size = my_num_cols
730 IF( mycol.LT.np-1 )
THEN
731 odd_size = odd_size - bw
738 IF( lsame( uplo,
'L' ) )
THEN
740 IF( lsame( trans,
'N' ) )
THEN
751 CALL dtbtrs( uplo,
'N',
'N', odd_size, bw, nrhs,
752 $ a( ofst+1 ), llda, b( part_offset+1 ), lldb,
756 IF( mycol.LT.np-1 )
THEN
764 CALL dlamov(
'N', bw, nrhs,
765 $ b( part_offset+odd_size-bw+1 ), lldb,
768 CALL dtrmm(
'L',
'U',
'N',
'N', bw, nrhs, -one,
769 $ a( ( ofst+( bw+1 )+( odd_size-bw )*llda ) ),
770 $ llda-1, work( 1 ), bw )
772 CALL dmatadd( bw, nrhs, one, work( 1 ), bw, one,
773 $ b( part_offset+odd_size+1 ), lldb )
778 IF( mycol.NE.0 )
THEN
782 CALL dgemm(
'T',
'N', bw, nrhs, odd_size, -one, af( 1 ),
783 $ odd_size, b( part_offset+1 ), lldb, zero,
784 $ work( 1+bw-bw ), bw )
795 IF( mycol.GT.0 )
THEN
797 CALL dgesd2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
804 IF( mycol.LT.npcol-1 )
THEN
806 CALL dgerv2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
811 CALL dmatadd( bw, nrhs, one, work( 1 ), bw, one,
812 $ b( part_offset+odd_size+1 ), lldb )
819 IF( mycol.EQ.npcol-1 )
THEN
834 IF( mod( ( mycol+1 ) / level_dist, 2 ).NE.0 )
839 IF( mycol-level_dist.GE.0 )
THEN
841 CALL dgerv2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
844 CALL dmatadd( bw, nrhs, one, work( 1 ), bw, one,
845 $ b( part_offset+odd_size+1 ), lldb )
851 IF( mycol+level_dist.LT.npcol-1 )
THEN
853 CALL dgerv2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
856 CALL dmatadd( bw, nrhs, one, work( 1 ), bw, one,
857 $ b( part_offset+odd_size+1 ), lldb )
861 level_dist = level_dist*2
874 CALL dtrtrs(
'L',
'N',
'N', bw, nrhs,
875 $ af( odd_size*bw+mbw2+1 ), bw
876 $ b( part_offset+odd_size+1 ), lldb, info )
885 IF( mycol / level_dist.LE.( npcol-1 ) / level_dist-2 )
THEN
889 CALL dgemm(
'T',
'N', bw, nrhs, bw, -one,
890 $ af( ( odd_size )*bw+1 ), bw,
891 $ b( part_offset+odd_size+1 ), lldb, zero,
896 CALL dgesd2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
903 IF( ( mycol / level_dist.GT.0 ) .AND.
904 $ ( mycol / level_dist.LE.( npcol-1 ) / level_dist-1 ) )
911 CALL dgemm(
'N',
'N', bw, nrhs, bw, -one,
912 $ af( odd_size*bw+2*mbw2+1 ), bw,
913 $ b( part_offset+odd_size+1 ), lldb, zero,
918 CALL dgesd2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
938 IF( mycol.EQ.npcol-1 )
THEN
946 IF( mod( ( mycol+1 ) / level_dist, 2 ).NE.0 )
949 level_dist = level_dist*2
955 IF( ( mycol / level_dist.GT.0 ) .AND.
956 $ ( mycol / level_dist.LE.( npcol-1 ) / level_dist-1 ) )
961 CALL dgerv2d( ictxt, bw, nrhs, work( 1 ), bw, 0,
968 CALL dgemm( 't
', 'n
', BW, NRHS, BW, -ONE,
969 $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, WORK( 1 ),
970 $ BW, ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB )
975.LE.
IF( MYCOL / LEVEL_DIST( NPCOL-1 ) / LEVEL_DIST-2 ) THEN
979 CALL DGERV2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0,
984 CALL DGEMM( 'n
', 'n
', BW, NRHS, BW, -ONE,
985 $ AF( ( ODD_SIZE )*BW+1 ), BW, WORK( 1 ), BW,
986 $ ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB )
994 CALL DTRTRS( 'l
', 't
', 'n
', BW, NRHS,
995 $ AF( ODD_SIZE*BW+MBW2+1 ), BW,
996 $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO )
1007.EQ.
IF( LEVEL_DIST1 )
1010 LEVEL_DIST = LEVEL_DIST / 2
1014.LT.
IF( MYCOL+LEVEL_DISTNPCOL-1 ) THEN
1016 CALL DGESD2D( ICTXT, BW, NRHS,
1017 $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0,
1018 $ MYCOL+LEVEL_DIST )
1024.GE.
IF( MYCOL-LEVEL_DIST0 ) THEN
1026 CALL DGESD2D( ICTXT, BW, NRHS,
1027 $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0,
1028 $ MYCOL-LEVEL_DIST )
1046.LT.
IF( MYCOLNPCOL-1 ) THEN
1048 CALL DGESD2D( ICTXT, BW, NRHS,
1049 $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0,
1056.GT.
IF( MYCOL0 ) THEN
1058 CALL DGERV2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0,
1069.NE.
IF( MYCOL0 ) THEN
1073 CALL DGEMM( 'n
', 'n
', ODD_SIZE, NRHS, BW, -ONE, AF( 1 ),
1074 $ ODD_SIZE, WORK( 1+BW-BW ), BW, ONE,
1075 $ B( PART_OFFSET+1 ), LLDB )
1080.LT.
IF( MYCOLNP-1 ) THEN
1088 CALL DLAMOV( 'n
', BW, NRHS, B( PART_OFFSET+ODD_SIZE+1 ),
1089 $ LLDB, WORK( 1+BW-BW ), BW )
1091 CALL DTRMM( 'l
', 'u
', 't
', 'n
', BW, NRHS, -ONE,
1092 $ A( ( OFST+( BW+1 )+( ODD_SIZE-BW )*LLDA ) ),
1093 $ LLDA-1, WORK( 1+BW-BW ), BW )
1095 CALL DMATADD( BW, NRHS, ONE, WORK( 1+BW-BW ), BW, ONE,
1096 $ B( PART_OFFSET+ODD_SIZE-BW+1 ), LLDB )
1102 CALL DTBTRS( UPLO, 't
', 'n
', ODD_SIZE, BW, NRHS,
1103 $ A( OFST+1 ), LLDA, B( PART_OFFSET+1 ), LLDB,
1114 IF( LSAME( TRANS, 't
' ) ) THEN
1125 CALL DTBTRS( UPLO, 't
', 'n
', ODD_SIZE, BW, NRHS,
1126 $ A( OFST+1 ), LLDA, B( PART_OFFSET+1 ), LLDB,
1130.LT.
IF( MYCOLNP-1 ) THEN
1138 CALL DLAMOV( 'n
', BW, NRHS,
1139 $ B( PART_OFFSET+ODD_SIZE-BW+1 ), LLDB,
1142 CALL DTRMM( 'l
', 'l
', 't
', 'n
', BW, NRHS, -ONE,
1143 $ A( ( OFST+1+ODD_SIZE*LLDA ) ), LLDA-1,
1146 CALL DMATADD( BW, NRHS, ONE, WORK( 1 ), BW, ONE,
1147 $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB )
1152.NE.
IF( MYCOL0 ) THEN
1156 CALL DGEMM( 't
', 'n
', BW, NRHS, ODD_SIZE, -ONE, AF( 1 ),
1157 $ ODD_SIZE, B( PART_OFFSET+1 ), LLDB, ZERO,
1158 $ WORK( 1+BW-BW ), BW )
1169.GT.
IF( MYCOL0 ) THEN
1171 CALL DGESD2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0,
1178.LT.
IF( MYCOLNPCOL-1 ) THEN
1180 CALL DGERV2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0,
1185 CALL DMATADD( BW, NRHS, ONE, WORK( 1 ), BW, ONE,
1186 $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB )
1193.EQ.
IF( MYCOLNPCOL-1 ) THEN
1208.NE.
IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 )0 )
1213.GE.
IF( MYCOL-LEVEL_DIST0 ) THEN
1215 CALL DGERV2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0,
1216 $ MYCOL-LEVEL_DIST )
1218 CALL DMATADD( BW, NRHS, ONE, WORK( 1 ), BW, ONE,
1219 $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB )
1225.LT.
IF( MYCOL+LEVEL_DISTNPCOL-1 ) THEN
1227 CALL DGERV2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0,
1228 $ MYCOL+LEVEL_DIST )
1230 CALL DMATADD( BW, NRHS, ONE, WORK( 1 ), BW, ONE,
1231 $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB )
1235 LEVEL_DIST = LEVEL_DIST*2
1248 CALL DTRTRS( 'l
', 'n
', 'n
', BW, NRHS,
1249 $ AF( ODD_SIZE*BW+MBW2+1 ), BW,
1250 $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO )
1252.NE.
IF( INFO0 ) THEN
1259.LE.
IF( MYCOL / LEVEL_DIST( NPCOL-1 ) / LEVEL_DIST-2 ) THEN
1263 CALL DGEMM( 't
', 'n
', BW, NRHS, BW, -ONE,
1264 $ AF( ( ODD_SIZE )*BW+1 ), BW,
1265 $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, ZERO,
1270 CALL DGESD2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0,
1271 $ MYCOL+LEVEL_DIST )
1277.GT..AND.
IF( ( MYCOL / LEVEL_DIST0 )
1278.LE.
$ ( MYCOL / LEVEL_DIST( NPCOL-1 ) / LEVEL_DIST-1 ) )
1285 CALL DGEMM( 'n
', 'n
', BW, NRHS, BW, -ONE,
1286 $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW,
1287 $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, ZERO,
1292 CALL DGESD2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0,
1293 $ MYCOL-LEVEL_DIST )
1312.EQ.
IF( MYCOLNPCOL-1 ) THEN
1320.NE.
IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 )0 )
1323 LEVEL_DIST = LEVEL_DIST*2
1329.GT..AND.
IF( ( MYCOL / LEVEL_DIST0 )
1330.LE.
$ ( MYCOL / LEVEL_DIST( NPCOL-1 ) / LEVEL_DIST-1 ) )
1335 CALL DGERV2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0,
1336 $ MYCOL-LEVEL_DIST )
1342 CALL DGEMM( 't
', 'n
', BW, NRHS, BW, -ONE,
1343 $ AF( ODD_SIZE*BW+2*MBW2+1 ), BW, WORK( 1 ),
1344 $ BW, ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB )
1349.LE.
IF( MYCOL / LEVEL_DIST( NPCOL-1 ) / LEVEL_DIST-2 ) THEN
1353 CALL DGERV2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0,
1354 $ MYCOL+LEVEL_DIST )
1358 CALL DGEMM( 'n
', 'n
', BW, NRHS, BW, -ONE,
1359 $ AF( ( ODD_SIZE )*BW+1 ), BW, WORK( 1 ), BW,
1360 $ ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB )
1368 CALL DTRTRS( 'l
', 't
', 'n
', BW, NRHS,
1369 $ AF( ODD_SIZE*BW+MBW2+1 ), BW,
1370 $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO )
1372.NE.
IF( INFO0 ) THEN
1381.EQ.
IF( LEVEL_DIST1 )
1384 LEVEL_DIST = LEVEL_DIST / 2
1388.LT.
IF( MYCOL+LEVEL_DISTNPCOL-1 ) THEN
1390 CALL DGESD2D( ICTXT, BW, NRHS,
1391 $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0,
1392 $ MYCOL+LEVEL_DIST )
1398.GE.
IF( MYCOL-LEVEL_DIST0 ) THEN
1400 CALL DGESD2D( ICTXT, BW, NRHS,
1401 $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0,
1402 $ MYCOL-LEVEL_DIST )
1420.LT.
IF( MYCOLNPCOL-1 ) THEN
1422 CALL DGESD2D( ICTXT, BW, NRHS,
1423 $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0,
1430.GT.
IF( MYCOL0 ) THEN
1432 CALL DGERV2D( ICTXT, BW, NRHS, WORK( 1 ), BW, 0,
1443.NE.
IF( MYCOL0 ) THEN
1447 CALL DGEMM( 'n
', 'n
', ODD_SIZE, NRHS, BW, -ONE, AF( 1 ),
1448 $ ODD_SIZE, WORK( 1+BW-BW ), BW, ONE,
1449 $ B( PART_OFFSET+1 ), LLDB )
1454.LT.
IF( MYCOLNP-1 ) THEN
1462 CALL DLAMOV( 'n
', BW, NRHS, B( PART_OFFSET+ODD_SIZE+1 ),
1463 $ LLDB, WORK( 1+BW-BW ), BW )
1465 CALL DTRMM( 'l
', 'l
', 'n
', 'n
', BW, NRHS, -ONE,
1466 $ A( ( OFST+1+ODD_SIZE*LLDA ) ), LLDA-1,
1467 $ WORK( 1+BW-BW ), BW )
1469 CALL DMATADD( BW, NRHS, ONE, WORK( 1+BW-BW ), BW, ONE,
1470 $ B( PART_OFFSET+ODD_SIZE-BW+1 ), LLDB )
1476 CALL DTBTRS( UPLO, 'n
', 'n
', ODD_SIZE, BW, NRHS,
1477 $ A( OFST+1 ), LLDA, B( PART_OFFSET+1 ), LLDB,
1491.NE.
IF( ICTXT_SAVEICTXT_NEW ) THEN
1492 CALL BLACS_GRIDEXIT( ICTXT_NEW )
1504 WORK( 1 ) = WORK_SIZE_MIN