1 SUBROUTINE psdttrsv( UPLO, TRANS, N, NRHS, DL, D, DU, JA, DESCA,
2 $ B, IB, DESCB, AF, LAF, WORK, LWORK, INFO )
11 INTEGER IB, INFO, JA, LAF, LWORK, N, NRHS
14 INTEGER DESCA( * ), DESCB( * )
15 REAL AF( * ), B( * ), D( * ), DL( * ), DU( * ),
387 parameter( one = 1.0e+0 )
391 parameter( int_one = 1 )
394 INTEGER BLOCK_CYCLIC_2D, , CTXT_, DLEN_, DTYPE_,
395 $ lld_, mb_, m_, nb_, n_, rsrc_
396 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
397 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
398 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
401 INTEGER CSRC, , ICTXT, ICTXT_NEW, ICTXT_SAVE,
402 $ idum1, idum2, idum3, ja_new, level_dist, llda,
403 $ lldb, mycol, myrow, my_num_cols, nb, np, npcol,
404 $ nprow, np_save, odd_size, part_offset,
405 $ part_size, return_code, store_m_b, store_n_a,
406 $ temp, work_size_min, work_u
409 INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ),
410 $ param_check( 16, 3 )
420 EXTERNAL lsame, numroc
423 INTRINSIC ichar,
min, mod
437 temp = desca( dtype_ )
438 IF( temp.EQ.502 )
THEN
440 desca( dtype_ ) = 501
445 desca( dtype_ ) = temp
447 IF( return_code.NE.0 )
THEN
453 IF( return_code.NE.0 )
THEN
460 IF( desca_1xp( 2 ).NE.descb_px1( 2 ) )
THEN
468 IF( desca_1xp( 4 ).NE.descb_px1( 4 ) )
THEN
474 IF( desca_1xp( 5 ).NE.descb_px1( 5 ) )
THEN
480 ictxt = desca_1xp( 2 )
481 csrc = desca_1xp( 5 )
483 llda = desca_1xp( 6 )
484 store_n_a = desca_1xp( 3 )
485 lldb = descb_px1( 6 )
486 store_m_b = descb_px1( 3 )
496 IF( lsame( uplo,
'U' ) )
THEN
498 ELSE IF( lsame( uplo,
'L' ) )
THEN
504 IF( lsame( trans,
'N' ) )
THEN
506 ELSE IF( lsame( trans,
'T' ) )
THEN
508 ELSE IF( lsame( trans,
'C' ) )
THEN
514 IF( lwork.LT.-1 )
THEN
516 ELSE IF( lwork.EQ.-1 )
THEN
526 IF( n+ja-1.GT.store_n_a )
THEN
530 IF( n+ib-1.GT.store_m_b )
THEN
534 IF( lldb.LT.nb )
THEN
550 IF( nprow.NE.1 )
THEN
554 IF( n.GT.np*nb-mod( ja-1, nb ) )
THEN
557 $
'PSDTTRSV, D&C alg.: only 1 block per proc',
562 IF( ( ja+n-1.GT.nb ) .AND. ( nb.LT.2*int_one ) )
THEN
564 CALL pxerbla( ictxt,
'PSDTTRSV, D&C alg.: NB too small',
570 work_size_min = int_one*nrhs
572 work( 1 ) = work_size_min
574 IF( lwork.LT.work_size_min )
THEN
575 IF( lwork.NE.-1 )
THEN
577 CALL pxerbla( ictxt,
'PSDTTRSV: worksize error', -info )
584 param_check( 16, 1 ) = descb( 5 )
585 param_check( 15, 1 ) = descb( 4 )
586 param_check( 14, 1 ) = descb( 3 )
587 param_check( 13, 1 ) = descb( 2 )
588 param_check( 12, 1 ) = descb( 1 )
589 param_check( 11, 1 ) = ib
590 param_check( 10, 1 ) = desca( 5 )
591 param_check( 9, 1 ) = desca( 4 )
592 param_check( 8, 1 ) = desca( 3 )
593 param_check( 7, 1 ) = desca( 1 )
594 param_check( 6, 1 ) = ja
595 param_check( 5, 1 ) = nrhs
596 param_check( 4, 1 ) = n
597 param_check( 3, 1 ) = idum3
598 param_check( 2, 1 ) = idum2
599 param_check( 1, 1 ) = idum1
601 param_check( 16, 2 ) = 1205
602 param_check( 15, 2 ) = 1204
603 param_check( 14, 2 ) = 1203
604 param_check( 13, 2 ) = 1202
605 param_check( 12, 2 ) = 1201
606 param_check( 11, 2 ) = 11
607 param_check( 10, 2 ) = 905
608 param_check( 9, 2 ) = 904
609 param_check( 8, 2 ) = 903
610 param_check( 7, 2 ) = 901
611 param_check( 6, 2 ) = 8
612 param_check( 5, 2 ) = 4
613 param_check( 4, 2 ) = 3
614 param_check( 3, 2 ) = 16
615 param_check( 2, 2 ) = 2
616 param_check( 1, 2 ) = 1
624 ELSE IF( info.LT.-descmult )
THEN
627 info = -info*descmult
632 CALL globchk( ictxt, 16, param_check, 16, param_check( 1, 3 ),
638 IF( info.EQ.bignum )
THEN
640 ELSE IF( mod( info, descmult ).EQ.0 )
THEN
641 info = -info / descmult
647 CALL pxerbla( ictxt,
'PSDTTRSV', -info )
663 part_offset = nb*( ( ja-1 ) / ( npcol*nb ) )
665 IF( ( mycol-csrc ).LT.( ja-part_offset-1 ) / nb )
THEN
666 part_offset = part_offset + nb
669 IF( mycol.LT.csrc )
THEN
670 part_offset = part_offset - nb
679 first_proc = mod( ( ja-1 ) / nb+csrc, npcol )
683 ja_new = mod( ja-1, nb ) + 1
688 np = ( ja_new+n-2 ) / nb + 1
692 CALL reshape( ictxt, int_one, ictxt_new, int_one, first_proc,
699 desca_1xp( 2 ) = ictxt_new
700 descb_px1( 2 ) = ictxt_new
708 IF( myrow.LT.0 )
THEN
721 my_num_cols = numroc( n, part_size, mycol, 0, npcol )
725 IF( mycol.EQ.0 )
THEN
726 part_offset = part_offset + mod( ja_new-1, part_size )
727 my_num_cols = my_num_cols - mod( ja_new-1, part_size )
732 odd_size = my_num_cols
733 IF( mycol.LT.np-1 )
THEN
734 odd_size = odd_size - int_one
739 work_u = int_one*odd_size + 3
745 IF( lsame( uplo,
'L' ) )
THEN
747 IF( lsame( trans,
'N' ) )
THEN
758 CALL sdttrsv( uplo,
'N', odd_size, nrhs,
759 $ dl( part_offset+2 ), d( part_offset+1 ),
760 $ du( part_offset+1 ), b( part_offset+1 ), lldb
764 IF( mycol.LT.np-1 )
THEN
768 CALL saxpy( nrhs, -dl( part_offset+odd_size+1 ),
769 $ b( part_offset+odd_size ), lldb,
770 $ b( part_offset+odd_size+1 ), lldb )
775 IF( mycol.NE.0 )
THEN
779 CALL sgemm(
'T',
'N', int_one, nrhs, odd_size, -one,
780 $ af( 1 ), odd_size, b( part_offset+1 ), lldb,
781 $ zero, work( 1+int_one-int_one ), int_one )
792 IF( mycol.GT.0 )
THEN
794 CALL sgesd2d( ictxt, int_one, nrhs, work( 1 ), int_one,
801 IF( mycol.LT.npcol-1 )
THEN
803 CALL sgerv2d( ictxt, int_one, nrhs, work( 1 ), int_one,
808 CALL smatadd( int_one, nrhs, one, work( 1 ), int_one,
809 $ one, b( part_offset+odd_size+1 ), lldb )
816 IF( mycol.EQ.npcol-1 )
THEN
831 IF( mod( ( mycol+1 ) / level_dist, 2 ).NE.0 )
836 IF( mycol-level_dist.GE.0 )
THEN
838 CALL sgerv2d( ictxt, int_one, nrhs, work( 1 ), int_one,
839 $ 0, mycol-level_dist )
841 CALL smatadd( int_one, nrhs, one, work( 1 ), int_one,
842 $ one, b( part_offset+odd_size+1 ), lldb )
848 IF( mycol+level_dist.LT.npcol-1 )
THEN
850 CALL sgerv2d( ictxt, int_one, nrhs, work( 1 ), int_one,
851 $ 0, mycol+level_dist )
853 CALL smatadd( int_one, nrhs, one, work( 1 ), int_one,
854 $ one, b( part_offset+odd_size+1 ), lldb )
858 level_dist = level_dist*2
871 CALL stbtrs(
'L',
'N',
'U', int_one,
872 $
min( int_one, int_one-1 ), nrhs,
873 $ af( odd_size+2 ), int_one+1,
874 $ b( part_offset+odd_size+1 ), lldb, info )
883 IF( mycol / level_dist.LE.( npcol-1 ) / level_dist-2 )
THEN
887 CALL sgemm(
'T',
'N', int_one, nrhs, int_one, -one,
888 $ af( ( odd_size )*int_one+1 ), int_one,
889 $ b( part_offset+odd_size+1 ), lldb, zero,
890 $ work( 1 ), int_one )
894 CALL sgesd2d( ictxt, int_one, nrhs, work( 1 ), int_one,
895 $ 0, mycol+level_dist )
901 IF( ( mycol / level_dist.GT.0 ) .AND.
902 $ ( mycol / level_dist.LE.( npcol-1 ) / level_dist-1 ) )
909 CALL sgemm(
'N',
'N', int_one, nrhs, int_one, -one,
910 $ af( odd_size*int_one+2+1 ), int_one,
911 $ b( part_offset+odd_size+1 ), lldb, zero,
912 $ work( 1 ), int_one )
916 CALL sgesd2d( ictxt, int_one, nrhs, work( 1 ), int_one,
917 $ 0, mycol-level_dist )
936 IF( mycol.EQ.npcol-1 )
THEN
944 IF( mod( ( mycol+1 ) / level_dist, 2 ).NE.0 )
947 level_dist = level_dist*2
953 IF( ( mycol / level_dist.GT.0 ) .AND.
954 $ ( mycol / level_dist.LE.( npcol-1 ) / level_dist-1 ) )
959 CALL sgerv2d( ictxt, int_one, nrhs, work( 1 ), int_one,
960 $ 0, mycol-level_dist )
966 CALL sgemm(
'T',
'N', int_one, nrhs, int_one, -one,
967 $ af( odd_size*int_one+2+1 ), int_one,
968 $ work( 1 ), int_one, one,
969 $ b( part_offset+odd_size+1 ), lldb )
974 IF( mycol / level_dist.LE.( npcol-1 ) / level_dist-2 )
THEN
978 CALL sgerv2d( ictxt, int_one, nrhs, work( 1 ), int_one,
979 $ 0, mycol+level_dist )
983 CALL sgemm(
'N',
'N', int_one, nrhs, int_one, -one,
984 $ af( ( odd_size )*int_one+1 ), int_one,
985 $ work( 1 ), int_one, one,
986 $ b( part_offset+odd_size+1 ), lldb )
994 CALL stbtrs(
'L',
'T',
'U', int_one,
995 $
min( int_one, int_one-1 ), nrhs,
996 $ af( odd_size+2 ), int_one+1,
997 $ b( part_offset+odd_size+1 ), lldb, info )
1008 IF( level_dist.EQ.1 )
1011 level_dist = level_dist / 2
1015 IF( mycol+level_dist.LT.npcol-1 )
THEN
1017 CALL sgesd2d( ictxt, int_one, nrhs,
1018 $ b( part_offset+odd_size+1 ), lldb, 0,
1019 $ mycol+level_dist )
1025 IF( mycol-level_dist.GE.0 )
THEN
1027 CALL sgesd2d( ictxt, int_one, nrhs,
1028 $ b( part_offset+odd_size+1 ), lldb, 0,
1029 $ mycol-level_dist )
1047 IF( mycol.LT.npcol-1 )
THEN
1049 CALL sgesd2d( ictxt, int_one, nrhs,
1050 $ b( part_offset+odd_size+1 ), lldb, 0,
1057 IF( mycol.GT.0 )
THEN
1059 CALL sgerv2d( ictxt, int_one, nrhs, work( 1 ), int_one,
1070 IF( mycol.NE.0 )
THEN
1074 CALL sgemm(
'N',
'N', odd_size, nrhs, int_one, -one,
1075 $ af( 1 ), odd_size, work( 1+int_one-int_one ),
1076 $ int_one, one, b( part_offset+1 ), lldb )
1081 IF( mycol.LT.np-1 )
THEN
1085 CALL saxpy( nrhs, -( dl( part_offset+odd_size+1 ) ),
1086 $ b( part_offset+odd_size+1 ), lldb,
1087 $ b( part_offset+odd_size ), lldb )
1093 CALL sdttrsv( uplo,
'T', odd_size, nrhs,
1094 $ dl( part_offset+2 ), d( part_offset+1 ),
1095 $ du( part_offset+1 ), b( part_offset+1 ), lldb,
1106 IF( lsame( trans, 't
' ) ) THEN
1117 CALL SDTTRSV( UPLO, 't
', ODD_SIZE, NRHS,
1118 $ DL( PART_OFFSET+2 ), D( PART_OFFSET+1 ),
1119 $ DU( PART_OFFSET+1 ), B( PART_OFFSET+1 ), LLDB,
1123.LT.
IF( MYCOLNP-1 ) THEN
1127 CALL SAXPY( NRHS, -( DU( PART_OFFSET+ODD_SIZE ) ),
1128 $ B( PART_OFFSET+ODD_SIZE ), LLDB,
1129 $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB )
1134.NE.
IF( MYCOL0 ) THEN
1138 CALL SGEMM( 't
', 'n
', INT_ONE, NRHS, ODD_SIZE, -ONE,
1139 $ AF( WORK_U+1 ), ODD_SIZE, B( PART_OFFSET+1 ),
1140 $ LLDB, ZERO, WORK( 1+INT_ONE-INT_ONE ),
1152.GT.
IF( MYCOL0 ) THEN
1154 CALL SGESD2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE,
1161.LT.
IF( MYCOLNPCOL-1 ) THEN
1163 CALL SGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE,
1168 CALL SMATADD( INT_ONE, NRHS, ONE, WORK( 1 ), INT_ONE,
1169 $ ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB )
1176.EQ.
IF( MYCOLNPCOL-1 ) THEN
1191.NE.
IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 )0 )
1196.GE.
IF( MYCOL-LEVEL_DIST0 ) THEN
1198 CALL SGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE,
1199 $ 0, MYCOL-LEVEL_DIST )
1201 CALL SMATADD( INT_ONE, NRHS, ONE, WORK( 1 ), INT_ONE,
1202 $ ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB )
1208.LT.
IF( MYCOL+LEVEL_DISTNPCOL-1 ) THEN
1210 CALL SGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE,
1211 $ 0, MYCOL+LEVEL_DIST )
1213 CALL SMATADD( INT_ONE, NRHS, ONE, WORK( 1 ), INT_ONE,
1214 $ ONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB )
1218 LEVEL_DIST = LEVEL_DIST*2
1231 CALL STBTRS( 'u
', 't
', 'n
', INT_ONE,
1232 $ MIN( INT_ONE, INT_ONE-1 ), NRHS,
1233 $ AF( ODD_SIZE+2 ), INT_ONE+1,
1234 $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO )
1236.NE.
IF( INFO0 ) THEN
1243.LE.
IF( MYCOL / LEVEL_DIST( NPCOL-1 ) / LEVEL_DIST-2 ) THEN
1247 CALL SGEMM( 't
', 'n
', INT_ONE, NRHS, INT_ONE, -ONE,
1248 $ AF( WORK_U+( ODD_SIZE )*INT_ONE+1 ), INT_ONE,
1249 $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, ZERO,
1250 $ WORK( 1 ), INT_ONE )
1254 CALL SGESD2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE,
1255 $ 0, MYCOL+LEVEL_DIST )
1261.GT..AND.
IF( ( MYCOL / LEVEL_DIST0 )
1262.LE.
$ ( MYCOL / LEVEL_DIST( NPCOL-1 ) / LEVEL_DIST-1 ) )
1269 CALL SGEMM( 'n
', 'n
', INT_ONE, NRHS, INT_ONE, -ONE,
1270 $ AF( WORK_U+ODD_SIZE*INT_ONE+2+1 ), INT_ONE,
1271 $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, ZERO,
1272 $ WORK( 1 ), INT_ONE )
1276 CALL SGESD2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE,
1277 $ 0, MYCOL-LEVEL_DIST )
1296.EQ.
IF( MYCOLNPCOL-1 ) THEN
1304.NE.
IF( MOD( ( MYCOL+1 ) / LEVEL_DIST, 2 )0 )
1307 LEVEL_DIST = LEVEL_DIST*2
1313.GT..AND.
IF( ( MYCOL / LEVEL_DIST0 )
1314.LE.
$ ( MYCOL / LEVEL_DIST( NPCOL-1 ) / LEVEL_DIST-1 ) )
1319 CALL SGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE,
1320 $ 0, MYCOL-LEVEL_DIST )
1326 CALL SGEMM( 't
', 'n
', INT_ONE, NRHS, INT_ONE, -ONE,
1327 $ AF( WORK_U+ODD_SIZE*INT_ONE+2+1 ), INT_ONE,
1328 $ WORK( 1 ), INT_ONE, ONE,
1329 $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB )
1334.LE.
IF( MYCOL / LEVEL_DIST( NPCOL-1 ) / LEVEL_DIST-2 ) THEN
1338 CALL SGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE,
1339 $ 0, MYCOL+LEVEL_DIST )
1343 CALL SGEMM( 'n
', 'n
', INT_ONE, NRHS, INT_ONE, -ONE,
1344 $ AF( WORK_U+( ODD_SIZE )*INT_ONE+1 ), INT_ONE,
1345 $ WORK( 1 ), INT_ONE, ONE,
1346 $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB )
1354 CALL STBTRS( 'u
', 'n
', 'n
', INT_ONE,
1355 $ MIN( INT_ONE, INT_ONE-1 ), NRHS,
1356 $ AF( ODD_SIZE+2 ), INT_ONE+1,
1357 $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO )
1359.NE.
IF( INFO0 ) THEN
1368.EQ.
IF( LEVEL_DIST1 )
1371 LEVEL_DIST = LEVEL_DIST / 2
1375.LT.
IF( MYCOL+LEVEL_DISTNPCOL-1 ) THEN
1377 CALL SGESD2D( ICTXT, INT_ONE, NRHS,
1378 $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0,
1379 $ MYCOL+LEVEL_DIST )
1385.GE.
IF( MYCOL-LEVEL_DIST0 ) THEN
1387 CALL SGESD2D( ICTXT, INT_ONE, NRHS,
1388 $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0,
1389 $ MYCOL-LEVEL_DIST )
1407.LT.
IF( MYCOLNPCOL-1 ) THEN
1409 CALL SGESD2D( ICTXT, INT_ONE, NRHS,
1410 $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, 0,
1417.GT.
IF( MYCOL0 ) THEN
1419 CALL SGERV2D( ICTXT, INT_ONE, NRHS, WORK( 1 ), INT_ONE,
1430.NE.
IF( MYCOL0 ) THEN
1434 CALL SGEMM( 'n
', 'n
', ODD_SIZE, NRHS, INT_ONE, -ONE,
1435 $ AF( WORK_U+1 ), ODD_SIZE,
1436 $ WORK( 1+INT_ONE-INT_ONE ), INT_ONE, ONE,
1437 $ B( PART_OFFSET+1 ), LLDB )
1442.LT.
IF( MYCOLNP-1 ) THEN
1446 CALL SAXPY( NRHS, -( DU( PART_OFFSET+ODD_SIZE ) ),
1447 $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB,
1448 $ B( PART_OFFSET+ODD_SIZE ), LLDB )
1454 CALL SDTTRSV( UPLO, 'n', odd_size, nrhs,
1455 $ du( part_offset+2 ), d( part_offset+1 ),
1456 $ du( part_offset+1 ), b( part_offset+1 ), lldb,
1470 IF( ictxt_save.NE.ictxt_new )
THEN
1483 work( 1 ) = work_size_min