1 SUBROUTINE pcdbtrsv( UPLO, TRANS, N, BWL, BWU, NRHS, A, JA, DESCA,
2 $ B, IB, DESCB, AF, LAF, WORK, LWORK, INFO )
10 INTEGER BWL, BWU, IB, INFO, JA, LAF, LWORK, N, NRHS
13 INTEGER DESCA( * ), DESCB( * )
14 COMPLEX A( * ), AF( * ), B( * ), ( * )
374 parameter( one = 1.0e+0 )
375 parameter( zero = 0.0e+0 )
377 parameter( cone = ( 1.0e+0, 0.0e+0 ) )
378 parameter( czero = ( 0.0e+0, 0.0e+0 ) )
380 parameter( int_one = 1 )
381 INTEGER DESCMULT, BIGNUM
382 parameter(descmult = 100, bignum = descmult * descmult)
383 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, ,
384 $ lld_, mb_, m_, nb_, n_, rsrc_
385 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
386 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
387 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
390 INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE,
391 $ idum1, idum2, idum3, ja_new, level_dist, llda,
392 $ lldb, max_bw, mbw2, mycol, myrow, my_num_cols,
394 $ part_offset, part_size
395 $ store_n_a, work_size_min, work_u
398 INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ),
399 $ param_check( 18, 3 )
410 EXTERNAL lsame, numroc
413 INTRINSIC ichar,
min, mod
429 IF( return_code .NE. 0)
THEN
430 info = -( 9*100 + 2 )
435 IF( return_code .NE. 0)
THEN
436 info = -( 12*100 + 2 )
442 IF( desca_1xp( 2 ) .NE. descb_px1( 2 ) )
THEN
443 info = -( 12*100 + 2 )
450 IF( desca_1xp( 4 ) .NE. descb_px1( 4 ) )
THEN
451 info = -( 12*100 + 4 )
456 IF( desca_1xp( 5 ) .NE. descb_px1( 5 ) )
THEN
457 info = -( 12*100 + 5 )
462 ictxt = desca_1xp( 2 )
463 csrc = desca_1xp( 5 )
465 llda = desca_1xp( 6 )
466 store_n_a = desca_1xp( 3 )
467 lldb = descb_px1( 6 )
468 store_m_b = descb_px1( 3 )
475 max_bw =
max(bwl,bwu)
476 mbw2 = max_bw * max_bw
483 IF( lsame( uplo,
'U' ) )
THEN
485 ELSE IF ( lsame( uplo,
'L' ) )
THEN
491 IF( lsame( trans,
'N' ) )
THEN
493 ELSE IF ( lsame( trans,
'C' ) )
THEN
499 IF( lwork .LT. -1)
THEN
501 ELSE IF ( lwork .EQ. -1 )
THEN
511 IF( n+ja-1 .GT. store_n_a )
THEN
512 info = -( 9*100 + 6 )
515 IF(( bwl .GT. n-1 ) .OR.
516 $ ( bwl .LT. 0 ) )
THEN
520 IF(( bwu .GT. n-1 ) .OR.
521 $ ( bwu .LT. 0 ) )
THEN
525 IF( llda .LT. (bwl+bwu+1) )
THEN
526 info = -( 9*100 + 6 )
530 info = -( 9*100 + 4 )
533 IF( n+ib-1 .GT. store_m_b )
THEN
534 info = -( 12*100 + 3 )
537 IF( lldb .LT. nb )
THEN
538 info = -( 12*100 + 6 )
541 IF( nrhs .LT. 0 )
THEN
553 IF( nprow .NE. 1 )
THEN
557 IF( n .GT. np*nb-mod( ja-1, nb ))
THEN
560 $
'PCDBTRSV, D&C alg.: only 1 block per proc',
565 IF((ja+n-1.GT.nb) .AND. ( nb.LT.2*
max(bwl,bwu) ))
THEN
568 $
'PCDBTRSV, D&C alg.: NB too small',
577 work( 1 ) = work_size_min
579 IF( lwork .LT. work_size_min )
THEN
580 IF( lwork .NE. -1 )
THEN
583 $
'PCDBTRSV: worksize error',
591 param_check( 18, 1 ) = descb(5)
592 param_check( 17, 1 ) = descb(4)
593 param_check( 16, 1 ) = descb(3)
594 param_check( 15, 1 ) = descb(2)
595 param_check( 14, 1 ) = descb(1)
596 param_check( 13, 1 ) = ib
597 param_check( 12, 1 ) = desca(5)
598 param_check( 11, 1 ) = desca(4)
599 param_check( 10, 1 ) = desca(3)
600 param_check( 9, 1 ) = desca(1)
601 param_check( 8, 1 ) = ja
602 param_check( 7, 1 ) = nrhs
603 param_check( 6, 1 ) = bwu
604 param_check( 5, 1 ) = bwl
605 param_check( 4, 1 ) = n
606 param_check( 3, 1 ) = idum3
607 param_check( 2, 1 ) = idum2
608 param_check( 1, 1 ) = idum1
610 param_check( 18, 2 ) = 1205
611 param_check( 17, 2 ) = 1204
612 param_check( 16, 2 ) = 1203
613 param_check( 15, 2 ) = 1202
614 param_check( 14, 2 ) = 1201
615 param_check( 13, 2 ) = 11
616 param_check( 12, 2 ) = 905
617 param_check( 11, 2 ) = 904
618 param_check( 10, 2 ) = 903
619 param_check( 9, 2 ) = 901
620 param_check( 8, 2 ) = 8
621 param_check( 7, 2 ) = 6
622 param_check( 6, 2 ) = 5
623 param_check( 5, 2 ) = 4
624 param_check( 4, 2 ) = 3
625 param_check( 3, 2 ) = 16
626 param_check( 2, 2 ) = 2
627 param_check( 1, 2 ) = 1
635 ELSE IF( info.LT.-descmult )
THEN
638 info = -info * descmult
643 CALL globchk( ictxt, 18, param_check, 18,
644 $ param_check( 1, 3 ), info )
649 IF( info.EQ.bignum )
THEN
651 ELSE IF( mod( info, descmult ) .EQ. 0 )
THEN
652 info = -info / descmult
658 CALL pxerbla( ictxt,
'PCDBTRSV', -info )
674 part_offset = nb*( (ja-1)/(npcol*nb) )
676 IF ( (mycol-csrc) .LT. (ja-part_offset-1)/nb )
THEN
677 part_offset = part_offset + nb
680 IF ( mycol .LT. csrc )
THEN
681 part_offset = part_offset - nb
690 first_proc = mod( ( ja-1 )/nb+csrc, npcol )
694 ja_new = mod( ja-1, nb ) + 1
699 np = ( ja_new+n-2 )/nb + 1
703 CALL reshape( ictxt, int_one, ictxt_new, int_one,
704 $ first_proc, int_one, np )
710 desca_1xp( 2 ) = ictxt_new
711 descb_px1( 2 ) = ictxt_new
719 IF( myrow .LT. 0 )
THEN
732 my_num_cols = numroc( n, part_size, mycol, 0, npcol )
736 IF ( mycol .EQ. 0 )
THEN
737 part_offset = part_offset+mod( ja_new-1, part_size )
738 my_num_cols = my_num_cols - mod(ja_new-1, part_size )
743 ofst = part_offset*llda
747 odd_size = my_num_cols
748 IF ( mycol .LT. np-1 )
THEN
749 odd_size = odd_size - max_bw
754 work_u = bwu*odd_size + 3*mbw2
760 IF ( lsame( uplo,
'L' ) )
THEN
762 IF ( lsame( trans,
'N' ) )
THEN
773 CALL ctbtrs( uplo,
'N', 'u
', ODD_SIZE,
775 $ A( OFST+1+BWU ), LLDA,
776 $ B( PART_OFFSET+1 ), LLDB, INFO )
779.LT.
IF ( MYCOL NP-1 ) THEN
787 CALL CLAMOV( 'n
', BWL, NRHS,
788 $ B( PART_OFFSET+ODD_SIZE-BWL+1), LLDB,
789 $ WORK( 1 ), MAX_BW )
791 CALL CTRMM( 'l
', 'u
', 'n
', 'n
', BWL, NRHS, -CONE,
792 $ A(( OFST+(BWL+BWU+1)+(ODD_SIZE-BWL)*LLDA )),
793 $ LLDA-1, WORK( 1 ), MAX_BW )
795 CALL CMATADD( BWL, NRHS, CONE, WORK( 1 ), MAX_BW,
796 $ CONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB )
802 DO 10 IDUM1=1, WORK_SIZE_MIN
807.NE.
IF ( MYCOL 0 ) THEN
811 CALL CGEMM( 'c
', 'n
', BWU, NRHS, ODD_SIZE, -CONE, AF( 1 ),
812 $ ODD_SIZE, B( PART_OFFSET+1 ), LLDB, CZERO,
813 $ WORK( 1+MAX_BW-BWU ), MAX_BW )
824.GT.
IF( MYCOL 0) THEN
826 CALL CGESD2D( ICTXT, MAX_BW, NRHS,
834.LT.
IF( MYCOL NPCOL-1) THEN
836 CALL CGERV2D( ICTXT, MAX_BW, NRHS,
842 CALL CMATADD( MAX_BW, NRHS, CONE,
843 $ WORK( 1 ), MAX_BW, CONE,
844 $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB )
851.EQ.
IF( MYCOL NPCOL-1 ) THEN
866.NE.
IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) 0 ) GOTO 11
870.GE.
IF( MYCOL-LEVEL_DIST 0 ) THEN
872 CALL CGERV2D( ICTXT, MAX_BW, NRHS,
874 $ MAX_BW, 0, MYCOL-LEVEL_DIST )
876 CALL CMATADD( MAX_BW, NRHS, CONE,
877 $ WORK( 1 ), MAX_BW, CONE,
878 $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB )
884.LT.
IF( MYCOL+LEVEL_DIST NPCOL-1 ) THEN
886 CALL CGERV2D( ICTXT, MAX_BW, NRHS,
888 $ MAX_BW, 0, MYCOL+LEVEL_DIST )
890 CALL CMATADD( MAX_BW, NRHS, CONE,
891 $ WORK( 1 ), MAX_BW, CONE,
892 $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB )
896 LEVEL_DIST = LEVEL_DIST*2
909 CALL CTBTRS( 'l
', 'n
', 'u
', MAX_BW, MIN( BWL, MAX_BW-1 ), NRHS,
910 $ AF( ODD_SIZE*BWU+MBW2+1 ), MAX_BW+1,
911 $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO )
920.LE.
IF( MYCOL/LEVEL_DIST (NPCOL-1)/LEVEL_DIST-2 )THEN
924 CALL CGEMM( 'c
', 'n
', MAX_BW, NRHS, MAX_BW, -CONE,
925 $ AF( (ODD_SIZE)*BWU+1 ),
927 $ B( PART_OFFSET+ODD_SIZE+1 ),
934 CALL CGESD2D( ICTXT, MAX_BW, NRHS,
936 $ MAX_BW, 0, MYCOL+LEVEL_DIST )
942.GT..AND.
IF( (MYCOL/LEVEL_DIST 0 )
943.LE.
$ ( MYCOL/LEVEL_DIST (NPCOL-1)/LEVEL_DIST-1 ) ) THEN
949 CALL CGEMM( 'n
', 'n
', MAX_BW, NRHS, MAX_BW, -CONE,
950 $ AF( ODD_SIZE*BWU+2*MBW2+1 ),
952 $ B( PART_OFFSET+ODD_SIZE+1 ),
959 CALL CGESD2D( ICTXT, MAX_BW, NRHS,
961 $ MAX_BW, 0, MYCOL-LEVEL_DIST )
980.EQ.
IF( MYCOL NPCOL-1 ) THEN
988.NE.
IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) 0 ) GOTO 26
990 LEVEL_DIST = LEVEL_DIST*2
996.GT..AND.
IF( (MYCOL/LEVEL_DIST 0 )
997.LE.
$ ( MYCOL/LEVEL_DIST (NPCOL-1)/LEVEL_DIST-1 ) ) THEN
1001 CALL CGERV2D( ICTXT, MAX_BW, NRHS,
1003 $ MAX_BW, 0, MYCOL-LEVEL_DIST )
1009 CALL CGEMM( 'c
', 'n
', MAX_BW, NRHS, MAX_BW, -CONE,
1010 $ AF( ODD_SIZE*BWU+2*MBW2+1 ),
1014 $ B( PART_OFFSET+ODD_SIZE+1 ),
1020.LE.
IF( MYCOL/LEVEL_DIST (NPCOL-1)/LEVEL_DIST-2 )THEN
1024 CALL CGERV2D( ICTXT, MAX_BW, NRHS,
1026 $ MAX_BW, 0, MYCOL+LEVEL_DIST )
1030 CALL CGEMM( 'n
', 'n
', MAX_BW, NRHS, MAX_BW, -CONE,
1031 $ AF( (ODD_SIZE)*BWU+1 ),
1035 $ B( PART_OFFSET+ODD_SIZE+1 ),
1044 CALL CTBTRS( 'l
', 'c
', 'u
', MAX_BW, MIN( BWL, MAX_BW-1 ), NRHS,
1045 $ AF( ODD_SIZE*BWU+MBW2+1 ), MAX_BW+1,
1046 $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO )
1048.NE.
IF( INFO0 ) THEN
1057.EQ.
IF( LEVEL_DIST 1 ) GOTO 21
1059 LEVEL_DIST = LEVEL_DIST/2
1063.LT.
IF( MYCOL+LEVEL_DIST NPCOL-1 ) THEN
1065 CALL CGESD2D( ICTXT, MAX_BW, NRHS,
1066 $ B( PART_OFFSET+ODD_SIZE+1 ),
1067 $ LLDB, 0, MYCOL+LEVEL_DIST )
1073.GE.
IF( MYCOL-LEVEL_DIST 0 ) THEN
1075 CALL CGESD2D( ICTXT, MAX_BW, NRHS,
1076 $ B( PART_OFFSET+ODD_SIZE+1 ),
1077 $ LLDB, 0, MYCOL-LEVEL_DIST )
1095.LT.
IF( MYCOL NPCOL-1) THEN
1097 CALL CGESD2D( ICTXT, MAX_BW, NRHS,
1098 $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB,
1105.GT.
IF( MYCOL 0) THEN
1107 CALL CGERV2D( ICTXT, MAX_BW, NRHS,
1108 $ WORK( 1 ), MAX_BW,
1119.NE.
IF ( MYCOL 0 ) THEN
1123 CALL CGEMM( 'n
', 'n
', ODD_SIZE, NRHS, BWU, -CONE, AF( 1 ),
1124 $ ODD_SIZE, WORK( 1+MAX_BW-BWU ), MAX_BW, CONE,
1125 $ B( PART_OFFSET+1 ), LLDB )
1130.LT.
IF ( MYCOL NP-1 ) THEN
1138 CALL CLAMOV( 'n
', BWL, NRHS, B( PART_OFFSET+ODD_SIZE+1), LLDB,
1139 $ WORK( 1+MAX_BW-BWL ), MAX_BW )
1141 CALL CTRMM( 'l
', 'u
', 'c
', 'n
', BWL, NRHS, -CONE,
1142 $ A(( OFST+(BWL+BWU+1)+(ODD_SIZE-BWL)*LLDA )),
1143 $ LLDA-1, WORK( 1+MAX_BW-BWL ), MAX_BW )
1145 CALL CMATADD( BWL, NRHS, CONE, WORK( 1+MAX_BW-BWL ), MAX_BW,
1146 $ CONE, B( PART_OFFSET+ODD_SIZE-BWL+1 ), LLDB )
1152 CALL CTBTRS( UPLO, 'c
', 'u
', ODD_SIZE,
1155 $ LLDA, B( PART_OFFSET+1 ),
1166 IF ( LSAME( TRANS, 'c
' ) ) THEN
1177 CALL CTBTRS( UPLO, 'c
', 'n
', ODD_SIZE,
1179 $ A( OFST+1 ), LLDA,
1180 $ B( PART_OFFSET+1 ), LLDB, INFO )
1183.LT.
IF ( MYCOL NP-1 ) THEN
1191 CALL CLAMOV( 'n
', BWU, NRHS,
1192 $ B( PART_OFFSET+ODD_SIZE-BWU+1), LLDB,
1193 $ WORK( 1 ), MAX_BW )
1195 CALL CTRMM( 'l
', 'l
', 'c
', 'n
', BWU, NRHS, -CONE,
1196 $ A(( OFST+1+ODD_SIZE*LLDA )), LLDA-1, WORK( 1 ),
1199 CALL CMATADD( BWU, NRHS, CONE, WORK( 1 ), MAX_BW,
1200 $ CONE, B( PART_OFFSET+ODD_SIZE+1 ), LLDB )
1206 DO 20 IDUM1=1, WORK_SIZE_MIN
1211.NE.
IF ( MYCOL 0 ) THEN
1215 CALL CGEMM( 'c
', 'n
', BWL, NRHS, ODD_SIZE, -CONE,
1216 $ AF( WORK_U+1 ), ODD_SIZE, B( PART_OFFSET+1 ),
1217 $ LLDB, CZERO, WORK( 1+MAX_BW-BWL ), MAX_BW )
1228.GT.
IF( MYCOL 0) THEN
1230 CALL CGESD2D( ICTXT, MAX_BW, NRHS,
1231 $ WORK( 1 ), MAX_BW,
1238.LT.
IF( MYCOL NPCOL-1) THEN
1240 CALL CGERV2D( ICTXT, MAX_BW, NRHS,
1241 $ WORK( 1 ), MAX_BW,
1246 CALL CMATADD( MAX_BW, NRHS, CONE,
1247 $ WORK( 1 ), MAX_BW, CONE,
1248 $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB )
1255.EQ.
IF( MYCOL NPCOL-1 ) THEN
1270.NE.
IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) 0 ) GOTO 41
1274.GE.
IF( MYCOL-LEVEL_DIST 0 ) THEN
1276 CALL CGERV2D( ICTXT, MAX_BW, NRHS,
1278 $ MAX_BW, 0, MYCOL-LEVEL_DIST )
1280 CALL CMATADD( MAX_BW, NRHS, CONE,
1281 $ WORK( 1 ), MAX_BW, CONE,
1282 $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB )
1288.LT.
IF( MYCOL+LEVEL_DIST NPCOL-1 ) THEN
1290 CALL CGERV2D( ICTXT, MAX_BW, NRHS,
1292 $ MAX_BW, 0, MYCOL+LEVEL_DIST )
1294 CALL CMATADD( MAX_BW, NRHS, CONE,
1295 $ WORK( 1 ), MAX_BW, CONE,
1296 $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB )
1300 LEVEL_DIST = LEVEL_DIST*2
1313 CALL CTBTRS( 'u
', 'c
', 'n', max_bw,
min( bwu, max_bw-1 ), nrhs,
1314 $ af( odd_size*bwu+mbw2+1-
min( bwu, max_bw-1 ) ),
1315 $ max_bw+1, b( part_offset+odd_size+1 ), lldb,
1318 IF( info.NE.0 )
THEN
1325 IF( mycol/level_dist .LE. (npcol-1)/level_dist-2 )
THEN
1329 CALL cgemm(
'C',
'N', max_bw, nrhs, max_bw, -cone,
1330 $ af( work_u+(odd_size)*bwl+1 ),
1332 $ b( part_offset+odd_size+1 ),
1339 CALL cgesd2d( ictxt, max_bw, nrhs,
1341 $ max_bw, 0, mycol+level_dist )
1347 IF( (mycol/level_dist .GT. 0 ).AND.
1348 $ ( mycol/level_dist .LE. (npcol-1)/level_dist-1 ) )
THEN
1354 CALL cgemm(
'N',
'N', max_bw
1355 $ af( work_u+odd_size*bwl+2*mbw2+1 ),
1357 $ b( part_offset+odd_size+1 ),
1364 CALL cgesd2d( ictxt, max_bw, nrhs,
1366 $ max_bw, 0, mycol-level_dist )
1385 IF( mycol .EQ. npcol-1 )
THEN
1393 IF( mod( (mycol+1)/level_dist, 2) .NE. 0 )
GOTO 56
1395 level_dist = level_dist*2
1401 IF( (mycol/level_dist .GT. 0 ).AND.
1402 $ ( mycol/level_dist .LE. (npcol-1)/level_dist-1 ) )
THEN
1406 CALL cgerv2d( ictxt, max_bw, nrhs,
1408 $ max_bw, 0, mycol-level_dist )
1414 CALL cgemm(
'C',
'N', max_bw, nrhs, max_bw, -cone,
1415 $ af( work_u+odd_size*bwl+2*mbw2+1 ),
1419 $ b( part_offset+odd_size+1 ),
1425 IF( mycol/level_dist .LE. (npcol-1)/level_dist-2 )
THEN
1429 CALL cgerv2d( ictxt, max_bw, nrhs,
1431 $ max_bw, 0, mycol+level_dist )
1435 CALL cgemm(
'N', 'n', max_bw, nrhs, max_bw, -cone,
1436 $ af( work_u+(odd_size)*bwl+1 ),
1440 $ b( part_offset+odd_size+1 ),
1449 CALL ctbtrs(
'U',
'N',
'N', max_bw,
min( bwu, max_bw-1 ), nrhs,
1450 $ af( odd_size*bwu+mbw2+1-
min( bwu, max_bw-1 ) ),
1451 $ max_bw+1, b( part_offset+odd_size+1 ), lldb,
1454 IF( info.NE.0 )
THEN
1463 IF( level_dist .EQ. 1 )
GOTO 51
1465 level_dist = level_dist/2
1469 IF( mycol+level_dist .LT. npcol-1 )
THEN
1471 CALL cgesd2d( ictxt, max_bw, nrhs,
1472 $ b( part_offset+odd_size+1 ),
1473 $ lldb, 0, mycol+level_dist )
1479 IF( mycol-level_dist .GE. 0 )
THEN
1481 CALL cgesd2d( ictxt, max_bw, nrhs,
1482 $ b( part_offset+odd_size+1 ),
1483 $ lldb, 0, mycol-level_dist )
1501 IF( mycol .LT. npcol-1)
THEN
1503 CALL cgesd2d( ictxt, max_bw, nrhs,
1511 IF( mycol .GT. 0)
THEN
1513 CALL cgerv2d( ictxt, max_bw, nrhs,
1514 $ work( 1 ), max_bw,
1525 IF ( mycol .NE. 0 )
THEN
1529 CALL cgemm(
'N',
'N', odd_size, nrhs, bwl, -cone,
1530 $ af( work_u+1 ), odd_size, work( 1+max_bw-bwl ),
1531 $ max_bw, cone, b( part_offset+1 ), lldb )
1536 IF ( mycol .LT. np-1 )
THEN
1544 CALL clamov(
'N', bwu, nrhs, b( part_offset+odd_size+1), lldb,
1545 $ work( 1+max_bw-bwu ), max_bw+bwl )
1547 CALL ctrmm(
'L',
'L',
'N',
'N', bwu
1548 $ a(( ofst+1+odd_size*llda )), llda-1,
1549 $ work( 1+max_bw-bwu ), max_bw+bwl )
1553 $ b( part_offset+odd_size-bwu+1 ), lldb )
1559 CALL ctbtrs( uplo,
'N',
'N', odd_size,
1562 $ llda, b( part_offset+1 ),
1576 IF( ictxt_save .NE. ictxt_new )
THEN
1589 work( 1 ) = work_size_min