1 SUBROUTINE pzdbtrsv( UPLO, TRANS, N, BWL, BWU, NRHS, A, JA, DESCA,
2 $ B, IB, DESCB, AF, LAF, WORK, LWORK, INFO )
10 INTEGER BWL, BWU, IB, INFO, , LAF, LWORK, N, NRHS
13 INTEGER DESCA( * ), DESCB( * )
14 COMPLEX*16 A( * ), AF( * ), B( * ), WORK( * )
373 DOUBLE PRECISION ONE, ZERO
374 parameter( one = 1.0d+0 )
375 parameter( zero = 0.0d+0 )
376 COMPLEX*16 CONE, CZERO
377 parameter( cone = ( 1.0d+0, 0.0d+0 ) )
378 parameter( czero = ( 0.0d+0, 0.0d+0 ) )
380 parameter( int_one = 1 )
382 parameter(descmult = 100, bignum = descmult * descmult)
383 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
384 $ lld_, mb_, m_, nb_, n_, rsrc_
385 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
386 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
387 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
390 INTEGER CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE,
391 $ idum1, idum2, idum3, ja_new, level_dist, llda,
392 $ lldb, max_bw, mbw2, mycol, myrow, my_num_cols,
393 $ nb, np, npcol, nprow, np_save, odd_size, ofst,
394 $ part_offset, part_size, return_code, store_m_b,
395 $ store_n_a, work_size_min, work_u
398 INTEGER DESCA_1XP( 7 ), DESCB_PX1( 7 ),
399 $ param_check( 18, 3 )
410 EXTERNAL lsame, numroc
413 INTRINSIC ichar,
min, mod
429 IF( return_code .NE. 0)
THEN
430 info = -( 9*100 + 2 )
435 IF( return_code .NE. 0)
THEN
436 info = -( 12*100 + 2 )
442 IF( desca_1xp( 2 ) .NE. descb_px1( 2 ) )
THEN
443 info = -( 12*100 + 2 )
450 IF( desca_1xp( 4 ) .NE. descb_px1( 4 ) )
THEN
451 info = -( 12*100 + 4 )
456 IF( desca_1xp( 5 ) .NE. descb_px1( 5 ) )
THEN
457 info = -( 12*100 + 5 )
462 ictxt = desca_1xp( 2 )
463 csrc = desca_1xp( 5 )
465 llda = desca_1xp( 6 )
466 store_n_a = desca_1xp( 3 )
467 lldb = descb_px1( 6 )
468 store_m_b = descb_px1( 3 )
475 max_bw =
max(bwl,bwu)
476 mbw2 = max_bw * max_bw
483 IF(
lsame( uplo,
'U' ) )
THEN
485 ELSE IF (
lsame( uplo,
'L' ) )
THEN
491 IF(
lsame( trans,
'N' ) )
THEN
493 ELSE IF (
lsame( trans,
'C' ) )
THEN
499 IF( lwork .LT. -1)
THEN
501 ELSE IF ( lwork .EQ. -1 )
THEN
511 IF( n+ja-1 .GT. store_n_a )
THEN
512 info = -( 9*100 + 6 )
515 IF(( bwl .GT. n-1 ) .OR.
516 $ ( bwl .LT. 0 ) )
THEN
520 IF(( bwu .GT. n-1 ) .OR.
521 $ ( bwu .LT. 0 ) )
THEN
525 IF( llda .LT. (bwl+bwu+1) )
THEN
526 info = -( 9*100 + 6 )
530 info = -( 9*100 + 4 )
533 IF( n+ib-1 .GT. store_m_b )
THEN
534 info = -( 12*100 + 3 )
537 IF( lldb .LT. nb )
THEN
538 info = -( 12*100 + 6 )
541 IF( nrhs .LT. 0 )
THEN
553 IF( nprow .NE. 1 )
THEN
557 IF( n .GT. np*nb-mod( ja-1, nb ))
THEN
560 $
'PZDBTRSV, D&C alg.: only 1 block per proc',
565 IF((ja+n-1.GT.nb) .AND. ( nb.LT.2*
max(bwl,bwu) ))
THEN
568 $
'PZDBTRSV, D&C alg.: NB too small',
577 work( 1 ) = work_size_min
579 IF( lwork .LT. work_size_min )
THEN
580 IF( lwork .NE. -1 )
THEN
583 $
'PZDBTRSV: worksize error',
591 param_check( 18, 1 ) = descb(5)
592 param_check( 17, 1 ) = descb(4)
593 param_check( 16, 1 ) = descb(3)
594 param_check( 15, 1 ) = descb(2)
595 param_check( 14, 1 ) = descb(1)
596 param_check( 13, 1 ) = ib
597 param_check( 12, 1 ) = desca(5)
598 param_check( 11, 1 ) = desca(4)
599 param_check( 10, 1 ) = desca(3)
600 param_check( 9, 1 ) = desca(1)
601 param_check( 8, 1 ) = ja
602 param_check( 7, 1 ) = nrhs
603 param_check( 6, 1 ) = bwu
604 param_check( 5, 1 ) = bwl
605 param_check( 4, 1 ) = n
606 param_check( 3, 1 ) = idum3
607 param_check( 2, 1 ) = idum2
608 param_check( 1, 1 ) = idum1
610 param_check( 18, 2 ) = 1205
611 param_check( 17, 2 ) = 1204
613 param_check( 15, 2 ) = 1202
614 param_check( 14, 2 ) = 1201
615 param_check( 13, 2 ) = 11
616 param_check( 12, 2 ) = 905
617 param_check( 11, 2 ) = 904
618 param_check( 10, 2 ) = 903
619 param_check( 9, 2 ) = 901
620 param_check( 8, 2 ) =
621 param_check( 7, 2 ) = 6
622 param_check( 6, 2 ) = 5
623 param_check( 5, 2 ) = 4
624 param_check( 4, 2 ) = 3
625 param_check( 3, 2 ) = 16
626 param_check( 2, 2 ) = 2
627 param_check( 1, 2 ) = 1
635 ELSE IF( info.LT.-descmult )
THEN
638 info = -info * descmult
643 CALL globchk( ictxt, 18, param_check, 18,
644 $ param_check( 1, 3 ), info )
649 IF( info.EQ.bignum )
THEN
651 ELSE IF( mod( info, descmult ) .EQ. 0 )
THEN
652 info = -info / descmult
658 CALL pxerbla( ictxt,
'PZDBTRSV', -info )
674 part_offset = nb*( (ja-1)/(npcol*nb) )
676 IF ( (mycol-csrc) .LT. (ja-part_offset-1)/nb )
THEN
677 part_offset = part_offset + nb
680 IF ( mycol .LT. csrc )
THEN
681 part_offset = part_offset - nb
690 first_proc = mod( ( ja-1 )/nb+csrc, npcol )
694 ja_new = mod( ja-1, nb ) + 1
699 np = ( ja_new+n-2 )/nb + 1
703 CALL reshape( ictxt, int_one, ictxt_new, int_one,
704 $ first_proc, int_one, np )
710 desca_1xp( 2 ) = ictxt_new
711 descb_px1( 2 ) = ictxt_new
719 IF( myrow .LT. 0 )
THEN
732 my_num_cols = numroc( n, part_size, mycol, 0, npcol )
736 IF ( mycol .EQ. 0 )
THEN
737 part_offset = part_offset+mod( ja_new-1, part_size )
738 my_num_cols = my_num_cols - mod(ja_new-1, part_size )
743 ofst = part_offset*llda
747 odd_size = my_num_cols
748 IF ( mycol .LT. np-1 )
THEN
749 odd_size = odd_size - max_bw
754 work_u = bwu*odd_size + 3*mbw2
760 IF (
lsame( uplo,
'L' ) )
THEN
762 IF (
lsame( trans,
'N' ) )
THEN
773 CALL ztbtrs( uplo,
'N',
'U', odd_size,
775 $ a( ofst+1+bwu ), llda,
776 $ b( part_offset+1 ), lldb, info )
779 IF ( mycol .LT. np-1 )
THEN
787 CALL zlamov(
'N', bwl, nrhs,
788 $ b( part_offset+odd_size-bwl+1), lldb,
789 $ work( 1 ), max_bw )
791 CALL ztrmm(
'L',
'U',
'N',
'N', bwl, nrhs, -cone,
792 $ a(( ofst+(bwl+bwu+1)+(odd_size-bwl)*llda )),
793 $ llda-1, work( 1 ), max_bw )
795 CALL zmatadd( bwl, nrhs, cone, work( 1 ), max_bw,
796 $ cone, b( part_offset+odd_size+1 ), lldb )
802 DO 10 idum1=1, work_size_min
807 IF ( mycol .NE. 0 )
THEN
811 CALL zgemm( 'c
', 'n
', BWU, NRHS, ODD_SIZE, -CONE, AF( 1 ),
812 $ ODD_SIZE, B( PART_OFFSET+1 ), LLDB, CZERO,
813 $ WORK( 1+MAX_BW-BWU ), MAX_BW )
824.GT.
IF( MYCOL 0) THEN
826 CALL ZGESD2D( ICTXT, MAX_BW, NRHS,
834.LT.
IF( MYCOL NPCOL-1) THEN
836 CALL ZGERV2D( ICTXT, MAX_BW, NRHS,
842 CALL ZMATADD( MAX_BW, NRHS, CONE,
843 $ WORK( 1 ), MAX_BW, CONE,
844 $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB )
851.EQ.
IF( MYCOL NPCOL-1 ) THEN
866.NE.
IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) 0 ) GOTO 11
870.GE.
IF( MYCOL-LEVEL_DIST 0 ) THEN
872 CALL ZGERV2D( ICTXT, MAX_BW, NRHS,
874 $ MAX_BW, 0, MYCOL-LEVEL_DIST )
876 CALL ZMATADD( MAX_BW, NRHS, CONE,
877 $ WORK( 1 ), MAX_BW, CONE,
878 $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB )
884.LT.
IF( MYCOL+LEVEL_DIST NPCOL-1 ) THEN
886 CALL ZGERV2D( ICTXT, MAX_BW, NRHS,
888 $ MAX_BW, 0, MYCOL+LEVEL_DIST )
890 CALL ZMATADD( MAX_BW, NRHS, CONE,
891 $ WORK( 1 ), MAX_BW, CONE,
892 $ B( PART_OFFSET+ODD_SIZE + 1 ), LLDB )
896 LEVEL_DIST = LEVEL_DIST*2
909 CALL ZTBTRS( 'l
', 'n
', 'u
', MAX_BW, MIN( BWL, MAX_BW-1 ), NRHS,
910 $ AF( ODD_SIZE*BWU+MBW2+1 ), MAX_BW+1,
911 $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO )
920.LE.
IF( MYCOL/LEVEL_DIST (NPCOL-1)/LEVEL_DIST-2 )THEN
924 CALL ZGEMM( 'c
', 'n
', MAX_BW, NRHS, MAX_BW, -CONE,
925 $ AF( (ODD_SIZE)*BWU+1 ),
927 $ B( PART_OFFSET+ODD_SIZE+1 ),
934 CALL ZGESD2D( ICTXT, MAX_BW, NRHS,
936 $ MAX_BW, 0, MYCOL+LEVEL_DIST )
942.GT..AND.
IF( (MYCOL/LEVEL_DIST 0 )
943.LE.
$ ( MYCOL/LEVEL_DIST (NPCOL-1)/LEVEL_DIST-1 ) ) THEN
949 CALL ZGEMM( 'n
', 'n
', MAX_BW, NRHS, MAX_BW, -CONE,
950 $ AF( ODD_SIZE*BWU+2*MBW2+1 ),
952 $ B( PART_OFFSET+ODD_SIZE+1 ),
959 CALL ZGESD2D( ICTXT, MAX_BW, NRHS,
961 $ MAX_BW, 0, MYCOL-LEVEL_DIST )
980.EQ.
IF( MYCOL NPCOL-1 ) THEN
988.NE.
IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) 0 ) GOTO 26
990 LEVEL_DIST = LEVEL_DIST*2
996.GT..AND.
IF( (MYCOL/LEVEL_DIST 0 )
997.LE.
$ ( MYCOL/LEVEL_DIST (NPCOL-1)/LEVEL_DIST-1 ) ) THEN
1001 CALL ZGERV2D( ICTXT, MAX_BW, NRHS,
1003 $ MAX_BW, 0, MYCOL-LEVEL_DIST )
1009 CALL ZGEMM( 'c
', 'n
', MAX_BW, NRHS, MAX_BW, -CONE,
1010 $ AF( ODD_SIZE*BWU+2*MBW2+1 ),
1014 $ B( PART_OFFSET+ODD_SIZE+1 ),
1020.LE.
IF( MYCOL/LEVEL_DIST (NPCOL-1)/LEVEL_DIST-2 )THEN
1024 CALL ZGERV2D( ICTXT, MAX_BW, NRHS,
1026 $ MAX_BW, 0, MYCOL+LEVEL_DIST )
1030 CALL ZGEMM( 'n
', 'n
', MAX_BW, NRHS, MAX_BW, -CONE,
1031 $ AF( (ODD_SIZE)*BWU+1 ),
1035 $ B( PART_OFFSET+ODD_SIZE+1 ),
1044 CALL ZTBTRS( 'l
', 'c
', 'u
', MAX_BW, MIN( BWL, MAX_BW-1 ), NRHS,
1045 $ AF( ODD_SIZE*BWU+MBW2+1 ), MAX_BW+1,
1046 $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO )
1048.NE.
IF( INFO0 ) THEN
1057.EQ.
IF( LEVEL_DIST 1 ) GOTO 21
1059 LEVEL_DIST = LEVEL_DIST/2
1063.LT.
IF( MYCOL+LEVEL_DIST NPCOL-1 ) THEN
1065 CALL ZGESD2D( ICTXT, MAX_BW, NRHS,
1066 $ B( PART_OFFSET+ODD_SIZE+1 ),
1067 $ LLDB, 0, MYCOL+LEVEL_DIST )
1073.GE.
IF( MYCOL-LEVEL_DIST 0 ) THEN
1075 CALL ZGESD2D( ICTXT, MAX_BW, NRHS,
1076 $ B( PART_OFFSET+ODD_SIZE+1 ),
1077 $ LLDB, 0, MYCOL-LEVEL_DIST )
1095.LT.
IF( MYCOL NPCOL-1) THEN
1097 CALL ZGESD2D( ICTXT, MAX_BW, NRHS,
1098 $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB,
1105.GT.
IF( MYCOL 0) THEN
1107 CALL ZGERV2D( ICTXT, MAX_BW, NRHS,
1108 $ WORK( 1 ), MAX_BW,
1119.NE.
IF ( MYCOL 0 ) THEN
1123 CALL ZGEMM( 'n',
'N', odd_size, nrhs, bwu, -cone, af( 1 ),
1124 $ odd_size, work( 1+max_bw-bwu ), max_bw, cone,
1125 $ b( part_offset+1 ), lldb )
1130 IF ( mycol .LT. np-1 )
THEN
1138 CALL zlamov(
'N', bwl, nrhs, b( part_offset+odd_size+1), lldb,
1139 $ work( 1+max_bw-bwl ), max_bw )
1141 CALL ztrmm(
'L',
'U',
'C',
'N', bwl, nrhs, -cone,
1142 $ a(( ofst+(bwl+bwu+1)+(odd_size-bwl)*llda )),
1143 $ llda-1, work( 1+max_bw-bwl ), max_bw )
1145 CALL zmatadd( bwl, nrhs, cone, work( 1+max_bw-bwl ), max_bw,
1146 $ cone, b( part_offset+odd_size-bwl+1 ), lldb )
1152 CALL ztbtrs( uplo,
'C',
'U', odd_size,
1155 $ llda, b( part_offset+1 ),
1166 IF (
lsame( trans,
'C' ) )
THEN
1177 CALL ztbtrs( uplo,
'C',
'N', odd_size,
1179 $ a( ofst+1 ), llda,
1180 $ b( part_offset+1 ), lldb
1183 IF ( mycol .LT. np-1 )
THEN
1191 CALL zlamov(
'N', bwu, nrhs,
1192 $ b( part_offset+odd_size-bwu+1), lldb,
1193 $ work( 1 ), max_bw )
1195 CALL ztrmm(
'L',
'L',
'C',
'N', bwu, nrhs, -cone,
1196 $ a(( ofst+1+odd_size*llda )), llda-1, work( 1 ),
1199 CALL zmatadd( bwu, nrhs, cone, work( 1 ), max_bw,
1200 $ cone, b( part_offset+odd_size+1 ), lldb )
1206 DO 20 idum1=1, work_size_min
1211 IF ( mycol .NE. 0 )
THEN
1215 CALL zgemm(
'C',
'N', bwl, nrhs, odd_size, -cone,
1216 $ af( work_u+1 ), odd_size, b
1217 $ lldb, czero, work( 1+max_bw-bwl ), max_bw )
1228 IF( mycol .GT. 0)
THEN
1230 CALL zgesd2d( ictxt, max_bw, nrhs,
1231 $ work( 1 ), max_bw,
1238 IF( mycol .LT. npcol-1)
THEN
1240 CALL zgerv2d( ictxt, max_bw, nrhs,
1241 $ work( 1 ), max_bw,
1246 CALL zmatadd( max_bw, nrhs, cone,
1247 $ work( 1 ), max_bw, cone,
1248 $ b( part_offset+odd_size + 1 ), lldb )
1255 IF( mycol .EQ. npcol-1 )
THEN
1270 IF( mod( (mycol+1)/level_dist, 2) .NE. 0 )
GOTO 41
1274 IF( mycol-level_dist .GE. 0 )
THEN
1276 CALL zgerv2d( ictxt, max_bw, nrhs,
1278 $ max_bw, 0, mycol-level_dist )
1280 CALL zmatadd( max_bw, nrhs, cone,
1281 $ work( 1 ), max_bw, cone,
1282 $ b( part_offset+odd_size + 1 ), lldb )
1288 IF( mycol+level_dist .LT. npcol-1 )
THEN
1290 CALL zgerv2d( ictxt, max_bw, nrhs,
1292 $ max_bw, 0, mycol+level_dist )
1294 CALL zmatadd( max_bw, nrhs, cone,
1295 $ work( 1 ), max_bw, cone,
1296 $ b( part_offset+odd_size + 1 ), lldb )
1300 level_dist = level_dist*2
1314 $ af( odd_size*bwu+mbw2+1-
min( bwu, max_bw-1 ) ),
1315 $ max_bw+1, b( part_offset+odd_size+1 ), lldb,
1318 IF( info.NE.0 )
THEN
1325 IF( mycol/level_dist .LE. (npcol-1)/level_dist-2 )
THEN
1330 $ af( work_u+(odd_size)*bwl+1 ),
1339 CALL zgesd2d( ictxt, max_bw, nrhs,
1341 $ max_bw, 0, mycol+level_dist )
1347 IF( (mycol/level_dist .GT. 0 ).AND.
1348 $ ( mycol/level_dist .LE. (npcol-1)/level_dist-1 ) )
THEN
1354 CALL zgemm(
'N',
'N', max_bw, nrhs, max_bw, -cone,
1355 $ af( work_u+odd_size*bwl+2*mbw2+1 ),
1357 $ b( part_offset+odd_size+1 ),
1364 CALL zgesd2d( ictxt, max_bw, nrhs,
1366 $ max_bw, 0, mycol-level_dist )
1385 IF( mycol .EQ. npcol-1 )
THEN
1393 IF( mod( (mycol+1)/level_dist, 2) .NE. 0 )
GOTO 56
1395 level_dist = level_dist*2
1401 IF( (mycol/level_dist .GT. 0 ).AND.
1402 $ ( mycol/level_dist .LE. (npcol-1)/level_dist-1 ) )
THEN
1406 CALL zgerv2d( ictxt, max_bw, nrhs,
1408 $ max_bw, 0, mycol-level_dist )
1414 CALL zgemm(
'C', 'n
', MAX_BW, NRHS, MAX_BW, -CONE,
1415 $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1 ),
1419 $ B( PART_OFFSET+ODD_SIZE+1 ),
1425.LE.
IF( MYCOL/LEVEL_DIST (NPCOL-1)/LEVEL_DIST-2 )THEN
1429 CALL ZGERV2D( ICTXT, MAX_BW, NRHS,
1431 $ MAX_BW, 0, MYCOL+LEVEL_DIST )
1435 CALL ZGEMM( 'n
', 'n
', MAX_BW, NRHS, MAX_BW, -CONE,
1436 $ AF( WORK_U+(ODD_SIZE)*BWL+1 ),
1440 $ B( PART_OFFSET+ODD_SIZE+1 ),
1449 CALL ZTBTRS( 'u
', 'n
', 'n
', MAX_BW, MIN( BWU, MAX_BW-1 ), NRHS,
1450 $ AF( ODD_SIZE*BWU+MBW2+1-MIN( BWU, MAX_BW-1 ) ),
1451 $ MAX_BW+1, B( PART_OFFSET+ODD_SIZE+1 ), LLDB,
1454.NE.
IF( INFO0 ) THEN
1463.EQ.
IF( LEVEL_DIST 1 ) GOTO 51
1465 LEVEL_DIST = LEVEL_DIST/2
1469.LT.
IF( MYCOL+LEVEL_DIST NPCOL-1 ) THEN
1471 CALL ZGESD2D( ICTXT, MAX_BW, NRHS,
1472 $ B( PART_OFFSET+ODD_SIZE+1 ),
1473 $ LLDB, 0, MYCOL+LEVEL_DIST )
1479.GE.
IF( MYCOL-LEVEL_DIST 0 ) THEN
1481 CALL ZGESD2D( ICTXT, MAX_BW, NRHS,
1482 $ B( PART_OFFSET+ODD_SIZE+1 ),
1483 $ LLDB, 0, MYCOL-LEVEL_DIST )
1501.LT.
IF( MYCOL NPCOL-1) THEN
1503 CALL ZGESD2D( ICTXT, MAX_BW, NRHS,
1504 $ B( PART_OFFSET+ODD_SIZE+1 ), LLDB,
1511.GT.
IF( MYCOL 0) THEN
1513 CALL ZGERV2D( ICTXT, MAX_BW, NRHS,
1514 $ WORK( 1 ), MAX_BW,
1525.NE.
IF ( MYCOL 0 ) THEN
1529 CALL ZGEMM( 'n
', 'n
', ODD_SIZE, NRHS, BWL, -CONE,
1530 $ AF( WORK_U+1 ), ODD_SIZE, WORK( 1+MAX_BW-BWL ),
1531 $ MAX_BW, CONE, B( PART_OFFSET+1 ), LLDB )
1536.LT.
IF ( MYCOL NP-1 ) THEN
1544 CALL ZLAMOV( 'n
', BWU, NRHS, B( PART_OFFSET+ODD_SIZE+1), LLDB,
1545 $ WORK( 1+MAX_BW-BWU ), MAX_BW+BWL )
1547 CALL ZTRMM( 'l
', 'l
', 'n', 'n
', BWU, NRHS, -CONE,
1548 $ A(( OFST+1+ODD_SIZE*LLDA )), LLDA-1,
1549 $ WORK( 1+MAX_BW-BWU ), MAX_BW+BWL )
1551 CALL ZMATADD( BWU, NRHS, CONE, WORK( 1+MAX_BW-BWU ),
1553 $ B( PART_OFFSET+ODD_SIZE-BWU+1 ), LLDB )
1559 CALL ZTBTRS( UPLO, 'n
', 'n
', ODD_SIZE,
1562 $ LLDA, B( PART_OFFSET+1 ),
1576.NE.
IF( ICTXT_SAVE ICTXT_NEW ) THEN
1577 CALL BLACS_GRIDEXIT( ICTXT_NEW )
1589 WORK( 1 ) = WORK_SIZE_MIN