1 SUBROUTINE pdlarzb( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V,
2 $ IV, JV, DESCV, T, C, IC, JC, DESCC, WORK )
9 CHARACTER DIRECT, SIDE, STOREV, TRANS
10 INTEGER IC, IV, JC, JV, K, L, M, N
13 INTEGER DESCC( * ), DESCV( * )
14 DOUBLE PRECISION C( * ), T( * ), V( * ), WORK( * )
221 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
222 $ lld_, mb_, m_, nb_, n_, rsrc_
223 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
224 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
225 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
226 DOUBLE PRECISION ONE, ZERO
227 parameter( one = 1.0d+0, zero = 0.0d+0 )
231 CHARACTER COLBTOP, TRANST
232 INTEGER ICCOL1, ICCOL2, ICOFFC1, ICOFFC2, ICOFFV,
233 $ icrow1, icrow2, ictxt, iibeg, iic1, iic2,
234 $ iiend, iinxt, iiv, ileft, info, ioffc2, ioffv,
235 $ ipt, ipv, ipw, iroffc1, iroffc2, itop, ivcol,
236 $ ivrow, jjbeg, jjend, jjnxt, jjc1, jjc2, jjv,
237 $ ldc, ldv, lv, lw, mbc
238 $ mqv, mqv0, mycol, mydist, myrow, nbc, nbv,
239 $ npcol, nprow, nqc1, nqc2, nqcall, nqv
252 INTEGER ICEIL, NUMROC
253 EXTERNAL iceil, lsame, numroc
259 IF( m.LE.0 .OR. n.LE.0 .OR. k.LE.0 )
264 ictxt = descc( ctxt_ )
270 IF( .NOT.lsame( direct, 'b
' ) ) THEN
272.NOT.
ELSE IF( LSAME( STOREV, 'r
' ) ) THEN
276 CALL PXERBLA( ICTXT, 'pdlarzb', -INFO )
277 CALL BLACS_ABORT( ICTXT, 1 )
281 LEFT = LSAME( SIDE, 'l
' )
282 IF( LSAME( TRANS, 'n
' ) ) THEN
288 CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, IIV, JJV,
292 ICOFFV = MOD( JV-1, NBV )
293 NQV = NUMROC( L+ICOFFV, NBV, MYCOL, IVCOL, NPCOL )
297 IIV = MIN( IIV, LDV )
298 JJV = MIN( JJV, MAX( 1, NUMROC( DESCV( N_ ), NBV, MYCOL,
299 $ DESCV( CSRC_ ), NPCOL ) ) )
300 IOFFV = IIV + ( JJV-1 ) * LDV
303 NQCALL = NUMROC( DESCC( N_ ), NBC, MYCOL, DESCC( CSRC_ ), NPCOL )
304 CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL, IIC1,
305 $ JJC1, ICROW1, ICCOL1 )
307 IIC1 = MIN( IIC1, LDC )
308 JJC1 = MIN( JJC1, MAX( 1, NQCALL ) )
311 IROFFC1 = MOD( IC-1, MBC )
312 MPC1 = NUMROC( K+IROFFC1, MBC, MYROW, ICROW1, NPROW )
313.EQ.
IF( MYROWICROW1 )
314 $ MPC1 = MPC1 - IROFFC1
315 ICOFFC1 = MOD( JC-1, NBC )
316 NQC1 = NUMROC( N+ICOFFC1, NBC, MYCOL, ICCOL1, NPCOL )
317.EQ.
IF( MYCOLICCOL1 )
318 $ NQC1 = NQC1 - ICOFFC1
319 CALL INFOG2L( IC+M-L, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL,
320 $ IIC2, JJC2, ICROW2, ICCOL2 )
321 IROFFC2 = MOD( IC+M-L-1, MBC )
322 MPC2 = NUMROC( L+IROFFC2, MBC, MYROW, ICROW2, NPROW )
323.EQ.
IF( MYROWICROW2 )
324 $ MPC2 = MPC2 - IROFFC2
328 IROFFC1 = MOD( IC-1, MBC )
329 MPC1 = NUMROC( M+IROFFC1, MBC, MYROW, ICROW1, NPROW )
330.EQ.
IF( MYROWICROW1 )
331 $ MPC1 = MPC1 - IROFFC1
332 ICOFFC1 = MOD( JC-1, NBC )
333 NQC1 = NUMROC( K+ICOFFC1, NBC, MYCOL, ICCOL1, NPCOL )
334.EQ.
IF( MYCOLICCOL1 )
335 $ NQC1 = NQC1 - ICOFFC1
336 CALL INFOG2L( IC, JC+N-L, DESCC, NPROW, NPCOL, MYROW, MYCOL,
337 $ IIC2, JJC2, ICROW2, ICCOL2 )
340 ICOFFC2 = MOD( JC+N-L-1, NBC )
341 NQC2 = NUMROC( L+ICOFFC2, NBC, MYCOL, ICCOL2, NPCOL )
342.EQ.
IF( MYCOLICCOL2 )
343 $ NQC2 = NQC2 - ICOFFC2
345 IIC2 = MIN( IIC2, LDC )
346 JJC2 = MIN( JJC2, NQCALL )
347 IOFFC2 = IIC2 + ( JJC2-1 ) * LDC
349 IF( LSAME( SIDE, 'l
' ) ) THEN
356 MQV0 = NUMROC( M+ICOFFV, NBV, MYCOL, IVCOL, NPCOL )
357.EQ.
IF( MYCOLIVCOL ) THEN
362.EQ.
IF( MYROWICROW2 ) THEN
363 MPC20 = MPC2 + IROFFC2
374 IPW = IPV + MPC20 * K
379.EQ.
IF( MYROWIVROW ) THEN
380.EQ.
IF( MYCOLIVCOL ) THEN
381 CALL DLAMOV( 'all
', K, MQV, V( IOFFV ), LDV,
382 $ WORK( IPW+ICOFFV*LW ), LW )
384 CALL DLAMOV( 'all
', K, MQV, V( IOFFV ), LDV,
391 CALL PBDTRAN( ICTXT, 'rowwise
', 'transpose
', K, M+ICOFFV,
392 $ DESCV( NB_ ), WORK( IPW ), LW, ZERO,
393 $ WORK( IPV ), LV, IVROW, IVCOL, ICROW2, -1,
398.EQ.
IF( MYROWICROW2 )
399 $ IPV = IPV + IROFFC2
407 CALL DGEMM( 'transpose
', 'no transpose
', NQC2, K, MPC2,
408 $ ONE, C( IOFFC2 ), LDC, WORK( IPV ), LV, ZERO,
411 CALL DLASET( 'all
', NQC2, K, ZERO, ZERO, WORK( IPW ), LW )
417 MYDIST = MOD( MYROW-ICROW1+NPROW, NPROW )
418 ITOP = MAX( 0, MYDIST * MBC - IROFFC1 )
420 IIEND = IIC1 + MPC1 - 1
421 IINXT = MIN( ICEIL( IIBEG, MBC ) * MBC, IIEND )
424.LE.
IF( IIBEGIINXT ) THEN
425 CALL PBDMATADD( ICTXT, 'transpose
', NQC2, IINXT-IIBEG+1,
426 $ ONE, C( IIBEG+(JJC1-1)*LDC ), LDC, ONE,
427 $ WORK( IPW+ITOP ), LW )
428 MYDIST = MYDIST + NPROW
429 ITOP = MYDIST * MBC - IROFFC1
431 IINXT = MIN( IINXT+MBC, IIEND )
436 CALL DGSUM2D( ICTXT, 'columnwise
', ' ', NQC2, K, WORK( IPW ),
441.EQ.
IF( MYROWIVROW ) THEN
442.EQ.
IF( MYCOLIVCOL ) THEN
446 CALL DTRBS2D( ICTXT, 'rowwise
', ' ', 'lower
', 'non unit
',
449 CALL DTRBR2D( ICTXT, 'rowwise
', ' ', 'lower
', 'non unit
',
450 $ K, K, T, MBV, MYROW, IVCOL )
452 CALL DTRMM( 'right
', 'lower
', TRANST, 'non unit
', NQC2, K,
453 $ ONE, T, MBV, WORK( IPW ), LW )
455 CALL DGEBS2D( ICTXT, 'columnwise
', ' ', NQC2, K,
458 CALL DGEBR2D( ICTXT, 'columnwise
', ' ', NQC2, K,
459 $ WORK( IPW ), LW, IVROW, MYCOL )
465 MYDIST = MOD( MYROW-ICROW1+NPROW, NPROW )
466 ITOP = MAX( 0, MYDIST * MBC - IROFFC1 )
468 IIEND = IIC1 + MPC1 - 1
469 IINXT = MIN( ICEIL( IIBEG, MBC ) * MBC, IIEND )
472.LE.
IF( IIBEGIINXT ) THEN
473 CALL PBDMATADD( ICTXT, 'transpose
', IINXT-IIBEG+1, NQC2,
474 $ -ONE, WORK( IPW+ITOP ), LW, ONE,
475 $ C( IIBEG+(JJC1-1)*LDC ), LDC )
476 MYDIST = MYDIST + NPROW
477 ITOP = MYDIST * MBC - IROFFC1
479 IINXT = MIN( IINXT+MBC, IIEND )
488 CALL DGEMM( 'no transpose
', 'transpose
', MPC2, NQC2, K, -ONE,
489 $ WORK( IPV ), LV, WORK( IPW ), LW, ONE,
507 CALL PB_TOPGET( ICTXT, 'broadcast
', 'columnwise
', COLBTOP )
508.EQ.
IF( MYROWIVROW ) THEN
509 CALL DGEBS2D( ICTXT, 'columnwise
', COLBTOP, K, NQC2,
512 $ CALL DTRBS2D( ICTXT, 'columnwise
', COLBTOP, 'lower
',
513 $ 'non unit
', K, K, T, MBV )
514 CALL DLAMOV( 'all
', K, NQC2, V( IOFFV ), LDV, WORK( IPV ),
517 CALL DGEBR2D( ICTXT, 'columnwise
', COLBTOP, K, NQC2,
518 $ WORK( IPV ), LV, IVROW, MYCOL )
520 $ CALL DTRBR2D( ICTXT, 'columnwise
', COLBTOP, 'lower
',
521 $ 'non unit
', K, K, T, MBV, IVROW, MYCOL )
528 CALL DGEMM( 'no transpose',
'Transpose', mpc2, k, nqc2,
529 $ one, c( ioffc2 ), ldc, work( ipv ), lv, zero,
532 CALL dlaset(
'All', mpc2, k, zero, zero, work( ipw ), lw )
538 mydist = mod( mycol-iccol1+npcol, npcol )
539 ileft =
max( 0, mydist * nbc - icoffc1 )
541 jjend = jjc1 + nqc1 - 1
542 jjnxt =
min( iceil( jjbeg, nbc ) * nbc, jjend )
545 IF( jjbeg.LE.jjnxt )
THEN
546 CALL pbdmatadd( ictxt,
'No transpose', mpc2,
547 $ jjnxt-jjbeg+1, one,
548 $ c( iic1+(jjbeg-1)*ldc ), ldc, one,
549 $ work( ipw+ileft*lw ), lw )
550 mydist = mydist + npcol
551 ileft = mydist * nbc - icoffc1
553 jjnxt =
min( jjnxt+nbc, jjend )
558 CALL dgsum2d( ictxt,
'Rowwise',
' ', mpc2, k, work( ipw ),
563 IF( mycol.EQ.ivcol )
THEN
564 CALL dtrmm(
'Right', 'lower
', TRANS, 'non unit
', MPC2, K,
565 $ ONE, T, MBV, WORK( IPW ), LW )
566 CALL DGEBS2D( ICTXT, 'rowwise
', ' ', MPC2, K, WORK( IPW ),
569 CALL DGEBR2D( ICTXT, 'rowwise
', ' ', MPC2, K, WORK( IPW ),
576 MYDIST = MOD( MYCOL-ICCOL1+NPCOL, NPCOL )
577 ILEFT = MAX( 0, MYDIST * NBC - ICOFFC1 )
579 JJEND = JJC1 + NQC1 - 1
580 JJNXT = MIN( ICEIL( JJBEG, NBC ) * NBC, JJEND )
583.LE.
IF( JJBEGJJNXT ) THEN
584 CALL PBDMATADD( ICTXT, 'no transpose
', MPC2,
585 $ JJNXT-JJBEG+1, -ONE,
586 $ WORK( IPW+ILEFT*LW ), LW, ONE,
587 $ C( IIC1+(JJBEG-1)*LDC ), LDC )
588 MYDIST = MYDIST + NPCOL
589 ILEFT = MYDIST * NBC - ICOFFC1
591 JJNXT = MIN( JJNXT+NBC, JJEND )
601 $ CALL DGEMM( 'no transpose
', 'no transpose
', MPC2, NQC2, K,
602 $ -ONE, WORK( IPW ), LW, WORK( IPV ), LV, ONE,
subroutine pbdtran(icontxt, adist, trans, m, n, nb, a, lda, beta, c, ldc, iarow, iacol, icrow, iccol, work)
subroutine pdlarzb(side, trans, direct, storev, m, n, k, l, v, iv, jv, descv, t, c, ic, jc, descc, work)