pslaqr2_8f_source.html

      SUBROUTINE pslaqr2( WANTT, WANTZ, N, KTOP, KBOT, NW, A, DESCA,

     $                    ILOZ, IHIZ, Z, DESCZ, NS, ND, SR, SI, T, LDT,

     $                    V, LDV, WR, WI, WORK, LWORK )

*

*     Contribution from the Department of Computing Science and HPC2N,

*     Umea University, Sweden

*

*  -- ScaLAPACK routine (version 2.0.2) --

*     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver

*     May 1 2012

*

      IMPLICIT NONE

*

*     .. Scalar Arguments ..

      INTEGER            IHIZ, ILOZ, KBOT, KTOP, LDT, LDV, LWORK, N, ND,

     $                   NS, NW

      LOGICAL            WANTT, WANTZ

*     ..

*     .. Array Arguments ..

      INTEGER            DESCA( * ), DESCZ( * )

      REAL               A( * ), SI( KBOT ), SR( KBOT ), T( LDT, * ),

     $                   v( ldv, * ), work( * ), wi( * ), wr( * ),

     $                   z( * )

*     ..

*

*  Purpose

*  =======

*

*  Aggressive early deflation:

*

*  PSLAQR2 accepts as input an upper Hessenberg matrix A and performs an

*  orthogonal similarity transformation designed to detect and deflate

*  fully converged eigenvalues from a trailing principal submatrix.  On

*  output A has been overwritten by a new Hessenberg matrix that is a

*  perturbation of an orthogonal similarity transformation of A.  It is

*  to be hoped that the final version of H has many zero subdiagonal

*  entries.

*

*  This routine handles small deflation windows which is affordable by

*  one processor. Normally, it is called by PSLAQR1. All the inputs are

*  assumed to be valid without checking.

*

*  Notes

*  =====

*

*  Each global data object is described by an associated description

*  vector.  This vector stores the information required to establish

*  the mapping between an object element and its corresponding process

*  and memory location.

*

*  Let A be a generic term for any 2D block cyclicly distributed array.

*  Such a global array has an associated description vector DESCA.

*  In the following comments, the character _ should be read as

*  "of the global array".

*

*  NOTATION        STORED IN      EXPLANATION

*  --------------- -------------- --------------------------------------

*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,

*                                 DTYPE_A = 1.

*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating

*                                 the BLACS process grid A is distribu-

*                                 ted over. The context itself is glo-

*                                 bal, but the handle (the integer

*                                 value) may vary.

*  M_A    (global) DESCA( M_ )    The number of rows in the global

*                                 array A.

*  N_A    (global) DESCA( N_ )    The number of columns in the global

*                                 array A.

*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute

*                                 the rows of the array.

*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute

*                                 the columns of the array.

*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first

*                                 row of the array A is distributed.

*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the

*                                 first column of the array A is

*                                 distributed.

*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local

*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).

*

*  Let K be the number of rows or columns of a distributed matrix,

*  and assume that its process grid has dimension p x q.

*  LOCr( K ) denotes the number of elements of K that a process

*  would receive if K were distributed over the p processes of its

*  process column.

*  Similarly, LOCc( K ) denotes the number of elements of K that a

*  process would receive if K were distributed over the q processes of

*  its process row.

*  The values of LOCr() and LOCc() may be determined via a call to the

*  ScaLAPACK tool function, NUMROC:

*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),

*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).

*  An upper bound for these quantities may be computed by:

*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A

*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A

*

*  Arguments

*  =========

*

*  WANTT   (global input) LOGICAL

*          If .TRUE., then the Hessenberg matrix H is fully updated

*          so that the quasi-triangular Schur factor may be

*          computed (in cooperation with the calling subroutine).

*          If .FALSE., then only enough of H is updated to preserve

*          the eigenvalues.

*

*  WANTZ   (global input) LOGICAL

*          If .TRUE., then the orthogonal matrix Z is updated so

*          so that the orthogonal Schur factor may be computed

*          (in cooperation with the calling subroutine).

*          If .FALSE., then Z is not referenced.

*

*  N       (global input) INTEGER

*          The order of the matrix H and (if WANTZ is .TRUE.) the

*          order of the orthogonal matrix Z.

*

*  KTOP    (global input) INTEGER

*  KBOT    (global input) INTEGER

*          It is assumed without a check that either

*          KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together

*          determine an isolated block along the diagonal of the

*          Hessenberg matrix. However, H(KTOP,KTOP-1)=0 is not

*          essentially necessary if WANTT is .TRUE. .

*

*  NW      (global input) INTEGER

*          Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).

*          Normally NW .GE. 3 if PSLAQR2 is called by PSLAQR1.

*

*  A       (local input/output) REAL             array, dimension

*          (DESCH(LLD_),*)

*          On input the initial N-by-N section of A stores the

*          Hessenberg matrix undergoing aggressive early deflation.

*          On output A has been transformed by an orthogonal

*          similarity transformation, perturbed, and the returned

*          to Hessenberg form that (it is to be hoped) has some

*          zero subdiagonal entries.

*

*  DESCA   (global and local input) INTEGER array of dimension DLEN_.

*          The array descriptor for the distributed matrix A.

*

*  ILOZ    (global input) INTEGER

*  IHIZ    (global input) INTEGER

*          Specify the rows of Z to which transformations must be

*          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.

*

*  Z       (input/output) REAL             array, dimension

*          (DESCH(LLD_),*)

*          IF WANTZ is .TRUE., then on output, the orthogonal

*          similarity transformation mentioned above has been

*          accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.

*          If WANTZ is .FALSE., then Z is unreferenced.

*

*  DESCZ   (global and local input) INTEGER array of dimension DLEN_.

*          The array descriptor for the distributed matrix Z.

*

*  NS      (global output) INTEGER

*          The number of unconverged (ie approximate) eigenvalues

*          returned in SR and SI that may be used as shifts by the

*          calling subroutine.

*

*  ND      (global output) INTEGER

*          The number of converged eigenvalues uncovered by this

*          subroutine.

*

*  SR      (global output) REAL             array, dimension KBOT

*  SI      (global output) REAL             array, dimension KBOT

*          On output, the real and imaginary parts of approximate

*          eigenvalues that may be used for shifts are stored in

*          SR(KBOT-ND-NS+1) through SR(KBOT-ND) and

*          SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.

*          On proc #0, the real and imaginary parts of converged

*          eigenvalues are stored in SR(KBOT-ND+1) through SR(KBOT) and

*          SI(KBOT-ND+1) through SI(KBOT), respectively. On other

*          processors, these entries are set to zero.

*

*  T       (local workspace) REAL             array, dimension LDT*NW.

*

*  LDT     (local input) INTEGER

*          The leading dimension of the array T.

*          LDT >= NW.

*

*  V       (local workspace) REAL             array, dimension LDV*NW.

*

*  LDV     (local input) INTEGER

*          The leading dimension of the array V.

*          LDV >= NW.

*

*  WR      (local workspace) REAL             array, dimension KBOT.

*  WI      (local workspace) REAL             array, dimension KBOT.

*

*  WORK    (local workspace) REAL             array, dimension LWORK.

*

*  LWORK   (local input) INTEGER

*          WORK(LWORK) is a local array and LWORK is assumed big enough

*          so that LWORK >= NW*NW.

*

*  ================================================================

*  Implemented by

*        Meiyue Shao, Department of Computing Science and HPC2N,

*        Umea University, Sweden

*

*  ================================================================

*  References:

*        B. Kagstrom, D. Kressner, and M. Shao,

*        On Aggressive Early Deflation in Parallel Variants of the QR

*        Algorithm.

*        Para 2010, to appear.

*

*  ================================================================

*     .. Parameters ..

      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,

     $                   LLD_, MB_, M_, NB_, N_, RSRC_

      PARAMETER          ( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,

     $                     ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,

     $                     rsrc_ = 7, csrc_ = 8, lld_ = 9 )

      REAL               ZERO, ONE

      PARAMETER          ( ZERO = 0.0, one = 1.0 )

*     ..

*     .. Local Scalars ..

      INTEGER            CONTXT, HBL, I, I1, I2, IAFIRST, ICOL, ICOL1,

     $                   ICOL2, INFO, II, IROW, IROW1, IROW2, ITMP1,

     $                   itmp2, j, jafirst, jj, k, l, lda, ldz, lldtmp,

     $                   mycol, myrow, node, npcol, nprow, dblk,

     $                   hstep, vstep, kkrow, kkcol, kln, ltop, left,

     $                   right, up, down, d1, d2

*     ..

*     .. Local Arrays ..

      INTEGER            DESCT( 9 ), DESCV( 9 ), DESCWH( 9 ),

     $                   DESCWV( 9 )

*     ..

*     .. External Functions ..

      INTEGER            NUMROC

      EXTERNAL           NUMROC

*     ..

*     .. External Subroutines ..

      EXTERNAL           blacs_gridinfo, infog2l, slaset,

     $                   slaqr3, descinit, psgemm, psgemr2d, sgemm,

     $                   slamov, sgesd2d, sgerv2d, sgebs2d, sgebr2d,

     $                   igebs2d, igebr2d

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          max, min, mod

*     ..

*     .. Executable Statements ..

*

      info = 0

*

      IF( n.EQ.0 )

     $   RETURN

*

*     NODE (IAFIRST,JAFIRST) OWNS A(1,1)

*

      hbl = desca( mb_ )

      contxt = desca( ctxt_ )

      lda = desca( lld_ )

      iafirst = desca( rsrc_ )

      jafirst = desca( csrc_ )

      ldz = descz( lld_ )

      CALL blacs_gridinfo( contxt, nprow, npcol, myrow, mycol )

      node = myrow*npcol + mycol

      left = mod( mycol+npcol-1, npcol )

      right = mod( mycol+1, npcol )

      up = mod( myrow+nprow-1, nprow )

      down = mod( myrow+1, nprow )

*

*     I1 and I2 are the indices of the first row and last column of A

*     to which transformations must be applied.

*

      i = kbot

      l = ktop

      IF( wantt ) THEN

         i1 = 1

         i2 = n

         ltop = 1

      ELSE

         i1 = l

         i2 = i

         ltop = l

      END IF

*

*     Begin Aggressive Early Deflation.

*

      dblk = nw

      CALL infog2l( i-dblk+1, i-dblk+1, desca, nprow, npcol, myrow,

     $     mycol, irow, icol, ii, jj )

      IF ( myrow .EQ. ii ) THEN

         CALL descinit( desct, dblk, dblk, dblk, dblk, ii, jj, contxt,

     $        ldt, info )

         CALL descinit( descv, dblk, dblk, dblk, dblk, ii, jj, contxt,

     $        ldv, info )

      ELSE

         CALL descinit( desct, dblk, dblk, dblk, dblk, ii, jj, contxt,

     $        1, info )

         CALL descinit( descv, dblk, dblk, dblk, dblk, ii, jj, contxt,

     $        1, info )

      END IF

      CALL psgemr2d( dblk, dblk, a, i-dblk+1, i-dblk+1, desca, t, 1, 1,

     $     desct, contxt )

      IF ( myrow .EQ. ii .AND. mycol .EQ. jj ) THEN

         CALL slaset( 'All', dblk, dblk, zero, one, v, ldv )

         CALL slaqr3( .true., .true., dblk, 1, dblk, dblk-1, t, ldt, 1,

     $        dblk, v, ldv, ns, nd, wr, wi, work, dblk, dblk,

     $        work( dblk*dblk+1 ), dblk, dblk, work( 2*dblk*dblk+1 ),

     $        dblk, work( 3*dblk*dblk+1 ), lwork-3*dblk*dblk )

         CALL sgebs2d( contxt, 'All', ' ', dblk, dblk, v, ldv )

         CALL igebs2d( contxt, 'All', ' ', 1, 1, nd, 1 )

      ELSE

         CALL sgebr2d( contxt, 'All', ' ', dblk, dblk, v, ldv, ii, jj )

         CALL igebr2d( contxt, 'All', ' ', 1, 1, nd, 1, ii, jj )

      END IF

*

      IF( nd .GT. 0 ) THEN

*

*        Copy the local matrix back to the diagonal block.

*

         CALL psgemr2d( dblk, dblk, t, 1, 1, desct, a, i-dblk+1,

     $        i-dblk+1, desca, contxt )

*

*        Update T and Z.

*

         IF( mod( i-dblk, hbl )+dblk .LE. hbl ) THEN

*

*           Simplest case: the deflation window is located on one

*           processor.

*           Call SGEMM directly to perform the update.

*

            hstep = lwork / dblk

            vstep = hstep

*

*           Update horizontal slab in A.

*

            IF( wantt ) THEN

               CALL infog2l( i-dblk+1, i+1, desca, nprow, npcol, myrow,

     $              mycol, irow, icol, ii, jj )

               IF( myrow .EQ. ii ) THEN

                  icol1 = numroc( n, hbl, mycol, jafirst, npcol )

                  DO 10 kkcol = icol, icol1, hstep

                     kln = min( hstep, icol1-kkcol+1 )

                     CALL sgemm( 'T', 'N', dblk, kln, dblk, one, v,

     $                    ldv, a( irow+(kkcol-1)*lda ), lda, zero, work,

     $                    dblk )

                     CALL slamov( 'A', dblk, kln, work, dblk,

     $                    a( irow+(kkcol-1)*lda ), lda )

   10             CONTINUE

               END IF

            END IF

*

*           Update vertical slab in A.

*

            CALL infog2l( ltop, i-dblk+1, desca, nprow, npcol, myrow,

     $           mycol, irow, icol, ii, jj )

            IF( mycol .EQ. jj ) THEN

               CALL infog2l( i-dblk, i-dblk+1, desca, nprow, npcol,

     $              myrow, mycol, irow1, icol1, itmp1, itmp2 )

               IF( myrow .NE. itmp1 ) irow1 = irow1-1

               DO 20 kkrow = irow, irow1, vstep

                  kln = min( vstep, irow1-kkrow+1 )

                  CALL sgemm( 'N', 'N', kln, dblk, dblk, one,

     $                 a( kkrow+(icol-1)*lda ), lda, v, ldv, zero, work,

     $                 kln )

                  CALL slamov( 'A', kln, dblk, work, kln,

     $                 a( kkrow+(icol-1)*lda ), lda )

   20          CONTINUE

            END IF

*

*           Update vertical slab in Z.

*

            IF( wantz ) THEN

               CALL infog2l( iloz, i-dblk+1, descz, nprow, npcol, myrow,

     $              mycol, irow, icol, ii, jj )

               IF( mycol .EQ. jj ) THEN

                  CALL infog2l( ihiz, i-dblk+1, descz, nprow, npcol,

     $                 myrow, mycol, irow1, icol1, itmp1, itmp2 )

                  IF( myrow .NE. itmp1 ) irow1 = irow1-1

                  DO 30 kkrow = irow, irow1, vstep

                     kln = min( vstep, irow1-kkrow+1 )

                     CALL sgemm( 'N', 'N', kln, dblk, dblk, one,

     $                    z( kkrow+(icol-1)*ldz ), ldz, v, ldv, zero,

     $                    work, kln )

                     CALL slamov( 'A', kln, dblk, work, kln,

     $                    z( kkrow+(icol-1)*ldz ), ldz )

   30             CONTINUE

               END IF

            END IF

*

         ELSE IF( mod( i-dblk, hbl )+dblk .LE. 2*hbl ) THEN

*

*           More complicated case: the deflation window lay on a 2x2

*           processor mesh.

*           Call SGEMM locally and communicate by pair.

*

            d1 = hbl - mod( i-dblk, hbl )

            d2 = dblk - d1

            hstep = lwork / dblk

            vstep = hstep

*

*           Update horizontal slab in A.

*

            IF( wantt ) THEN

               CALL infog2l( i-dblk+1, i+1, desca, nprow, npcol, myrow,

     $              mycol, irow, icol, ii, jj )

               IF( myrow .EQ. up ) THEN

                  IF( myrow .EQ. ii ) THEN

                     icol1 = numroc( n, hbl, mycol, jafirst, npcol )

                     DO 40 kkcol = icol, icol1, hstep

                        kln = min( hstep, icol1-kkcol+1 )

                        CALL sgemm( 'T', 'N', dblk, kln, dblk, one, v,

     $                       dblk, a( irow+(kkcol-1)*lda ), lda, zero,

     $                       work, dblk )

                        CALL slamov( 'A', dblk, kln, work, dblk,

     $                       a( irow+(kkcol-1)*lda ), lda )

   40                CONTINUE

                  END IF

               ELSE

                  IF( myrow .EQ. ii ) THEN

                     icol1 = numroc( n, hbl, mycol, jafirst, npcol )

                     DO 50 kkcol = icol, icol1, hstep

                        kln = min( hstep, icol1-kkcol+1 )

                        CALL sgemm( 'T', 'N', d2, kln, d1, one,

     $                       v( 1, d1+1 ), ldv, a( irow+(kkcol-1)*lda ),

     $                       lda, zero, work( d1+1 ), dblk )

                        CALL sgesd2d( contxt, d2, kln, work( d1+1 ),

     $                       dblk, down, mycol )

                        CALL sgerv2d( contxt, d1, kln, work, dblk, down,

     $                       mycol )

                        CALL sgemm( 'T', 'N', d1, kln, d1, one,

     $                       v, ldv, a( irow+(kkcol-1)*lda ), lda, one,

     $                       work, dblk )

                        CALL slamov( 'A', d1, kln, work, dblk,

     $                       a( irow+(kkcol-1)*lda ), lda )

   50                CONTINUE

                  ELSE IF( up .EQ. ii ) THEN

                     icol1 = numroc( n, hbl, mycol, jafirst, npcol )

                     DO 60 kkcol = icol, icol1, hstep

                        kln = min( hstep, icol1-kkcol+1 )

                        CALL sgemm( 'T', 'N', d1, kln, d2, one,

     $                       v( d1+1, 1 ), ldv, a( irow+(kkcol-1)*lda ),

     $                       lda, zero, work, dblk )

                        CALL sgesd2d( contxt, d1, kln, work, dblk, up,

     $                       mycol )

                        CALL sgerv2d( contxt, d2, kln, work( d1+1 ),

     $                       dblk, up, mycol )

                        CALL sgemm( 'T', 'N', d2, kln, d2, one,

     $                       v( d1+1, d1+1 ), ldv,

     $                       a( irow+(kkcol-1)*lda ), lda, one,

     $                       work( d1+1 ), dblk )

                        CALL slamov( 'A', d2, kln, work( d1+1 ), dblk,

     $                       a( irow+(kkcol-1)*lda ), lda )

   60                CONTINUE

                  END IF

               END IF

            END IF

*

*           Update vertical slab in A.

*

            CALL infog2l( ltop, i-dblk+1, desca, nprow, npcol, myrow,

     $           mycol, irow, icol, ii, jj )

            IF( mycol .EQ. left ) THEN

               IF( mycol .EQ. jj ) THEN

                  CALL infog2l( i-dblk, i-dblk+1, desca, nprow, npcol,

     $                 myrow, mycol, irow1, icol1, itmp1, itmp2 )

                  IF( myrow .NE. itmp1 ) irow1 = irow1-1

                  DO 70 kkrow = irow, irow1, vstep

                     kln = min( vstep, irow1-kkrow+1 )

                     CALL sgemm( 'N', 'N', kln, dblk, dblk, one,

     $                    a( kkrow+(icol-1)*lda ), lda, v, ldv, zero,

     $                    work, kln )

                     CALL slamov( 'A', kln, dblk, work, kln,

     $                    a( kkrow+(icol-1)*lda ), lda )

   70             CONTINUE

               END IF

            ELSE

               IF( mycol .EQ. jj ) THEN

                  CALL infog2l( i-dblk, i-dblk+1, desca, nprow, npcol,

     $                 myrow, mycol, irow1, icol1, itmp1, itmp2 )

                  IF( myrow .NE. itmp1 ) irow1 = irow1-1

                  DO 80 kkrow = irow, irow1, vstep

                     kln = min( vstep, irow1-kkrow+1 )

                     CALL sgemm( 'N', 'N', kln, d2, d1, one,

     $                    a( kkrow+(icol-1)*lda ), lda,

     $                    v( 1, d1+1 ), ldv, zero, work( 1+d1*kln ),

     $                    kln )

                     CALL sgesd2d( contxt, kln, d2, work( 1+d1*kln ),

     $                    kln, myrow, right )

                     CALL sgerv2d( contxt, kln, d1, work, kln, myrow,

     $                    right )

                     CALL sgemm( 'N', 'N', kln, d1, d1, one,

     $                    a( kkrow+(icol-1)*lda ), lda, v, ldv, one,

     $                    work, kln )

                     CALL slamov( 'A', kln, d1, work, kln,

     $                    a( kkrow+(icol-1)*lda ), lda )

   80             CONTINUE

               ELSE IF ( left .EQ. jj ) THEN

                  CALL infog2l( i-dblk, i-dblk+1, desca, nprow, npcol,

     $                 myrow, mycol, irow1, icol1, itmp1, itmp2 )

                  IF( myrow .NE. itmp1 ) irow1 = irow1-1

                  DO 90 kkrow = irow, irow1, vstep

                     kln = min( vstep, irow1-kkrow+1 )

                     CALL sgemm( 'N', 'N', kln, d1, d2, one,

     $                    a( kkrow+(icol-1)*lda ), lda, v( d1+1, 1 ),

     $                    ldv, zero, work, kln )

                     CALL sgesd2d( contxt, kln, d1, work, kln, myrow,

     $                    left )

                     CALL sgerv2d( contxt, kln, d2, work( 1+d1*kln ),

     $                    kln, myrow, left )

                     CALL sgemm( 'N', 'N', kln, d2, d2, one,

     $                    a( kkrow+(icol-1)*lda ), lda, v( d1+1, d1+1 ),

     $                    ldv, one, work( 1+d1*kln ), kln )

                     CALL slamov( 'A', kln, d2, work( 1+d1*kln ), kln,

     $                    a( kkrow+(icol-1)*lda ), lda )

   90             CONTINUE

               END IF

            END IF

*

*           Update vertical slab in Z.

*

            IF( wantz ) THEN

               CALL infog2l( iloz, i-dblk+1, descz, nprow, npcol, myrow,

     $              mycol, irow, icol, ii, jj )

               IF( mycol .EQ. left ) THEN

                  IF( mycol .EQ. jj ) THEN

                     CALL infog2l( ihiz, i-dblk+1, descz, nprow, npcol,

     $                    myrow, mycol, irow1, icol1, itmp1, itmp2 )

                     IF( myrow .NE. itmp1 ) irow1 = irow1-1

                     DO 100 kkrow = irow, irow1, vstep

                        kln = min( vstep, irow1-kkrow+1 )

                        CALL sgemm( 'N', 'N', kln, dblk, dblk, one,

     $                       z( kkrow+(icol-1)*ldz ), ldz, v, ldv, zero,

     $                       work, kln )

                        CALL slamov( 'a', KLN, DBLK, WORK, KLN,

     $                       Z( KKROW+(ICOL-1)*LDZ ), LDZ )

  100                CONTINUE

                  END IF

               ELSE

.EQ.                  IF( MYCOL  JJ ) THEN

                     CALL INFOG2L( IHIZ, I-DBLK+1, DESCZ, NPROW, NPCOL,

     $                    MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )

.NE.                     IF( MYROW  ITMP1 ) IROW1 = IROW1-1

                     DO 110 KKROW = IROW, IROW1, VSTEP

                        KLN = MIN( VSTEP, IROW1-KKROW+1 )

                        CALL SGEMM( 'n', 'n', KLN, D2, D1, ONE,

     $                       Z( KKROW+(ICOL-1)*LDZ ), LDZ,

     $                       V( 1, D1+1 ), LDV, ZERO, WORK( 1+D1*KLN ),

     $                       KLN )

                        CALL SGESD2D( CONTXT, KLN, D2, WORK( 1+D1*KLN ),

     $                       KLN, MYROW, RIGHT )

                        CALL SGERV2D( CONTXT, KLN, D1, WORK, KLN, MYROW,

     $                       RIGHT )

                        CALL SGEMM( 'n', 'n', KLN, D1, D1, ONE,

     $                       Z( KKROW+(ICOL-1)*LDZ ), LDZ, V, LDV, ONE,

     $                       WORK, KLN )

                        CALL SLAMOV( 'a', KLN, D1, WORK, KLN,

     $                       Z( KKROW+(ICOL-1)*LDZ ), LDZ )

  110                CONTINUE

.EQ.                  ELSE IF( LEFT  JJ ) THEN

                     CALL INFOG2L( IHIZ, I-DBLK+1, DESCZ, NPROW, NPCOL,

     $                    MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )

.NE.                     IF( MYROW  ITMP1 ) IROW1 = IROW1-1

                     DO 120 KKROW = IROW, IROW1, VSTEP

                        KLN = MIN( VSTEP, IROW1-KKROW+1 )

                        CALL SGEMM( 'n', 'n', KLN, D1, D2, ONE,

     $                       Z( KKROW+(ICOL-1)*LDZ ), LDZ,

     $                       V( D1+1, 1 ), LDV, ZERO, WORK, KLN )

                        CALL SGESD2D( CONTXT, KLN, D1, WORK, KLN, MYROW,

     $                       LEFT )

                        CALL SGERV2D( CONTXT, KLN, D2, WORK( 1+D1*KLN ),

     $                       KLN, MYROW, LEFT )

                        CALL SGEMM( 'n', 'n', KLN, D2, D2, ONE,

     $                       Z( KKROW+(ICOL-1)*LDZ ), LDZ,

     $                       V( D1+1, D1+1 ), LDV, ONE,

     $                       WORK( 1+D1*KLN ), KLN )

                        CALL SLAMOV( 'a', KLN, D2, WORK( 1+D1*KLN ),

     $                       KLN, Z( KKROW+(ICOL-1)*LDZ ), LDZ )

  120                CONTINUE

                  END IF

               END IF

            END IF

*

         ELSE

*

*           Most complicated case: the deflation window lay across the

*           border of the processor mesh.

*           Treat V as a distributed matrix and call PSGEMM.

*

            HSTEP = LWORK / DBLK * NPCOL

            VSTEP = LWORK / DBLK * NPROW

            LLDTMP = NUMROC( DBLK, DBLK, MYROW, 0, NPROW )

            LLDTMP = MAX( 1, LLDTMP )

            CALL DESCINIT( DESCV, DBLK, DBLK, DBLK, DBLK, 0, 0, CONTXT,

     $           LLDTMP, INFO )

            CALL DESCINIT( DESCWH, DBLK, HSTEP, DBLK, LWORK / DBLK, 0,

     $           0, CONTXT, LLDTMP, INFO )

*

*           Update horizontal slab in A.

*

            IF( WANTT ) THEN

               DO 130 KKCOL = I+1, N, HSTEP

                  KLN = MIN( HSTEP, N-KKCOL+1 )

                  CALL PSGEMM( 't', 'n', dblk, kln, dblk, one, v, 1, 1,

     $                 descv, a, i-dblk+1, kkcol, desca, zero, work, 1,

     $                 1, descwh )

                  CALL psgemr2d( dblk, kln, work, 1, 1, descwh, a,

     $                 i-dblk+1, kkcol, desca, contxt )

  130          CONTINUE

            END IF

*

*           Update vertical slab in A.

*

            DO 140 kkrow = ltop, i-dblk, vstep

               kln = min( vstep, i-dblk-kkrow+1 )

               lldtmp = numroc( kln, lwork / dblk, myrow, 0, nprow )

               lldtmp = max( 1, lldtmp )

               CALL descinit( descwv, kln, dblk, lwork / dblk, dblk, 0,

     $              0, contxt, lldtmp, info )

               CALL psgemm( 'N', 'N', kln, dblk, dblk, one, a, kkrow,

     $              i-dblk+1, desca, v, 1, 1, descv, zero, work, 1, 1,

     $              descwv )

               CALL psgemr2d( kln, dblk, work, 1, 1, descwv, a, kkrow,

     $              i-dblk+1, desca, contxt )

  140       CONTINUE

*

*           Update vertical slab in Z.

*

            IF( wantz ) THEN

               DO 150 kkrow = iloz, ihiz, vstep

                  kln = min( vstep, ihiz-kkrow+1 )

                  lldtmp = numroc( kln, lwork / dblk, myrow, 0, nprow )

                  lldtmp = max( 1, lldtmp )

                  CALL descinit( descwv, kln, dblk, lwork / dblk, dblk,

     $                 0, 0, contxt, lldtmp, info )

                  CALL psgemm( 'N', 'N', kln, dblk, dblk, one, z, kkrow,

     $                 i-dblk+1, descz, v, 1, 1, descv, zero, work, 1,

     $                 1, descwv )

                  CALL psgemr2d( kln, dblk, work, 1, 1, descwv, z,

     $                 kkrow, i-dblk+1, descz, contxt )

  150          CONTINUE

            END IF

         END IF

*

*        Extract converged eigenvalues.

*

         ii = 0

  160    CONTINUE

            IF( ii .EQ. nd-1 .OR. wi( dblk-ii ) .EQ. zero ) THEN

               IF( node .EQ. 0 ) THEN

                  sr( i-ii ) = wr( dblk-ii )

               ELSE

                  sr( i-ii ) = zero

               END IF

               si( i-ii ) = zero

               ii = ii + 1

            ELSE

               IF( node .EQ. 0 ) THEN

                  sr( i-ii-1 ) = wr( dblk-ii-1 )

                  sr( i-ii ) = wr( dblk-ii )

                  si( i-ii-1 ) = wi( dblk-ii-1 )

                  si( i-ii ) = wi( dblk-ii )

               ELSE

                  sr( i-ii-1 ) = zero

                  sr( i-ii ) = zero

                  si( i-ii-1 ) = zero

                  si( i-ii ) = zero

               END IF

               ii = ii + 2

            END IF

         IF( ii .LT. nd ) GOTO 160

      END IF

*

*     END OF PSLAQR2

*


      END

slaset
subroutine slaset(uplo, m, n, alpha, beta, a, lda)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition slaset.f:110

slaqr3
subroutine slaqr3(wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz, ihiz, z, ldz, ns, nd, sr, si, v, ldv, nh, t, ldt, nv, wv, ldwv, work, lwork)
SLAQR3 performs the orthogonal similarity transformation of a Hessenberg matrix to detect and deflate...
Definition slaqr3.f:275

sgemm
subroutine sgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
SGEMM
Definition sgemm.f:187

min
#define min(a, b)
Definition macros.h:20

max
#define max(a, b)
Definition macros.h:21

sgebs2d
subroutine sgebs2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1072

sgebr2d
subroutine sgebr2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1113

infog2l
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition mpi.f:937

descinit
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition mpi.f:777

blacs_gridinfo
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
Definition mpi.f:754

pslaqr2
subroutine pslaqr2(wantt, wantz, n, ktop, kbot, nw, a, desca, iloz, ihiz, z, descz, ns, nd, sr, si, t, ldt, v, ldv, wr, wi, work, lwork)
Definition pslaqr2.f:4