pslahqr_8f_source.html

      SUBROUTINE pslahqr( WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI,

     $                    ILOZ, IHIZ, Z, DESCZ, WORK, LWORK, IWORK,

     $                    ILWORK, INFO )

*

*  -- ScaLAPACK routine (version 2.0.2) --

*     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver

*     May 1 2012

*

*     .. Scalar Arguments ..

      LOGICAL            WANTT, WANTZ

      INTEGER            IHI, IHIZ, ILO, ILOZ, ILWORK, INFO, LWORK, N

*     ..

*     .. Array Arguments ..

      INTEGER            DESCA( * ), DESCZ( * ), IWORK( * )

      REAL               A( * ), WI( * ), WORK( * ), WR( * ), Z( * )

*     ..

*

*  Purpose

*  =======

*

*  PSLAHQR is an auxiliary routine used to find the Schur decomposition

*    and or eigenvalues of a matrix already in Hessenberg form from

*    cols ILO to IHI.

*

*  Notes

*  =====

*

*  Each global data object is described by an associated description

*  vector.  This vector stores the information required to establish

*  the mapping between an object element and its corresponding process

*  and memory location.

*

*  Let A be a generic term for any 2D block cyclicly distributed array.

*  Such a global array has an associated description vector DESCA.

*  In the following comments, the character _ should be read as

*  "of the global array".

*

*  NOTATION        STORED IN      EXPLANATION

*  --------------- -------------- --------------------------------------

*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,

*                                 DTYPE_A = 1.

*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating

*                                 the BLACS process grid A is distribu-

*                                 ted over. The context itself is glo-

*                                 bal, but the handle (the integer

*                                 value) may vary.

*  M_A    (global) DESCA( M_ )    The number of rows in the global

*                                 array A.

*  N_A    (global) DESCA( N_ )    The number of columns in the global

*                                 array A.

*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute

*                                 the rows of the array.

*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute

*                                 the columns of the array.

*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first

*                                 row of the array A is distributed.

*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the

*                                 first column of the array A is

*                                 distributed.

*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local

*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).

*

*  Let K be the number of rows or columns of a distributed matrix,

*  and assume that its process grid has dimension p x q.

*  LOCr( K ) denotes the number of elements of K that a process

*  would receive if K were distributed over the p processes of its

*  process column.

*  Similarly, LOCc( K ) denotes the number of elements of K that a

*  process would receive if K were distributed over the q processes of

*  its process row.

*  The values of LOCr() and LOCc() may be determined via a call to the

*  ScaLAPACK tool function, NUMROC:

*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),

*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).

*  An upper bound for these quantities may be computed by:

*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A

*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A

*

*  Arguments

*  =========

*

*  WANTT   (global input) LOGICAL

*          = .TRUE. : the full Schur form T is required;

*          = .FALSE.: only eigenvalues are required.

*

*  WANTZ   (global input) LOGICAL

*          = .TRUE. : the matrix of Schur vectors Z is required;

*          = .FALSE.: Schur vectors are not required.

*

*  N       (global input) INTEGER

*          The order of the Hessenberg matrix A (and Z if WANTZ).

*          N >= 0.

*

*  ILO     (global input) INTEGER

*  IHI     (global input) INTEGER

*          It is assumed that A is already upper quasi-triangular in

*          rows and columns IHI+1:N, and that A(ILO,ILO-1) = 0 (unless

*          ILO = 1). PSLAHQR works primarily with the Hessenberg

*          submatrix in rows and columns ILO to IHI, but applies

*          transformations to all of H if WANTT is .TRUE..

*          1 <= ILO <= max(1,IHI); IHI <= N.

*

*  A       (global input/output) REAL array, dimension

*          (DESCA(LLD_),*)

*          On entry, the upper Hessenberg matrix A.

*          On exit, if WANTT is .TRUE., A is upper quasi-triangular in

*          rows and columns ILO:IHI, with any 2-by-2 or larger diagonal

*          blocks not yet in standard form. If WANTT is .FALSE., the

*          contents of A are unspecified on exit.

*

*  DESCA   (global and local input) INTEGER array of dimension DLEN_.

*          The array descriptor for the distributed matrix A.

*

*  WR      (global replicated output) REAL array,

*                                                         dimension (N)

*  WI      (global replicated output) REAL array,

*                                                         dimension (N)

*          The real and imaginary parts, respectively, of the computed

*          eigenvalues ILO to IHI are stored in the corresponding

*          elements of WR and WI. If two eigenvalues are computed as a

*          complex conjugate pair, they are stored in consecutive

*          elements of WR and WI, say the i-th and (i+1)th, with

*          WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the

*          eigenvalues are stored in the same order as on the diagonal

*          of the Schur form returned in A.  A may be returned with

*          larger diagonal blocks until the next release.

*

*  ILOZ    (global input) INTEGER

*  IHIZ    (global input) INTEGER

*          Specify the rows of Z to which transformations must be

*          applied if WANTZ is .TRUE..

*          1 <= ILOZ <= ILO; IHI <= IHIZ <= N.

*

*  Z       (global input/output) REAL array.

*          If WANTZ is .TRUE., on entry Z must contain the current

*          matrix Z of transformations accumulated by PDHSEQR, and on

*          exit Z has been updated; transformations are applied only to

*          the submatrix Z(ILOZ:IHIZ,ILO:IHI).

*          If WANTZ is .FALSE., Z is not referenced.

*

*  DESCZ   (global and local input) INTEGER array of dimension DLEN_.

*          The array descriptor for the distributed matrix Z.

*

*  WORK    (local output) REAL array of size LWORK

*

*  LWORK   (local input) INTEGER

*          WORK(LWORK) is a local array and LWORK is assumed big enough

*          so that LWORK >= 3*N +

*                MAX( 2*MAX(DESCZ(LLD_),DESCA(LLD_)) + 2*LOCc(N),

*                     7*Ceil(N/HBL)/LCM(NPROW,NPCOL)) )

*

*  IWORK   (global and local input) INTEGER array of size ILWORK

*

*  ILWORK  (local input) INTEGER

*          This holds the some of the IBLK integer arrays.  This is held

*          as a place holder for the next release.

*

*  INFO    (global output) INTEGER

*          < 0: parameter number -INFO incorrect or inconsistent

*          = 0: successful exit

*          > 0: PSLAHQR failed to compute all the eigenvalues ILO to IHI

*               in a total of 30*(IHI-ILO+1) iterations; if INFO = i,

*               elements i+1:ihi of WR and WI contain those eigenvalues

*               which have been successfully computed.

*

*  Logic:

*       This algorithm is very similar to _LAHQR.  Unlike _LAHQR,

*       instead of sending one double shift through the largest

*       unreduced submatrix, this algorithm sends multiple double shifts

*       and spaces them apart so that there can be parallelism across

*       several processor row/columns.  Another critical difference is

*       that this algorithm aggregrates multiple transforms together in

*       order to apply them in a block fashion.

*

*  Important Local Variables:

*       IBLK = The maximum number of bulges that can be computed.

*           Currently fixed.  Future releases this won't be fixed.

*       HBL  = The square block size (HBL=DESCA(MB_)=DESCA(NB_))

*       ROTN = The number of transforms to block together

*       NBULGE = The number of bulges that will be attempted on the

*           current submatrix.

*       IBULGE = The current number of bulges started.

*       K1(*),K2(*) = The current bulge loops from K1(*) to K2(*).

*

*  Subroutines:

*       This routine calls:

*           PSLACONSB   -> To determine where to start each iteration

*           PSLAWIL   -> Given the shift, get the transformation

*           SLASORTE   -> Pair up eigenvalues so that reals are paired.

*           PSLACP3   -> Parallel array to local replicated array copy &

*                        back.

*           SLAREF   -> Row/column reflector applier.  Core routine

*                        here.

*           PSLASMSUB   -> Finds negligible subdiagonal elements.

*

*  Current Notes and/or Restrictions:

*       1.) This code requires the distributed block size to be square

*           and at least six (6); unlike simpler codes like LU, this

*           algorithm is extremely sensitive to block size.  Unwise

*           choices of too small a block size can lead to bad

*           performance.

*       2.) This code requires A and Z to be distributed identically

*           and have identical contxts.

*       3.) This release currently does not have a routine for

*           resolving the Schur blocks into regular 2x2 form after

*           this code is completed.  Because of this, a significant

*           performance impact is required while the deflation is done

*           by sometimes a single column of processors.

*       4.) This code does not currently block the initial transforms

*           so that none of the rows or columns for any bulge are

*           completed until all are started.  To offset pipeline

*           start-up it is recommended that at least 2*LCM(NPROW,NPCOL)

*           bulges are used (if possible)

*       5.) The maximum number of bulges currently supported is fixed at

*           32.  In future versions this will be limited only by the

*           incoming WORK array.

*       6.) The matrix A must be in upper Hessenberg form.  If elements

*           below the subdiagonal are nonzero, the resulting transforms

*           may be nonsimilar.  This is also true with the LAPACK

*           routine.

*       7.) For this release, it is assumed RSRC_=CSRC_=0

*       8.) Currently, all the eigenvalues are distributed to all the

*           nodes.  Future releases will probably distribute the

*           eigenvalues by the column partitioning.

*       9.) The internals of this routine are subject to change.

*

*  Implemented by:  G. Henry, November 17, 1996

*

*  =====================================================================

*

*     .. Parameters ..

      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,

     $                   LLD_, MB_, M_, NB_, N_, RSRC_

      parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,

     $                     ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,

     $                     rsrc_ = 7, csrc_ = 8, lld_ = 9 )

      REAL               ZERO, ONE, HALF

      PARAMETER          ( ZERO = 0.0, one = 1.0, half = 0.5 )

      REAL               CONST

      parameter( const = 1.50 )

      INTEGER            IBLK

      parameter( iblk = 32 )

*     ..

*     .. Local Scalars ..

      INTEGER            CONTXT, DOWN, HBL, I, I1, I2, IAFIRST, IBULGE,

     $                   ICBUF, ICOL, ICOL1, ICOL2, IDIA, IERR, II,

     $                   irbuf, irow, irow1, irow2, ispec, istart,

     $                   istartcol, istartrow, istop, isub, isup,

     $                   itermax, itmp1, itmp2, itn, its, j, jafirst,

     $                   jblk, jj, k, ki, l, lcmrc, lda, ldz, left,

     $                   lihih, lihiz, liloh, liloz, locali1, locali2,

     $                   localk, localm, m, modkm1, mycol, myrow,

     $                   nbulge, nh, node, npcol, nprow, nr, num, nz,

     $                   right, rotn, up, vecsidx

      REAL               AVE, DISC, H00, H10, H11, H12, H21, H22, H33,

     $                   H43H34, H44, OVFL, S, SMLNUM, SUM, T1, T1COPY,

     $                   t2, t3, ulp, unfl, v1save, v2, v2save, v3,

     $                   v3save, cs, sn

*     ..

*     .. Local Arrays ..

      INTEGER            ICURCOL( IBLK ), ICURROW( IBLK ), K1( IBLK ),

     $                   K2( IBLK ), KCOL( IBLK ), KP2COL( IBLK ),

     $                   kp2row( iblk ), krow( iblk ), localk2( iblk )

      REAL               S1( 2*IBLK, 2*IBLK ), SMALLA( 6, 6, IBLK ),

     $                   VCOPY( 3 )

*     ..

*     .. External Functions ..

      INTEGER            ILCM, NUMROC

      REAL               PSLAMCH

      EXTERNAL           ilcm, numroc, pslamch

*     ..

*     .. External Subroutines ..

      EXTERNAL           blacs_gridinfo, scopy, sgebr2d, sgebs2d,

     $                   sgerv2d, sgesd2d, sgsum2d, slahqr, slaref,

     $                   slarfg, slasorte, igamn2d, infog1l, infog2l,

     $                   pslabad, pslaconsb, pslacp3, pslasmsub,

     $                   pslawil, pxerbla, slanv2

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          abs, max, min, mod, sign, sqrt

*     ..

*     .. Executable Statements ..

*

      info = 0

*

      itermax = 30*( ihi-ilo+1 )

*     ITERMAX = 0

      IF( n.EQ.0 )

     $   RETURN

*

*     NODE (IAFIRST,JAFIRST) OWNS A(1,1)

*

      hbl = desca( mb_ )

      contxt = desca( ctxt_ )

      lda = desca( lld_ )

      iafirst = desca( rsrc_ )

      jafirst = desca( csrc_ )

      ldz = descz( lld_ )

      CALL blacs_gridinfo( contxt, nprow, npcol, myrow, mycol )

      node = myrow*npcol + mycol

      num = nprow*npcol

      left = mod( mycol+npcol-1, npcol )

      right = mod( mycol+1, npcol )

      up = mod( myrow+nprow-1, nprow )

      down = mod( myrow+1, nprow )

      lcmrc = ilcm( nprow, npcol )

*

*     Determine the number of columns we have so we can check workspace

*

      localk = numroc( n, hbl, mycol, jafirst, npcol )

      jj = n / hbl

      IF( jj*hbl.LT.n )

     $   jj = jj + 1

      jj = 7*jj / lcmrc

      IF( lwork.LT.3*n+max( 2*max( lda, ldz )+2*localk, jj ) ) THEN

         info = -15

      END IF

      IF( descz( ctxt_ ).NE.desca( ctxt_ ) ) THEN

         info = -( 1300+ctxt_ )

      END IF

      IF( desca( mb_ ).NE.desca( nb_ ) ) THEN

         info = -( 700+nb_ )

      END IF

      IF( descz( mb_ ).NE.descz( nb_ ) ) THEN

         info = -( 1300+nb_ )

      END IF

      IF( desca( mb_ ).NE.descz( mb_ ) ) THEN

         info = -( 1300+mb_ )

      END IF

      IF( ( desca( rsrc_ ).NE.0 ) .OR. ( desca( csrc_ ).NE.0 ) ) THEN

         info = -( 700+rsrc_ )

      END IF

      IF( ( descz( rsrc_ ).NE.0 ) .OR. ( descz( csrc_ ).NE.0 ) ) THEN

         info = -( 1300+rsrc_ )

      END IF

      IF( ( ilo.GT.n ) .OR. ( ilo.LT.1 ) ) THEN

         info = -4

      END IF

      IF( ( ihi.GT.n ) .OR. ( ihi.LT.1 ) ) THEN

         info = -5

      END IF

      IF( hbl.LT.5 ) THEN

         info = -( 700+mb_ )

      END IF

      CALL igamn2d( contxt, 'ALL', ' ', 1, 1, info, 1, itmp1, itmp2, -1,

     $              -1, -1 )

      IF( info.LT.0 ) THEN

         CALL pxerbla( contxt, 'PSLAHQR', -info )

         RETURN

      END IF

*

*     Set work array indices

*

      vecsidx = 0

      idia = 3*n

      isub = 3*n

      isup = 3*n

      irbuf = 3*n

      icbuf = 3*n

*

*     Find a value for ROTN

*

      rotn = hbl / 3

      rotn = max( rotn, hbl-2 )

      rotn = min( rotn, 1 )

*

      IF( ilo.EQ.ihi ) THEN

         CALL infog2l( ilo, ilo, desca, nprow, npcol, myrow, mycol,

     $                 irow, icol, ii, jj )

         IF( ( myrow.EQ.ii ) .AND. ( mycol.EQ.jj ) ) THEN

            wr( ilo ) = a( ( icol-1 )*lda+irow )

         ELSE

            wr( ilo ) = zero

         END IF

         wi( ilo ) = zero

         RETURN

      END IF

*

      nh = ihi - ilo + 1

      nz = ihiz - iloz + 1

*

      CALL infog1l( iloz, hbl, nprow, myrow, 0, liloz, lihiz )

      lihiz = numroc( ihiz, hbl, myrow, 0, nprow )

*

*     Set machine-dependent constants for the stopping criterion.

*     If NORM(H) <= SQRT(OVFL), overflow should not occur.

*

      unfl = pslamch( contxt, 'SAFE MINIMUM' )

      ovfl = one / unfl

      CALL pslabad( contxt, unfl, ovfl )

      ulp = pslamch( contxt, 'PRECISION' )

      smlnum = unfl*( nh / ulp )

*

*     I1 and I2 are the indices of the first row and last column of H

*     to which transformations must be applied. If eigenvalues only are

*     being computed, I1 and I2 are set inside the main loop.

*

      IF( wantt ) THEN

         i1 = 1

         i2 = n

      END IF

*

*     ITN is the total number of QR iterations allowed.

*

      itn = itermax

*

*     The main loop begins here. I is the loop index and decreases from

*     IHI to ILO in steps of our schur block size (<=2*IBLK). Each

*     iteration of the loop works  with the active submatrix in rows

*     and columns L to I.   Eigenvalues I+1 to IHI have already

*     converged. Either L = ILO or the global A(L,L-1) is negligible

*     so that the matrix splits.

*

      i = ihi

   10 CONTINUE

      l = ilo

      IF( i.LT.ilo )

     $   GO TO 450

*

*     Perform QR iterations on rows and columns ILO to I until a

*     submatrix of order 1 or 2 splits off at the bottom because a

*     subdiagonal element has become negligible.

*

      DO 420 its = 0, itn

*

*        Look for a single small subdiagonal element.

*

         CALL pslasmsub( a, desca, i, l, k, smlnum, work( irbuf+1 ),

     $                   lwork-irbuf )

         l = k

*

         IF( l.GT.ilo ) THEN

*

*           H(L,L-1) is negligible

*

            CALL infog2l( l, l-1, desca, nprow, npcol, myrow, mycol,

     $                    irow, icol, itmp1, itmp2 )

            IF( ( myrow.EQ.itmp1 ) .AND. ( mycol.EQ.itmp2 ) ) THEN

               a( ( icol-1 )*lda+irow ) = zero

            END IF

            work( isub+l-1 ) = zero

         END IF

*

*        Exit from loop if a submatrix of order 1 or 2 has split off.

*

         m = l - 10

*        IF ( L .GE. I - (2*IBLK-1) )

*         IF ( L .GE. I - MAX(2*IBLK-1,HBL) )

         IF( l.GE.i-1 )

     $      GO TO 430

*

*        Now the active submatrix is in rows and columns L to I. If

*        eigenvalues only are being computed, only the active submatrix

*        need be transformed.

*

         IF( .NOT.wantt ) THEN

            i1 = l

            i2 = i

         END IF

*

*        Copy submatrix of size 2*JBLK and prepare to do generalized

*           Wilkinson shift or an exceptional shift

*

         jblk = min( iblk, ( ( i-l+1 ) / 2 )-1 )

         IF( jblk.GT.lcmrc ) THEN

*

*           Make sure it's divisible by LCM (we want even workloads!)

*

            jblk = jblk - mod( jblk, lcmrc )

         END IF

         jblk = min( jblk, 2*lcmrc )

         jblk = max( jblk, 1 )

*

         CALL pslacp3( 2*jblk, i-2*jblk+1, a, desca, s1, 2*iblk, -1, -1,

     $                 0 )

         IF( its.EQ.20 .OR. its.EQ.40 ) THEN

*

*           Exceptional shift.

*

            DO 20 ii = 2*jblk, 2, -1

               s1( ii, ii ) = const*( abs( s1( ii, ii ) )+

     $                        abs( s1( ii, ii-1 ) ) )

               s1( ii, ii-1 ) = zero

               s1( ii-1, ii ) = zero

   20       CONTINUE

            s1( 1, 1 ) = const*abs( s1( 1, 1 ) )

         ELSE

            CALL slahqr( .false., .false., 2*jblk, 1, 2*jblk, s1,

     $                   2*iblk, work( irbuf+1 ), work( icbuf+1 ), 1,

     $                   2*jblk, z, ldz, ierr )

*

*           Prepare to use Wilkinson's double shift

*

            h44 = s1( 2*jblk, 2*jblk )

            h33 = s1( 2*jblk-1, 2*jblk-1 )

            h43h34 = s1( 2*jblk-1, 2*jblk )*s1( 2*jblk, 2*jblk-1 )

            IF( ( jblk.GT.1 ) .AND. ( its.GT.30 ) ) THEN

               s = s1( 2*jblk-1, 2*jblk-2 )

               disc = ( h33-h44 )*half

               disc = disc*disc + h43h34

               IF( disc.GT.zero ) THEN

*

*                 Real roots: Use Wilkinson's shift twice

*

                  disc = sqrt( disc )

                  ave = half*( h33+h44 )

                  IF( abs( h33 )-abs( h44 ).GT.zero ) THEN

                     h33 = h33*h44 - h43h34

                     h44 = h33 / ( sign( disc, ave )+ave )

                  ELSE

                     h44 = sign( disc, ave ) + ave

                  END IF

                  h33 = h44

                  h43h34 = zero

               END IF

            END IF

         END IF

*

*        Look for two consecutive small subdiagonal elements:

*           PSLACONSB is the routine that does this.

*

c         CALL PSLACONSB( A, DESCA, I, L, M, H44, H33, H43H34,

c     $                   WORK( IRBUF+1 ), LWORK-IRBUF )

*

*        Skip small submatrices

*

*        IF ( M .GE. I - 5 )

*    $      GO TO 80

*

*        In principle PSLACONSB needs to check all shifts to decide

*        whether two consecutive small subdiagonal entries are suitable

*        as the starting position of the bulge chasing phase. It can be

*        dangerous to check the first pair of shifts only. Moreover it

*        is quite rare to obtain an M which is much larger than L. This

*        process is a bit expensive compared with the benefit.

*        Therefore it is sensible to abandon this routine. Total amount

*        of communications is saved in average.

*

         m = l

*        Double-shift QR step

*

*        NBULGE is the number of bulges that will be attempted

*

         istop = min( m+rotn-mod( m, rotn ), i-2 )

         istop = min( istop, m+hbl-3-mod( m-1, hbl ) )

         istop = min( istop, i2-2 )

         istop = max( istop, m )

         nbulge = ( i-1-istop ) / hbl

*

*        Do not exceed maximum determined.

*

         nbulge = min( nbulge, jblk )

         IF( nbulge.GT.lcmrc ) THEN

*

*           Make sure it's divisible by LCM (we want even workloads!)

*

            nbulge = nbulge - mod( nbulge, lcmrc )

         END IF

         nbulge = max( nbulge, 1 )

*

         IF( ( its.NE.20 ) .AND. ( its.NE.40 ) .AND. ( nbulge.GT.1 ) )

     $        THEN

*

*           sort the eigenpairs so that they are in twos for double

*           shifts.  only call if several need sorting

*

            CALL slasorte( s1( 2*( jblk-nbulge )+1,

     $                     2*( jblk-nbulge )+1 ), 2*iblk, 2*nbulge,

     $                     work( irbuf+1 ), ierr )

         END IF

*

*        IBULGE is the number of bulges going so far

*

         ibulge = 1

*

*        "A" row defs : main row transforms from LOCALK to LOCALI2

*

         CALL infog1l( m, hbl, npcol, mycol, 0, itmp1, localk )

         localk = numroc( n, hbl, mycol, 0, npcol )

         CALL infog1l( 1, hbl, npcol, mycol, 0, icol1, locali2 )

         locali2 = numroc( i2, hbl, mycol, 0, npcol )

*

*        "A" col defs : main col transforms from LOCALI1 to LOCALM

*

         CALL infog1l( i1, hbl, nprow, myrow, 0, locali1, icol1 )

         icol1 = numroc( n, hbl, myrow, 0, nprow )

         CALL infog1l( 1, hbl, nprow, myrow, 0, localm, icol1 )

         icol1 = numroc( min( m+3, i ), hbl, myrow, 0, nprow )

*

*        Which row & column will start the bulges

*

         istartrow = mod( ( m+1 ) / hbl, nprow ) + iafirst

         istartcol = mod( ( m+1 ) / hbl, npcol ) + jafirst

*

         CALL infog1l( m, hbl, nprow, myrow, 0, ii, itmp2 )

         itmp2 = numroc( n, hbl, myrow, 0, nprow )

         CALL infog1l( m, hbl, npcol, mycol, 0, jj, itmp2 )

         itmp2 = numroc( n, hbl, mycol, 0, npcol )

         CALL infog1l( 1, hbl, nprow, myrow, 0, istop, kp2row( 1 ) )

         kp2row( 1 ) = numroc( m+2, hbl, myrow, 0, nprow )

         CALL infog1l( 1, hbl, npcol, mycol, 0, istop, kp2col( 1 ) )

         kp2col( 1 ) = numroc( m+2, hbl, mycol, 0, npcol )

*

*        Set all values for bulges.  All bulges are stored in

*          intermediate steps as loops over KI.  Their current "task"

*          over the global M to I-1 values is always K1(KI) to K2(KI).

*          However, because there are many bulges, K1(KI) & K2(KI) might

*          go past that range while later bulges (KI+1,KI+2,etc..) are

*          finishing up.

*

*        Rules:

*              If MOD(K1(KI)-1,HBL) < HBL-2 then MOD(K2(KI)-1,HBL)<HBL-2

*              If MOD(K1(KI)-1,HBL) = HBL-2 then MOD(K2(KI)-1,HBL)=HBL-2

*              If MOD(K1(KI)-1,HBL) = HBL-1 then MOD(K2(KI)-1,HBL)=HBL-1

*              K2(KI)-K1(KI) <= ROTN

*

*        We first hit a border when MOD(K1(KI)-1,HBL)=HBL-2 and we hit

*        it again when MOD(K1(KI)-1,HBL)=HBL-1.

*

         DO 30 ki = 1, nbulge

            k1( ki ) = m

            istop = min( m+rotn-mod( m, rotn ), i-2 )

            istop = min( istop, m+hbl-3-mod( m-1, hbl ) )

            istop = min( istop, i2-2 )

            istop = max( istop, m )

            k2( ki ) = istop

            icurrow( ki ) = istartrow

            icurcol( ki ) = istartcol

            localk2( ki ) = itmp1

            krow( ki ) = ii

            kcol( ki ) = jj

            IF( ki.GT.1 )

     $         kp2row( ki ) = kp2row( 1 )

            IF( ki.GT.1 )

     $         kp2col( ki ) = kp2col( 1 )

   30    CONTINUE

*

*        Get first transform on node who owns M+2,M+2

*

         DO 31 itmp1 = 1, 3

            vcopy(itmp1) = zero

   31    CONTINUE

         itmp1 = istartrow

         itmp2 = istartcol

         CALL pslawil( itmp1, itmp2, m, a, desca, h44, h33, h43h34,

     $                 vcopy )

         v1save = vcopy( 1 )

         v2save = vcopy( 2 )

         v3save = vcopy( 3 )

         IF( k2( ibulge ).LE.i-1 ) THEN

   40       CONTINUE

            IF( ( k1( ibulge ).GE.m+5 ) .AND. ( ibulge.LT.nbulge ) )

     $           THEN

               IF( ( mod( k2( ibulge )+2, hbl ).EQ.mod( k2( ibulge+1 )+

     $             2, hbl ) ) .AND. ( k1( 1 ).LE.i-1 ) ) THEN

                  h44 = s1( 2*jblk-2*ibulge, 2*jblk-2*ibulge )

                  h33 = s1( 2*jblk-2*ibulge-1, 2*jblk-2*ibulge-1 )

                  h43h34 = s1( 2*jblk-2*ibulge-1, 2*jblk-2*ibulge )*

     $                     s1( 2*jblk-2*ibulge, 2*jblk-2*ibulge-1 )

                  itmp1 = istartrow

                  itmp2 = istartcol

                  CALL pslawil( itmp1, itmp2, m, a, desca, h44, h33,

     $                          h43h34, vcopy )

                  v1save = vcopy( 1 )

                  v2save = vcopy( 2 )

                  v3save = vcopy( 3 )

                  ibulge = ibulge + 1

               END IF

            END IF

*

*        When we hit a border, there are row and column transforms that

*          overlap over several processors and the code gets very

*          "congested."  As a remedy, when we first hit a border, a 6x6

*          *local* matrix is generated on one node (called SMALLA) and

*          work is done on that.  At the end of the border, the data is

*          passed back and everything stays a lot simpler.

*

            DO 80 ki = 1, ibulge

*

               istart = max( k1( ki ), m )

               istop = min( k2( ki ), i-1 )

               k = istart

               modkm1 = mod( k-1, hbl )

               IF( ( modkm1.GE.hbl-2 ) .AND. ( k.LE.i-1 ) ) THEN

                  DO 81 itmp1 = 1, 6

                     DO 82 itmp2 = 1, 6

                        smalla(itmp1, itmp2, ki) = zero

   82                CONTINUE

   81             CONTINUE

                  IF( ( modkm1.EQ.hbl-2 ) .AND. ( k.LT.i-1 ) ) THEN

*

*                 Copy 6 elements from global A(K-1:K+4,K-1:K+4)

*

                     CALL infog2l( k+2, k+2, desca, nprow, npcol, myrow,

     $                             mycol, irow1, icol1, itmp1, itmp2 )

                     CALL pslacp3( min( 6, n-k+2 ), k-1, a, desca,

     $                             smalla( 1, 1, ki ), 6, itmp1, itmp2,

     $                             0 )

                  END IF

                  IF( modkm1.EQ.hbl-1 ) THEN

*

*                 Copy 6 elements from global A(K-2:K+3,K-2:K+3)

*

                     CALL infog2l( k+1, k+1, desca, nprow, npcol, myrow,

     $                             mycol, irow1, icol1, itmp1, itmp2 )

                     CALL pslacp3( min( 6, n-k+3 ), k-2, a, desca,

     $                             smalla( 1, 1, ki ), 6, itmp1, itmp2,

     $                             0 )

                  END IF

               END IF

*

*           SLAHQR used to have a single row application and a single

*              column application to H.  Here we do something a little

*              more clever.  We break each transformation down into 3

*              parts:

*                  1.) The minimum amount of work it takes to determine

*                        a group of ROTN transformations (this is on

*                        the critical path.) (Loops 130-180)

*                  2.) The small work it takes so that each of the rows

*                        and columns is at the same place.  For example,

*                        all ROTN row transforms are all complete

*                        through some column TMP.  (Loops within 190)

*                  3.) The majority of the row and column transforms

*                        are then applied in a block fashion.

*                        (Loops 290 on.)

*

*           Each of these three parts are further subdivided into 3

*           parts:

*               A.) Work at the start of a border when

*                       MOD(ISTART-1,HBL) = HBL-2

*               B.) Work at the end of a border when

*                       MOD(ISTART-1,HBL) = HBL-1

*               C.) Work in the middle of the block when

*                       MOD(ISTART-1,HBL) < HBL-2

*

               IF( ( myrow.EQ.icurrow( ki ) ) .AND.

     $             ( mycol.EQ.icurcol( ki ) ) .AND.

     $             ( modkm1.EQ.hbl-2 ) .AND.

     $             ( istart.LT.min( i-1, istop+1 ) ) ) THEN

                  k = istart

                  nr = min( 3, i-k+1 )

                  IF( k.GT.m ) THEN

                     CALL scopy( nr, smalla( 2, 1, ki ), 1, vcopy, 1 )

                  ELSE

                     vcopy( 1 ) = v1save

                     vcopy( 2 ) = v2save

                     vcopy( 3 ) = v3save

                  END IF

                  CALL slarfg( nr, vcopy( 1 ), vcopy( 2 ), 1, t1copy )

                  IF( k.GT.m ) THEN

                     smalla( 2, 1, ki ) = vcopy( 1 )

                     smalla( 3, 1, ki ) = zero

                     IF( k.LT.i-1 )

     $                  smalla( 4, 1, ki ) = zero

                  ELSE IF( m.GT.l ) THEN

                     smalla( 2, 1, ki ) = -smalla( 2, 1, ki )

                  END IF

                  v2 = vcopy( 2 )

                  t2 = t1copy*v2

                  work( vecsidx+( k-1 )*3+1 ) = vcopy( 2 )

                  work( vecsidx+( k-1 )*3+2 ) = vcopy( 3 )

                  work( vecsidx+( k-1 )*3+3 ) = t1copy

               END IF

*

               IF( ( mod( istop-1, hbl ).EQ.hbl-1 ) .AND.

     $             ( myrow.EQ.icurrow( ki ) ) .AND.

     $             ( mycol.EQ.icurcol( ki ) ) .AND.

     $             ( istart.LE.min( i, istop ) ) ) THEN

                  k = istart

                  nr = min( 3, i-k+1 )

                  IF( k.GT.m ) THEN

                     CALL scopy( nr, smalla( 3, 2, ki ), 1, vcopy, 1 )

                  ELSE

                     vcopy( 1 ) = v1save

                     vcopy( 2 ) = v2save

                     vcopy( 3 ) = v3save

                  END IF

                  CALL slarfg( nr, vcopy( 1 ), vcopy( 2 ), 1, t1copy )

                  IF( k.GT.m ) THEN

                     smalla( 3, 2, ki ) = vcopy( 1 )

                     smalla( 4, 2, ki ) = zero

                     IF( k.LT.i-1 )

     $                  smalla( 5, 2, ki ) = zero

*

*                 Set a subdiagonal to zero now if it's possible

*

*                 H11 = SMALLA(1,1,KI)

*                 H10 = SMALLA(2,1,KI)

*                 H22 = SMALLA(2,2,KI)

*                 IF ( ABS(H10) .LE. MAX(ULP*(ABS(H11)+ABS(H22)),

*    $                                    SMLNUM) ) THEN

*                    SMALLA(2,1,KI) = ZERO

*     WORK(ISUB+K-2) = ZERO

*                 END IF

                  ELSE IF( m.GT.l ) THEN

                     smalla( 3, 2, ki ) = -smalla( 3, 2, ki )

                  END IF

                  v2 = vcopy( 2 )

                  t2 = t1copy*v2

                  work( vecsidx+( k-1 )*3+1 ) = vcopy( 2 )

                  work( vecsidx+( k-1 )*3+2 ) = vcopy( 3 )

                  work( vecsidx+( k-1 )*3+3 ) = t1copy

               END IF

*

               IF( ( modkm1.EQ.0 ) .AND. ( istart.LE.i-1 ) .AND.

     $             ( myrow.EQ.icurrow( ki ) ) .AND.

     $             ( right.EQ.icurcol( ki ) ) ) THEN

*

*              (IROW1,ICOL1) is (I,J)-coordinates of H(ISTART,ISTART)

*

                  irow1 = krow( ki )

                  icol1 = localk2( ki )

                  IF( istart.GT.m ) THEN

                     vcopy( 1 ) = smalla( 4, 3, ki )

                     vcopy( 2 ) = smalla( 5, 3, ki )

                     vcopy( 3 ) = smalla( 6, 3, ki )

                     nr = min( 3, i-istart+1 )

                     CALL slarfg( nr, vcopy( 1 ), vcopy( 2 ), 1,

     $                            t1copy )

                     a( ( icol1-2 )*lda+irow1 ) = vcopy( 1 )

                     a( ( icol1-2 )*lda+irow1+1 ) = zero

                     IF( istart.LT.i-1 ) THEN

                        a( ( icol1-2 )*lda+irow1+2 ) = zero

                     END IF

                  ELSE

                     IF( m.GT.l ) THEN

                        a( ( icol1-2 )*lda+irow1 ) = -a( ( icol1-2 )*

     $                     lda+irow1 )

                     END IF

                  END IF

               END IF

*

               IF( ( myrow.EQ.icurrow( ki ) ) .AND.

     $             ( mycol.EQ.icurcol( ki ) ) .AND.

     $             ( ( ( modkm1.EQ.hbl-2 ) .AND. ( istart.EQ.i-

     $             1 ) ) .OR. ( ( modkm1.LT.hbl-2 ) .AND. ( istart.LE.i-

     $             1 ) ) ) ) THEN

*

*           (IROW1,ICOL1) is (I,J)-coordinates of H(ISTART,ISTART)

*

                  irow1 = krow( ki )

                  icol1 = localk2( ki )

                  DO 70 k = istart, istop

*

*              Create and do these transforms

*

                     nr = min( 3, i-k+1 )

                     IF( k.GT.m ) THEN

                        IF( mod( k-1, hbl ).EQ.0 ) THEN

                           vcopy( 1 ) = smalla( 4, 3, ki )

                           vcopy( 2 ) = smalla( 5, 3, ki )

                           vcopy( 3 ) = smalla( 6, 3, ki )

                        ELSE

                           vcopy( 1 ) = a( ( icol1-2 )*lda+irow1 )

                           vcopy( 2 ) = a( ( icol1-2 )*lda+irow1+1 )

                           IF( nr.EQ.3 ) THEN

                              vcopy( 3 ) = a( ( icol1-2 )*lda+irow1+2 )

                           END IF

                        END IF

                     ELSE

                        vcopy( 1 ) = v1save

                        vcopy( 2 ) = v2save

                        vcopy( 3 ) = v3save

                     END IF

                     CALL slarfg( nr, vcopy( 1 ), vcopy( 2 ), 1,

     $                            t1copy )

                     IF( k.GT.m ) THEN

                        IF( mod( k-1, hbl ).GT.0 ) THEN

                           a( ( icol1-2 )*lda+irow1 ) = vcopy( 1 )

                           a( ( icol1-2 )*lda+irow1+1 ) = zero

                           IF( k.LT.i-1 ) THEN

                              a( ( icol1-2 )*lda+irow1+2 ) = zero

                           END IF

*

*                    Set a subdiagonal to zero now if it's possible

*

*                    IF ( (IROW1.GT.2) .AND. (ICOL1.GT.2) .AND.

*    $                    (MOD(K-1,HBL) .GT. 1) ) THEN

*                       H11 = A((ICOL1-3)*LDA+IROW1-2)

*                       H10 = A((ICOL1-3)*LDA+IROW1-1)

*                       H22 = A((ICOL1-2)*LDA+IROW1-1)

*                       IF ( ABS(H10).LE.MAX(ULP*(ABS(H11)+ABS(H22)),

*    $                                       SMLNUM) ) THEN

*                           A((ICOL1-3)*LDA+IROW1-1) = ZERO

*                       END IF

*                    END IF

                        END IF

                     ELSE IF( m.GT.l ) THEN

                        IF( mod( k-1, hbl ).GT.0 ) THEN

                           a( ( icol1-2 )*lda+irow1 ) = -a( ( icol1-2 )*

     $                        lda+irow1 )

                        END IF

                     END IF

                     v2 = vcopy( 2 )

                     t2 = t1copy*v2

                     work( vecsidx+( k-1 )*3+1 ) = vcopy( 2 )

                     work( vecsidx+( k-1 )*3+2 ) = vcopy( 3 )

                     work( vecsidx+( k-1 )*3+3 ) = t1copy

                     t1 = t1copy

                     IF( k.LT.istop ) THEN

*

*                 Do some work so next step is ready...

*

                        v3 = vcopy( 3 )

                        t3 = t1*v3

                        DO 50 j = icol1, min( k2( ki )+1, i-1 ) +

     $                          icol1 - k

                           sum = a( ( j-1 )*lda+irow1 ) +

     $                           v2*a( ( j-1 )*lda+irow1+1 ) +

     $                           v3*a( ( j-1 )*lda+irow1+2 )

                           a( ( j-1 )*lda+irow1 ) = a( ( j-1 )*lda+

     $                        irow1 ) - sum*t1

                           a( ( j-1 )*lda+irow1+1 ) = a( ( j-1 )*lda+

     $                        irow1+1 ) - sum*t2

                           a( ( j-1 )*lda+irow1+2 ) = a( ( j-1 )*lda+

     $                        irow1+2 ) - sum*t3

   50                   CONTINUE

                        itmp1 = localk2( ki )

                        DO 60 j = irow1 + 1, irow1 + 3

                           sum = a( ( icol1-1 )*lda+j ) +

     $                           v2*a( icol1*lda+j ) +

     $                           v3*a( ( icol1+1 )*lda+j )

                           a( ( icol1-1 )*lda+j ) = a( ( icol1-1 )*lda+

     $                        j ) - sum*t1

                           a( icol1*lda+j ) = a( icol1*lda+j ) - sum*t2

                           a( ( icol1+1 )*lda+j ) = a( ( icol1+1 )*lda+

     $                        j ) - sum*t3

   60                   CONTINUE

                     END IF

                     irow1 = irow1 + 1

                     icol1 = icol1 + 1

   70             CONTINUE

               END IF

*

               IF( modkm1.EQ.hbl-2 ) THEN

                  IF( ( down.EQ.icurrow( ki ) ) .AND.

     $                ( right.EQ.icurcol( ki ) ) .AND. ( num.GT.1 ) )

     $                 THEN

                     CALL sgerv2d( contxt, 3, 1,

     $                             work( vecsidx+( istart-1 )*3+1 ), 3,

     $                             down, right )

                  END IF

                  IF( ( myrow.EQ.icurrow( ki ) ) .AND.

     $                ( mycol.EQ.icurcol( ki ) ) .AND. ( num.GT.1 ) )

     $                 THEN

                     CALL sgesd2d( contxt, 3, 1,

     $                             work( vecsidx+( istart-1 )*3+1 ), 3,

     $                             up, left )

                  END IF

                  IF( ( down.EQ.icurrow( ki ) ) .AND.

     $                ( npcol.GT.1 ) .AND. ( istart.LE.istop ) ) THEN

                     jj = mod( icurcol( ki )+npcol-1, npcol )

                     IF( mycol.NE.jj ) THEN

                        CALL sgebr2d( contxt, 'ROW', ' ',

     $                                3*( istop-istart+1 ), 1,

     $                                work( vecsidx+( istart-1 )*3+1 ),

     $                                3*( istop-istart+1 ), myrow, jj )

                     ELSE

                        CALL sgebs2d( contxt, 'ROW', ' ',

     $                                3*( istop-istart+1 ), 1,

     $                                work( vecsidx+( istart-1 )*3+1 ),

     $                                3*( istop-istart+1 ) )

                     END IF

                  END IF

               END IF

*

*           Broadcast Householder information from the block

*

               IF( ( myrow.EQ.icurrow( ki ) ) .AND. ( npcol.GT.1 ) .AND.

     $             ( istart.LE.istop ) ) THEN

                  IF( mycol.NE.icurcol( ki ) ) THEN

                     CALL sgebr2d( contxt, 'ROW', ' ',

     $                             3*( istop-istart+1 ), 1,

     $                             work( vecsidx+( istart-1 )*3+1 ),

     $                             3*( istop-istart+1 ), myrow,

     $                             icurcol( ki ) )

                  ELSE

                     CALL sgebs2d( contxt, 'ROW', ' ',

     $                             3*( istop-istart+1 ), 1,

     $                             work( vecsidx+( istart-1 )*3+1 ),

     $                             3*( istop-istart+1 ) )

                  END IF

               END IF

   80       CONTINUE

*

*        Now do column transforms and finish work

*

            DO 90 ki = 1, ibulge

*

               istart = max( k1( ki ), m )

               istop = min( k2( ki ), i-1 )

*

               IF( mod( istart-1, hbl ).EQ.hbl-2 ) THEN

                  IF( ( right.EQ.icurcol( ki ) ) .AND.

     $                ( nprow.GT.1 ) .AND. ( istart.LE.istop ) ) THEN

                     jj = mod( icurrow( ki )+nprow-1, nprow )

                     IF( myrow.NE.jj ) THEN

                        CALL sgebr2d( contxt, 'COL', ' ',

     $                                3*( istop-istart+1 ), 1,

     $                                work( vecsidx+( istart-1 )*3+1 ),

     $                                3*( istop-istart+1 ), jj, mycol )

                     ELSE

                        CALL sgebs2d( contxt, 'COL', ' ',

     $                                3*( istop-istart+1 ), 1,

     $                                work( vecsidx+( istart-1 )*3+1 ),

     $                                3*( istop-istart+1 ) )

                     END IF

                  END IF

               END IF

*

               IF( ( mycol.EQ.icurcol( ki ) ) .AND. ( nprow.GT.1 ) .AND.

     $             ( istart.LE.istop ) ) THEN

                  IF( myrow.NE.icurrow( ki ) ) THEN

                     CALL sgebr2d( contxt, 'COL', ' ',

     $                             3*( istop-istart+1 ), 1,

     $                             work( vecsidx+( istart-1 )*3+1 ),

     $                             3*( istop-istart+1 ), icurrow( ki ),

     $                             mycol )

                  ELSE

                     CALL sgebs2d( contxt, 'COL', ' ',

     $                             3*( istop-istart+1 ), 1,

     $                             work( vecsidx+( istart-1 )*3+1 ),

     $                             3*( istop-istart+1 ) )

                  END IF

               END IF

   90       CONTINUE

*

*        Now do make up work to have things in block fashion

*

            DO 150 ki = 1, ibulge

               istart = max( k1( ki ), m )

               istop = min( k2( ki ), i-1 )

*

               modkm1 = mod( istart-1, hbl )

               IF( ( myrow.EQ.icurrow( ki ) ) .AND.

     $             ( mycol.EQ.icurcol( ki ) ) .AND.

     $             ( modkm1.EQ.hbl-2 ) .AND. ( istart.LT.i-1 ) ) THEN

                  k = istart

*

*              Catch up on column & border work

*

                  nr = min( 3, i-k+1 )

                  v2 = work( vecsidx+( k-1 )*3+1 )

                  v3 = work( vecsidx+( k-1 )*3+2 )

                  t1 = work( vecsidx+( k-1 )*3+3 )

                  IF( nr.EQ.3 ) THEN

*

*                 Do some work so next step is ready...

*

*                 V3 = VCOPY( 3 )

                     t2 = t1*v2

                     t3 = t1*v3

                     itmp1 = min( 6, i2+2-k )

                     itmp2 = max( i1-k+2, 1 )

                     DO 100 j = 2, itmp1

                        sum = smalla( 2, j, ki ) +

     $                        v2*smalla( 3, j, ki ) +

     $                        v3*smalla( 4, j, ki )

                        smalla( 2, j, ki ) = smalla( 2, j, ki ) - sum*t1

                        smalla( 3, j, ki ) = smalla( 3, j, ki ) - sum*t2

                        smalla( 4, j, ki ) = smalla( 4, j, ki ) - sum*t3

  100                CONTINUE

                     DO 110 j = itmp2, 5

                        sum = smalla( j, 2, ki ) +

     $                        v2*smalla( j, 3, ki ) +

     $                        v3*smalla( j, 4, ki )

                        smalla( j, 2, ki ) = smalla( j, 2, ki ) - sum*t1

                        smalla( j, 3, ki ) = smalla( j, 3, ki ) - sum*t2

                        smalla( j, 4, ki ) = smalla( j, 4, ki ) - sum*t3

  110                CONTINUE

                  END IF

               END IF

*

               IF( ( mod( istart-1, hbl ).EQ.hbl-1 ) .AND.

     $             ( istart.LE.istop ) .AND.

     $             ( myrow.EQ.icurrow( ki ) ) .AND.

     $             ( mycol.EQ.icurcol( ki ) ) ) THEN

                  k = istop

*

*              Catch up on column & border work

*

                  nr = min( 3, i-k+1 )

                  v2 = work( vecsidx+( k-1 )*3+1 )

                  v3 = work( vecsidx+( k-1 )*3+2 )

                  t1 = work( vecsidx+( k-1 )*3+3 )

                  IF( nr.EQ.3 ) THEN

*

*                 Do some work so next step is ready...

*

*                 V3 = VCOPY( 3 )

                     t2 = t1*v2

                     t3 = t1*v3

                     itmp1 = min( 6, i2-k+3 )

                     itmp2 = max( i1-k+3, 1 )

                     DO 120 j = 3, itmp1

                        sum = smalla( 3, j, ki ) +

     $                        v2*smalla( 4, j, ki ) +

     $                        v3*smalla( 5, j, ki )

                        smalla( 3, j, ki ) = smalla( 3, j, ki ) - sum*t1

                        smalla( 4, j, ki ) = smalla( 4, j, ki ) - sum*t2

                        smalla( 5, j, ki ) = smalla( 5, j, ki ) - sum*t3

  120                CONTINUE

                     DO 130 j = itmp2, 6

                        sum = smalla( j, 3, ki ) +

     $                        v2*smalla( j, 4, ki ) +

     $                        v3*smalla( j, 5, ki )

                        smalla( j, 3, ki ) = smalla( j, 3, ki ) - sum*t1

                        smalla( j, 4, ki ) = smalla( j, 4, ki ) - sum*t2

                        smalla( j, 5, ki ) = smalla( j, 5, ki ) - sum*t3

  130                CONTINUE

                  END IF

               END IF

*

               modkm1 = mod( istart-1, hbl )

               IF( ( myrow.EQ.icurrow( ki ) ) .AND.

     $             ( mycol.EQ.icurcol( ki ) ) .AND.

     $             ( ( ( modkm1.EQ.hbl-2 ) .AND. ( istart.EQ.i-

     $             1 ) ) .OR. ( ( modkm1.LT.hbl-2 ) .AND. ( istart.LE.i-

     $             1 ) ) ) ) THEN

*

*           (IROW1,ICOL1) is (I,J)-coordinates of H(ISTART,ISTART)

*

                  irow1 = krow( ki )

                  icol1 = localk2( ki )

                  DO 140 k = istart, istop

*

*              Catch up on column & border work

*

                     nr = min( 3, i-k+1 )

                     v2 = work( vecsidx+( k-1 )*3+1 )

                     v3 = work( vecsidx+( k-1 )*3+2 )

                     t1 = work( vecsidx+( k-1 )*3+3 )

                     IF( k.LT.istop ) THEN

*

*                 Do some work so next step is ready...

*

                        t2 = t1*v2

                        t3 = t1*v3

                        CALL slaref( 'Col', a, lda, .false., z, ldz,

     $                               .false., icol1, icol1, istart,

     $                               istop, min( istart+1, i )-k+irow1,

     $                               irow1, liloz, lihiz,

     $                               work( vecsidx+1 ), v2, v3, t1, t2,

     $                               t3 )

                        irow1 = irow1 + 1

                        icol1 = icol1 + 1

                     ELSE

                        IF( ( nr.EQ.3 ) .AND. ( mod( k-1,

     $                      hbl ).LT.hbl-2 ) ) THEN

                           t2 = t1*v2

                           t3 = t1*v3

                           CALL slaref( 'Row', a, lda, .false., z, ldz,

     $                                  .false., irow1, irow1, istart,

     $                                  istop, icol1, min( min( k2( ki )

     $                                  +1, i-1 ), i2 )-k+icol1, liloz,

     $                                  lihiz, work( vecsidx+1 ), v2,

     $                                  v3, t1, t2, t3 )

                        END IF

                     END IF

  140             CONTINUE

               END IF

*

*           Send SMALLA back again.

*

               k = istart

               modkm1 = mod( k-1, hbl )

               IF( ( modkm1.GE.hbl-2 ) .AND. ( k.LE.i-1 ) ) THEN

                  IF( ( modkm1.EQ.hbl-2 ) .AND. ( k.LT.i-1 ) ) THEN

*

*                 Copy 6 elements from global A(K-1:K+4,K-1:K+4)

*

                     CALL infog2l( k+2, k+2, desca, nprow, npcol, myrow,

     $                             mycol, irow1, icol1, itmp1, itmp2 )

                     CALL pslacp3( min( 6, n-k+2 ), k-1, a, desca,

     $                             smalla( 1, 1, ki ), 6, itmp1, itmp2,

     $                             1 )

*

                  END IF

                  IF( modkm1.EQ.hbl-1 ) THEN

*

*                 Copy 6 elements from global A(K-2:K+3,K-2:K+3)

*

                     CALL infog2l( k+1, k+1, desca, nprow, npcol, myrow,

     $                             mycol, irow1, icol1, itmp1, itmp2 )

                     CALL pslacp3( min( 6, n-k+3 ), k-2, a, desca,

     $                             smalla( 1, 1, ki ), 6, itmp1, itmp2,

     $                             1 )

                  END IF

               END IF

*

  150       CONTINUE

*

*        Now start major set of block ROW reflections

*

            DO 160 ki = 1, ibulge

               IF( ( myrow.NE.icurrow( ki ) ) .AND.

     $             ( down.NE.icurrow( ki ) ) )GO TO 160

               istart = max( k1( ki ), m )

               istop = min( k2( ki ), i-1 )

*

               IF( ( istop.GT.istart ) .AND.

     $             ( mod( istart-1, hbl ).LT.hbl-2 ) .AND.

     $             ( icurrow( ki ).EQ.myrow ) ) THEN

                  irow1 = min( k2( ki )+1, i-1 ) + 1

                  CALL infog1l( irow1, hbl, npcol, mycol, 0, itmp1,

     $                          itmp2 )

                  itmp2 = numroc( i2, hbl, mycol, 0, npcol )

                  ii = krow( ki )

                  CALL slaref( 'Row', a, lda, wantz, z, ldz, .true., ii,

     $                         ii, istart, istop, itmp1, itmp2, liloz,

     $                         lihiz, work( vecsidx+1 ), v2, v3, t1, t2,

     $                         t3 )

               END IF

  160       CONTINUE

*

            DO 180 ki = 1, ibulge

               IF( krow( ki ).GT.kp2row( ki ) )

     $            GO TO 180

               IF( ( myrow.NE.icurrow( ki ) ) .AND.

     $             ( down.NE.icurrow( ki ) ) )GO TO 180

               istart = max( k1( ki ), m )

               istop = min( k2( ki ), i-1 )

               IF( ( istart.EQ.istop ) .OR.

     $             ( mod( istart-1, hbl ).GE.hbl-2 ) .OR.

     $             ( icurrow( ki ).NE.myrow ) ) THEN

                  DO 170 k = istart, istop

                     v2 = work( vecsidx+( k-1 )*3+1 )

                     v3 = work( vecsidx+( k-1 )*3+2 )

                     t1 = work( vecsidx+( k-1 )*3+3 )

                     nr = min( 3, i-k+1 )

                     IF( ( nr.EQ.3 ) .AND. ( krow( ki ).LE.

     $                   kp2row( ki ) ) ) THEN

                        IF( ( k.LT.istop ) .AND.

     $                      ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN

                           itmp1 = min( k2( ki )+1, i-1 ) + 1

                        ELSE

                           IF( mod( k-1, hbl ).LT.hbl-2 ) THEN

                              itmp1 = min( k2( ki )+1, i-1 ) + 1

                           END IF

                           IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN

                              itmp1 = min( k+4, i2 ) + 1

                           END IF

                           IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN

                              itmp1 = min( k+3, i2 ) + 1

                           END IF

                        END IF

*

*                    Find local coor of rows K through K+2

*

                        irow1 = krow( ki )

                        irow2 = kp2row( ki )

                        CALL infog1l( itmp1, hbl, npcol, mycol, 0,

     $                                icol1, icol2 )

                        icol2 = numroc( i2, hbl, mycol, 0, npcol )

                        IF( ( mod( k-1, hbl ).LT.hbl-2 ) .OR.

     $                      ( nprow.EQ.1 ) ) THEN

                           t2 = t1*v2

                           t3 = t1*v3

                           CALL slaref( 'Row', a, lda, wantz, z, ldz,

     $                                  .false., irow1, irow1, istart,

     $                                  istop, icol1, icol2, liloz,

     $                                  lihiz, work( vecsidx+1 ), v2,

     $                                  v3, t1, t2, t3 )

                        END IF

                        IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.

     $                      ( nprow.GT.1 ) ) THEN

                           IF( irow1.EQ.irow2 ) THEN

                              CALL sgesd2d( contxt, 1, icol2-icol1+1,

     $                                      a( ( icol1-1 )*lda+irow2 ),

     $                                      lda, up, mycol )

                           END IF

                        END IF

                        IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.

     $                      ( nprow.GT.1 ) ) THEN

                           IF( irow1.EQ.irow2 ) THEN

                              CALL sgesd2d( contxt, 1, icol2-icol1+1,

     $                                      a( ( icol1-1 )*lda+irow1 ),

     $                                      lda, down, mycol )

                           END IF

                        END IF

                     END IF

  170             CONTINUE

               END IF

  180       CONTINUE

*

            DO 220 ki = 1, ibulge

               IF( krow( ki ).GT.kp2row( ki ) )

     $            GO TO 220

               IF( ( myrow.NE.icurrow( ki ) ) .AND.

     $             ( down.NE.icurrow( ki ) ) )GO TO 220

               istart = max( k1( ki ), m )

               istop = min( k2( ki ), i-1 )

               IF( ( istart.EQ.istop ) .OR.

     $             ( mod( istart-1, hbl ).GE.hbl-2 ) .OR.

     $             ( icurrow( ki ).NE.myrow ) ) THEN

                  DO 210 k = istart, istop

                     v2 = work( vecsidx+( k-1 )*3+1 )

                     v3 = work( vecsidx+( k-1 )*3+2 )

                     t1 = work( vecsidx+( k-1 )*3+3 )

                     nr = min( 3, i-k+1 )

                     IF( ( nr.EQ.3 ) .AND. ( krow( ki ).LE.

     $                   kp2row( ki ) ) ) THEN

                        IF( ( k.LT.istop ) .AND.

     $                      ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN

                           itmp1 = min( k2( ki )+1, i-1 ) + 1

                        ELSE

                           IF( mod( k-1, hbl ).LT.hbl-2 ) THEN

                              itmp1 = min( k2( ki )+1, i-1 ) + 1

                           END IF

                           IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN

                              itmp1 = min( k+4, i2 ) + 1

                           END IF

                           IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN

                              itmp1 = min( k+3, i2 ) + 1

                           END IF

                        END IF

*

                        irow1 = krow( ki ) + k - istart

                        irow2 = kp2row( ki ) + k - istart

                        CALL infog1l( itmp1, hbl, npcol, mycol, 0,

     $                                icol1, icol2 )

                        icol2 = numroc( i2, hbl, mycol, 0, npcol )

                        IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.

     $                      ( nprow.GT.1 ) ) THEN

                           IF( irow1.NE.irow2 ) THEN

                              CALL sgerv2d( contxt, 1, icol2-icol1+1,

     $                                      work( irbuf+1 ), 1, down,

     $                                      mycol )

                              t2 = t1*v2

                              t3 = t1*v3

                              DO 190 j = icol1, icol2

                                 sum = a( ( j-1 )*lda+irow1 ) +

     $                                 v2*a( ( j-1 )*lda+irow1+1 ) +

     $                                 v3*work( irbuf+j-icol1+1 )

                                 a( ( j-1 )*lda+irow1 ) = a( ( j-1 )*

     $                              lda+irow1 ) - sum*t1

                                 a( ( j-1 )*lda+irow1+1 ) = a( ( j-1 )*

     $                              lda+irow1+1 ) - sum*t2

                                 work( irbuf+j-icol1+1 ) = work( irbuf+

     $                              j-icol1+1 ) - sum*t3

  190                         CONTINUE

                              CALL sgesd2d( contxt, 1, icol2-icol1+1,

     $                                      work( irbuf+1 ), 1, down,

     $                                      mycol )

                           END IF

                        END IF

                        IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.

     $                      ( nprow.GT.1 ) ) THEN

                           IF( irow1.NE.irow2 ) THEN

                              CALL sgerv2d( contxt, 1, icol2-icol1+1,

     $                                      work( irbuf+1 ), 1, up,

     $                                      mycol )

                              t2 = t1*v2

                              t3 = t1*v3

                              DO 200 j = icol1, icol2

                                 sum = work( irbuf+j-icol1+1 ) +

     $                                 v2*a( ( j-1 )*lda+irow1 ) +

     $                                 v3*a( ( j-1 )*lda+irow1+1 )

                                 work( irbuf+j-icol1+1 ) = work( irbuf+

     $                              j-icol1+1 ) - sum*t1

                                 a( ( j-1 )*lda+irow1 ) = a( ( j-1 )*

     $                              lda+irow1 ) - sum*t2

                                 a( ( j-1 )*lda+irow1+1 ) = a( ( j-1 )*

     $                              lda+irow1+1 ) - sum*t3

  200                         CONTINUE

                              CALL sgesd2d( contxt, 1, icol2-icol1+1,

     $                                      work( irbuf+1 ), 1, up,

     $                                      mycol )

                           END IF

                        END IF

                     END IF

  210             CONTINUE

               END IF

  220       CONTINUE

*

            DO 240 ki = 1, ibulge

               IF( krow( ki ).GT.kp2row( ki ) )

     $            GO TO 240

               IF( ( myrow.NE.icurrow( ki ) ) .AND.

     $             ( down.NE.icurrow( ki ) ) )GO TO 240

               istart = max( k1( ki ), m )

               istop = min( k2( ki ), i-1 )

               IF( ( istart.EQ.istop ) .OR.

     $             ( mod( istart-1, hbl ).GE.hbl-2 ) .OR.

     $             ( icurrow( ki ).NE.myrow ) ) THEN

                  DO 230 k = istart, istop

                     v2 = work( vecsidx+( k-1 )*3+1 )

                     v3 = work( vecsidx+( k-1 )*3+2 )

                     t1 = work( vecsidx+( k-1 )*3+3 )

                     nr = min( 3, i-k+1 )

                     IF( ( nr.EQ.3 ) .AND. ( krow( ki ).LE.

     $                   kp2row( ki ) ) ) THEN

                        IF( ( k.LT.istop ) .AND.

     $                      ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN

                           itmp1 = min( k2( ki )+1, i-1 ) + 1

                        ELSE

                           IF( mod( k-1, hbl ).LT.hbl-2 ) THEN

                              itmp1 = min( k2( ki )+1, i-1 ) + 1

                           END IF

                           IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN

                              itmp1 = min( k+4, i2 ) + 1

                           END IF

                           IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN

                              itmp1 = min( k+3, i2 ) + 1

                           END IF

                        END IF

*

                        irow1 = krow( ki ) + k - istart

                        irow2 = kp2row( ki ) + k - istart

                        CALL infog1l( itmp1, hbl, npcol, mycol, 0,

     $                                icol1, icol2 )

                        icol2 = numroc( i2, hbl, mycol, 0, npcol )

                        IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.

     $                      ( nprow.GT.1 ) ) THEN

                           IF( irow1.EQ.irow2 ) THEN

                              CALL sgerv2d( contxt, 1, icol2-icol1+1,

     $                                      a( ( icol1-1 )*lda+irow2 ),

     $                                      lda, up, mycol )

                           END IF

                        END IF

                        IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.

     $                      ( nprow.GT.1 ) ) THEN

                           IF( irow1.EQ.irow2 ) THEN

                              CALL sgerv2d( contxt, 1, icol2-icol1+1,

     $                                      a( ( icol1-1 )*lda+irow1 ),

     $                                      lda, down, mycol )

                           END IF

                        END IF

                     END IF

  230             CONTINUE

               END IF

  240       CONTINUE

  250       CONTINUE

*

*        Now start major set of block COL reflections

*

            DO 260 ki = 1, ibulge

               IF( ( mycol.NE.icurcol( ki ) ) .AND.

     $             ( right.NE.icurcol( ki ) ) )GO TO 260

               istart = max( k1( ki ), m )

               istop = min( k2( ki ), i-1 )

*

               IF( ( ( mod( istart-1, hbl ).LT.hbl-2 ) .OR. ( npcol.EQ.

     $             1 ) ) .AND. ( icurcol( ki ).EQ.mycol ) .AND.

     $             ( i-istop+1.GE.3 ) ) THEN

                  k = istart

                  IF( ( k.LT.istop ) .AND. ( mod( k-1,

     $                hbl ).LT.hbl-2 ) ) THEN

                     itmp1 = min( istart+1, i ) - 1

                  ELSE

                     IF( mod( k-1, hbl ).LT.hbl-2 ) THEN

                        itmp1 = min( k+3, i )

                     END IF

                     IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN

                        itmp1 = max( i1, k-1 ) - 1

                     END IF

                     IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN

                        itmp1 = max( i1, k-2 ) - 1

                     END IF

                  END IF

*

                  icol1 = kcol( ki )

                  CALL infog1l( i1, hbl, nprow, myrow, 0, irow1, irow2 )

                  irow2 = numroc( itmp1, hbl, myrow, 0, nprow )

                  IF( irow1.LE.irow2 ) THEN

                     itmp2 = irow2

                  ELSE

                     itmp2 = -1

                  END IF

                  CALL slaref( 'Col', a, lda, wantz, z, ldz, .true.,

     $                         icol1, icol1, istart, istop, irow1,

     $                         irow2, liloz, lihiz, work( vecsidx+1 ),

     $                         v2, v3, t1, t2, t3 )

                  k = istop

                  IF( mod( k-1, hbl ).LT.hbl-2 ) THEN

*

*                 Do from ITMP1+1 to MIN(K+3,I)

*

                     IF( mod( k-1, hbl ).LT.hbl-3 ) THEN

                        irow1 = itmp2 + 1

                        IF( mod( ( itmp1 / hbl ), nprow ).EQ.myrow )

     $                       THEN

                           IF( itmp2.GT.0 ) THEN

                              irow2 = itmp2 + min( k+3, i ) - itmp1

                           ELSE

                              irow2 = irow1 - 1

                           END IF

                        ELSE

                           irow2 = irow1 - 1

                        END IF

                     ELSE

                        CALL infog1l( itmp1+1, hbl, nprow, myrow, 0,

     $                                irow1, irow2 )

                        irow2 = numroc( min( k+3, i ), hbl, myrow, 0,

     $                          nprow )

                     END IF

                     v2 = work( vecsidx+( k-1 )*3+1 )

                     v3 = work( vecsidx+( k-1 )*3+2 )

                     t1 = work( vecsidx+( k-1 )*3+3 )

                     t2 = t1*v2

                     t3 = t1*v3

                     icol1 = kcol( ki ) + istop - istart

                     CALL slaref( 'Col', a, lda, .false., z, ldz,

     $                            .false., icol1, icol1, istart, istop,

     $                            irow1, irow2, liloz, lihiz,

     $                            work( vecsidx+1 ), v2, v3, t1, t2,

     $                            t3 )

                  END IF

               END IF

  260       CONTINUE

*

            DO 320 ki = 1, ibulge

               IF( kcol( ki ).GT.kp2col( ki ) )

     $            GO TO 320

               IF( ( mycol.NE.icurcol( ki ) ) .AND.

     $             ( right.NE.icurcol( ki ) ) )GO TO 320

               istart = max( k1( ki ), m )

               istop = min( k2( ki ), i-1 )

               IF( mod( istart-1, hbl ).GE.hbl-2 ) THEN

*

*              INFO is found in a buffer

*

                  ispec = 1

               ELSE

*

*              All INFO is local

*

                  ispec = 0

               END IF

*

               DO 310 k = istart, istop

*

                  v2 = work( vecsidx+( k-1 )*3+1 )

                  v3 = work( vecsidx+( k-1 )*3+2 )

                  t1 = work( vecsidx+( k-1 )*3+3 )

                  nr = min( 3, i-k+1 )

                  IF( ( nr.EQ.3 ) .AND. ( kcol( ki ).LE.kp2col( ki ) ) )

     $                 THEN

*

                     IF( ( k.LT.istop ) .AND.

     $                   ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN

                        itmp1 = min( istart+1, i ) - 1

                     ELSE

                        IF( mod( k-1, hbl ).LT.hbl-2 ) THEN

                           itmp1 = min( k+3, i )

                        END IF

                        IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN

                           itmp1 = max( i1, k-1 ) - 1

                        END IF

                        IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN

                           itmp1 = max( i1, k-2 ) - 1

                        END IF

                     END IF

                     icol1 = kcol( ki ) + k - istart

                     icol2 = kp2col( ki ) + k - istart

                     CALL infog1l( i1, hbl, nprow, myrow, 0, irow1,

     $                             irow2 )

                     irow2 = numroc( itmp1, hbl, myrow, 0, nprow )

                     IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.

     $                   ( npcol.GT.1 ) ) THEN

                        IF( icol1.EQ.icol2 ) THEN

                           CALL sgesd2d( contxt, irow2-irow1+1, 1,

     $                                   a( ( icol1-1 )*lda+irow1 ),

     $                                   lda, myrow, left )

                           CALL sgerv2d( contxt, irow2-irow1+1, 1,

     $                                   a( ( icol1-1 )*lda+irow1 ),

     $                                   lda, myrow, left )

                        ELSE

                           CALL sgerv2d( contxt, irow2-irow1+1, 1,

     $                                   work( icbuf+1 ), irow2-irow1+1,

     $                                   myrow, right )

                           t2 = t1*v2

                           t3 = t1*v3

                           DO 270 j = irow1, irow2

                              sum = a( ( icol1-1 )*lda+j ) +

     $                              v2*a( icol1*lda+j ) +

     $                              v3*work( icbuf+j-irow1+1 )

                              a( ( icol1-1 )*lda+j ) = a( ( icol1-1 )*

     $                           lda+j ) - sum*t1

                              a( icol1*lda+j ) = a( icol1*lda+j ) -

     $                                           sum*t2

                              work( icbuf+j-irow1+1 ) = work( icbuf+j-

     $                           irow1+1 ) - sum*t3

  270                      CONTINUE

                           CALL sgesd2d( contxt, irow2-irow1+1, 1,

     $                                   work( icbuf+1 ), irow2-irow1+1,

     $                                   myrow, right )

                        END IF

                     END IF

                     IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.

     $                   ( npcol.GT.1 ) ) THEN

                        IF( icol1.EQ.icol2 ) THEN

                           CALL sgesd2d( contxt, irow2-irow1+1, 1,

     $                                   a( ( icol1-1 )*lda+irow1 ),

     $                                   lda, myrow, right )

                           CALL sgerv2d( contxt, irow2-irow1+1, 1,

     $                                   a( ( icol1-1 )*lda+irow1 ),

     $                                   lda, myrow, right )

                        ELSE

                           CALL sgerv2d( contxt, irow2-irow1+1, 1,

     $                                   work( icbuf+1 ), irow2-irow1+1,

     $                                   myrow, left )

                           t2 = t1*v2

                           t3 = t1*v3

                           DO 280 j = irow1, irow2

                              sum = work( icbuf+j-irow1+1 ) +

     $                              v2*a( ( icol1-1 )*lda+j ) +

     $                              v3*a( icol1*lda+j )

                              work( icbuf+j-irow1+1 ) = work( icbuf+j-

     $                           irow1+1 ) - sum*t1

                              a( ( icol1-1 )*lda+j ) = a( ( icol1-1 )*

     $                           lda+j ) - sum*t2

                              a( icol1*lda+j ) = a( icol1*lda+j ) -

     $                                           sum*t3

  280                      CONTINUE

                           CALL sgesd2d( contxt, irow2-irow1+1, 1,

     $                                   work( icbuf+1 ), irow2-irow1+1,

     $                                   myrow, left )

                        END IF

                     END IF

*

*                 If we want Z and we haven't already done any Z

                     IF( ( wantz ) .AND. ( mod( k-1,

     $                   hbl ).GE.hbl-2 ) .AND. ( npcol.GT.1 ) ) THEN

*

*                    Accumulate transformations in the matrix Z

*

                        irow1 = liloz

                        irow2 = lihiz

                        IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN

                           IF( icol1.EQ.icol2 ) THEN

                              CALL sgesd2d( contxt, irow2-irow1+1, 1,

     $                                      z( ( icol1-1 )*ldz+irow1 ),

     $                                      ldz, myrow, left )

                              CALL sgerv2d( contxt, irow2-irow1+1, 1,

     $                                      z( ( icol1-1 )*ldz+irow1 ),

     $                                      ldz, myrow, left )

                           ELSE

                              CALL sgerv2d( contxt, irow2-irow1+1, 1,

     $                                      work( icbuf+1 ),

     $                                      irow2-irow1+1, myrow,

     $                                      right )

                              t2 = t1*v2

                              t3 = t1*v3

                              icol1 = ( icol1-1 )*ldz

                              DO 290 j = irow1, irow2

                                 sum = z( icol1+j ) +

     $                                 v2*z( icol1+j+ldz ) +

     $                                 v3*work( icbuf+j-irow1+1 )

                                 z( j+icol1 ) = z( j+icol1 ) - sum*t1

                                 z( j+icol1+ldz ) = z( j+icol1+ldz ) -

     $                                              sum*t2

                                 work( icbuf+j-irow1+1 ) = work( icbuf+

     $                              j-irow1+1 ) - sum*t3

  290                         CONTINUE

                              CALL sgesd2d( contxt, irow2-irow1+1, 1,

     $                                      work( icbuf+1 ),

     $                                      irow2-irow1+1, myrow,

     $                                      right )

                           END IF

                        END IF

                        IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN

                           IF( icol1.EQ.icol2 ) THEN

                              CALL sgesd2d( contxt, irow2-irow1+1, 1,

     $                                      z( ( icol1-1 )*ldz+irow1 ),

     $                                      ldz, myrow, right )

                              CALL sgerv2d( contxt, irow2-irow1+1, 1,

     $                                      z( ( icol1-1 )*ldz+irow1 ),

     $                                      ldz, myrow, right )

                           ELSE

                              CALL sgerv2d( contxt, irow2-irow1+1, 1,

     $                                      work( icbuf+1 ),

     $                                      irow2-irow1+1, myrow, left )

                              t2 = t1*v2

                              t3 = t1*v3

                              icol1 = ( icol1-1 )*ldz

                              DO 300 j = irow1, irow2

                                 sum = work( icbuf+j-irow1+1 ) +

     $                                 v2*z( j+icol1 ) +

     $                                 v3*z( j+icol1+ldz )

                                 work( icbuf+j-irow1+1 ) = work( icbuf+

     $                              j-irow1+1 ) - sum*t1

                                 z( j+icol1 ) = z( j+icol1 ) - sum*t2

                                 z( j+icol1+ldz ) = z( j+icol1+ldz ) -

     $                                              sum*t3

  300                         CONTINUE

                              CALL sgesd2d( contxt, irow2-irow1+1, 1,

     $                                      work( icbuf+1 ),

     $                                      irow2-irow1+1, myrow, left )

                           END IF

                        END IF

                     END IF

                     IF( icurcol( ki ).EQ.mycol ) THEN

                        IF( ( ispec.EQ.0 ) .OR. ( npcol.EQ.1 ) ) THEN

                           localk2( ki ) = localk2( ki ) + 1

                        END IF

                     ELSE

                        IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.

     $                      ( icurcol( ki ).EQ.right ) ) THEN

                           IF( k.GT.m ) THEN

                              localk2( ki ) = localk2( ki ) + 2

                           ELSE

                              localk2( ki ) = localk2( ki ) + 1

                           END IF

                        END IF

                        IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.

     $                      ( i-k.EQ.2 ) .AND. ( icurcol( ki ).EQ.

     $                      right ) ) THEN

                           localk2( ki ) = localk2( ki ) + 2

                        END IF

                     END IF

                  END IF

  310          CONTINUE

  320       CONTINUE

*

*        Column work done

*

  330       CONTINUE

*

*        Now do NR=2 work

*

            DO 410 ki = 1, ibulge

               istart = max( k1( ki ), m )

               istop = min( k2( ki ), i-1 )

               IF( mod( istart-1, hbl ).GE.hbl-2 ) THEN

*

*              INFO is found in a buffer

*

                  ispec = 1

               ELSE

*

*              All INFO is local

*

                  ispec = 0

               END IF

*

               DO 400 k = istart, istop

*

                  v2 = work( vecsidx+( k-1 )*3+1 )

                  v3 = work( vecsidx+( k-1 )*3+2 )

                  t1 = work( vecsidx+( k-1 )*3+3 )

                  nr = min( 3, i-k+1 )

                  IF( nr.EQ.2 ) THEN

                     IF ( icurrow( ki ).EQ.myrow ) THEN

                        t2 = t1*v2

                     END IF

                     IF ( icurcol( ki ).EQ.mycol ) THEN

                        t2 = t1*v2

                     END IF

*

*              Apply G from the left to transform the rows of the matrix

*              in columns K to I2.

*

                     CALL infog1l( k, hbl, npcol, mycol, 0, liloh,

     $                             lihih )

                     lihih = numroc( i2, hbl, mycol, 0, npcol )

                     CALL infog1l( 1, hbl, nprow, myrow, 0, itmp2,

     $                             itmp1 )

                     itmp1 = numroc( k+1, hbl, myrow, 0, nprow )

                     IF( icurrow( ki ).EQ.myrow ) THEN

                        IF( ( ispec.EQ.0 ) .OR. ( nprow.EQ.1 ) .OR.

     $                      ( mod( k-1, hbl ).EQ.hbl-2 ) ) THEN

                           itmp1 = itmp1 - 1

                           DO 340 j = ( liloh-1 )*lda,

     $                             ( lihih-1 )*lda, lda

                              sum = a( itmp1+j ) + v2*a( itmp1+1+j )

                              a( itmp1+j ) = a( itmp1+j ) - sum*t1

                              a( itmp1+1+j ) = a( itmp1+1+j ) - sum*t2

  340                      CONTINUE

                        ELSE

                           IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN

                              CALL sgerv2d( contxt, 1, lihih-liloh+1,

     $                                      work( irbuf+1 ), 1, up,

     $                                      mycol )

                              DO 350 j = liloh, lihih

                                 sum = work( irbuf+j-liloh+1 ) +

     $                                 v2*a( ( j-1 )*lda+itmp1 )

                                 work( irbuf+j-liloh+1 ) = work( irbuf+

     $                              j-liloh+1 ) - sum*t1

                                 a( ( j-1 )*lda+itmp1 ) = a( ( j-1 )*

     $                              lda+itmp1 ) - sum*t2

  350                         CONTINUE

                              CALL sgesd2d( contxt, 1, lihih-liloh+1,

     $                                      work( irbuf+1 ), 1, up,

     $                                      mycol )

                           END IF

                        END IF

                     ELSE

                        IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.

     $                      ( icurrow( ki ).EQ.down ) ) THEN

                           CALL sgesd2d( contxt, 1, lihih-liloh+1,

     $                                   a( ( liloh-1 )*lda+itmp1 ),

     $                                   lda, down, mycol )

                           CALL sgerv2d( contxt, 1, lihih-liloh+1,

     $                                   a( ( liloh-1 )*lda+itmp1 ),

     $                                   lda, down, mycol )

                        END IF

                     END IF

*

*              Apply G from the right to transform the columns of the

*              matrix in rows I1 to MIN(K+3,I).

*

                     CALL infog1l( i1, hbl, nprow, myrow, 0, liloh,

     $                             lihih )

                     lihih = numroc( i, hbl, myrow, 0, nprow )

*

                     IF( icurcol( ki ).EQ.mycol ) THEN

*                 LOCAL A(LILOZ:LIHIZ,LOCALK2:LOCALK2+2)

                        IF( ( ispec.EQ.0 ) .OR. ( npcol.EQ.1 ) .OR.

     $                      ( mod( k-1, hbl ).EQ.hbl-2 ) ) THEN

                           CALL infog1l( k, hbl, npcol, mycol, 0, itmp1,

     $                                   itmp2 )

                           itmp2 = numroc( k+1, hbl, mycol, 0, npcol )

                           DO 360 j = liloh, lihih

                              sum = a( ( itmp1-1 )*lda+j ) +

     $                              v2*a( itmp1*lda+j )

                              a( ( itmp1-1 )*lda+j ) = a( ( itmp1-1 )*

     $                           lda+j ) - sum*t1

                              a( itmp1*lda+j ) = a( itmp1*lda+j ) -

     $                                           sum*t2

  360                      CONTINUE

                        ELSE

                           itmp1 = localk2( ki )

                           IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN

                              CALL sgerv2d( contxt, lihih-liloh+1, 1,

     $                                      work( icbuf+1 ),

     $                                      lihih-liloh+1, myrow, left )

                              DO 370 j = liloh, lihih

                                 sum = work( icbuf+j ) +

     $                                 v2*a( ( itmp1-1 )*lda+j )

                                 work( icbuf+j ) = work( icbuf+j ) -

     $                                             sum*t1

                                 a( ( itmp1-1 )*lda+j )

     $                              = a( ( itmp1-1 )*lda+j ) - sum*t2

  370                         CONTINUE

                              CALL sgesd2d( contxt, lihih-liloh+1, 1,

     $                                      work( icbuf+1 ),

     $                                      lihih-liloh+1, myrow, left )

                           END IF

                        END IF

                     ELSE

                        IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.

     $                      ( icurcol( ki ).EQ.right ) ) THEN

                           itmp1 = kcol( ki )

                           CALL sgesd2d( contxt, lihih-liloh+1, 1,

     $                                   a( ( itmp1-1 )*lda+liloh ),

     $                                   lda, myrow, right )

                           CALL infog1l( k, hbl, npcol, mycol, 0, itmp1,

     $                                   itmp2 )

                           itmp2 = numroc( k+1, hbl, mycol, 0, npcol )

                           CALL sgerv2d( contxt, lihih-liloh+1, 1,

     $                                   a( ( itmp1-1 )*lda+liloh ),

     $                                   lda, myrow, right )

                        END IF

                     END IF

*

                     IF( wantz ) THEN

*

*                 Accumulate transformations in the matrix Z

*

                        IF( icurcol( ki ).EQ.mycol ) THEN

*                    LOCAL Z(LILOZ:LIHIZ,LOCALK2:LOCALK2+2)

                           IF( ( ispec.EQ.0 ) .OR. ( npcol.EQ.1 ) .OR.

     $                         ( mod( k-1, hbl ).EQ.hbl-2 ) ) THEN

                              itmp1 = kcol( ki ) + k - istart

                              itmp1 = ( itmp1-1 )*ldz

                              DO 380 j = liloz, lihiz

                                 sum = z( j+itmp1 ) +

     $                                 v2*z( j+itmp1+ldz )

                                 z( j+itmp1 ) = z( j+itmp1 ) - sum*t1

                                 z( j+itmp1+ldz ) = z( j+itmp1+ldz ) -

     $                                              sum*t2

  380                         CONTINUE

                              localk2( ki ) = localk2( ki ) + 1

                           ELSE

                              itmp1 = localk2( ki )

*                       IF WE ACTUALLY OWN COLUMN K

                              IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN

                                 CALL sgerv2d( contxt, lihiz-liloz+1, 1,

     $                                         work( icbuf+1 ), ldz,

     $                                         myrow, left )

                                 itmp1 = ( itmp1-1 )*ldz

                                 DO 390 j = liloz, lihiz

                                    sum = work( icbuf+j ) +

     $                                    v2*z( j+itmp1 )

                                    work( icbuf+j ) = work( icbuf+j ) -

     $                                 sum*t1

                                    z( j+itmp1 ) = z( j+itmp1 ) - sum*t2

  390                            CONTINUE

                                 CALL sgesd2d( contxt, lihiz-liloz+1, 1,

     $                                         work( icbuf+1 ), ldz,

     $                                         myrow, left )

                                 localk2( ki ) = localk2( ki ) + 1

                              END IF

                           END IF

                        ELSE

*

*                    NO WORK BUT NEED TO UPDATE ANYWAY????

*

                           IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.

     $                         ( icurcol( ki ).EQ.right ) ) THEN

                              itmp1 = kcol( ki )

                              itmp1 = ( itmp1-1 )*ldz

                              CALL sgesd2d( contxt, lihiz-liloz+1, 1,

     $                                      z( liloz+itmp1 ), ldz,

     $                                      myrow, right )

                              CALL sgerv2d( contxt, lihiz-liloz+1, 1,

     $                                      z( liloz+itmp1 ), ldz,

     $                                      myrow, right )

                              localk2( ki ) = localk2( ki ) + 1

                           END IF

                        END IF

                     END IF

                  END IF

  400          CONTINUE

*

*        Adjust local information for this bulge

*

               IF( nprow.EQ.1 ) THEN

                  krow( ki ) = krow( ki ) + k2( ki ) - k1( ki ) + 1

                  kp2row( ki ) = kp2row( ki ) + k2( ki ) - k1( ki ) + 1

               END IF

               IF( ( mod( k1( ki )-1, hbl ).LT.hbl-2 ) .AND.

     $             ( icurrow( ki ).EQ.myrow ) .AND. ( nprow.GT.1 ) )

     $              THEN

                  krow( ki ) = krow( ki ) + k2( ki ) - k1( ki ) + 1

               END IF

               IF( ( mod( k2( ki ), hbl ).LT.hbl-2 ) .AND.

     $             ( icurrow( ki ).EQ.myrow ) .AND. ( nprow.GT.1 ) )

     $              THEN

                  kp2row( ki ) = kp2row( ki ) + k2( ki ) - k1( ki ) + 1

               END IF

               IF( ( mod( k1( ki )-1, hbl ).GE.hbl-2 ) .AND.

     $             ( ( myrow.EQ.icurrow( ki ) ) .OR. ( down.EQ.

     $             icurrow( ki ) ) ) .AND. ( nprow.GT.1 ) ) THEN

                  CALL infog1l( k2( ki )+1, hbl, nprow, myrow, 0,

     $                          krow( ki ), itmp2 )

                  itmp2 = numroc( n, hbl, myrow, 0, nprow )

               END IF

               IF( ( mod( k2( ki ), hbl ).GE.hbl-2 ) .AND.

     $             ( ( myrow.EQ.icurrow( ki ) ) .OR. ( up.EQ.

     $             icurrow( ki ) ) ) .AND. ( nprow.GT.1 ) ) THEN

                  CALL infog1l( 1, hbl, nprow, myrow, 0, itmp2,

     $                          kp2row( ki ) )

                  kp2row( ki ) = numroc( k2( ki )+3, hbl, myrow, 0,

     $                           nprow )

               END IF

               IF( npcol.EQ.1 ) THEN

                  kcol( ki ) = kcol( ki ) + k2( ki ) - k1( ki ) + 1

                  kp2col( ki ) = kp2col( ki ) + k2( ki ) - k1( ki ) + 1

               END IF

               IF( ( mod( k1( ki )-1, hbl ).LT.hbl-2 ) .AND.

     $             ( icurcol( ki ).EQ.mycol ) .AND. ( npcol.GT.1 ) )

     $              THEN

                  kcol( ki ) = kcol( ki ) + k2( ki ) - k1( ki ) + 1

               END IF

               IF( ( mod( k2( ki ), hbl ).LT.hbl-2 ) .AND.

     $             ( icurcol( ki ).EQ.mycol ) .AND. ( npcol.GT.1 ) )

     $              THEN

                  kp2col( ki ) = kp2col( ki ) + k2( ki ) - k1( ki ) + 1

               END IF

               IF( ( mod( k1( ki )-1, hbl ).GE.hbl-2 ) .AND.

     $             ( ( mycol.EQ.icurcol( ki ) ) .OR. ( right.EQ.

     $             icurcol( ki ) ) ) .AND. ( npcol.GT.1 ) ) THEN

                  CALL infog1l( k2( ki )+1, hbl, npcol, mycol, 0,

     $                          kcol( ki ), itmp2 )

                  itmp2 = numroc( n, hbl, mycol, 0, npcol )

               END IF

               IF( ( mod( k2( ki ), hbl ).GE.hbl-2 ) .AND.

     $             ( ( mycol.EQ.icurcol( ki ) ) .OR. ( left.EQ.

     $             icurcol( ki ) ) ) .AND. ( npcol.GT.1 ) ) THEN

                  CALL infog1l( 1, hbl, npcol, mycol, 0, itmp2,

     $                          kp2col( ki ) )

                  kp2col( ki ) = numroc( k2( ki )+3, hbl, mycol, 0,

     $                           npcol )

               END IF

               k1( ki ) = k2( ki ) + 1

               istop = min( k1( ki )+rotn-mod( k1( ki ), rotn ), i-2 )

               istop = min( istop, k1( ki )+hbl-3-

     $                 mod( k1( ki )-1, hbl ) )

               istop = min( istop, i2-2 )

               istop = max( istop, k1( ki ) )

*        ISTOP = MIN( ISTOP , I-1 )

               k2( ki ) = istop

               IF( k1( ki ).EQ.istop ) THEN

                  IF( ( mod( istop-1, hbl ).EQ.hbl-2 ) .AND.

     $                ( i-istop.GT.1 ) ) THEN

*

*              Next step switches rows & cols

*

                     icurrow( ki ) = mod( icurrow( ki )+1, nprow )

                     icurcol( ki ) = mod( icurcol( ki )+1, npcol )

                  END IF

               END IF

  410       CONTINUE

            IF( k2( ibulge ).LE.i-1 )

     $         GO TO 40

         END IF

*

  420 CONTINUE

*

*     Failure to converge in remaining number of iterations

*

      info = i

      RETURN

*

  430 CONTINUE

*

      IF( l.EQ.i ) THEN

*

*        H(I,I-1) is negligible: one eigenvalue has converged.

*

         CALL infog2l( i, i, desca, nprow, npcol, myrow, mycol, irow,

     $                 icol, itmp1, itmp2 )

         IF( ( myrow.EQ.itmp1 ) .AND. ( mycol.EQ.itmp2 ) ) THEN

            wr( i ) = a( ( icol-1 )*lda+irow )

         ELSE

            wr( i ) = zero

         END IF

         wi( i ) = zero

      ELSE IF( l.EQ.i-1 ) THEN

*

*        H(I-1,I-2) is negligible: a pair of eigenvalues have converged.

*

         CALL pselget( 'All', ' ', h11, a, l, l, desca )

         CALL pselget( 'All', ' ', h21, a, i, l, desca )

         CALL pselget( 'All', ' ', h12, a, l, i, desca )

         CALL pselget( 'All', ' ', h22, a, i, i, desca )

         CALL slanv2( h11, h12, h21, h22, wr( l ), wi( l ), wr( i ),

     $                wi( i ), cs, sn )

         IF( node .NE. 0 ) THEN

            wr( l ) = zero

            wr( i ) = zero

            wi( l ) = zero

            wi( i ) = zero

         ENDIF

      ELSE

*

*        Find the eigenvalues in H(L:I,L:I), L < I-1

*

         jblk = i - l + 1

         IF( jblk.LE.2*iblk ) THEN

            CALL pslacp3( i-l+1, l, a, desca, s1, 2*iblk, 0, 0, 0 )

            CALL slahqr( .false., .false., jblk, 1, jblk, s1, 2*iblk,

     $                   wr( l ), wi( l ), 1, jblk, z, ldz, ierr )

            IF( node.NE.0 ) THEN

*

*           Erase the eigenvalues

*

               DO 440 k = l, i

                  wr( k ) = zero

                  wi( k ) = zero

  440          CONTINUE

            END IF

         END IF

      END IF

*

*     Decrement number of remaining iterations, and return to start of

*     the main loop with new value of I.

*

      itn = itn - its

      IF( m.EQ.l-10 ) THEN

         i = l - 1

      ELSE

         i = m

      END IF

*     I = L - 1

      GO TO 10

*

  450 CONTINUE

      CALL sgsum2d( contxt, 'All', ' ', n, 1, wr, n, -1, -1 )

      CALL sgsum2d( contxt, 'All', ' ', n, 1, wi, n, -1, -1 )

      RETURN

*

*     END OF PSLAHQR

*


      END

slahqr
subroutine slahqr(wantt, wantz, n, ilo, ihi, h, ldh, wr, wi, iloz, ihiz, z, ldz, info)
SLAHQR computes the eigenvalues and Schur factorization of an upper Hessenberg matrix,...
Definition slahqr.f:207

slanv2
subroutine slanv2(a, b, c, d, rt1r, rt1i, rt2r, rt2i, cs, sn)
SLANV2 computes the Schur factorization of a real 2-by-2 nonsymmetric matrix in standard form.
Definition slanv2.f:127

slarfg
subroutine slarfg(n, alpha, x, incx, tau)
SLARFG generates an elementary reflector (Householder matrix).
Definition slarfg.f:106

scopy
subroutine scopy(n, sx, incx, sy, incy)
SCOPY
Definition scopy.f:82

infog1l
subroutine infog1l(gindx, nb, nprocs, myroc, isrcproc, lindx, rocsrc)
Definition infog1l.f:3

min
#define min(a, b)
Definition macros.h:20

max
#define max(a, b)
Definition macros.h:21

sgebs2d
subroutine sgebs2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1072

pxerbla
subroutine pxerbla(contxt, srname, info)
Definition mpi.f:1600

sgebr2d
subroutine sgebr2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1113

infog2l
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition mpi.f:937

blacs_gridinfo
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
Definition mpi.f:754

pselget
subroutine pselget(scope, top, alpha, a, ia, ja, desca)
Definition pselget.f:2

pslabad
subroutine pslabad(ictxt, small, large)
Definition pslabad.f:2

pslaconsb
subroutine pslaconsb(a, desca, i, l, m, h44, h33, h43h34, buf, lwork)
Definition pslaconsb.f:3

pslacp3
subroutine pslacp3(m, i, a, desca, b, ldb, ii, jj, rev)
Definition pslacp3.f:2

pslahqr
subroutine pslahqr(wantt, wantz, n, ilo, ihi, a, desca, wr, wi, iloz, ihiz, z, descz, work, lwork, iwork, ilwork, info)
Definition pslahqr.f:4

pslasmsub
subroutine pslasmsub(a, desca, i, l, k, smlnum, buf, lwork)
Definition pslasmsub.f:2

pslawil
subroutine pslawil(ii, jj, m, a, desca, h44, h33, h43h34, v)
Definition pslawil.f:2

slaref
subroutine slaref(type, a, lda, wantz, z, ldz, block, irow1, icol1, istart, istop, itmp1, itmp2, liloz, lihiz, vecs, v2, v3, t1, t2, t3)
Definition slaref.f:4

slasorte
subroutine slasorte(s, lds, j, out, info)
Definition slasorte.f:2